Skip to content

Commit e400726

Browse files
author
Dhruv Garg
committed
tpch partitions for 100g, 250g and diff max executors
1 parent db90d0c commit e400726

File tree

1 file changed

+84
-59
lines changed

1 file changed

+84
-59
lines changed

data/tpch_partitioning_analysis.ipynb

Lines changed: 84 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 10,
5+
"execution_count": 2,
66
"metadata": {},
77
"outputs": [],
88
"source": [
@@ -12,7 +12,7 @@
1212
},
1313
{
1414
"cell_type": "code",
15-
"execution_count": 11,
15+
"execution_count": 3,
1616
"metadata": {},
1717
"outputs": [],
1818
"source": [
@@ -24,7 +24,7 @@
2424
},
2525
{
2626
"cell_type": "code",
27-
"execution_count": 12,
27+
"execution_count": 4,
2828
"metadata": {},
2929
"outputs": [],
3030
"source": [
@@ -51,7 +51,7 @@
5151
},
5252
{
5353
"cell_type": "code",
54-
"execution_count": 13,
54+
"execution_count": 5,
5555
"metadata": {},
5656
"outputs": [],
5757
"source": [
@@ -73,7 +73,7 @@
7373
},
7474
{
7575
"cell_type": "code",
76-
"execution_count": 14,
76+
"execution_count": 6,
7777
"metadata": {},
7878
"outputs": [],
7979
"source": [
@@ -84,7 +84,7 @@
8484
},
8585
{
8686
"cell_type": "code",
87-
"execution_count": 15,
87+
"execution_count": 7,
8888
"metadata": {},
8989
"outputs": [],
9090
"source": [
@@ -103,7 +103,7 @@
103103
},
104104
{
105105
"cell_type": "code",
106-
"execution_count": 16,
106+
"execution_count": 8,
107107
"metadata": {},
108108
"outputs": [],
109109
"source": [
@@ -117,48 +117,53 @@
117117
},
118118
{
119119
"cell_type": "code",
120-
"execution_count": 17,
120+
"execution_count": 9,
121+
"metadata": {},
122+
"outputs": [],
123+
"source": [
124+
"# Input params to create buckets\n",
125+
"json_path = \"/home/dgarg39/erdos-scheduling-simulator/profiles/workload/tpch/cloudlab/cloudlab_22query_tpch_profiles.json\"\n",
126+
"bucket_size=8000\n",
127+
"dataset_size=\"100g\"\n",
128+
"max_executors=200\n",
129+
"min_task_runtime_ms=12000\n",
130+
"\n",
131+
"buckets = analyze_tpch_queries(json_path=json_path, bucket_size=bucket_size, dataset_size=dataset_size, max_executors=max_executors, min_task_runtime_ms=min_task_runtime_ms)"
132+
]
133+
},
134+
{
135+
"cell_type": "markdown",
136+
"metadata": {},
137+
"source": [
138+
"#### 100g dataset, varying the executors: 75, 100, 200"
139+
]
140+
},
141+
{
142+
"cell_type": "code",
143+
"execution_count": 10,
121144
"metadata": {},
122145
"outputs": [
123146
{
124147
"data": {
125148
"text/plain": [
126-
"{'easy': [],\n",
127-
" 'medium': ['q1',\n",
128-
" 'q2',\n",
129-
" 'q6',\n",
130-
" 'q11',\n",
131-
" 'q12',\n",
132-
" 'q13',\n",
133-
" 'q14',\n",
134-
" 'q15',\n",
135-
" 'q16',\n",
136-
" 'q19',\n",
137-
" 'q20',\n",
138-
" 'q22'],\n",
139-
" 'hard': ['q3', 'q4', 'q5', 'q7', 'q8', 'q9', 'q10', 'q17', 'q18', 'q21']}"
149+
"{'easy': ['q11', 'q13', 'q14', 'q15', 'q19', 'q20', 'q22'],\n",
150+
" 'medium': ['q1', 'q2', 'q4', 'q6', 'q10', 'q12', 'q16', 'q17', 'q18'],\n",
151+
" 'hard': ['q3', 'q5', 'q7', 'q8', 'q9', 'q21']}"
140152
]
141153
},
142-
"execution_count": 17,
154+
"execution_count": 10,
143155
"metadata": {},
144156
"output_type": "execute_result"
145157
}
146158
],
147159
"source": [
148-
"# Input params to create buckets\n",
149-
"json_path = \"/home/dgarg39/erdos-scheduling-simulator/profiles/workload/tpch/cloudlab/cloudlab_22query_tpch_profiles.json\"\n",
150-
"bucket_size=8000\n",
151-
"dataset_size=\"100g\"\n",
152-
"max_executors=200\n",
153-
"min_task_runtime_ms=12000\n",
154-
"\n",
155-
"buckets = analyze_tpch_queries(json_path=json_path, bucket_size=bucket_size, dataset_size=dataset_size, max_executors=max_executors, min_task_runtime_ms=min_task_runtime_ms)\n",
160+
"buckets = analyze_tpch_queries(json_path=json_path, bucket_size=5500, dataset_size=\"100g\", max_executors=75, min_task_runtime_ms=12000)\n",
156161
"buckets"
157162
]
158163
},
159164
{
160165
"cell_type": "code",
161-
"execution_count": 18,
166+
"execution_count": 11,
162167
"metadata": {},
163168
"outputs": [
164169
{
@@ -169,7 +174,7 @@
169174
" 'hard': ['q3', 'q5', 'q7', 'q8', 'q9', 'q18', 'q21']}"
170175
]
171176
},
172-
"execution_count": 18,
177+
"execution_count": 11,
173178
"metadata": {},
174179
"output_type": "execute_result"
175180
}
@@ -181,80 +186,100 @@
181186
},
182187
{
183188
"cell_type": "code",
184-
"execution_count": 19,
189+
"execution_count": 12,
185190
"metadata": {},
186191
"outputs": [
187192
{
188193
"data": {
189194
"text/plain": [
190-
"{'easy': ['q2', 'q11', 'q13', 'q16', 'q19', 'q22'],\n",
191-
" 'medium': ['q1', 'q4', 'q6', 'q10', 'q12', 'q14', 'q15', 'q20'],\n",
195+
"{'easy': ['q6', 'q11', 'q13', 'q19', 'q22'],\n",
196+
" 'medium': ['q1', 'q2', 'q4', 'q10', 'q12', 'q14', 'q15', 'q16', 'q20'],\n",
192197
" 'hard': ['q3', 'q5', 'q7', 'q8', 'q9', 'q17', 'q18', 'q21']}"
193198
]
194199
},
195-
"execution_count": 19,
200+
"execution_count": 12,
196201
"metadata": {},
197202
"output_type": "execute_result"
198203
}
199204
],
200205
"source": [
201-
"buckets = analyze_tpch_queries(json_path=json_path, bucket_size=7500, dataset_size=\"100g\", max_executors=100, min_task_runtime_ms=12000)\n",
206+
"buckets = analyze_tpch_queries(json_path=json_path, bucket_size=10000, dataset_size=\"100g\", max_executors=200, min_task_runtime_ms=12000)\n",
202207
"buckets"
203208
]
204209
},
210+
{
211+
"cell_type": "markdown",
212+
"metadata": {},
213+
"source": [
214+
"#### 250g dataset, varying the executors: 75, 100, 250"
215+
]
216+
},
205217
{
206218
"cell_type": "code",
207-
"execution_count": 20,
219+
"execution_count": 13,
208220
"metadata": {},
209221
"outputs": [
210222
{
211223
"data": {
212224
"text/plain": [
213-
"{'easy': ['q13', 'q16', 'q22'],\n",
214-
" 'medium': ['q2', 'q6', 'q11', 'q12', 'q14', 'q15', 'q19', 'q20'],\n",
215-
" 'hard': ['q1',\n",
216-
" 'q3',\n",
217-
" 'q4',\n",
218-
" 'q5',\n",
219-
" 'q7',\n",
220-
" 'q8',\n",
221-
" 'q9',\n",
222-
" 'q10',\n",
223-
" 'q17',\n",
224-
" 'q18',\n",
225-
" 'q21']}"
225+
"{'easy': ['q2', 'q11', 'q13', 'q16', 'q19', 'q22'],\n",
226+
" 'medium': ['q1', 'q6', 'q7', 'q10', 'q12', 'q14', 'q15', 'q20'],\n",
227+
" 'hard': ['q3', 'q4', 'q5', 'q8', 'q9', 'q17', 'q18', 'q21']}"
226228
]
227229
},
228-
"execution_count": 20,
230+
"execution_count": 13,
229231
"metadata": {},
230232
"output_type": "execute_result"
231233
}
232234
],
233235
"source": [
234-
"buckets = analyze_tpch_queries(json_path=json_path, bucket_size=9000, dataset_size=\"250g\", max_executors=75, min_task_runtime_ms=12000)\n",
236+
"buckets = analyze_tpch_queries(json_path=json_path, bucket_size=11500, dataset_size=\"250g\", max_executors=75, min_task_runtime_ms=12000)\n",
235237
"buckets"
236238
]
237239
},
238240
{
239241
"cell_type": "code",
240-
"execution_count": 21,
242+
"execution_count": 14,
241243
"metadata": {},
242244
"outputs": [
243245
{
244246
"data": {
245247
"text/plain": [
246248
"{'easy': ['q2', 'q11', 'q13', 'q16', 'q19', 'q22'],\n",
247-
" 'medium': ['q1', 'q6', 'q7', 'q10', 'q12', 'q14', 'q15', 'q20'],\n",
248-
" 'hard': ['q3', 'q4', 'q5', 'q8', 'q9', 'q17', 'q18', 'q21']}"
249+
" 'medium': ['q1', 'q6', 'q10', 'q12', 'q14', 'q15', 'q20'],\n",
250+
" 'hard': ['q3', 'q4', 'q5', 'q7', 'q8', 'q9', 'q17', 'q18', 'q21']}"
249251
]
250252
},
251-
"execution_count": 21,
253+
"execution_count": 14,
252254
"metadata": {},
253255
"output_type": "execute_result"
254256
}
255257
],
256258
"source": [
257-
"buckets = analyze_tpch_queries(json_path=json_path, bucket_size=11500, dataset_size=\"250g\", max_executors=75, min_task_runtime_ms=12000)\n",
259+
"buckets = analyze_tpch_queries(json_path=json_path, bucket_size=15000, dataset_size=\"250g\", max_executors=100, min_task_runtime_ms=12000)\n",
260+
"buckets"
261+
]
262+
},
263+
{
264+
"cell_type": "code",
265+
"execution_count": 15,
266+
"metadata": {},
267+
"outputs": [
268+
{
269+
"data": {
270+
"text/plain": [
271+
"{'easy': ['q1', 'q2', 'q6', 'q11', 'q13', 'q16', 'q22'],\n",
272+
" 'medium': ['q4', 'q7', 'q10', 'q12', 'q14', 'q15', 'q19', 'q20'],\n",
273+
" 'hard': ['q3', 'q5', 'q8', 'q9', 'q17', 'q18', 'q21']}"
274+
]
275+
},
276+
"execution_count": 15,
277+
"metadata": {},
278+
"output_type": "execute_result"
279+
}
280+
],
281+
"source": [
282+
"buckets = analyze_tpch_queries(json_path=json_path, bucket_size=15000, dataset_size=\"250g\", max_executors=200, min_task_runtime_ms=12000)\n",
258283
"buckets"
259284
]
260285
},

0 commit comments

Comments
 (0)