|
2 | 2 | "cells": [
|
3 | 3 | {
|
4 | 4 | "cell_type": "code",
|
5 |
| - "execution_count": 10, |
| 5 | + "execution_count": 2, |
6 | 6 | "metadata": {},
|
7 | 7 | "outputs": [],
|
8 | 8 | "source": [
|
|
12 | 12 | },
|
13 | 13 | {
|
14 | 14 | "cell_type": "code",
|
15 |
| - "execution_count": 11, |
| 15 | + "execution_count": 3, |
16 | 16 | "metadata": {},
|
17 | 17 | "outputs": [],
|
18 | 18 | "source": [
|
|
24 | 24 | },
|
25 | 25 | {
|
26 | 26 | "cell_type": "code",
|
27 |
| - "execution_count": 12, |
| 27 | + "execution_count": 4, |
28 | 28 | "metadata": {},
|
29 | 29 | "outputs": [],
|
30 | 30 | "source": [
|
|
51 | 51 | },
|
52 | 52 | {
|
53 | 53 | "cell_type": "code",
|
54 |
| - "execution_count": 13, |
| 54 | + "execution_count": 5, |
55 | 55 | "metadata": {},
|
56 | 56 | "outputs": [],
|
57 | 57 | "source": [
|
|
73 | 73 | },
|
74 | 74 | {
|
75 | 75 | "cell_type": "code",
|
76 |
| - "execution_count": 14, |
| 76 | + "execution_count": 6, |
77 | 77 | "metadata": {},
|
78 | 78 | "outputs": [],
|
79 | 79 | "source": [
|
|
84 | 84 | },
|
85 | 85 | {
|
86 | 86 | "cell_type": "code",
|
87 |
| - "execution_count": 15, |
| 87 | + "execution_count": 7, |
88 | 88 | "metadata": {},
|
89 | 89 | "outputs": [],
|
90 | 90 | "source": [
|
|
103 | 103 | },
|
104 | 104 | {
|
105 | 105 | "cell_type": "code",
|
106 |
| - "execution_count": 16, |
| 106 | + "execution_count": 8, |
107 | 107 | "metadata": {},
|
108 | 108 | "outputs": [],
|
109 | 109 | "source": [
|
|
117 | 117 | },
|
118 | 118 | {
|
119 | 119 | "cell_type": "code",
|
120 |
| - "execution_count": 17, |
| 120 | + "execution_count": 9, |
| 121 | + "metadata": {}, |
| 122 | + "outputs": [], |
| 123 | + "source": [ |
| 124 | + "# Input params to create buckets\n", |
| 125 | + "json_path = \"/home/dgarg39/erdos-scheduling-simulator/profiles/workload/tpch/cloudlab/cloudlab_22query_tpch_profiles.json\"\n", |
| 126 | + "bucket_size=8000\n", |
| 127 | + "dataset_size=\"100g\"\n", |
| 128 | + "max_executors=200\n", |
| 129 | + "min_task_runtime_ms=12000\n", |
| 130 | + "\n", |
| 131 | + "buckets = analyze_tpch_queries(json_path=json_path, bucket_size=bucket_size, dataset_size=dataset_size, max_executors=max_executors, min_task_runtime_ms=min_task_runtime_ms)" |
| 132 | + ] |
| 133 | + }, |
| 134 | + { |
| 135 | + "cell_type": "markdown", |
| 136 | + "metadata": {}, |
| 137 | + "source": [ |
| 138 | + "#### 100g dataset, varying the executors: 75, 100, 200" |
| 139 | + ] |
| 140 | + }, |
| 141 | + { |
| 142 | + "cell_type": "code", |
| 143 | + "execution_count": 10, |
121 | 144 | "metadata": {},
|
122 | 145 | "outputs": [
|
123 | 146 | {
|
124 | 147 | "data": {
|
125 | 148 | "text/plain": [
|
126 |
| - "{'easy': [],\n", |
127 |
| - " 'medium': ['q1',\n", |
128 |
| - " 'q2',\n", |
129 |
| - " 'q6',\n", |
130 |
| - " 'q11',\n", |
131 |
| - " 'q12',\n", |
132 |
| - " 'q13',\n", |
133 |
| - " 'q14',\n", |
134 |
| - " 'q15',\n", |
135 |
| - " 'q16',\n", |
136 |
| - " 'q19',\n", |
137 |
| - " 'q20',\n", |
138 |
| - " 'q22'],\n", |
139 |
| - " 'hard': ['q3', 'q4', 'q5', 'q7', 'q8', 'q9', 'q10', 'q17', 'q18', 'q21']}" |
| 149 | + "{'easy': ['q11', 'q13', 'q14', 'q15', 'q19', 'q20', 'q22'],\n", |
| 150 | + " 'medium': ['q1', 'q2', 'q4', 'q6', 'q10', 'q12', 'q16', 'q17', 'q18'],\n", |
| 151 | + " 'hard': ['q3', 'q5', 'q7', 'q8', 'q9', 'q21']}" |
140 | 152 | ]
|
141 | 153 | },
|
142 |
| - "execution_count": 17, |
| 154 | + "execution_count": 10, |
143 | 155 | "metadata": {},
|
144 | 156 | "output_type": "execute_result"
|
145 | 157 | }
|
146 | 158 | ],
|
147 | 159 | "source": [
|
148 |
| - "# Input params to create buckets\n", |
149 |
| - "json_path = \"/home/dgarg39/erdos-scheduling-simulator/profiles/workload/tpch/cloudlab/cloudlab_22query_tpch_profiles.json\"\n", |
150 |
| - "bucket_size=8000\n", |
151 |
| - "dataset_size=\"100g\"\n", |
152 |
| - "max_executors=200\n", |
153 |
| - "min_task_runtime_ms=12000\n", |
154 |
| - "\n", |
155 |
| - "buckets = analyze_tpch_queries(json_path=json_path, bucket_size=bucket_size, dataset_size=dataset_size, max_executors=max_executors, min_task_runtime_ms=min_task_runtime_ms)\n", |
| 160 | + "buckets = analyze_tpch_queries(json_path=json_path, bucket_size=5500, dataset_size=\"100g\", max_executors=75, min_task_runtime_ms=12000)\n", |
156 | 161 | "buckets"
|
157 | 162 | ]
|
158 | 163 | },
|
159 | 164 | {
|
160 | 165 | "cell_type": "code",
|
161 |
| - "execution_count": 18, |
| 166 | + "execution_count": 11, |
162 | 167 | "metadata": {},
|
163 | 168 | "outputs": [
|
164 | 169 | {
|
|
169 | 174 | " 'hard': ['q3', 'q5', 'q7', 'q8', 'q9', 'q18', 'q21']}"
|
170 | 175 | ]
|
171 | 176 | },
|
172 |
| - "execution_count": 18, |
| 177 | + "execution_count": 11, |
173 | 178 | "metadata": {},
|
174 | 179 | "output_type": "execute_result"
|
175 | 180 | }
|
|
181 | 186 | },
|
182 | 187 | {
|
183 | 188 | "cell_type": "code",
|
184 |
| - "execution_count": 19, |
| 189 | + "execution_count": 12, |
185 | 190 | "metadata": {},
|
186 | 191 | "outputs": [
|
187 | 192 | {
|
188 | 193 | "data": {
|
189 | 194 | "text/plain": [
|
190 |
| - "{'easy': ['q2', 'q11', 'q13', 'q16', 'q19', 'q22'],\n", |
191 |
| - " 'medium': ['q1', 'q4', 'q6', 'q10', 'q12', 'q14', 'q15', 'q20'],\n", |
| 195 | + "{'easy': ['q6', 'q11', 'q13', 'q19', 'q22'],\n", |
| 196 | + " 'medium': ['q1', 'q2', 'q4', 'q10', 'q12', 'q14', 'q15', 'q16', 'q20'],\n", |
192 | 197 | " 'hard': ['q3', 'q5', 'q7', 'q8', 'q9', 'q17', 'q18', 'q21']}"
|
193 | 198 | ]
|
194 | 199 | },
|
195 |
| - "execution_count": 19, |
| 200 | + "execution_count": 12, |
196 | 201 | "metadata": {},
|
197 | 202 | "output_type": "execute_result"
|
198 | 203 | }
|
199 | 204 | ],
|
200 | 205 | "source": [
|
201 |
| - "buckets = analyze_tpch_queries(json_path=json_path, bucket_size=7500, dataset_size=\"100g\", max_executors=100, min_task_runtime_ms=12000)\n", |
| 206 | + "buckets = analyze_tpch_queries(json_path=json_path, bucket_size=10000, dataset_size=\"100g\", max_executors=200, min_task_runtime_ms=12000)\n", |
202 | 207 | "buckets"
|
203 | 208 | ]
|
204 | 209 | },
|
| 210 | + { |
| 211 | + "cell_type": "markdown", |
| 212 | + "metadata": {}, |
| 213 | + "source": [ |
| 214 | + "#### 250g dataset, varying the executors: 75, 100, 250" |
| 215 | + ] |
| 216 | + }, |
205 | 217 | {
|
206 | 218 | "cell_type": "code",
|
207 |
| - "execution_count": 20, |
| 219 | + "execution_count": 13, |
208 | 220 | "metadata": {},
|
209 | 221 | "outputs": [
|
210 | 222 | {
|
211 | 223 | "data": {
|
212 | 224 | "text/plain": [
|
213 |
| - "{'easy': ['q13', 'q16', 'q22'],\n", |
214 |
| - " 'medium': ['q2', 'q6', 'q11', 'q12', 'q14', 'q15', 'q19', 'q20'],\n", |
215 |
| - " 'hard': ['q1',\n", |
216 |
| - " 'q3',\n", |
217 |
| - " 'q4',\n", |
218 |
| - " 'q5',\n", |
219 |
| - " 'q7',\n", |
220 |
| - " 'q8',\n", |
221 |
| - " 'q9',\n", |
222 |
| - " 'q10',\n", |
223 |
| - " 'q17',\n", |
224 |
| - " 'q18',\n", |
225 |
| - " 'q21']}" |
| 225 | + "{'easy': ['q2', 'q11', 'q13', 'q16', 'q19', 'q22'],\n", |
| 226 | + " 'medium': ['q1', 'q6', 'q7', 'q10', 'q12', 'q14', 'q15', 'q20'],\n", |
| 227 | + " 'hard': ['q3', 'q4', 'q5', 'q8', 'q9', 'q17', 'q18', 'q21']}" |
226 | 228 | ]
|
227 | 229 | },
|
228 |
| - "execution_count": 20, |
| 230 | + "execution_count": 13, |
229 | 231 | "metadata": {},
|
230 | 232 | "output_type": "execute_result"
|
231 | 233 | }
|
232 | 234 | ],
|
233 | 235 | "source": [
|
234 |
| - "buckets = analyze_tpch_queries(json_path=json_path, bucket_size=9000, dataset_size=\"250g\", max_executors=75, min_task_runtime_ms=12000)\n", |
| 236 | + "buckets = analyze_tpch_queries(json_path=json_path, bucket_size=11500, dataset_size=\"250g\", max_executors=75, min_task_runtime_ms=12000)\n", |
235 | 237 | "buckets"
|
236 | 238 | ]
|
237 | 239 | },
|
238 | 240 | {
|
239 | 241 | "cell_type": "code",
|
240 |
| - "execution_count": 21, |
| 242 | + "execution_count": 14, |
241 | 243 | "metadata": {},
|
242 | 244 | "outputs": [
|
243 | 245 | {
|
244 | 246 | "data": {
|
245 | 247 | "text/plain": [
|
246 | 248 | "{'easy': ['q2', 'q11', 'q13', 'q16', 'q19', 'q22'],\n",
|
247 |
| - " 'medium': ['q1', 'q6', 'q7', 'q10', 'q12', 'q14', 'q15', 'q20'],\n", |
248 |
| - " 'hard': ['q3', 'q4', 'q5', 'q8', 'q9', 'q17', 'q18', 'q21']}" |
| 249 | + " 'medium': ['q1', 'q6', 'q10', 'q12', 'q14', 'q15', 'q20'],\n", |
| 250 | + " 'hard': ['q3', 'q4', 'q5', 'q7', 'q8', 'q9', 'q17', 'q18', 'q21']}" |
249 | 251 | ]
|
250 | 252 | },
|
251 |
| - "execution_count": 21, |
| 253 | + "execution_count": 14, |
252 | 254 | "metadata": {},
|
253 | 255 | "output_type": "execute_result"
|
254 | 256 | }
|
255 | 257 | ],
|
256 | 258 | "source": [
|
257 |
| - "buckets = analyze_tpch_queries(json_path=json_path, bucket_size=11500, dataset_size=\"250g\", max_executors=75, min_task_runtime_ms=12000)\n", |
| 259 | + "buckets = analyze_tpch_queries(json_path=json_path, bucket_size=15000, dataset_size=\"250g\", max_executors=100, min_task_runtime_ms=12000)\n", |
| 260 | + "buckets" |
| 261 | + ] |
| 262 | + }, |
| 263 | + { |
| 264 | + "cell_type": "code", |
| 265 | + "execution_count": 15, |
| 266 | + "metadata": {}, |
| 267 | + "outputs": [ |
| 268 | + { |
| 269 | + "data": { |
| 270 | + "text/plain": [ |
| 271 | + "{'easy': ['q1', 'q2', 'q6', 'q11', 'q13', 'q16', 'q22'],\n", |
| 272 | + " 'medium': ['q4', 'q7', 'q10', 'q12', 'q14', 'q15', 'q19', 'q20'],\n", |
| 273 | + " 'hard': ['q3', 'q5', 'q8', 'q9', 'q17', 'q18', 'q21']}" |
| 274 | + ] |
| 275 | + }, |
| 276 | + "execution_count": 15, |
| 277 | + "metadata": {}, |
| 278 | + "output_type": "execute_result" |
| 279 | + } |
| 280 | + ], |
| 281 | + "source": [ |
| 282 | + "buckets = analyze_tpch_queries(json_path=json_path, bucket_size=15000, dataset_size=\"250g\", max_executors=200, min_task_runtime_ms=12000)\n", |
258 | 283 | "buckets"
|
259 | 284 | ]
|
260 | 285 | },
|
|
0 commit comments