5
5
from pathlib import Path
6
6
from dataclasses import dataclass
7
7
8
- SPARK_MIRROR_PATH = str (Path ("../spark_mirror" ).resolve ())
9
- TPCH_SPARK_PATH = str (Path ("../tpch-spark" ).resolve ())
10
-
11
8
12
9
def bang (cmd , dry_run , stdout = subprocess .PIPE , stderr = subprocess .PIPE ):
13
10
cmd = [str (part ) for part in cmd ]
@@ -29,6 +26,8 @@ def must(cmd, dry_run, stdout=subprocess.PIPE, stderr=subprocess.PIPE):
29
26
@dataclass
30
27
class Service :
31
28
service_args : any
29
+ spark_mirror_path : Path
30
+ spark_master_ip : str
32
31
output_dir : Path
33
32
dry_run : bool
34
33
@@ -59,22 +58,22 @@ def __enter__(self):
59
58
# launch spark master and worker
60
59
self ._master = must (
61
60
[
62
- f"{ SPARK_MIRROR_PATH } /sbin/start-master.sh" ,
63
- * ("--host" , "130.207.125.81" ),
61
+ f"{ self . spark_mirror_path } /sbin/start-master.sh" ,
62
+ * ("--host" , self . spark_master_ip ),
64
63
* (
65
64
"--properties-file" ,
66
- f"{ SPARK_MIRROR_PATH } /conf/spark-dg-config.conf" ,
65
+ f"{ self . spark_mirror_path } /conf/spark-dg-config.conf" ,
67
66
),
68
67
],
69
68
self .dry_run ,
70
69
)
71
70
self ._worker = must (
72
71
[
73
- f"{ SPARK_MIRROR_PATH } /sbin/start-worker.sh" ,
74
- "spark://130.207.125.81 :7077" ,
72
+ f"{ self . spark_mirror_path } /sbin/start-worker.sh" ,
73
+ f "spark://{ self . spark_master_ip } :7077" ,
75
74
* (
76
75
"--properties-file" ,
77
- f"{ SPARK_MIRROR_PATH } /conf/spark-dg-config.conf" ,
76
+ f"{ self . spark_mirror_path } /conf/spark-dg-config.conf" ,
78
77
),
79
78
],
80
79
self .dry_run ,
@@ -90,9 +89,9 @@ def clean(self):
90
89
if self ._service :
91
90
self ._service .wait ()
92
91
if self ._master :
93
- must ([f"{ SPARK_MIRROR_PATH } /sbin/stop-master.sh" ], self .dry_run )
92
+ must ([f"{ self . spark_mirror_path } /sbin/stop-master.sh" ], self .dry_run )
94
93
if self ._worker :
95
- must ([f"{ SPARK_MIRROR_PATH } /sbin/stop-worker.sh" ], self .dry_run )
94
+ must ([f"{ self . spark_mirror_path } /sbin/stop-worker.sh" ], self .dry_run )
96
95
97
96
def __exit__ (self , type , value , traceback ):
98
97
self .clean ()
@@ -101,6 +100,9 @@ def __exit__(self, type, value, traceback):
101
100
@dataclass
102
101
class Launcher :
103
102
launcher_args : any
103
+ spark_mirror_path : Path
104
+ spark_master_ip : str
105
+ tpch_spark_path : Path
104
106
output_dir : Path
105
107
dry_run : bool
106
108
@@ -113,8 +115,9 @@ def launch(self):
113
115
[
114
116
* ("python3" , "-u" , "-m" , "rpc.launch_tpch_queries" ),
115
117
* self .launcher_args ,
116
- * ("--spark-mirror-path" , SPARK_MIRROR_PATH ),
117
- * ("--tpch-spark-path" , TPCH_SPARK_PATH ),
118
+ * ("--spark-master-ip" , self .spark_master_ip ),
119
+ * ("--spark-mirror-path" , self .spark_mirror_path ),
120
+ * ("--tpch-spark-path" , self .tpch_spark_path ),
118
121
],
119
122
self .dry_run ,
120
123
stdout = f_out ,
@@ -127,19 +130,27 @@ class Experiment:
127
130
name : str
128
131
service_args : any
129
132
launcher_args : any
130
- args : any
131
133
132
- def run (self ):
133
- output_dir = self . args .output_dir / self .name
134
+ def run (self , args ):
135
+ output_dir = args .output_dir / self .name
134
136
if not output_dir .exists ():
135
137
output_dir .mkdir (parents = True )
136
138
137
139
with Service (
138
140
service_args = self .service_args ,
141
+ spark_mirror_path = args .spark_mirror_path ,
142
+ spark_master_ip = args .spark_master_ip ,
139
143
output_dir = output_dir ,
140
- dry_run = self . args .dry_run ,
144
+ dry_run = args .dry_run ,
141
145
) as s :
142
- Launcher (self .launcher_args , output_dir , self .args .dry_run ).launch ()
146
+ Launcher (
147
+ launcher_args = self .launcher_args ,
148
+ spark_mirror_path = args .spark_mirror_path ,
149
+ spark_master_ip = args .spark_master_ip ,
150
+ tpch_spark_path = args .tpch_spark_path ,
151
+ output_dir = output_dir ,
152
+ dry_run = args .dry_run ,
153
+ ).launch ()
143
154
144
155
145
156
def main ():
@@ -149,6 +160,24 @@ def main():
149
160
action = "store_true" ,
150
161
help = "Prints commands that will be executed for each experiment" ,
151
162
)
163
+ parser .add_argument (
164
+ "--spark-mirror-path" ,
165
+ type = Path ,
166
+ required = True ,
167
+ help = "Path to spark-mirror repository" ,
168
+ )
169
+ parser .add_argument (
170
+ "--spark-master-ip" ,
171
+ type = str ,
172
+ required = True ,
173
+ help = "IP address of node running Spark master" ,
174
+ )
175
+ parser .add_argument (
176
+ "--tpch-spark-path" ,
177
+ type = Path ,
178
+ required = True ,
179
+ help = "Path to TPC-H Spark repository" ,
180
+ )
152
181
parser .add_argument ("--output-dir" , type = Path , default = Path ("exp-output" ))
153
182
args = parser .parse_args ()
154
183
@@ -183,19 +212,6 @@ def main():
183
212
* ("--scheduler_plan_ahead_no_consideration_gap" , 1 ),
184
213
]
185
214
experiments = [
186
- Experiment (
187
- name = "edf-q300-hard" ,
188
- service_args = [
189
- * base_args ,
190
- * edf_args ,
191
- * variance_args ,
192
- ],
193
- launcher_args = [
194
- * ("--num_queries" , 300 ),
195
- * ("--variable_arrival_rate" , 0.052 ),
196
- ],
197
- args = args ,
198
- ),
199
215
Experiment (
200
216
name = "dsched-q300-hard" ,
201
217
service_args = [
@@ -207,14 +223,13 @@ def main():
207
223
* ("--num_queries" , 300 ),
208
224
* ("--variable_arrival_rate" , 0.052 ),
209
225
],
210
- args = args ,
211
226
),
212
227
]
213
228
214
229
for i , experiment in enumerate (experiments ):
215
230
try :
216
231
print (f"=== { experiment .name } ({ i + 1 } /{ len (experiments )} ) ===" )
217
- experiment .run ()
232
+ experiment .run (args )
218
233
print ("=== done ===" )
219
234
except Exception as e :
220
235
print (traceback .format_exc ())
0 commit comments