@@ -192,17 +192,13 @@ def init_health_status(self) -> None:
192
192
)
193
193
self .worker_ready_signal .value [self .local_rank % self .max_chips_per_node ] = 1
194
194
195
- if self .parallel_config .local_data_parallel_id == 0 :
196
- current_suffix = self .parallel_config .engine_pid
197
- else :
198
- current_suffix = self .parallel_config .engine_worker_queue_port
199
195
# init worker_healthy_live_signal
200
196
workers_alive = np .zeros (shape = [min (array_size , self .parallel_config .tensor_parallel_size )], dtype = np .int32 )
201
197
self .worker_healthy_live_signal = IPCSignal (
202
198
name = "worker_healthy_live_signal" ,
203
199
array = workers_alive ,
204
200
dtype = np .int32 ,
205
- suffix = current_suffix ,
201
+ suffix = self . parallel_config . engine_worker_queue_port ,
206
202
create = False ,
207
203
)
208
204
local_rank = self .local_rank % self .parallel_config .tensor_parallel_size
@@ -214,7 +210,7 @@ def init_health_status(self) -> None:
214
210
name = "model_weights_status" ,
215
211
array = workers_model_weights ,
216
212
dtype = np .int32 ,
217
- suffix = current_suffix ,
213
+ suffix = self . parallel_config . engine_worker_queue_port ,
218
214
create = False ,
219
215
)
220
216
@@ -224,7 +220,7 @@ def init_health_status(self) -> None:
224
220
name = "exist_task_signal" ,
225
221
array = workers_exist_task ,
226
222
dtype = np .int32 ,
227
- suffix = current_suffix ,
223
+ suffix = self . parallel_config . engine_worker_queue_port ,
228
224
create = False ,
229
225
)
230
226
@@ -234,7 +230,7 @@ def init_health_status(self) -> None:
234
230
name = "exist_swapped_task_signal" ,
235
231
array = workers_swapped_task ,
236
232
dtype = np .int32 ,
237
- suffix = current_suffix ,
233
+ suffix = self . parallel_config . engine_worker_queue_port ,
238
234
create = False ,
239
235
)
240
236
@@ -244,7 +240,7 @@ def init_health_status(self) -> None:
244
240
name = "exist_prefill_task_signal" ,
245
241
array = exist_prefill_task_signal_data ,
246
242
dtype = np .int32 ,
247
- suffix = current_suffix ,
243
+ suffix = self . parallel_config . engine_worker_queue_port ,
248
244
create = False ,
249
245
)
250
246
0 commit comments