Skip to content

Commit 684f6c9

Browse files
committed
solve comment
1 parent f57a1ca commit 684f6c9

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

fastdeploy/worker/gpu_model_runner.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,7 +1040,7 @@ def initialize_forward_meta(self):
10401040
only_decode_use_cudagraph = self.use_cudagraph and if_only_decode
10411041

10421042
# Update config about moe for better performance
1043-
# TODO:Modifying the config at runtime is not appropriate; it needs to be moved to forward_meta. It will be used in MoEMethodBase.apply()
1043+
# TODO(wanglongzhi):Modifying the config at runtime is not appropriate; it needs to be moved to forward_meta. It will be used in MoEMethodBase.apply()
10441044
if self.fd_config.parallel_config.use_ep and self.fd_config.parallel_config.splitwise_role == "mixed":
10451045
self.fd_config.parallel_config.moe_phase.phase = "decode" if if_only_decode else "prefill"
10461046

@@ -1411,7 +1411,7 @@ def _update_chunked_prefill(self, tasks):
14111411
self.proposer.update_task_chunk_prefill(task)
14121412
task.chunk_idx += 1
14131413

1414-
def capture_model(self, prefill_only: bool = False) -> None:
1414+
def capture_model(self) -> None:
14151415
"""
14161416
Trigger CUDA Graph capture for all shapes in cuda graph capture list
14171417
"""
@@ -1422,7 +1422,7 @@ def capture_model(self, prefill_only: bool = False) -> None:
14221422
expected_decode_len = 1
14231423
capture_sizes = self.cudagraph_capture_sizes.copy()
14241424

1425-
if prefill_only:
1425+
if self.fd_config.graph_opt_config.cudagraph_only_prefill:
14261426
for num_tokens in sorted(capture_sizes, reverse=True):
14271427
self._dummy_run(
14281428
num_tokens=num_tokens,

fastdeploy/worker/gpu_worker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def graph_optimize_and_warm_up_model(self) -> None:
210210
if self.model_runner.graph_opt_level >= 1:
211211
self.model_runner.sot_warmup()
212212
# Triger cuda grpah capture
213-
self.model_runner.capture_model(prefill_only=self.fd_config.graph_opt_config.cudagraph_only_prefill)
213+
self.model_runner.capture_model()
214214

215215
def check_health(self) -> bool:
216216
""" """

0 commit comments

Comments
 (0)