Skip to content

Commit 4cb53ec

Browse files
authored
fix: log warning when disable cuda graph (#5209)
1 parent 456b008 commit 4cb53ec

File tree

3 files changed

+13
-7
lines changed

3 files changed

+13
-7
lines changed

python/sglang/srt/model_executor/cuda_graph_runner.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -269,10 +269,10 @@ def __init__(self, model_runner: ModelRunner):
269269
raise Exception(
270270
f"Capture cuda graph failed: {e}\n"
271271
"Possible solutions:\n"
272-
"1. disable cuda graph by --disable-cuda-graph\n"
273-
"2. set --mem-fraction-static to a smaller value (e.g., 0.8 or 0.7)\n"
272+
"1. set --mem-fraction-static to a smaller value (e.g., 0.8 or 0.7)\n"
273+
"2. set --cuda-graph-max-bs to a smaller value (e.g., 32)\n"
274274
"3. disable torch compile by not using --enable-torch-compile\n"
275-
"4. set --cuda-graph-max-bs to a smaller value (e.g., 32)\n"
275+
"4. disable cuda graph by --disable-cuda-graph\n"
276276
"Open an issue on GitHub https://github.com/sgl-project/sglang/issues/new/choose \n"
277277
)
278278

python/sglang/srt/model_executor/model_runner.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -924,6 +924,12 @@ def init_cuda_graphs(self):
924924
return
925925

926926
if self.server_args.disable_cuda_graph:
927+
logger.warning(
928+
"\n\nCUDA Graph is DISABLED.\n"
929+
"This will cause significant performance degradation.\n"
930+
"CUDA Graph should almost never be disabled in most usage scenarios.\n"
931+
"If you encounter OOM issues, please try setting --mem-fraction-static to a lower value (such as 0.8 or 0.7) instead of disabling CUDA Graph.\n"
932+
)
927933
return
928934

929935
tic = time.time()

python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,10 @@ def __init__(self, eagle_worker: EAGLEWorker):
8484
raise Exception(
8585
f"Capture cuda graph failed: {e}\n"
8686
"Possible solutions:\n"
87-
"1. disable cuda graph by --disable-cuda-graph\n"
88-
"2. set --mem-fraction-static to a smaller value (e.g., 0.8 or 0.7)\n"
89-
"3. disable torch compile by not using --enable-torch-compile\n"
90-
"4. specify --dtype to the same dtype (e.g. bfloat16)\n"
87+
"1. set --mem-fraction-static to a smaller value (e.g., 0.8 or 0.7)\n"
88+
"2. disable torch compile by not using --enable-torch-compile\n"
89+
"3. specify --dtype to the same dtype (e.g. bfloat16)\n"
90+
"4. disable cuda graph by --disable-cuda-graph\n"
9191
"Open an issue on GitHub https://github.com/sgl-project/sglang/issues/new/choose \n"
9292
)
9393

0 commit comments

Comments
 (0)