Skip to content

Commit 6c6596e

Browse files
Alcanderianxwu-intel
authored andcommitted
[fix] fix determine_n_share_experts_fusion (sgl-project#6118)
1 parent 4f604f3 commit 6c6596e

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

python/sglang/srt/models/deepseek_v2.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1486,22 +1486,24 @@ def determine_n_share_experts_fusion(
14861486
if self.n_share_experts_fusion > 0:
14871487
# Only Deepseek V3/R1 can use shared experts fusion optimization now.
14881488
if (
1489-
self.config.architectures[0] != architecture
1489+
not _is_cuda
1490+
or self.config.architectures[0] != architecture
14901491
or self.config.n_routed_experts != 256
14911492
):
14921493
self.n_share_experts_fusion = 0
14931494
global_server_args_dict["n_share_experts_fusion"] = 0
14941495
log_info_on_rank0(
14951496
logger,
1496-
"Only Deepseek V3/R1 can use shared experts fusion optimization. Shared experts fusion optimization is disabled.",
1497+
"Only Deepseek V3/R1 on NV-platform can use shared experts fusion optimization. Shared experts fusion optimization is disabled.",
14971498
)
14981499
else:
14991500
assert (
15001501
self.n_share_experts_fusion == self.tp_size
15011502
), f"Shared experts fusion optimization is enabled in DeepSeek V3/R1, set it to {self.tp_size} can get best optimized performace."
15021503
elif self.n_share_experts_fusion == 0:
15031504
if (
1504-
torch.cuda.get_device_capability("cuda") >= (9, 0)
1505+
_is_cuda
1506+
and torch.cuda.get_device_capability("cuda") >= (9, 0)
15051507
and self.config.architectures[0] == architecture
15061508
and self.config.n_routed_experts == 256
15071509
and (not global_server_args_dict["enable_deepep_moe"])

0 commit comments

Comments
 (0)