File tree Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Original file line number Diff line number Diff line change @@ -1486,22 +1486,24 @@ def determine_n_share_experts_fusion(
1486
1486
if self .n_share_experts_fusion > 0 :
1487
1487
# Only Deepseek V3/R1 can use shared experts fusion optimization now.
1488
1488
if (
1489
- self .config .architectures [0 ] != architecture
1489
+ not _is_cuda
1490
+ or self .config .architectures [0 ] != architecture
1490
1491
or self .config .n_routed_experts != 256
1491
1492
):
1492
1493
self .n_share_experts_fusion = 0
1493
1494
global_server_args_dict ["n_share_experts_fusion" ] = 0
1494
1495
log_info_on_rank0 (
1495
1496
logger ,
1496
- "Only Deepseek V3/R1 can use shared experts fusion optimization. Shared experts fusion optimization is disabled." ,
1497
+ "Only Deepseek V3/R1 on NV-platform can use shared experts fusion optimization. Shared experts fusion optimization is disabled." ,
1497
1498
)
1498
1499
else :
1499
1500
assert (
1500
1501
self .n_share_experts_fusion == self .tp_size
1501
1502
), f"Shared experts fusion optimization is enabled in DeepSeek V3/R1, set it to { self .tp_size } can get best optimized performace."
1502
1503
elif self .n_share_experts_fusion == 0 :
1503
1504
if (
1504
- torch .cuda .get_device_capability ("cuda" ) >= (9 , 0 )
1505
+ _is_cuda
1506
+ and torch .cuda .get_device_capability ("cuda" ) >= (9 , 0 )
1505
1507
and self .config .architectures [0 ] == architecture
1506
1508
and self .config .n_routed_experts == 256
1507
1509
and (not global_server_args_dict ["enable_deepep_moe" ])
You can’t perform that action at this time.
0 commit comments