We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a4c14cf commit d172eb4Copy full SHA for d172eb4
python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py
@@ -84,9 +84,7 @@ def get_deepep_buffer(
84
num_nvl_bytes,
85
num_rdma_bytes,
86
low_latency_mode=deepep_mode.enable_low_latency(),
87
- num_qps_per_rank=(
88
- num_experts // group.size() if deepep_mode.enable_low_latency() else 1
89
- ),
+ num_qps_per_rank=(max(num_experts // group.size(), Buffer.num_sms // 2)),
90
)
91
return cls._buffer
92
0 commit comments