Skip to content

Commit 74e7e45

Browse files
hebiao064zminglei
andauthored
Fix DEEPEP BF16 compatibility for Deepseek Style model like GLM 4.5 (sgl-project#8469)
Co-authored-by: Minglei Zhu <mingleizhu1122@gmail.com>
1 parent 1466c1b commit 74e7e45

File tree

1 file changed

+1
-6
lines changed
  • python/sglang/srt/layers/moe/ep_moe

1 file changed

+1
-6
lines changed

python/sglang/srt/layers/moe/ep_moe/layer.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -800,11 +800,6 @@ def __init__(
800800
routed_scaling_factor=routed_scaling_factor,
801801
)
802802
self.deepep_mode = deepep_mode
803-
if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM:
804-
assert self.use_fp8_w8a8, (
805-
"DeepGEMM requires an fp8_w8a8 model; "
806-
"alternatively, you can disable DeepGEMM by turning off the ENABLE_JIT_DEEPGEMM environment variable."
807-
)
808803

809804
# TODO: move to the beginning of the file
810805
from sglang.srt.distributed.parallel_state import get_tp_group
@@ -897,7 +892,7 @@ def moe_impl(self, dispatch_output: DispatchOutput):
897892
# in forward_aiter, we skip token permutation and unpermutation, which have been fused inside aiter kernel
898893
return self.forward_aiter(dispatch_output)
899894
if dispatch_output.format.is_deepep_normal():
900-
if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM:
895+
if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM and self.use_fp8_w8a8:
901896
return self.forward_deepgemm_contiguous(dispatch_output)
902897
else:
903898
return self.forward_normal(dispatch_output)

0 commit comments

Comments
 (0)