File tree Expand file tree Collapse file tree 1 file changed +1
-6
lines changed
python/sglang/srt/layers/moe/ep_moe Expand file tree Collapse file tree 1 file changed +1
-6
lines changed Original file line number Diff line number Diff line change @@ -800,11 +800,6 @@ def __init__(
800
800
routed_scaling_factor = routed_scaling_factor ,
801
801
)
802
802
self .deepep_mode = deepep_mode
803
- if deep_gemm_wrapper .ENABLE_JIT_DEEPGEMM :
804
- assert self .use_fp8_w8a8 , (
805
- "DeepGEMM requires an fp8_w8a8 model; "
806
- "alternatively, you can disable DeepGEMM by turning off the ENABLE_JIT_DEEPGEMM environment variable."
807
- )
808
803
809
804
# TODO: move to the beginning of the file
810
805
from sglang .srt .distributed .parallel_state import get_tp_group
@@ -897,7 +892,7 @@ def moe_impl(self, dispatch_output: DispatchOutput):
897
892
# in forward_aiter, we skip token permutation and unpermutation, which have been fused inside aiter kernel
898
893
return self .forward_aiter (dispatch_output )
899
894
if dispatch_output .format .is_deepep_normal ():
900
- if deep_gemm_wrapper .ENABLE_JIT_DEEPGEMM :
895
+ if deep_gemm_wrapper .ENABLE_JIT_DEEPGEMM and self . use_fp8_w8a8 :
901
896
return self .forward_deepgemm_contiguous (dispatch_output )
902
897
else :
903
898
return self .forward_normal (dispatch_output )
You can’t perform that action at this time.
0 commit comments