JustinTong0323
diff --git a/‎python/sglang/srt/layers/moe/fused_moe_triton/layer.py
Lines changed: 6 additions & 1 deletion b/‎python/sglang/srt/layers/moe/fused_moe_triton/layer.py
Lines changed: 6 additions & 1 deletion
diff --git a/‎python/sglang/srt/layers/quantization/__init__.py
Lines changed: 8 additions & 9 deletions b/‎python/sglang/srt/layers/quantization/__init__.py
Lines changed: 8 additions & 9 deletions
@@ -474,6 +474,7 @@ def weight_loader(
             not expert_id
             and self.quant_config is not None
             and self.quant_config.get_name() == "mxfp4"
+            and self.quant_config.is_static_cfg()
         ):
             if "bias" in weight_name:
                 dim1 = loaded_weight.shape[1]
@@ -724,7 +725,11 @@ def weight_loader_fused(
     ) -> None:
         tp_rank = self.moe_tp_rank
 
-        if self.quant_config is not None and self.quant_config.get_name() == "mxfp4":
+        if (
+            self.quant_config is not None
+            and self.quant_config.get_name() == "mxfp4"
+            and self.quant_config.is_static_cfg()
+        ):
             if "bias" in weight_name:
                 dim1 = loaded_weight.shape[1]
                 param.data[:, :dim1].copy_(loaded_weight)
 
@@ -48,12 +48,6 @@ def override_quantization_method(self, *args, **kwargs):
 from sglang.srt.layers.quantization.compressed_tensors.compressed_tensors import (
     CompressedTensorsConfig,
 )
-from sglang.srt.utils import is_cuda, is_hip, mxfp_supported
-
-is_mxfp_supported = mxfp_supported()
-if is_mxfp_supported:
-    from sglang.srt.layers.quantization.fp4 import MxFp4Config
-
 from sglang.srt.layers.quantization.fp8 import Fp8Config
 from sglang.srt.layers.quantization.gptq import GPTQConfig, GPTQMarlinConfig
 from sglang.srt.layers.quantization.modelopt_quant import (
@@ -67,6 +61,9 @@ def override_quantization_method(self, *args, **kwargs):
 from sglang.srt.layers.quantization.w4afp8 import W4AFp8Config
 from sglang.srt.layers.quantization.w8a8_fp8 import W8A8Fp8Config
 from sglang.srt.layers.quantization.w8a8_int8 import W8A8Int8Config
+from sglang.srt.utils import is_cuda, is_hip, mxfp_supported
+
+_is_mxfp_supported = mxfp_supported()
 
 if TYPE_CHECKING:
     from sglang.srt.layers.moe.topk import TopKOutput
@@ -98,11 +95,13 @@ def override_quantization_method(self, *args, **kwargs):
             "mxfp4": Mxfp4Config,
         }
     )
-elif is_mxfp_supported and is_hip():
+elif _is_mxfp_supported and is_hip():
+    from sglang.srt.layers.quantization.quark.quark import QuarkConfig
+
     BASE_QUANTIZATION_METHODS.update(
         {
-            "quark": MxFp4Config,
-            "mxfp4": MxFp4Config,
+            "quark": QuarkConfig,
+            "mxfp4": Mxfp4Config,
         }
     )
 # VLLM-dependent quantization methods