We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 81523c6 commit fc3b8edCopy full SHA for fc3b8ed
fastdeploy/model_executor/layers/moe/fused_moe_wint2_backend.py
@@ -20,6 +20,7 @@
20
import fastdeploy
21
from fastdeploy.distributed.communication_op import \
22
tensor_model_parallel_all_reduce
23
+
24
from ..quantization.quant_base import QuantMethodBase
25
from ..utils import create_and_set_parameter, get_tensor
26
@@ -223,7 +224,6 @@ def apply(
223
224
)
225
226
from fastdeploy.model_executor.ops.gpu import moe_expert_reduce
-
227
fused_moe_out = moe_expert_reduce(
228
ffn_out,
229
topk_weights,
0 commit comments