Skip to content

Commit c81a3f9

Browse files
fix conflict
1 parent 7299c1e commit c81a3f9

File tree

2 files changed

+5
-3
lines changed

2 files changed

+5
-3
lines changed

fastdeploy/model_executor/layers/moe/ep.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def __init__(
8383
# prefill engine
8484
self.prefill_deepep_engine = deep_ep.Buffer(
8585
self.group,
86-
int(1e9),
86+
int(5e8),
8787
0,
8888
low_latency_mode=False,
8989
num_qps_per_rank=1,
@@ -97,7 +97,7 @@ def __init__(
9797
elif moe_phase.phase == "prefill":
9898
self.prefill_deepep_engine = deep_ep.Buffer(
9999
self.group,
100-
int(1e9),
100+
int(5e8),
101101
0,
102102
low_latency_mode=False,
103103
num_qps_per_rank=1,
@@ -261,7 +261,6 @@ def __init__(
261261
num_max_dispatch_tokens_per_rank=num_max_dispatch_tokens_per_rank,
262262
hidden=hidden,
263263
num_experts=num_experts + redundant_experts_num,
264-
moe_phase=moe_phase,
265264
ep_size=ep_size,
266265
ep_rank=ep_rank,
267266
splitwise_role=splitwise_role,

fastdeploy/model_executor/layers/moe/fused_moe_backend_base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def init_ep(self, layer: nn.Layer) -> None:
5353
layer.fd_config.parallel_config.splitwise_role,
5454
layer.ep_size,
5555
layer.ep_rank,
56+
layer.fd_config.model_config.redundant_experts_num,
5657
)
5758
self.ep_decoder_runner = EPDecoderRunner(
5859
layer.top_k,
@@ -75,6 +76,7 @@ def init_ep(self, layer: nn.Layer) -> None:
7576
layer.fd_config.parallel_config.splitwise_role,
7677
layer.ep_size,
7778
layer.ep_rank,
79+
layer.fd_config.model_config.redundant_experts_num,
7880
)
7981
else:
8082
from .ep import EPDecoderRunner
@@ -87,6 +89,7 @@ def init_ep(self, layer: nn.Layer) -> None:
8789
layer.fd_config.parallel_config.splitwise_role,
8890
layer.ep_size,
8991
layer.ep_rank,
92+
layer.fd_config.model_config.redundant_experts_num,
9093
)
9194

9295
def process_loaded_weights(self, layer, weights) -> None:

0 commit comments

Comments
 (0)