File tree Expand file tree Collapse file tree 2 files changed +5
-3
lines changed
fastdeploy/model_executor/layers/moe Expand file tree Collapse file tree 2 files changed +5
-3
lines changed Original file line number Diff line number Diff line change @@ -83,7 +83,7 @@ def __init__(
83
83
# prefill engine
84
84
self .prefill_deepep_engine = deep_ep .Buffer (
85
85
self .group ,
86
- int (1e9 ),
86
+ int (5e8 ),
87
87
0 ,
88
88
low_latency_mode = False ,
89
89
num_qps_per_rank = 1 ,
@@ -97,7 +97,7 @@ def __init__(
97
97
elif moe_phase .phase == "prefill" :
98
98
self .prefill_deepep_engine = deep_ep .Buffer (
99
99
self .group ,
100
- int (1e9 ),
100
+ int (5e8 ),
101
101
0 ,
102
102
low_latency_mode = False ,
103
103
num_qps_per_rank = 1 ,
@@ -261,7 +261,6 @@ def __init__(
261
261
num_max_dispatch_tokens_per_rank = num_max_dispatch_tokens_per_rank ,
262
262
hidden = hidden ,
263
263
num_experts = num_experts + redundant_experts_num ,
264
- moe_phase = moe_phase ,
265
264
ep_size = ep_size ,
266
265
ep_rank = ep_rank ,
267
266
splitwise_role = splitwise_role ,
Original file line number Diff line number Diff line change @@ -53,6 +53,7 @@ def init_ep(self, layer: nn.Layer) -> None:
53
53
layer .fd_config .parallel_config .splitwise_role ,
54
54
layer .ep_size ,
55
55
layer .ep_rank ,
56
+ layer .fd_config .model_config .redundant_experts_num ,
56
57
)
57
58
self .ep_decoder_runner = EPDecoderRunner (
58
59
layer .top_k ,
@@ -75,6 +76,7 @@ def init_ep(self, layer: nn.Layer) -> None:
75
76
layer .fd_config .parallel_config .splitwise_role ,
76
77
layer .ep_size ,
77
78
layer .ep_rank ,
79
+ layer .fd_config .model_config .redundant_experts_num ,
78
80
)
79
81
else :
80
82
from .ep import EPDecoderRunner
@@ -87,6 +89,7 @@ def init_ep(self, layer: nn.Layer) -> None:
87
89
layer .fd_config .parallel_config .splitwise_role ,
88
90
layer .ep_size ,
89
91
layer .ep_rank ,
92
+ layer .fd_config .model_config .redundant_experts_num ,
90
93
)
91
94
92
95
def process_loaded_weights (self , layer , weights ) -> None :
You can’t perform that action at this time.
0 commit comments