Skip to content

Commit 30c47d3

Browse files
for ci
1 parent 3a3a263 commit 30c47d3

File tree

2 files changed

+28
-0
lines changed

2 files changed

+28
-0
lines changed

fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -736,6 +736,8 @@ def process_prequanted_weights(self, layer: nn.Layer, state_dict):
736736
"""
737737
Paddle cutlass process prequanted weights.
738738
"""
739+
if state_dict is None:
740+
return
739741
up_gate_proj_expert_weight_key = layer.weight_key_map.get("up_gate_proj_expert_weight_key", None)
740742
down_proj_expert_weight_key = layer.weight_key_map.get("down_proj_expert_weight_key", None)
741743
up_gate_proj_expert_weight_scale_key = layer.weight_key_map.get("up_gate_proj_expert_weight_scale_key", None)

test/layers/test_w4afp8.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import paddle
2+
from paddle import nn
3+
4+
from fastdeploy.model_executor.layers.moe.fused_moe_cutlass_backend import (
5+
CutlassW4AFP8MoEMethod,
6+
)
7+
8+
9+
class W4AFP8Layer(nn.Layer):
10+
def __init__(self):
11+
super().__init__()
12+
self.weight_key_map = {}
13+
self.ep_size = 1
14+
15+
def load_experts_weight(self, state_dict, up_gate_proj_expert_weight_key, down_proj_expert_weight_key):
16+
return (
17+
paddle.zeros([64, 128, 128]),
18+
paddle.zeros([64, 128, 128]),
19+
paddle.zeros([64, 128, 128]),
20+
paddle.zeros([64, 128, 128]),
21+
)
22+
23+
24+
w4afp8 = CutlassW4AFP8MoEMethod({})
25+
layer = W4AFP8Layer()
26+
w4afp8.process_prequanted_weights(layer, None)

0 commit comments

Comments
 (0)