Skip to content

Commit 59142ee

Browse files
zhyncsRunkaiTao
authored andcommitted
fix: gemma 3 not use softcap (sgl-project#5622)
1 parent 4ee6328 commit 59142ee

File tree

4 files changed

+17
-2
lines changed

4 files changed

+17
-2
lines changed

python/sglang/srt/configs/model_config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,11 @@ def __init__(
7878
logger.info(
7979
"Multimodal is disabled for Llama4. To enable it, set --enable-llama4-multimodal."
8080
)
81+
elif self.hf_config.architectures[0] == "Gemma3ForConditionalGeneration":
82+
enable_multimodal = False
83+
logger.info(
84+
"Multimodal is disabled for Gemma3. To enable it, set --enable-gemma3-multimodal."
85+
)
8186
else:
8287
enable_multimodal = True
8388

python/sglang/srt/models/gemma3_causal.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ def __init__(
189189
self.scaling,
190190
num_kv_heads=self.num_kv_heads,
191191
layer_id=layer_id,
192-
logit_cap=getattr(self.config, "attn_logit_softcapping", None),
192+
logit_cap=0.0,
193193
# Module must also define `get_attention_sliding_window_size` to correctly initialize
194194
# attention backend in `ForwardBatch`.
195195
sliding_window_size=self.sliding_window,

python/sglang/srt/server_args.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ class ServerArgs:
154154
disable_outlines_disk_cache: bool = False
155155
disable_custom_all_reduce: bool = False
156156
enable_llama4_multimodal: Optional[bool] = None
157+
enable_gemma3_multimodal: Optional[bool] = None
157158
disable_overlap_schedule: bool = False
158159
enable_mixed_chunk: bool = False
159160
enable_dp_attention: bool = False
@@ -285,7 +286,9 @@ def __post_init__(self):
285286
if self.grammar_backend is None:
286287
self.grammar_backend = "xgrammar"
287288

288-
self.enable_multimodal: Optional[bool] = self.enable_llama4_multimodal
289+
self.enable_multimodal: Optional[bool] = (
290+
self.enable_llama4_multimodal or self.enable_gemma3_multimodal
291+
)
289292

290293
# Data parallelism attention
291294
if self.enable_dp_attention:
@@ -984,6 +987,12 @@ def add_cli_args(parser: argparse.ArgumentParser):
984987
action="store_true",
985988
help="Enable the multimodal functionality for Llama-4.",
986989
)
990+
parser.add_argument(
991+
"--enable-gemma3-multimodal",
992+
default=ServerArgs.enable_gemma3_multimodal,
993+
action="store_true",
994+
help="Enable the multimodal functionality for Gemma-3.",
995+
)
987996
parser.add_argument(
988997
"--disable-overlap-schedule",
989998
action="store_true",

python/sglang/srt/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1971,6 +1971,7 @@ def is_fa3_default_architecture(hf_config):
19711971
"LlamaForCausalLM",
19721972
"MistralForCausalLM",
19731973
"Gemma2ForCausalLM",
1974+
"Gemma3ForConditionalGeneration",
19741975
}
19751976
return architectures[0] in default_archs
19761977

0 commit comments

Comments
 (0)