Skip to content

Commit 7e831ef

Browse files
Fix chat template handling for OpenAI serving (sgl-project#8635)
Signed-off-by: Xinyuan Tong <justinning0323@outlook.com> Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com>
1 parent 20b5563 commit 7e831ef

File tree

7 files changed

+83
-156
lines changed

7 files changed

+83
-156
lines changed

python/sglang/srt/conversation.py

Lines changed: 0 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -954,20 +954,6 @@ def generate_chat_conv(
954954
)
955955
)
956956

957-
register_conv_template(
958-
Conversation(
959-
name="mimo-vl",
960-
system_message="You are MiMo, an AI assistant developed by Xiaomi.",
961-
system_template="<|im_start|>system\n{system_message}",
962-
roles=("<|im_start|>user", "<|im_start|>assistant"),
963-
sep="<|im_end|>\n",
964-
sep_style=SeparatorStyle.ADD_NEW_LINE_SINGLE,
965-
stop_str=["<|im_end|>"],
966-
image_token="<|vision_start|><|image_pad|><|vision_end|>",
967-
)
968-
)
969-
970-
971957
register_conv_template(
972958
Conversation(
973959
name="qwen2-audio",
@@ -981,51 +967,11 @@ def generate_chat_conv(
981967
)
982968
)
983969

984-
register_conv_template(
985-
Conversation(
986-
name="llama_4_vision",
987-
system_message="You are a helpful language and vision assistant. You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language.",
988-
system_template="<|header_start|>system<|header_end|>\n\n{system_message}<|eot|>",
989-
roles=("user", "assistant"),
990-
sep_style=SeparatorStyle.LLAMA4,
991-
sep="",
992-
stop_str="<|eot|>",
993-
image_token="<|image|>",
994-
)
995-
)
996-
997-
register_conv_template(
998-
Conversation(
999-
name="step3-vl",
1000-
system_message="<|begin▁of▁sentence|>You are a helpful assistant",
1001-
system_template="{system_message}\n",
1002-
roles=(
1003-
"<|BOT|>user\n",
1004-
"<|BOT|>assistant\n<think>\n",
1005-
),
1006-
sep="<|EOT|>",
1007-
sep_style=SeparatorStyle.NO_COLON_SINGLE,
1008-
stop_str="<|EOT|>",
1009-
image_token="<im_patch>",
1010-
# add_bos=True,
1011-
)
1012-
)
1013-
1014970

1015971
@register_conv_template_matching_function
1016972
def match_internvl(model_path: str):
1017973
if re.search(r"internvl", model_path, re.IGNORECASE):
1018974
return "internvl-2-5"
1019-
if re.search(r"intern.*s1", model_path, re.IGNORECASE):
1020-
return "interns1"
1021-
1022-
1023-
@register_conv_template_matching_function
1024-
def match_llama_vision(model_path: str):
1025-
if re.search(r"llama.*3\.2.*vision", model_path, re.IGNORECASE):
1026-
return "llama_3_vision"
1027-
if re.search(r"llama.*4.*", model_path, re.IGNORECASE):
1028-
return "llama_4_vision"
1029975

1030976

1031977
@register_conv_template_matching_function
@@ -1040,22 +986,6 @@ def match_vicuna(model_path: str):
1040986
return "vicuna_v1.1"
1041987

1042988

1043-
@register_conv_template_matching_function
1044-
def match_llama2_chat(model_path: str):
1045-
if re.search(
1046-
r"llama-2.*chat|codellama.*instruct",
1047-
model_path,
1048-
re.IGNORECASE,
1049-
):
1050-
return "llama-2"
1051-
1052-
1053-
@register_conv_template_matching_function
1054-
def match_mistral(model_path: str):
1055-
if re.search(r"pixtral|(mistral|mixtral).*instruct", model_path, re.IGNORECASE):
1056-
return "mistral"
1057-
1058-
1059989
@register_conv_template_matching_function
1060990
def match_deepseek_vl(model_path: str):
1061991
if re.search(r"deepseek.*vl2", model_path, re.IGNORECASE):
@@ -1064,12 +994,6 @@ def match_deepseek_vl(model_path: str):
1064994

1065995
@register_conv_template_matching_function
1066996
def match_qwen_chat_ml(model_path: str):
1067-
if re.search(r"gme.*qwen.*vl", model_path, re.IGNORECASE):
1068-
return "gme-qwen2-vl"
1069-
if re.search(r"qwen.*vl", model_path, re.IGNORECASE):
1070-
return "qwen2-vl"
1071-
if re.search(r"qwen.*audio", model_path, re.IGNORECASE):
1072-
return "qwen2-audio"
1073997
if re.search(
1074998
r"llava-v1\.6-34b|llava-v1\.6-yi-34b|llava-next-video-34b|llava-onevision-qwen2",
1075999
model_path,
@@ -1078,12 +1002,6 @@ def match_qwen_chat_ml(model_path: str):
10781002
return "chatml-llava"
10791003

10801004

1081-
@register_conv_template_matching_function
1082-
def match_gemma3_instruct(model_path: str):
1083-
if re.search(r"gemma-3.*it", model_path, re.IGNORECASE):
1084-
return "gemma-it"
1085-
1086-
10871005
@register_conv_template_matching_function
10881006
def match_openbmb_minicpm(model_path: str):
10891007
if re.search(r"minicpm-v", model_path, re.IGNORECASE):
@@ -1092,37 +1010,7 @@ def match_openbmb_minicpm(model_path: str):
10921010
return "minicpmo"
10931011

10941012

1095-
@register_conv_template_matching_function
1096-
def match_moonshot_kimivl(model_path: str):
1097-
if re.search(r"kimi.*vl", model_path, re.IGNORECASE):
1098-
return "kimi-vl"
1099-
1100-
1101-
@register_conv_template_matching_function
1102-
def match_devstral(model_path: str):
1103-
if re.search(r"devstral", model_path, re.IGNORECASE):
1104-
return "devstral"
1105-
1106-
11071013
@register_conv_template_matching_function
11081014
def match_phi_4_mm(model_path: str):
11091015
if "phi-4-multimodal" in model_path.lower():
11101016
return "phi-4-mm"
1111-
1112-
1113-
@register_conv_template_matching_function
1114-
def match_vila(model_path: str):
1115-
if re.search(r"vila", model_path, re.IGNORECASE):
1116-
return "chatml"
1117-
1118-
1119-
@register_conv_template_matching_function
1120-
def match_mimo_vl(model_path: str):
1121-
if re.search(r"mimo.*vl", model_path, re.IGNORECASE):
1122-
return "mimo-vl"
1123-
1124-
1125-
# @register_conv_template_matching_function
1126-
# def match_step3(model_path: str):
1127-
# if re.search(r"step3", model_path, re.IGNORECASE):
1128-
# return "step3-vl"

python/sglang/srt/managers/template_manager.py

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -84,26 +84,27 @@ def load_chat_template(
8484
if chat_template_arg:
8585
self._load_explicit_chat_template(tokenizer_manager, chat_template_arg)
8686
else:
87-
# Try HuggingFace template first
88-
hf_template = self._resolve_hf_chat_template(tokenizer_manager)
89-
if hf_template:
90-
self._jinja_template_content_format = (
91-
detect_jinja_template_content_format(hf_template)
92-
)
93-
logger.info(
94-
f"Using default HuggingFace chat template with detected content format: {self._jinja_template_content_format}"
95-
)
96-
return
97-
98-
# Fallback to SGLang template guessing
87+
# Guess chat template from model path
9988
self.guess_chat_template_from_model_path(model_path)
10089

101-
# Set default format if no template was found
90+
# If no pre-defined template was found, fallback to HuggingFace template
10291
if self._chat_template_name is None:
103-
self._jinja_template_content_format = "string"
104-
logger.info(
105-
"No chat template found, defaulting to 'string' content format"
106-
)
92+
# Try HuggingFace template first
93+
hf_template = self._resolve_hf_chat_template(tokenizer_manager)
94+
if hf_template:
95+
# override the chat template
96+
tokenizer_manager.tokenizer.chat_template = hf_template
97+
self._jinja_template_content_format = (
98+
detect_jinja_template_content_format(hf_template)
99+
)
100+
logger.info(
101+
f"Using default HuggingFace chat template with detected content format: {self._jinja_template_content_format}"
102+
)
103+
return
104+
105+
# Default to string content format if no template was found
106+
self._jinja_template_content_format = "string"
107+
logger.info("No chat template found, defaulting to 'string' content format")
107108

108109
def _load_explicit_chat_template(
109110
self, tokenizer_manager, chat_template_arg: str
@@ -257,13 +258,15 @@ def _resolve_hf_chat_template(self, tokenizer_manager) -> Optional[str]:
257258
258259
Returns the chat template string if found, None otherwise.
259260
"""
260-
tokenizer = tokenizer_manager.tokenizer
261-
262-
# Try to get AutoTokenizer chat template
263261
try:
264-
return tokenizer.get_chat_template()
262+
if processor := tokenizer_manager.processor:
263+
if hasattr(processor, "chat_template") and processor.chat_template:
264+
return processor.chat_template
265+
if tokenizer := tokenizer_manager.tokenizer:
266+
if hasattr(tokenizer, "chat_template") and tokenizer.chat_template:
267+
return tokenizer.chat_template
265268
except Exception as e:
266-
logger.debug(f"Error getting chat template via get_chat_template(): {e}")
269+
logger.debug(f"Error getting chat template: {e}")
267270

268271
logger.debug("No HuggingFace chat template found")
269272
return None

python/sglang/srt/managers/tokenizer_manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,10 +225,10 @@ def __init__(
225225
self.tokenizer = get_tokenizer_from_processor(self.processor)
226226
os.environ["TOKENIZERS_PARALLELISM"] = "false"
227227
else:
228-
self.mm_processor = None
228+
self.mm_processor = self.processor = None
229229

230230
if server_args.skip_tokenizer_init:
231-
self.tokenizer = self.processor = None
231+
self.tokenizer = None
232232
else:
233233
self.tokenizer = get_tokenizer(
234234
server_args.tokenizer_path,

python/sglang/srt/multimodal/processors/gemma3n.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
# limitations under the License.
1313
# ==============================================================================
1414

15-
import re
1615
from typing import Dict, List, Optional, Union
1716

1817
from sglang.srt.managers.multimodal_processor import (
@@ -38,14 +37,8 @@ def __init__(self, hf_config, server_args, _processor, *args, **kwargs):
3837
self.mm_tokens = MultimodalSpecialTokens(
3938
image_token="<image_soft_token>",
4039
image_token_id=hf_config.image_token_id,
41-
image_token_regex=re.compile(
42-
r"<start_of_image>(?:(?:<image_soft_token>)*<end_of_image>)?"
43-
),
4440
audio_token="<audio_soft_token>",
4541
audio_token_id=hf_config.audio_token_id,
46-
audio_token_regex=re.compile(
47-
r"<start_of_audio>(?:(?:<audio_soft_token>)*<end_of_audio>)?"
48-
),
4942
).build(_processor)
5043

5144
async def process_mm_data_async(

test/srt/test_vision_openai_server_a.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ def setUpClass(cls):
3131
other_args=[
3232
"--mem-fraction-static",
3333
"0.35",
34+
"--cuda-graph-max-bs",
35+
"4",
3436
],
3537
)
3638
cls.base_url += "/v1"
@@ -53,6 +55,8 @@ def setUpClass(cls):
5355
other_args=[
5456
"--mem-fraction-static",
5557
"0.35",
58+
"--cuda-graph-max-bs",
59+
"4",
5660
],
5761
)
5862
cls.base_url += "/v1"
@@ -76,6 +80,8 @@ def setUpClass(cls):
7680
"--context-length",
7781
"300",
7882
"--mem-fraction-static=0.75",
83+
"--cuda-graph-max-bs",
84+
"4",
7985
],
8086
)
8187
cls.base_url += "/v1"
@@ -149,6 +155,8 @@ def setUpClass(cls):
149155
"--trust-remote-code",
150156
"--mem-fraction-static",
151157
"0.35",
158+
"--cuda-graph-max-bs",
159+
"4",
152160
],
153161
)
154162
cls.base_url += "/v1"
@@ -164,7 +172,11 @@ def setUpClass(cls):
164172
cls.model,
165173
cls.base_url,
166174
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
167-
other_args=["--trust-remote-code"],
175+
other_args=[
176+
"--trust-remote-code",
177+
"--cuda-graph-max-bs",
178+
"4",
179+
],
168180
)
169181
cls.base_url += "/v1"
170182

@@ -183,6 +195,8 @@ def setUpClass(cls):
183195
"--trust-remote-code",
184196
"--mem-fraction-static",
185197
"0.65",
198+
"--cuda-graph-max-bs",
199+
"4",
186200
],
187201
)
188202
cls.base_url += "/v1"
@@ -207,10 +221,13 @@ def setUpClass(cls):
207221
"--trust-remote-code",
208222
"--mem-fraction-static",
209223
"0.6",
224+
"--cuda-graph-max-bs",
225+
"4",
210226
],
211227
)
212228
cls.base_url += "/v1"
213229

214230

215231
if __name__ == "__main__":
232+
del TestOpenAIVisionServer
216233
unittest.main()

0 commit comments

Comments
 (0)