Skip to content

Commit 581e7dc

Browse files
authored
GLM-4.5 Model Support Follow-up (sgl-project#8445)
1 parent 484d0e0 commit 581e7dc

File tree

6 files changed

+44
-15
lines changed

6 files changed

+44
-15
lines changed

python/sglang/srt/function_call/glm4_moe_detector.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,7 @@ def build_ebnf(self, tools: List[Tool]):
156156
tools,
157157
individual_call_start_token=self.bot_token,
158158
individual_call_end_token=self.eot_token,
159-
# GLM4Moe is not compatible with multiple tool_calls under tool_choice condition: it will output unlimited tool_calls...
160-
# tool_call_separator="\\n",
159+
tool_call_separator="\\n",
161160
function_format="xml",
162161
call_rule_fmt='"{name}" "\\n" {arguments_rule} "\\n"',
163162
key_value_rule_fmt='"<arg_key>{key}</arg_key>" "\\n" "<arg_value>" {valrule} "</arg_value>"',

python/sglang/srt/function_call/qwen3_coder_detector.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,4 +148,5 @@ def build_ebnf(self, tools: List[Tool]):
148148
function_format="xml",
149149
call_rule_fmt='"<function={name}>\\n" {arguments_rule} "\\n</function>"',
150150
key_value_rule_fmt='"<parameter={key}>\\n" {valrule} "\\n</parameter>"',
151+
key_value_separator="\\n",
151152
)

test/srt/openai_server/features/test_enable_thinking.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ def test_stream_chat_completion_without_reasoning(self):
189189
)
190190

191191

192-
## Skip for ci test
192+
# Skip for ci test
193193
# class TestGLM45EnableThinking(TestEnableThinking):
194194
# @classmethod
195195
# def setUpClass(cls):

test/srt/openai_server/function_call/test_openai_function_calling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -913,7 +913,7 @@ def test_pythonic_tool_call_streaming(self):
913913
)
914914

915915

916-
## Skip for ci test
916+
# Skip for ci test
917917
# class TestGLM45ServerFunctionCalling(TestOpenAIServerFunctionCalling):
918918
# @classmethod
919919
# def setUpClass(cls):

test/srt/openai_server/function_call/test_tool_choice.py

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def get_test_messages(self):
135135
return [
136136
{
137137
"role": "user",
138-
"content": "Answer the following questions as best you can:\n\nYou will be given a trace of thinking process in the following format.\n\nQuestion: the input question you must answer\nTOOL: think about what to do, and choose a tool to use ONLY IF there are defined tools\nOBSERVATION: the result of the tool call or the observation of the current task, NEVER include this in your response, this information will be provided\n... (this TOOL/OBSERVATION can repeat N times)\nANSWER: If you know the answer to the original question, require for more information, \nif the previous conversation history already contains the answer, \nor you don't know the answer and there are no defined tools or all available tools are not helpful, respond with the answer without mentioning anything else.\nYou may use light Markdown formatting to improve clarity (e.g. lists, **bold**, *italics*), but keep it minimal and unobtrusive.\n\nYour task is to respond with the next step to take, based on the traces, \nor answer the question if you have enough information.\n\nQuestion: what is the weather in top 5 populated cities in the US?\n\nTraces:\n\n\nThese are some additional instructions that you should follow:",
138+
"content": "Answer the following questions as best you can:\n\nYou will be given a trace of thinking process in the following format.\n\nQuestion: the input question you must answer\nTOOL: think about what to do, and choose a tool to use ONLY IF there are defined tools\nOBSERVATION: the result of the tool call or the observation of the current task, NEVER include this in your response, this information will be provided\n... (this TOOL/OBSERVATION can repeat N times)\nANSWER: If you know the answer to the original question, require for more information, \nif the previous conversation history already contains the answer, \nor you don't know the answer and there are no defined tools or all available tools are not helpful, respond with the answer without mentioning anything else.\nYou may use light Markdown formatting to improve clarity (e.g. lists, **bold**, *italics*), but keep it minimal and unobtrusive.\n\nYour task is to respond with the next step to take, based on the traces, \nor answer the question if you have enough information.\n\nQuestion: what is the weather in top 5 populated cities in the US in celsius?\n\nTraces:\n\n\nThese are some additional instructions that you should follow:",
139139
}
140140
]
141141

@@ -203,7 +203,7 @@ def test_tool_choice_auto_non_streaming(self):
203203
response = self.client.chat.completions.create(
204204
model=self.model_name,
205205
messages=messages,
206-
max_tokens=400,
206+
max_tokens=2048,
207207
tools=tools,
208208
tool_choice="auto",
209209
stream=False,
@@ -220,7 +220,7 @@ def test_tool_choice_auto_streaming(self):
220220
response = self.client.chat.completions.create(
221221
model=self.model_name,
222222
messages=messages,
223-
max_tokens=400,
223+
max_tokens=2048,
224224
tools=tools,
225225
tool_choice="auto",
226226
stream=True,
@@ -248,7 +248,7 @@ def test_tool_choice_required_non_streaming(self):
248248
response = self.client.chat.completions.create(
249249
model=self.model_name,
250250
messages=messages,
251-
max_tokens=400,
251+
max_tokens=2048,
252252
temperature=0.2,
253253
tools=tools,
254254
tool_choice="required",
@@ -268,7 +268,7 @@ def test_tool_choice_required_streaming(self):
268268
response = self.client.chat.completions.create(
269269
model=self.model_name,
270270
messages=messages,
271-
max_tokens=400,
271+
max_tokens=2048,
272272
tools=tools,
273273
tool_choice="required",
274274
stream=True,
@@ -294,7 +294,7 @@ def test_tool_choice_specific_function_non_streaming(self):
294294
response = self.client.chat.completions.create(
295295
model=self.model_name,
296296
messages=messages,
297-
max_tokens=200,
297+
max_tokens=2048,
298298
tools=tools,
299299
tool_choice=tool_choice,
300300
stream=False,
@@ -318,7 +318,7 @@ def test_tool_choice_specific_function_streaming(self):
318318
response = self.client.chat.completions.create(
319319
model=self.model_name,
320320
messages=messages,
321-
max_tokens=200,
321+
max_tokens=2048,
322322
tools=tools,
323323
tool_choice=tool_choice,
324324
stream=True,
@@ -351,7 +351,7 @@ def test_multi_tool_scenario_auto(self):
351351
response = self.client.chat.completions.create(
352352
model=self.model_name,
353353
messages=messages,
354-
max_tokens=400,
354+
max_tokens=2048,
355355
temperature=0.2,
356356
tools=tools,
357357
tool_choice="auto",
@@ -392,7 +392,7 @@ def test_multi_tool_scenario_required(self):
392392
response = self.client.chat.completions.create(
393393
model=self.model_name,
394394
messages=messages,
395-
max_tokens=400,
395+
max_tokens=2048,
396396
temperature=0.2,
397397
tools=tools,
398398
tool_choice="required",
@@ -450,7 +450,7 @@ def test_error_handling_invalid_tool_choice(self):
450450
response = self.client.chat.completions.create(
451451
model=self.model_name,
452452
messages=messages,
453-
max_tokens=200,
453+
max_tokens=2048,
454454
tools=tools,
455455
tool_choice=tool_choice,
456456
stream=False,
@@ -517,5 +517,34 @@ def setUpClass(cls):
517517
cls.tokenizer = get_tokenizer(cls.model)
518518

519519

520+
# Skip for ci test
521+
# class TestToolChoiceGLM45(TestToolChoiceLlama32):
522+
# @classmethod
523+
# def setUpClass(cls):
524+
# # Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST
525+
# cls.model = "THUDM/GLM-4.5"
526+
# cls.base_url = DEFAULT_URL_FOR_TEST
527+
# cls.api_key = "sk-123456"
528+
529+
# # Start the local OpenAI Server. If necessary, you can add other parameters such as --enable-tools.
530+
# cls.process = popen_launch_server(
531+
# cls.model,
532+
# cls.base_url,
533+
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
534+
# api_key=cls.api_key,
535+
# other_args=[
536+
# # If your server needs extra parameters to test function calling, please add them here.
537+
# "--tool-call-parser",
538+
# "glm45",
539+
# "--reasoning-parser",
540+
# "glm45",
541+
# "--tp-size",
542+
# "8"
543+
# ],
544+
# )
545+
# cls.base_url += "/v1"
546+
# cls.tokenizer = get_tokenizer(cls.model)
547+
548+
520549
if __name__ == "__main__":
521550
unittest.main()

test/srt/test_function_call_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2068,7 +2068,7 @@ def test_streaming_multiple_tool_calls(self):
20682068
tool_calls[1]["parameters"], '{"city": "Shanghai", "date": "2024-06-28"}'
20692069
)
20702070

2071-
def test_tool_call_completion(self):
2071+
def test_tool_call_id(self):
20722072
"""Test that the buffer and state are reset after a tool call is completed."""
20732073
chunks = [
20742074
"<tool_call>get_weather\n",

0 commit comments

Comments
 (0)