GLM-4.5 Model Support Follow-up (sgl-project#8445)

byjiang1996 · web-flow · commit 581e7dcb92a7 · 2025-07-27T23:35:20.000-07:00
diff --git a/python/sglang/srt/function_call/glm4_moe_detector.py b/python/sglang/srt/function_call/glm4_moe_detector.py
@@ -156,8 +156,7 @@ def build_ebnf(self, tools: List[Tool]):
             tools,
             individual_call_start_token=self.bot_token,
             individual_call_end_token=self.eot_token,
-            # GLM4Moe is not compatible with multiple tool_calls under tool_choice condition: it will output unlimited tool_calls...
-            # tool_call_separator="\\n",
+            tool_call_separator="\\n",
             function_format="xml",
             call_rule_fmt='"{name}" "\\n" {arguments_rule} "\\n"',
             key_value_rule_fmt='"<arg_key>{key}</arg_key>" "\\n" "<arg_value>" {valrule} "</arg_value>"',
diff --git a/python/sglang/srt/function_call/qwen3_coder_detector.py b/python/sglang/srt/function_call/qwen3_coder_detector.py
@@ -148,4 +148,5 @@ def build_ebnf(self, tools: List[Tool]):
             function_format="xml",
             call_rule_fmt='"<function={name}>\\n" {arguments_rule} "\\n</function>"',
             key_value_rule_fmt='"<parameter={key}>\\n" {valrule} "\\n</parameter>"',
+            key_value_separator="\\n",
         )
diff --git a/test/srt/openai_server/features/test_enable_thinking.py b/test/srt/openai_server/features/test_enable_thinking.py
@@ -189,7 +189,7 @@ def test_stream_chat_completion_without_reasoning(self):
         )
 
 
-## Skip for ci test
+# Skip for ci test
 # class TestGLM45EnableThinking(TestEnableThinking):
 #     @classmethod
 #     def setUpClass(cls):
diff --git a/test/srt/openai_server/function_call/test_openai_function_calling.py b/test/srt/openai_server/function_call/test_openai_function_calling.py
@@ -913,7 +913,7 @@ def test_pythonic_tool_call_streaming(self):
         )
 
 
-## Skip for ci test
+# Skip for ci test
 # class TestGLM45ServerFunctionCalling(TestOpenAIServerFunctionCalling):
 #     @classmethod
 #     def setUpClass(cls):
diff --git a/test/srt/openai_server/function_call/test_tool_choice.py b/test/srt/openai_server/function_call/test_tool_choice.py
@@ -135,7 +135,7 @@ def get_test_messages(self):
         return [
             {
                 "role": "user",
-                "content": "Answer the following questions as best you can:\n\nYou will be given a trace of thinking process in the following format.\n\nQuestion: the input question you must answer\nTOOL: think about what to do, and choose a tool to use ONLY IF there are defined tools\nOBSERVATION: the result of the tool call or the observation of the current task, NEVER include this in your response, this information will be provided\n... (this TOOL/OBSERVATION can repeat N times)\nANSWER: If you know the answer to the original question, require for more information, \nif the previous conversation history already contains the answer, \nor you don't know the answer and there are no defined tools or all available tools are not helpful, respond with the answer without mentioning anything else.\nYou may use light Markdown formatting to improve clarity (e.g. lists, **bold**, *italics*), but keep it minimal and unobtrusive.\n\nYour task is to respond with the next step to take, based on the traces, \nor answer the question if you have enough information.\n\nQuestion: what is the weather in top 5 populated cities in the US?\n\nTraces:\n\n\nThese are some additional instructions that you should follow:",
+                "content": "Answer the following questions as best you can:\n\nYou will be given a trace of thinking process in the following format.\n\nQuestion: the input question you must answer\nTOOL: think about what to do, and choose a tool to use ONLY IF there are defined tools\nOBSERVATION: the result of the tool call or the observation of the current task, NEVER include this in your response, this information will be provided\n... (this TOOL/OBSERVATION can repeat N times)\nANSWER: If you know the answer to the original question, require for more information, \nif the previous conversation history already contains the answer, \nor you don't know the answer and there are no defined tools or all available tools are not helpful, respond with the answer without mentioning anything else.\nYou may use light Markdown formatting to improve clarity (e.g. lists, **bold**, *italics*), but keep it minimal and unobtrusive.\n\nYour task is to respond with the next step to take, based on the traces, \nor answer the question if you have enough information.\n\nQuestion: what is the weather in top 5 populated cities in the US in celsius?\n\nTraces:\n\n\nThese are some additional instructions that you should follow:",
             }
         ]
 
@@ -203,7 +203,7 @@ def test_tool_choice_auto_non_streaming(self):
         response = self.client.chat.completions.create(
             model=self.model_name,
             messages=messages,
-            max_tokens=400,
+            max_tokens=2048,
             tools=tools,
             tool_choice="auto",
             stream=False,
@@ -220,7 +220,7 @@ def test_tool_choice_auto_streaming(self):
         response = self.client.chat.completions.create(
             model=self.model_name,
             messages=messages,
-            max_tokens=400,
+            max_tokens=2048,
             tools=tools,
             tool_choice="auto",
             stream=True,
@@ -248,7 +248,7 @@ def test_tool_choice_required_non_streaming(self):
         response = self.client.chat.completions.create(
             model=self.model_name,
             messages=messages,
-            max_tokens=400,
+            max_tokens=2048,
             temperature=0.2,
             tools=tools,
             tool_choice="required",
@@ -268,7 +268,7 @@ def test_tool_choice_required_streaming(self):
         response = self.client.chat.completions.create(
             model=self.model_name,
             messages=messages,
-            max_tokens=400,
+            max_tokens=2048,
             tools=tools,
             tool_choice="required",
             stream=True,
@@ -294,7 +294,7 @@ def test_tool_choice_specific_function_non_streaming(self):
         response = self.client.chat.completions.create(
             model=self.model_name,
             messages=messages,
-            max_tokens=200,
+            max_tokens=2048,
             tools=tools,
             tool_choice=tool_choice,
             stream=False,
@@ -318,7 +318,7 @@ def test_tool_choice_specific_function_streaming(self):
         response = self.client.chat.completions.create(
             model=self.model_name,
             messages=messages,
-            max_tokens=200,
+            max_tokens=2048,
             tools=tools,
             tool_choice=tool_choice,
             stream=True,
@@ -351,7 +351,7 @@ def test_multi_tool_scenario_auto(self):
         response = self.client.chat.completions.create(
             model=self.model_name,
             messages=messages,
-            max_tokens=400,
+            max_tokens=2048,
             temperature=0.2,
             tools=tools,
             tool_choice="auto",
@@ -392,7 +392,7 @@ def test_multi_tool_scenario_required(self):
         response = self.client.chat.completions.create(
             model=self.model_name,
             messages=messages,
-            max_tokens=400,
+            max_tokens=2048,
             temperature=0.2,
             tools=tools,
             tool_choice="required",
@@ -450,7 +450,7 @@ def test_error_handling_invalid_tool_choice(self):
             response = self.client.chat.completions.create(
                 model=self.model_name,
                 messages=messages,
-                max_tokens=200,
+                max_tokens=2048,
                 tools=tools,
                 tool_choice=tool_choice,
                 stream=False,
@@ -517,5 +517,34 @@ def setUpClass(cls):
         cls.tokenizer = get_tokenizer(cls.model)
 
 
+# Skip for ci test
+# class TestToolChoiceGLM45(TestToolChoiceLlama32):
+#     @classmethod
+#     def setUpClass(cls):
+#         # Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST
+#         cls.model = "THUDM/GLM-4.5"
+#         cls.base_url = DEFAULT_URL_FOR_TEST
+#         cls.api_key = "sk-123456"
+
+#         # Start the local OpenAI Server. If necessary, you can add other parameters such as --enable-tools.
+#         cls.process = popen_launch_server(
+#             cls.model,
+#             cls.base_url,
+#             timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+#             api_key=cls.api_key,
+#             other_args=[
+#                 # If your server needs extra parameters to test function calling, please add them here.
+#                 "--tool-call-parser",
+#                 "glm45",
+#                 "--reasoning-parser",
+#                 "glm45",
+#                 "--tp-size",
+#                 "8"
+#             ],
+#         )
+#         cls.base_url += "/v1"
+#         cls.tokenizer = get_tokenizer(cls.model)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/srt/test_function_call_parser.py b/test/srt/test_function_call_parser.py
@@ -2068,7 +2068,7 @@ def test_streaming_multiple_tool_calls(self):
             tool_calls[1]["parameters"], '{"city": "Shanghai", "date": "2024-06-28"}'
         )
 
-    def test_tool_call_completion(self):
+    def test_tool_call_id(self):
         """Test that the buffer and state are reset after a tool call is completed."""
         chunks = [
             "<tool_call>get_weather\n",

Original file line number	Diff line number	Diff line change
`@@ -148,4 +148,5 @@ def build_ebnf(self, tools: List[Tool]):`
`148`	`148`	`function_format="xml",`
`149`	`149`	`call_rule_fmt='"<function={name}>\\n" {arguments_rule} "\\n</function>"',`
`150`	`150`	`key_value_rule_fmt='"<parameter={key}>\\n" {valrule} "\\n</parameter>"',`
	`151`	`+ key_value_separator="\\n",`
`151`	`152`	`)`
Original file line number	Diff line number	Diff line change
`@@ -189,7 +189,7 @@ def test_stream_chat_completion_without_reasoning(self):`
`189`	`189`	`)`
`190`	`190`
`191`	`191`
`192`		`-## Skip for ci test`
	`192`	`+# Skip for ci test`
`193`	`193`	`# class TestGLM45EnableThinking(TestEnableThinking):`
`194`	`194`	`# @classmethod`
`195`	`195`	`# def setUpClass(cls):`
Original file line number	Diff line number	Diff line change
`@@ -913,7 +913,7 @@ def test_pythonic_tool_call_streaming(self):`
`913`	`913`	`)`
`914`	`914`
`915`	`915`
`916`		`-## Skip for ci test`
	`916`	`+# Skip for ci test`
`917`	`917`	`# class TestGLM45ServerFunctionCalling(TestOpenAIServerFunctionCalling):`
`918`	`918`	`# @classmethod`
`919`	`919`	`# def setUpClass(cls):`
Original file line number	Diff line number	Diff line change
`@@ -2068,7 +2068,7 @@ def test_streaming_multiple_tool_calls(self):`
`2068`	`2068`	`tool_calls[1]["parameters"], '{"city": "Shanghai", "date": "2024-06-28"}'`
`2069`	`2069`	`)`
`2070`	`2070`
`2071`		`- def test_tool_call_completion(self):`
	`2071`	`+ def test_tool_call_id(self):`
`2072`	`2072`	`"""Test that the buffer and state are reset after a tool call is completed."""`
`2073`	`2073`	`chunks = [`
`2074`	`2074`	`"<tool_call>get_weather\n",`