From 73c97a22fe6efb7e9bce417fe706073f6d8a3a9b Mon Sep 17 00:00:00 2001 From: luukunn <981429396@qq.com> Date: Fri, 15 Aug 2025 11:18:15 +0800 Subject: [PATCH 01/11] fix chat_template_args --- fastdeploy/engine/engine.py | 5 +---- fastdeploy/entrypoints/llm.py | 7 ++----- fastdeploy/input/ernie_processor.py | 3 ++- fastdeploy/input/ernie_vl_processor.py | 3 ++- fastdeploy/input/text_processor.py | 3 ++- 5 files changed, 9 insertions(+), 12 deletions(-) diff --git a/fastdeploy/engine/engine.py b/fastdeploy/engine/engine.py index db3bdefffe..fa7be2f98c 100644 --- a/fastdeploy/engine/engine.py +++ b/fastdeploy/engine/engine.py @@ -465,10 +465,7 @@ def add_requests(self, task, sampling_params=None, **kwargs): request.sampling_params = sampling_params request.preprocess_start_time = time.time() - enable_thinking = None - if kwargs is not None: - enable_thinking = kwargs.get("enable_thinking", None) - request = self.data_processor.process_request(request, self.cfg.max_model_len, enable_thinking=enable_thinking) + request = self.data_processor.process_request(request, self.cfg.max_model_len, **kwargs) request.prompt_token_ids_len = len(request.prompt_token_ids) request.need_prefill_tokens = request.prompt_token_ids_len input_ids_len = request.prompt_token_ids_len diff --git a/fastdeploy/entrypoints/llm.py b/fastdeploy/entrypoints/llm.py index 001cfad3e0..1b0d9ddac4 100644 --- a/fastdeploy/entrypoints/llm.py +++ b/fastdeploy/entrypoints/llm.py @@ -248,7 +248,7 @@ def _add_request( self, prompts, sampling_params, - chat_template_kwargs: Optional[dict[str, Any]] = None, + **kwargs, ): """ 添加一个请求到 LLM Engine,并返回该请求的 ID。 @@ -289,10 +289,7 @@ def _add_request( current_sampling_params = sampling_params[i] else: current_sampling_params = sampling_params - enable_thinking = None - if chat_template_kwargs is not None: - enable_thinking = chat_template_kwargs.get("enable_thinking", None) - self.llm_engine.add_requests(tasks, current_sampling_params, enable_thinking=enable_thinking) + self.llm_engine.add_requests(tasks, current_sampling_params, **kwargs) return req_ids def _decode_token(self, token_id: int) -> str: diff --git a/fastdeploy/input/ernie_processor.py b/fastdeploy/input/ernie_processor.py index e4424a0b8c..4ff4d5e142 100644 --- a/fastdeploy/input/ernie_processor.py +++ b/fastdeploy/input/ernie_processor.py @@ -90,6 +90,7 @@ def process_request(self, request, max_model_len=None, **kwargs): request = self._apply_default_parameters(request) if request.get("eos_token_ids") is None or len(request.eos_token_ids) == 0: request.eos_token_ids = self.eos_token_ids + request.enable_thinking = kwargs.get("chat_template_kwargs", {}).get("enable_thinking") stop_sequences = request.get("stop", []) if stop_sequences is not None and len(stop_sequences) != 0: stop_seqs, stop_seqs_len = self.update_stop_seq(stop_sequences) @@ -140,7 +141,7 @@ def process_request_dict(self, request, max_model_len=None): request = self._apply_default_parameters(request) if not request.get("eos_token_ids"): request["eos_token_ids"] = self.eos_token_ids - + request["enable_thinking"] = request.get("chat_template_kwargs", {}).get("enable_thinking") # processing stop_sequences stop_sequences = request.get("stop", []) if stop_sequences: diff --git a/fastdeploy/input/ernie_vl_processor.py b/fastdeploy/input/ernie_vl_processor.py index e8239f7adb..7560115535 100644 --- a/fastdeploy/input/ernie_vl_processor.py +++ b/fastdeploy/input/ernie_vl_processor.py @@ -110,7 +110,7 @@ def set_value(req, key, value): def process_request(self, request, max_model_len=None, **kwargs): """process the input data""" task = request.to_dict() - task["enable_thinking"] = kwargs.get("enable_thinking", True) + task["chat_template_kwargs"] = kwargs.get("chat_template_kwargs", {}) self.process_request_dict(task, max_model_len) request = Request.from_dict(task) request = self._apply_default_parameters(request) @@ -198,6 +198,7 @@ def process_request_dict(self, request, max_model_len=None): request = self._apply_default_parameters(request) if not request.get("eos_token_ids"): request["eos_token_ids"] = self.eos_token_ids + request["enable_thinking"] = request.get("chat_template_kwargs", {}).get("enable_thinking") stop_sequences = request.get("stop", []) if stop_sequences: diff --git a/fastdeploy/input/text_processor.py b/fastdeploy/input/text_processor.py index e842e964b3..10f5081b4d 100644 --- a/fastdeploy/input/text_processor.py +++ b/fastdeploy/input/text_processor.py @@ -207,7 +207,7 @@ def process_request(self, request, max_model_len=None, **kwargs): request = self._apply_default_parameters(request) if request.get("eos_token_ids") is None or len(request.eos_token_ids) == 0: request.eos_token_ids = self.eos_token_ids - + request.enable_thinking = kwargs.get("chat_template_kwargs", {}).get("enable_thinking") stop_sequences = request.get("stop", []) if stop_sequences is not None and len(stop_sequences) != 0: stop_seqs, stop_seqs_len = self.update_stop_seq(stop_sequences) @@ -254,6 +254,7 @@ def process_request_dict(self, request, max_model_len=None, **kwargs): request = self._apply_default_parameters(request) if not request.get("eos_token_ids"): request["eos_token_ids"] = self.eos_token_ids + request["enable_thinking"] = request.get("chat_template_kwargs", {}).get("enable_thinking") # processing stop_sequences stop_sequences = request.get("stop", []) From 6a8eefc35e7a8f072730302a90747a6be6622b5e Mon Sep 17 00:00:00 2001 From: luukunn <981429396@qq.com> Date: Fri, 15 Aug 2025 14:31:10 +0800 Subject: [PATCH 02/11] fix args --- fastdeploy/input/ernie_processor.py | 7 ++++++- fastdeploy/input/ernie_vl_processor.py | 6 +++++- fastdeploy/input/text_processor.py | 6 +++++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/fastdeploy/input/ernie_processor.py b/fastdeploy/input/ernie_processor.py index 4ff4d5e142..766feea479 100644 --- a/fastdeploy/input/ernie_processor.py +++ b/fastdeploy/input/ernie_processor.py @@ -90,6 +90,7 @@ def process_request(self, request, max_model_len=None, **kwargs): request = self._apply_default_parameters(request) if request.get("eos_token_ids") is None or len(request.eos_token_ids) == 0: request.eos_token_ids = self.eos_token_ids + request.enable_thinking = kwargs.get("chat_template_kwargs", {}).get("enable_thinking") stop_sequences = request.get("stop", []) if stop_sequences is not None and len(stop_sequences) != 0: @@ -141,7 +142,11 @@ def process_request_dict(self, request, max_model_len=None): request = self._apply_default_parameters(request) if not request.get("eos_token_ids"): request["eos_token_ids"] = self.eos_token_ids - request["enable_thinking"] = request.get("chat_template_kwargs", {}).get("enable_thinking") + chat_template_kwargs = request.get("chat_template_kwargs") + if chat_template_kwargs: + for k, v in chat_template_kwargs.items(): + if k not in request: + request[k] = v # processing stop_sequences stop_sequences = request.get("stop", []) if stop_sequences: diff --git a/fastdeploy/input/ernie_vl_processor.py b/fastdeploy/input/ernie_vl_processor.py index 7560115535..04f29fa19e 100644 --- a/fastdeploy/input/ernie_vl_processor.py +++ b/fastdeploy/input/ernie_vl_processor.py @@ -198,7 +198,11 @@ def process_request_dict(self, request, max_model_len=None): request = self._apply_default_parameters(request) if not request.get("eos_token_ids"): request["eos_token_ids"] = self.eos_token_ids - request["enable_thinking"] = request.get("chat_template_kwargs", {}).get("enable_thinking") + chat_template_kwargs = request.get("chat_template_kwargs") + if chat_template_kwargs: + for k, v in chat_template_kwargs.items(): + if k not in request: + request[k] = v stop_sequences = request.get("stop", []) if stop_sequences: diff --git a/fastdeploy/input/text_processor.py b/fastdeploy/input/text_processor.py index 10f5081b4d..f67c922ef9 100644 --- a/fastdeploy/input/text_processor.py +++ b/fastdeploy/input/text_processor.py @@ -207,7 +207,11 @@ def process_request(self, request, max_model_len=None, **kwargs): request = self._apply_default_parameters(request) if request.get("eos_token_ids") is None or len(request.eos_token_ids) == 0: request.eos_token_ids = self.eos_token_ids - request.enable_thinking = kwargs.get("chat_template_kwargs", {}).get("enable_thinking") + chat_template_kwargs = request.get("chat_template_kwargs") + if chat_template_kwargs: + for k, v in chat_template_kwargs.items(): + if k not in request: + request[k] = v stop_sequences = request.get("stop", []) if stop_sequences is not None and len(stop_sequences) != 0: stop_seqs, stop_seqs_len = self.update_stop_seq(stop_sequences) From 0446cc72f177341f8941ff11490a00f8f5ab37b5 Mon Sep 17 00:00:00 2001 From: luukunn <981429396@qq.com> Date: Fri, 15 Aug 2025 15:09:24 +0800 Subject: [PATCH 03/11] add offline --- fastdeploy/input/ernie_processor.py | 8 +++++++- fastdeploy/input/ernie_vl_processor.py | 2 +- fastdeploy/input/text_processor.py | 11 +++++------ 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/fastdeploy/input/ernie_processor.py b/fastdeploy/input/ernie_processor.py index 766feea479..2e60ef325c 100644 --- a/fastdeploy/input/ernie_processor.py +++ b/fastdeploy/input/ernie_processor.py @@ -109,7 +109,13 @@ def process_request(self, request, max_model_len=None, **kwargs): request.prompt_token_ids = token_ids data_processor_logger.info(f"req_id:{request.request_id}, tokens:{tokens}, token_ids: {token_ids}") else: - request.prompt_token_ids = self.messages2ids(request.to_dict()) + task = request.to_dict() + chat_template_kwargs = request.get("chat_template_kwargs") + if chat_template_kwargs: + for k, v in chat_template_kwargs.items(): + if k not in task: + task[k] = v + request.prompt_token_ids = self.messages2ids(task) if len(request.prompt_token_ids) == 0: raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") diff --git a/fastdeploy/input/ernie_vl_processor.py b/fastdeploy/input/ernie_vl_processor.py index 04f29fa19e..296b07b753 100644 --- a/fastdeploy/input/ernie_vl_processor.py +++ b/fastdeploy/input/ernie_vl_processor.py @@ -110,7 +110,7 @@ def set_value(req, key, value): def process_request(self, request, max_model_len=None, **kwargs): """process the input data""" task = request.to_dict() - task["chat_template_kwargs"] = kwargs.get("chat_template_kwargs", {}) + task["chat_template_kwargs"] = kwargs.get("chat_template_kwargs") self.process_request_dict(task, max_model_len) request = Request.from_dict(task) request = self._apply_default_parameters(request) diff --git a/fastdeploy/input/text_processor.py b/fastdeploy/input/text_processor.py index f67c922ef9..a3cc0c0353 100644 --- a/fastdeploy/input/text_processor.py +++ b/fastdeploy/input/text_processor.py @@ -207,11 +207,6 @@ def process_request(self, request, max_model_len=None, **kwargs): request = self._apply_default_parameters(request) if request.get("eos_token_ids") is None or len(request.eos_token_ids) == 0: request.eos_token_ids = self.eos_token_ids - chat_template_kwargs = request.get("chat_template_kwargs") - if chat_template_kwargs: - for k, v in chat_template_kwargs.items(): - if k not in request: - request[k] = v stop_sequences = request.get("stop", []) if stop_sequences is not None and len(stop_sequences) != 0: stop_seqs, stop_seqs_len = self.update_stop_seq(stop_sequences) @@ -225,7 +220,11 @@ def process_request(self, request, max_model_len=None, **kwargs): if self.tokenizer.chat_template is None: raise ValueError("This model does not support chat_template.") task = request.to_dict() - task["enable_thinking"] = kwargs.get("enable_thinking", True) + chat_template_kwargs = kwargs.get("chat_template_kwargs") + if chat_template_kwargs: + for k, v in chat_template_kwargs.items(): + if k not in task: + task[k] = v request.prompt_token_ids = self.messages2ids(task) else: raise ValueError(f"The request should have `input_ids`, `text` or `messages`: {request}.") From 670ec7bf923f0d690009f19b25f311090f499a0e Mon Sep 17 00:00:00 2001 From: luukunn <981429396@qq.com> Date: Fri, 15 Aug 2025 15:18:38 +0800 Subject: [PATCH 04/11] add offline --- fastdeploy/input/ernie_processor.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/fastdeploy/input/ernie_processor.py b/fastdeploy/input/ernie_processor.py index 2e60ef325c..9f54b7284e 100644 --- a/fastdeploy/input/ernie_processor.py +++ b/fastdeploy/input/ernie_processor.py @@ -90,8 +90,6 @@ def process_request(self, request, max_model_len=None, **kwargs): request = self._apply_default_parameters(request) if request.get("eos_token_ids") is None or len(request.eos_token_ids) == 0: request.eos_token_ids = self.eos_token_ids - - request.enable_thinking = kwargs.get("chat_template_kwargs", {}).get("enable_thinking") stop_sequences = request.get("stop", []) if stop_sequences is not None and len(stop_sequences) != 0: stop_seqs, stop_seqs_len = self.update_stop_seq(stop_sequences) From 27c9336812fabf7f05957a32c53718002e42710f Mon Sep 17 00:00:00 2001 From: luukunn <981429396@qq.com> Date: Fri, 15 Aug 2025 15:23:02 +0800 Subject: [PATCH 05/11] fix --- fastdeploy/input/text_processor.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fastdeploy/input/text_processor.py b/fastdeploy/input/text_processor.py index a3cc0c0353..c4235265eb 100644 --- a/fastdeploy/input/text_processor.py +++ b/fastdeploy/input/text_processor.py @@ -257,7 +257,6 @@ def process_request_dict(self, request, max_model_len=None, **kwargs): request = self._apply_default_parameters(request) if not request.get("eos_token_ids"): request["eos_token_ids"] = self.eos_token_ids - request["enable_thinking"] = request.get("chat_template_kwargs", {}).get("enable_thinking") # processing stop_sequences stop_sequences = request.get("stop", []) @@ -275,6 +274,11 @@ def process_request_dict(self, request, max_model_len=None, **kwargs): elif "messages" in request: if self.tokenizer.chat_template is None: raise ValueError("This model does not support chat_template.") + chat_template_kwargs = request.get("chat_template_kwargs") + if chat_template_kwargs: + for k, v in chat_template_kwargs.items(): + if k not in request: + request[k] = v request["prompt_token_ids"] = self.messages2ids(request) else: raise ValueError(f"Request must contain 'prompt_token_ids', 'prompt', or 'messages': {request}") From 6320f29ccc561b66bf4e1f66de43b927f0694e5b Mon Sep 17 00:00:00 2001 From: luukunn <981429396@qq.com> Date: Fri, 15 Aug 2025 15:48:23 +0800 Subject: [PATCH 06/11] fix --- fastdeploy/input/ernie_processor.py | 25 ++++++++++++++++--------- fastdeploy/input/ernie_vl_processor.py | 13 ++++++++----- fastdeploy/input/text_processor.py | 18 ++++++++++++------ 3 files changed, 36 insertions(+), 20 deletions(-) diff --git a/fastdeploy/input/ernie_processor.py b/fastdeploy/input/ernie_processor.py index 9f54b7284e..95e64711fe 100644 --- a/fastdeploy/input/ernie_processor.py +++ b/fastdeploy/input/ernie_processor.py @@ -108,11 +108,14 @@ def process_request(self, request, max_model_len=None, **kwargs): data_processor_logger.info(f"req_id:{request.request_id}, tokens:{tokens}, token_ids: {token_ids}") else: task = request.to_dict() - chat_template_kwargs = request.get("chat_template_kwargs") + chat_template_kwargs = kwargs.get("chat_template_kwargs") if chat_template_kwargs: - for k, v in chat_template_kwargs.items(): - if k not in task: - task[k] = v + if isinstance(chat_template_kwargs, dict): + for k, v in chat_template_kwargs.items(): + if k not in task: + task[k] = v + else: + raise ValueError("Invalid input: chat_template_kwargs must be a dict") request.prompt_token_ids = self.messages2ids(task) if len(request.prompt_token_ids) == 0: @@ -146,11 +149,7 @@ def process_request_dict(self, request, max_model_len=None): request = self._apply_default_parameters(request) if not request.get("eos_token_ids"): request["eos_token_ids"] = self.eos_token_ids - chat_template_kwargs = request.get("chat_template_kwargs") - if chat_template_kwargs: - for k, v in chat_template_kwargs.items(): - if k not in request: - request[k] = v + # processing stop_sequences stop_sequences = request.get("stop", []) if stop_sequences: @@ -172,6 +171,14 @@ def process_request_dict(self, request, max_model_len=None): req_id = request.get("request_id", None) data_processor_logger.info(f"req_id:{req_id}, tokens:{tokens}, token_ids: {token_ids}") else: + chat_template_kwargs = request.get("chat_template_kwargs") + if chat_template_kwargs: + if isinstance(chat_template_kwargs, dict): + for k, v in chat_template_kwargs.items(): + if k not in request: + request[k] = v + else: + raise ValueError("Invalid input: chat_template_kwargs must be a dict") request["prompt_token_ids"] = self.messages2ids(request) if len(request["prompt_token_ids"]) == 0: raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") diff --git a/fastdeploy/input/ernie_vl_processor.py b/fastdeploy/input/ernie_vl_processor.py index 296b07b753..45af576172 100644 --- a/fastdeploy/input/ernie_vl_processor.py +++ b/fastdeploy/input/ernie_vl_processor.py @@ -198,11 +198,6 @@ def process_request_dict(self, request, max_model_len=None): request = self._apply_default_parameters(request) if not request.get("eos_token_ids"): request["eos_token_ids"] = self.eos_token_ids - chat_template_kwargs = request.get("chat_template_kwargs") - if chat_template_kwargs: - for k, v in chat_template_kwargs.items(): - if k not in request: - request[k] = v stop_sequences = request.get("stop", []) if stop_sequences: @@ -222,6 +217,14 @@ def process_request_dict(self, request, max_model_len=None): elif request.get("messages"): messages = request["messages"] self._check_mm_limits(messages) + chat_template_kwargs = request.get("chat_template_kwargs") + if chat_template_kwargs: + if isinstance(chat_template_kwargs, dict): + for k, v in chat_template_kwargs.items(): + if k not in request: + request[k] = v + else: + raise ValueError("Invalid input: chat_template_kwargs must be a dict") outputs = self.ernie_processor.request2ids(request) else: raise ValueError(f"Request must contain 'prompt', or 'messages': {request}") diff --git a/fastdeploy/input/text_processor.py b/fastdeploy/input/text_processor.py index c4235265eb..123bf5b435 100644 --- a/fastdeploy/input/text_processor.py +++ b/fastdeploy/input/text_processor.py @@ -222,9 +222,12 @@ def process_request(self, request, max_model_len=None, **kwargs): task = request.to_dict() chat_template_kwargs = kwargs.get("chat_template_kwargs") if chat_template_kwargs: - for k, v in chat_template_kwargs.items(): - if k not in task: - task[k] = v + if isinstance(chat_template_kwargs, dict): + for k, v in chat_template_kwargs.items(): + if k not in task: + task[k] = v + else: + raise ValueError("Invalid input: chat_template_kwargs must be a dict") request.prompt_token_ids = self.messages2ids(task) else: raise ValueError(f"The request should have `input_ids`, `text` or `messages`: {request}.") @@ -276,9 +279,12 @@ def process_request_dict(self, request, max_model_len=None, **kwargs): raise ValueError("This model does not support chat_template.") chat_template_kwargs = request.get("chat_template_kwargs") if chat_template_kwargs: - for k, v in chat_template_kwargs.items(): - if k not in request: - request[k] = v + if isinstance(chat_template_kwargs, dict): + for k, v in chat_template_kwargs.items(): + if k not in request: + request[k] = v + else: + raise ValueError("Invalid input: chat_template_kwargs must be a dict") request["prompt_token_ids"] = self.messages2ids(request) else: raise ValueError(f"Request must contain 'prompt_token_ids', 'prompt', or 'messages': {request}") From 59f45e659660b0b2e1e3188201ce64214f586319 Mon Sep 17 00:00:00 2001 From: luukunn <981429396@qq.com> Date: Fri, 15 Aug 2025 17:48:53 +0800 Subject: [PATCH 07/11] fix default enable_thinking value --- fastdeploy/input/ernie_processor.py | 8 ++------ fastdeploy/input/ernie_vl_processor.py | 1 + fastdeploy/input/text_processor.py | 2 ++ 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/fastdeploy/input/ernie_processor.py b/fastdeploy/input/ernie_processor.py index 95e64711fe..bbf795397e 100644 --- a/fastdeploy/input/ernie_processor.py +++ b/fastdeploy/input/ernie_processor.py @@ -266,9 +266,7 @@ def process_response_dict_normal(self, response_dict, **kwargs): delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id) if is_end: full_text = previous_texts + delta_text - if self.reasoning_parser and ( - enable_thinking or self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser" - ): + if self.reasoning_parser and enable_thinking: reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict) response_dict["outputs"]["text"] = text response_dict["outputs"]["reasoning_content"] = reasoning_content @@ -305,9 +303,7 @@ def process_response_dict_streaming(self, response_dict, **kwargs): token_ids = token_ids[:-1] delta_text, previous_token_ids, previous_texts = self.ids2tokens(token_ids, req_id) response_dict["outputs"]["raw_prediction"] = delta_text - if self.reasoning_parser and ( - enable_thinking or self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser" - ): + if self.reasoning_parser and enable_thinking: reasoning_content, text = self.reasoning_parser.extract_reasoning_content_streaming( previous_texts, previous_texts + delta_text, diff --git a/fastdeploy/input/ernie_vl_processor.py b/fastdeploy/input/ernie_vl_processor.py index 45af576172..e74e5d9861 100644 --- a/fastdeploy/input/ernie_vl_processor.py +++ b/fastdeploy/input/ernie_vl_processor.py @@ -225,6 +225,7 @@ def process_request_dict(self, request, max_model_len=None): request[k] = v else: raise ValueError("Invalid input: chat_template_kwargs must be a dict") + request.setdefault("enable_thinking", True) outputs = self.ernie_processor.request2ids(request) else: raise ValueError(f"Request must contain 'prompt', or 'messages': {request}") diff --git a/fastdeploy/input/text_processor.py b/fastdeploy/input/text_processor.py index 123bf5b435..4b8b2e9ebb 100644 --- a/fastdeploy/input/text_processor.py +++ b/fastdeploy/input/text_processor.py @@ -228,6 +228,7 @@ def process_request(self, request, max_model_len=None, **kwargs): task[k] = v else: raise ValueError("Invalid input: chat_template_kwargs must be a dict") + task.setdefault("enable_thinking", True) request.prompt_token_ids = self.messages2ids(task) else: raise ValueError(f"The request should have `input_ids`, `text` or `messages`: {request}.") @@ -285,6 +286,7 @@ def process_request_dict(self, request, max_model_len=None, **kwargs): request[k] = v else: raise ValueError("Invalid input: chat_template_kwargs must be a dict") + request.setdefault("enable_thinking", True) request["prompt_token_ids"] = self.messages2ids(request) else: raise ValueError(f"Request must contain 'prompt_token_ids', 'prompt', or 'messages': {request}") From af57373cc68bf070467e261761fdd56bde00d426 Mon Sep 17 00:00:00 2001 From: luukunn <981429396@qq.com> Date: Fri, 15 Aug 2025 18:13:44 +0800 Subject: [PATCH 08/11] fix default enable_thinking value --- fastdeploy/input/ernie_processor.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fastdeploy/input/ernie_processor.py b/fastdeploy/input/ernie_processor.py index bbf795397e..95e64711fe 100644 --- a/fastdeploy/input/ernie_processor.py +++ b/fastdeploy/input/ernie_processor.py @@ -266,7 +266,9 @@ def process_response_dict_normal(self, response_dict, **kwargs): delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id) if is_end: full_text = previous_texts + delta_text - if self.reasoning_parser and enable_thinking: + if self.reasoning_parser and ( + enable_thinking or self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser" + ): reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict) response_dict["outputs"]["text"] = text response_dict["outputs"]["reasoning_content"] = reasoning_content @@ -303,7 +305,9 @@ def process_response_dict_streaming(self, response_dict, **kwargs): token_ids = token_ids[:-1] delta_text, previous_token_ids, previous_texts = self.ids2tokens(token_ids, req_id) response_dict["outputs"]["raw_prediction"] = delta_text - if self.reasoning_parser and enable_thinking: + if self.reasoning_parser and ( + enable_thinking or self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser" + ): reasoning_content, text = self.reasoning_parser.extract_reasoning_content_streaming( previous_texts, previous_texts + delta_text, From 26430bdeb1c86963b6fbeefc376a6d30d93262db Mon Sep 17 00:00:00 2001 From: luukunn <981429396@qq.com> Date: Fri, 15 Aug 2025 21:00:24 +0800 Subject: [PATCH 09/11] modify condition --- fastdeploy/input/ernie_processor.py | 18 ++++++------------ fastdeploy/input/ernie_vl_processor.py | 9 +++------ fastdeploy/input/text_processor.py | 18 ++++++------------ 3 files changed, 15 insertions(+), 30 deletions(-) diff --git a/fastdeploy/input/ernie_processor.py b/fastdeploy/input/ernie_processor.py index 95e64711fe..86a1dd5ed8 100644 --- a/fastdeploy/input/ernie_processor.py +++ b/fastdeploy/input/ernie_processor.py @@ -110,12 +110,9 @@ def process_request(self, request, max_model_len=None, **kwargs): task = request.to_dict() chat_template_kwargs = kwargs.get("chat_template_kwargs") if chat_template_kwargs: - if isinstance(chat_template_kwargs, dict): - for k, v in chat_template_kwargs.items(): - if k not in task: - task[k] = v - else: - raise ValueError("Invalid input: chat_template_kwargs must be a dict") + for k, v in chat_template_kwargs.items(): + if k not in task: + task[k] = v request.prompt_token_ids = self.messages2ids(task) if len(request.prompt_token_ids) == 0: @@ -173,12 +170,9 @@ def process_request_dict(self, request, max_model_len=None): else: chat_template_kwargs = request.get("chat_template_kwargs") if chat_template_kwargs: - if isinstance(chat_template_kwargs, dict): - for k, v in chat_template_kwargs.items(): - if k not in request: - request[k] = v - else: - raise ValueError("Invalid input: chat_template_kwargs must be a dict") + for k, v in chat_template_kwargs.items(): + if k not in request: + request[k] = v request["prompt_token_ids"] = self.messages2ids(request) if len(request["prompt_token_ids"]) == 0: raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") diff --git a/fastdeploy/input/ernie_vl_processor.py b/fastdeploy/input/ernie_vl_processor.py index e74e5d9861..ec2002afaf 100644 --- a/fastdeploy/input/ernie_vl_processor.py +++ b/fastdeploy/input/ernie_vl_processor.py @@ -219,12 +219,9 @@ def process_request_dict(self, request, max_model_len=None): self._check_mm_limits(messages) chat_template_kwargs = request.get("chat_template_kwargs") if chat_template_kwargs: - if isinstance(chat_template_kwargs, dict): - for k, v in chat_template_kwargs.items(): - if k not in request: - request[k] = v - else: - raise ValueError("Invalid input: chat_template_kwargs must be a dict") + for k, v in chat_template_kwargs.items(): + if k not in request: + request[k] = v request.setdefault("enable_thinking", True) outputs = self.ernie_processor.request2ids(request) else: diff --git a/fastdeploy/input/text_processor.py b/fastdeploy/input/text_processor.py index 4b8b2e9ebb..0f82fd0031 100644 --- a/fastdeploy/input/text_processor.py +++ b/fastdeploy/input/text_processor.py @@ -222,12 +222,9 @@ def process_request(self, request, max_model_len=None, **kwargs): task = request.to_dict() chat_template_kwargs = kwargs.get("chat_template_kwargs") if chat_template_kwargs: - if isinstance(chat_template_kwargs, dict): - for k, v in chat_template_kwargs.items(): - if k not in task: - task[k] = v - else: - raise ValueError("Invalid input: chat_template_kwargs must be a dict") + for k, v in chat_template_kwargs.items(): + if k not in task: + task[k] = v task.setdefault("enable_thinking", True) request.prompt_token_ids = self.messages2ids(task) else: @@ -280,12 +277,9 @@ def process_request_dict(self, request, max_model_len=None, **kwargs): raise ValueError("This model does not support chat_template.") chat_template_kwargs = request.get("chat_template_kwargs") if chat_template_kwargs: - if isinstance(chat_template_kwargs, dict): - for k, v in chat_template_kwargs.items(): - if k not in request: - request[k] = v - else: - raise ValueError("Invalid input: chat_template_kwargs must be a dict") + for k, v in chat_template_kwargs.items(): + if k not in request: + request[k] = v request.setdefault("enable_thinking", True) request["prompt_token_ids"] = self.messages2ids(request) else: From f18a3b140649b37c23800eb1ed59971ec7889f71 Mon Sep 17 00:00:00 2001 From: luukunn <981429396@qq.com> Date: Fri, 15 Aug 2025 21:11:43 +0800 Subject: [PATCH 10/11] Revert "modify condition" This reverts commit 26430bdeb1c86963b6fbeefc376a6d30d93262db. --- fastdeploy/input/ernie_processor.py | 18 ++++++++++++------ fastdeploy/input/ernie_vl_processor.py | 9 ++++++--- fastdeploy/input/text_processor.py | 18 ++++++++++++------ 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/fastdeploy/input/ernie_processor.py b/fastdeploy/input/ernie_processor.py index 86a1dd5ed8..95e64711fe 100644 --- a/fastdeploy/input/ernie_processor.py +++ b/fastdeploy/input/ernie_processor.py @@ -110,9 +110,12 @@ def process_request(self, request, max_model_len=None, **kwargs): task = request.to_dict() chat_template_kwargs = kwargs.get("chat_template_kwargs") if chat_template_kwargs: - for k, v in chat_template_kwargs.items(): - if k not in task: - task[k] = v + if isinstance(chat_template_kwargs, dict): + for k, v in chat_template_kwargs.items(): + if k not in task: + task[k] = v + else: + raise ValueError("Invalid input: chat_template_kwargs must be a dict") request.prompt_token_ids = self.messages2ids(task) if len(request.prompt_token_ids) == 0: @@ -170,9 +173,12 @@ def process_request_dict(self, request, max_model_len=None): else: chat_template_kwargs = request.get("chat_template_kwargs") if chat_template_kwargs: - for k, v in chat_template_kwargs.items(): - if k not in request: - request[k] = v + if isinstance(chat_template_kwargs, dict): + for k, v in chat_template_kwargs.items(): + if k not in request: + request[k] = v + else: + raise ValueError("Invalid input: chat_template_kwargs must be a dict") request["prompt_token_ids"] = self.messages2ids(request) if len(request["prompt_token_ids"]) == 0: raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") diff --git a/fastdeploy/input/ernie_vl_processor.py b/fastdeploy/input/ernie_vl_processor.py index ec2002afaf..e74e5d9861 100644 --- a/fastdeploy/input/ernie_vl_processor.py +++ b/fastdeploy/input/ernie_vl_processor.py @@ -219,9 +219,12 @@ def process_request_dict(self, request, max_model_len=None): self._check_mm_limits(messages) chat_template_kwargs = request.get("chat_template_kwargs") if chat_template_kwargs: - for k, v in chat_template_kwargs.items(): - if k not in request: - request[k] = v + if isinstance(chat_template_kwargs, dict): + for k, v in chat_template_kwargs.items(): + if k not in request: + request[k] = v + else: + raise ValueError("Invalid input: chat_template_kwargs must be a dict") request.setdefault("enable_thinking", True) outputs = self.ernie_processor.request2ids(request) else: diff --git a/fastdeploy/input/text_processor.py b/fastdeploy/input/text_processor.py index 0f82fd0031..4b8b2e9ebb 100644 --- a/fastdeploy/input/text_processor.py +++ b/fastdeploy/input/text_processor.py @@ -222,9 +222,12 @@ def process_request(self, request, max_model_len=None, **kwargs): task = request.to_dict() chat_template_kwargs = kwargs.get("chat_template_kwargs") if chat_template_kwargs: - for k, v in chat_template_kwargs.items(): - if k not in task: - task[k] = v + if isinstance(chat_template_kwargs, dict): + for k, v in chat_template_kwargs.items(): + if k not in task: + task[k] = v + else: + raise ValueError("Invalid input: chat_template_kwargs must be a dict") task.setdefault("enable_thinking", True) request.prompt_token_ids = self.messages2ids(task) else: @@ -277,9 +280,12 @@ def process_request_dict(self, request, max_model_len=None, **kwargs): raise ValueError("This model does not support chat_template.") chat_template_kwargs = request.get("chat_template_kwargs") if chat_template_kwargs: - for k, v in chat_template_kwargs.items(): - if k not in request: - request[k] = v + if isinstance(chat_template_kwargs, dict): + for k, v in chat_template_kwargs.items(): + if k not in request: + request[k] = v + else: + raise ValueError("Invalid input: chat_template_kwargs must be a dict") request.setdefault("enable_thinking", True) request["prompt_token_ids"] = self.messages2ids(request) else: From 50f9b40d5c9eacca9467dbd8ea0872c1be969481 Mon Sep 17 00:00:00 2001 From: luukunn <981429396@qq.com> Date: Mon, 18 Aug 2025 16:24:15 +0800 Subject: [PATCH 11/11] fix unit test --- test/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py b/test/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py index add02ccfb5..df579c34d7 100644 --- a/test/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py +++ b/test/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py @@ -508,6 +508,7 @@ def test_chat_with_thinking(openai_client, capsys): extra_body={"chat_template_kwargs": {"enable_thinking": False}}, ) assert response.choices[0].message.reasoning_content is None + assert "" not in response.choices[0].message.content # enable thinking, streaming reasoning_max_tokens = 3