Skip to content

【BugFix】completion接口echo回显支持 #3245

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 58 commits into from
Aug 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
48f92eb
wenxin-tools-511,修复v1/completion无法回显的问题。
AuferGachet Aug 4, 2025
2dc63fb
支持多prompt的回显
AuferGachet Aug 6, 2025
5be9250
支持多prompt情况下的流式回显
AuferGachet Aug 6, 2025
c5c387a
补充了 completion 接口支持 echo 的单元测试
AuferGachet Aug 6, 2025
528cf32
Merge branch 'PaddlePaddle:develop' into wenxin-tools-551
AuferGachet Aug 6, 2025
145014c
pre-commit
AuferGachet Aug 6, 2025
1d1be93
移除了多余的test文件
AuferGachet Aug 6, 2025
9c96016
修复了completion接口echo支持的单测方法
AuferGachet Aug 6, 2025
8d705ff
补充了单元测试文件
AuferGachet Aug 7, 2025
1bfda37
补充单测
AuferGachet Aug 7, 2025
674a4d5
unittest
AuferGachet Aug 7, 2025
1e221fd
补充单测
AuferGachet Aug 7, 2025
925a8f5
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 7, 2025
2557b4e
修复单测
AuferGachet Aug 8, 2025
055e683
删除不必要的assert.
AuferGachet Aug 8, 2025
d5c3711
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 8, 2025
37fdf61
重新提交
AuferGachet Aug 9, 2025
bead3a7
重新提交
AuferGachet Aug 9, 2025
c61318d
更新测试方法
AuferGachet Aug 9, 2025
fad5b7c
ut
AuferGachet Aug 9, 2025
cb1a191
验证是否是正确思路单测
AuferGachet Aug 9, 2025
60ca4c2
验证是否是正确思路单测
AuferGachet Aug 10, 2025
0dfa82d
验证是否是正确思路单测3
AuferGachet Aug 10, 2025
23bd28f
优化单测代码,有针对性地缩小单测范围。
AuferGachet Aug 11, 2025
d8b3000
优化单测代码2,有针对性地缩小单测范围。
AuferGachet Aug 11, 2025
ac624d2
优化单测代码3,有针对性地缩小单测范围。
AuferGachet Aug 12, 2025
3118bed
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 12, 2025
c727540
support 'echo' in chat/completion.
AuferGachet Aug 12, 2025
cd5ffba
删除无用注释。
AuferGachet Aug 12, 2025
6b84794
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 12, 2025
bf83824
update
AuferGachet Aug 12, 2025
c99f80c
update
AuferGachet Aug 12, 2025
669c13e
update
AuferGachet Aug 12, 2025
d742458
update
AuferGachet Aug 12, 2025
e355b7f
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 12, 2025
9661815
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 12, 2025
f1a5f5d
update
AuferGachet Aug 12, 2025
8643f25
update
AuferGachet Aug 12, 2025
47b102e
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 12, 2025
65dcedd
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 13, 2025
edeab55
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 13, 2025
4ff77c5
update
AuferGachet Aug 13, 2025
a28f93d
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 13, 2025
d15aff4
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 13, 2025
a446766
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 13, 2025
95e0103
补充了关于tokenid的单元测试
AuferGachet Aug 14, 2025
0b7758c
update
AuferGachet Aug 14, 2025
1ca7fd2
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 14, 2025
0f5f538
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 14, 2025
5753994
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 14, 2025
1ce5384
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 14, 2025
89d8659
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 18, 2025
747110e
修正index错误
AuferGachet Aug 18, 2025
d2f7b9c
修复`
AuferGachet Aug 18, 2025
d31c79d
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 18, 2025
5cb685e
修正index错误
AuferGachet Aug 18, 2025
51fb6be
update
AuferGachet Aug 18, 2025
cfc1c0f
Merge branch 'develop' into wenxin-tools-551
AuferGachet Aug 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions fastdeploy/entrypoints/openai/serving_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,14 @@ async def completion_full_generator(
if dealer is not None:
dealer.close()

async def _echo_back_prompt(self, request, res, idx):
if res["outputs"].get("send_idx", -1) == 0 and request.echo:
if isinstance(request.prompt, list):
prompt_text = request.prompt[idx]
else:
prompt_text = request.prompt
res["outputs"]["text"] = prompt_text + (res["outputs"]["text"] or "")

def calc_finish_reason(self, max_tokens, token_num, output, tool_called):
if max_tokens is None or token_num != max_tokens:
if tool_called or output.get("tool_call"):
Expand Down Expand Up @@ -338,6 +346,7 @@ async def completion_stream_generator(
else:
arrival_time = res["metrics"]["arrival_time"] - inference_start_time[idx]

await self._echo_back_prompt(request, res, idx)
output = res["outputs"]
output_top_logprobs = output["top_logprobs"]
logprobs_res: Optional[CompletionLogprobs] = None
Expand Down Expand Up @@ -450,7 +459,7 @@ def request_output_to_completion_response(
final_res = final_res_batch[idx]
prompt_token_ids = prompt_batched_token_ids[idx]
assert prompt_token_ids is not None
prompt_text = final_res["prompt"]
prompt_text = request.prompt
completion_token_ids = completion_batched_token_ids[idx]

output = final_res["outputs"]
Expand All @@ -468,16 +477,14 @@ def request_output_to_completion_response(

if request.echo:
assert prompt_text is not None
if request.max_tokens == 0:
token_ids = prompt_token_ids
output_text = prompt_text
token_ids = [*prompt_token_ids, *output["token_ids"]]
if isinstance(prompt_text, list):
output_text = prompt_text[idx] + output["text"]
else:
token_ids = [*prompt_token_ids, *output["token_ids"]]
output_text = prompt_text + output["text"]
output_text = str(prompt_text) + output["text"]
else:
token_ids = output["token_ids"]
output_text = output["text"]

finish_reason = self.calc_finish_reason(request.max_tokens, final_res["output_token_ids"], output, False)

choice_data = CompletionResponseChoice(
Expand Down
177 changes: 177 additions & 0 deletions test/entrypoints/openai/test_completion_echo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import unittest
from unittest.mock import MagicMock, patch

from fastdeploy.entrypoints.openai.serving_completion import (
CompletionRequest,
OpenAIServingCompletion,
)


class YourClass:
async def _1(self, a, b, c):
if b["outputs"].get("send_idx", -1) == 0 and a.echo:
if isinstance(a.prompt, list):
text = a.prompt[c]
else:
text = a.prompt
b["outputs"]["text"] = text + (b["outputs"]["text"] or "")


class TestCompletionEcho(unittest.IsolatedAsyncioTestCase):
def setUp(self):
self.mock_engine = MagicMock()
self.completion_handler = None

def test_single_prompt_non_streaming(self):
"""测试单prompt非流式响应"""
self.completion_handler = OpenAIServingCompletion(self.mock_engine, pid=123, ips=None, max_waiting_time=30)

request = CompletionRequest(prompt="test prompt", max_tokens=10, echo=True, logprobs=1)

mock_output = {
"outputs": {
"text": " generated text",
"token_ids": [1, 2, 3],
"top_logprobs": {"token1": -0.1, "token2": -0.2},
"finished": True,
},
"output_token_ids": 3,
}
self.mock_engine.generate.return_value = [mock_output]

response = self.completion_handler.request_output_to_completion_response(
final_res_batch=[mock_output],
request=request,
request_id="test_id",
created_time=12345,
model_name="test_model",
prompt_batched_token_ids=[[1, 2]],
completion_batched_token_ids=[[3, 4, 5]],
text_after_process_list=["test prompt"],
)

self.assertEqual(response.choices[0].text, "test prompt generated text")

async def test_echo_back_prompt_and_streaming(self):
"""测试_echo_back_prompt方法和流式响应的prompt拼接逻辑"""
self.completion_handler = OpenAIServingCompletion(self.mock_engine, pid=123, ips=None, max_waiting_time=30)

request = CompletionRequest(prompt="test prompt", max_tokens=10, stream=True, echo=True)

mock_response = {"outputs": {"text": "test output", "token_ids": [1, 2, 3], "finished": True}}

with patch.object(self.completion_handler, "_echo_back_prompt") as mock_echo:

def mock_echo_side_effect(req, res, idx):
res["outputs"]["text"] = req.prompt + res["outputs"]["text"]

mock_echo.side_effect = mock_echo_side_effect

await self.completion_handler._echo_back_prompt(request, mock_response, 0)

mock_echo.assert_called_once_with(request, mock_response, 0)

self.assertEqual(mock_response["outputs"]["text"], "test prompttest output")
self.assertEqual(request.prompt, "test prompt")

def test_multi_prompt_non_streaming(self):
"""测试多prompt非流式响应"""
self.completion_handler = OpenAIServingCompletion(self.mock_engine, pid=123, ips=None, max_waiting_time=30)

request = CompletionRequest(prompt=["prompt1", "prompt2"], max_tokens=10, echo=True)

mock_outputs = [
{
"outputs": {"text": " response1", "token_ids": [1, 2], "top_logprobs": None, "finished": True},
"output_token_ids": 2,
},
{
"outputs": {"text": " response2", "token_ids": [3, 4], "top_logprobs": None, "finished": True},
"output_token_ids": 2,
},
]
self.mock_engine.generate.return_value = mock_outputs

response = self.completion_handler.request_output_to_completion_response(
final_res_batch=mock_outputs,
request=request,
request_id="test_id",
created_time=12345,
model_name="test_model",
prompt_batched_token_ids=[[1], [2]],
completion_batched_token_ids=[[1, 2], [3, 4]],
text_after_process_list=["prompt1", "prompt2"],
)

self.assertEqual(len(response.choices), 2)
self.assertEqual(response.choices[0].text, "prompt1 response1")
self.assertEqual(response.choices[1].text, "prompt2 response2")

async def test_multi_prompt_streaming(self):
self.completion_handler = OpenAIServingCompletion(self.mock_engine, pid=123, ips=None, max_waiting_time=30)

request = CompletionRequest(prompt=["prompt1", "prompt2"], max_tokens=10, stream=True, echo=True)

mock_responses = [
{"outputs": {"text": " response1", "token_ids": [1, 2], "finished": True}},
{"outputs": {"text": " response2", "token_ids": [3, 4], "finished": True}},
]

with patch.object(self.completion_handler, "_echo_back_prompt") as mock_echo:

def mock_echo_side_effect(req, res, idx):
res["outputs"]["text"] = req.prompt[idx] + res["outputs"]["text"]

mock_echo.side_effect = mock_echo_side_effect

await self.completion_handler._echo_back_prompt(request, mock_responses[0], 0)
await self.completion_handler._echo_back_prompt(request, mock_responses[1], 1)

self.assertEqual(mock_echo.call_count, 2)
mock_echo.assert_any_call(request, mock_responses[0], 0)
mock_echo.assert_any_call(request, mock_responses[1], 1)

self.assertEqual(mock_responses[0]["outputs"]["text"], "prompt1 response1")
self.assertEqual(mock_responses[1]["outputs"]["text"], "prompt2 response2")
self.assertEqual(request.prompt, ["prompt1", "prompt2"])

async def test_echo_back_prompt_and_streaming1(self):
request = CompletionRequest(echo=True, prompt=["Hello", "World"])
res = {"outputs": {"send_idx": 0, "text": "!"}}
idx = 0

instance = OpenAIServingCompletion(self.mock_engine, pid=123, ips=None, max_waiting_time=30)
await instance._echo_back_prompt(request, res, idx)
self.assertEqual(res["outputs"]["text"], "Hello!")

async def test_1_prompt_is_string_and_send_idx_is_0(self):
request = CompletionRequest(echo=True, prompt="Hello")
res = {"outputs": {"send_idx": 0, "text": "!"}}
idx = 0

instance = OpenAIServingCompletion(self.mock_engine, pid=123, ips=None, max_waiting_time=30)
await instance._echo_back_prompt(request, res, idx)
self.assertEqual(res["outputs"]["text"], "Hello!")

async def test_1_send_idx_is_not_0(self):
request = CompletionRequest(echo=True, prompt="Hello")
res = {"outputs": {"send_idx": 1, "text": "!"}}
idx = 0

instance = OpenAIServingCompletion(self.mock_engine, pid=123, ips=None, max_waiting_time=30)
await instance._echo_back_prompt(request, res, idx)
self.assertEqual(res["outputs"]["text"], "!")

async def test_1_echo_is_false(self):
"""测试echo为False时,_echo_back_prompt不拼接prompt"""
request = CompletionRequest(echo=False, prompt="Hello")
res = {"outputs": {"send_idx": 0, "text": "!"}}
idx = 0

instance = OpenAIServingCompletion(self.mock_engine, pid=123, ips=None, max_waiting_time=30)
await instance._echo_back_prompt(request, res, idx)
self.assertEqual(res["outputs"]["text"], "!")


if __name__ == "__main__":
unittest.main()
5 changes: 2 additions & 3 deletions test/entrypoints/openai/test_serving_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ def test_request_output_to_completion_response(self):
openai_serving_completion = OpenAIServingCompletion(engine_client, "pid", "ips", 360)
final_res_batch: List[RequestOutput] = [
{
"prompt": "Hello, world!",
"outputs": {
"token_ids": [1, 2, 3],
"text": " world!",
Expand All @@ -67,7 +66,6 @@ def test_request_output_to_completion_response(self):
"output_token_ids": 3,
},
{
"prompt": "Hello, world!",
"outputs": {
"token_ids": [4, 5, 6],
"text": " world!",
Expand All @@ -81,12 +79,13 @@ def test_request_output_to_completion_response(self):
]

request: CompletionRequest = Mock()
request.prompt = "Hello, world!"
request.echo = True
request_id = "test_request_id"
created_time = 1655136000
model_name = "test_model"
prompt_batched_token_ids = [[1, 2, 3], [4, 5, 6]]
completion_batched_token_ids = [[7, 8, 9], [10, 11, 12]]

completion_response = openai_serving_completion.request_output_to_completion_response(
final_res_batch=final_res_batch,
request=request,
Expand Down
Loading