@@ -224,6 +224,15 @@ async def completion_full_generator(
224
224
if dealer is not None :
225
225
dealer .close ()
226
226
227
+ def calc_finish_reason (self , max_tokens , token_num , output ):
228
+ if max_tokens is None or token_num != max_tokens :
229
+ if self .engine_client .reasoning_parser == "ernie_x1" and output .get ("finish_reason" , "" ) == "tool_calls" :
230
+ return "tool_calls"
231
+ else :
232
+ return "stop"
233
+ else :
234
+ return "length"
235
+
227
236
async def completion_stream_generator (
228
237
self ,
229
238
request : CompletionRequest ,
@@ -324,19 +333,13 @@ async def completion_stream_generator(
324
333
logprobs = logprobs_res ,
325
334
)
326
335
)
327
- if res ["finished" ]:
328
- if request .max_tokens is None or output_tokens [idx ] + 1 != request .max_tokens :
329
- chunk .choices [0 ].finish_reason = "stop"
330
- if (
331
- self .engine_client .reasoning_parser == "ernie_x1"
332
- and output .get ("finish_reason" , "" ) == "tool_calls"
333
- ):
334
- chunk .choices [0 ].finish_reason = "tool_calls"
335
- else :
336
- chunk .choices [0 ].finish_reason = "length"
337
-
338
336
output_tokens [idx ] += 1
339
337
338
+ if res ["finished" ]:
339
+ choices [- 1 ].finish_reason = self .calc_finish_reason (
340
+ request .max_tokens , output_tokens [idx ], output
341
+ )
342
+
340
343
if len (choices ) == max_streaming_response_tokens or res ["finished" ]:
341
344
chunk = CompletionStreamResponse (
342
345
id = request_id ,
@@ -421,6 +424,11 @@ def request_output_to_completion_response(
421
424
token_ids = output ["token_ids" ]
422
425
output_text = output ["text" ]
423
426
427
+ num_generated_tokens += final_res ["output_token_ids" ]
428
+ num_prompt_tokens += len (prompt_token_ids )
429
+
430
+ finish_reason = self .calc_finish_reason (request .max_tokens , final_res ["output_token_ids" ], output )
431
+
424
432
choice_data = CompletionResponseChoice (
425
433
token_ids = token_ids ,
426
434
index = len (choices ),
@@ -430,14 +438,10 @@ def request_output_to_completion_response(
430
438
reasoning_content = output .get ("reasoning_content" ),
431
439
tool_calls = output .get ("tool_call_content" ),
432
440
logprobs = aggregated_logprobs ,
433
- finish_reason = None ,
441
+ finish_reason = finish_reason ,
434
442
)
435
443
choices .append (choice_data )
436
444
437
- num_generated_tokens += final_res ["output_token_ids" ]
438
-
439
- num_prompt_tokens += len (prompt_token_ids )
440
-
441
445
usage = UsageInfo (
442
446
prompt_tokens = num_prompt_tokens ,
443
447
completion_tokens = num_generated_tokens ,
0 commit comments