@@ -234,6 +234,15 @@ async def completion_full_generator(
234
234
if dealer is not None :
235
235
dealer .close ()
236
236
237
+ def calc_finish_reason (self , max_tokens , token_num , output ):
238
+ if max_tokens is None or token_num != max_tokens :
239
+ if self .engine_client .reasoning_parser == "ernie_x1" and output .get ("finish_reason" , "" ) == "tool_calls" :
240
+ return "tool_calls"
241
+ else :
242
+ return "stop"
243
+ else :
244
+ return "length"
245
+
237
246
async def completion_stream_generator (
238
247
self ,
239
248
request : CompletionRequest ,
@@ -334,19 +343,13 @@ async def completion_stream_generator(
334
343
logprobs = logprobs_res ,
335
344
)
336
345
)
337
- if res ["finished" ]:
338
- if request .max_tokens is None or output_tokens [idx ] + 1 != request .max_tokens :
339
- chunk .choices [0 ].finish_reason = "stop"
340
- if (
341
- self .engine_client .reasoning_parser == "ernie_x1"
342
- and output .get ("finish_reason" , "" ) == "tool_calls"
343
- ):
344
- chunk .choices [0 ].finish_reason = "tool_calls"
345
- else :
346
- chunk .choices [0 ].finish_reason = "length"
347
-
348
346
output_tokens [idx ] += 1
349
347
348
+ if res ["finished" ]:
349
+ choices [- 1 ].finish_reason = self .calc_finish_reason (
350
+ request .max_tokens , output_tokens [idx ], output
351
+ )
352
+
350
353
if len (choices ) == max_streaming_response_tokens or res ["finished" ]:
351
354
chunk = CompletionStreamResponse (
352
355
id = request_id ,
@@ -433,6 +436,11 @@ def request_output_to_completion_response(
433
436
token_ids = output ["token_ids" ]
434
437
output_text = output ["text" ]
435
438
439
+ num_generated_tokens += final_res ["output_token_ids" ]
440
+ num_prompt_tokens += len (prompt_token_ids )
441
+
442
+ finish_reason = self .calc_finish_reason (request .max_tokens , final_res ["output_token_ids" ], output )
443
+
436
444
choice_data = CompletionResponseChoice (
437
445
token_ids = token_ids ,
438
446
index = len (choices ),
@@ -442,14 +450,10 @@ def request_output_to_completion_response(
442
450
reasoning_content = output .get ("reasoning_content" ),
443
451
tool_calls = output .get ("tool_call_content" ),
444
452
logprobs = aggregated_logprobs ,
445
- finish_reason = None ,
453
+ finish_reason = finish_reason ,
446
454
)
447
455
choices .append (choice_data )
448
456
449
- num_generated_tokens += final_res ["output_token_ids" ]
450
-
451
- num_prompt_tokens += len (prompt_token_ids )
452
-
453
457
usage = UsageInfo (
454
458
prompt_tokens = num_prompt_tokens ,
455
459
completion_tokens = num_generated_tokens ,
0 commit comments