Skip to content

Commit 9634f1a

Browse files
committed
feat(log):add_request_and_response_log
1 parent ce1d494 commit 9634f1a

File tree

3 files changed

+27
-3
lines changed

3 files changed

+27
-3
lines changed

fastdeploy/entrypoints/openai/api_server.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ async def create_chat_completion(request: ChatCompletionRequest):
197197
"""
198198
Create a chat completion for the provided prompt and parameters.
199199
"""
200+
api_server_logger.info(f"Chat Received request: {request.model_dump_json()}")
200201
if app.state.dynamic_load_weight:
201202
status, msg = app.state.engine_client.is_workers_alive()
202203
if not status:
@@ -218,6 +219,7 @@ async def create_completion(request: CompletionRequest):
218219
"""
219220
Create a completion for the provided prompt and parameters.
220221
"""
222+
api_server_logger.info(f"Completion Received request: {request.model_dump_json()}")
221223
if app.state.dynamic_load_weight:
222224
status, msg = app.state.engine_client.is_workers_alive()
223225
if not status:

fastdeploy/entrypoints/openai/serving_chat.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ async def chat_completion_stream_generator(
225225
prompt_tokens_details=PromptTokenUsageInfo(cached_tokens=num_cached_tokens),
226226
)
227227
yield f"data: {chunk.model_dump_json(exclude_unset=True)} \n\n"
228+
api_server_logger.info(f"Chat Streaming response send_idx 0: {chunk.model_dump_json()}")
228229
first_iteration = False
229230

230231
output = res["outputs"]
@@ -258,6 +259,7 @@ async def chat_completion_stream_generator(
258259
logprobs=logprobs_res,
259260
arrival_time=arrival_time,
260261
)
262+
261263
if res["finished"]:
262264
num_choices -= 1
263265
work_process_metrics.e2e_request_latency.observe(
@@ -291,6 +293,9 @@ async def chat_completion_stream_generator(
291293
if len(choices) == max_streaming_response_tokens or res["finished"]:
292294
chunk.choices = choices
293295
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
296+
# 打印尾包
297+
if res["finished"]:
298+
api_server_logger.info(f"Chat Streaming response last send: {chunk.model_dump_json()}")
294299
choices = []
295300

296301
if choices:
@@ -449,13 +454,15 @@ async def chat_completion_full_generator(
449454
prompt_tokens_details=PromptTokenUsageInfo(cached_tokens=final_res.get("num_cached_tokens", 0)),
450455
)
451456
work_process_metrics.e2e_request_latency.observe(time.time() - final_res["metrics"]["request_start_time"])
452-
return ChatCompletionResponse(
457+
res = ChatCompletionResponse(
453458
id=request_id,
454459
created=created_time,
455460
model=model_name,
456461
choices=choices,
457462
usage=usage,
458463
)
464+
api_server_logger.info(f"Chat response: {res.model_dump_json()}")
465+
return res
459466

460467
def build_logprobs_response(
461468
self,

fastdeploy/entrypoints/openai/serving_completion.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,7 @@ async def completion_full_generator(
190190
valid_results[rid] = data
191191
num_choices -= 1
192192
break
193-
194-
return self.request_output_to_completion_response(
193+
res = self.request_output_to_completion_response(
195194
final_res_batch=valid_results,
196195
request=request,
197196
request_id=request_id,
@@ -200,6 +199,8 @@ async def completion_full_generator(
200199
prompt_batched_token_ids=prompt_batched_token_ids,
201200
completion_batched_token_ids=completion_batched_token_ids,
202201
)
202+
api_server_logger.info(f"Completion response: {res.model_dump_json()}")
203+
return res
203204
except Exception as e:
204205
api_server_logger.error(f"Error in completion_full_generator: {e}", exc_info=True)
205206
raise
@@ -278,6 +279,9 @@ async def completion_stream_generator(
278279
],
279280
)
280281
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
282+
api_server_logger.info(
283+
f"Completion Streaming response send_idx 0: {chunk.model_dump_json()}"
284+
)
281285
first_iteration[idx] = False
282286

283287
self.engine_client.data_processor.process_response_dict(res, stream=True)
@@ -313,6 +317,16 @@ async def completion_stream_generator(
313317

314318
output_tokens[idx] += 1
315319

320+
send_idx = output.get("send_idx")
321+
# 只有当 send_idx 明确为 0 时才记录日志
322+
if send_idx == 0 and not request.return_token_ids:
323+
chunk_temp = chunk
324+
chunk_temp.choices = choices
325+
api_server_logger.info(
326+
f"Completion Streaming response send_idx 0: {chunk_temp.model_dump_json()}"
327+
)
328+
del chunk_temp
329+
316330
if len(choices) == max_streaming_response_tokens or res["finished"]:
317331
chunk = CompletionStreamResponse(
318332
id=request_id,
@@ -337,6 +351,7 @@ async def completion_stream_generator(
337351
),
338352
)
339353
yield f"data: {usage_chunk.model_dump_json(exclude_unset=True)}\n\n"
354+
api_server_logger.info(f"Completion Streaming response last send: {chunk.model_dump_json()}")
340355
if choices:
341356
chunk.choices = choices
342357
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"

0 commit comments

Comments
 (0)