diff --git a/fastdeploy/output/token_processor.py b/fastdeploy/output/token_processor.py index 66c481e0f9..f28ed04438 100644 --- a/fastdeploy/output/token_processor.py +++ b/fastdeploy/output/token_processor.py @@ -463,8 +463,9 @@ def _process_batch_output(self): if recovery_stop: llm_logger.info(f"recovery stop signal found at task {task_id}") if not recovery_stop and token_id < 0: - if task_id in self.resource_manager.to_be_rescheduled_request_id_set: - self.resource_manager.reschedule_preempt_task(task_id) + if envs.ENABLE_V1_KVCACHE_SCHEDULER: + if task_id in self.resource_manager.to_be_rescheduled_request_id_set: + self.resource_manager.reschedule_preempt_task(task_id) continue if task.get("prefill_chunk_info", None) is not None: