PaddlePaddle · xiegegege · Aug 13, 2025 · Aug 13, 2025
diff --git a/fastdeploy/engine/sched/resource_manager_v1.py b/fastdeploy/engine/sched/resource_manager_v1.py
@@ -142,26 +142,31 @@ def _get_num_new_tokens(self, request, token_budget):
 
         input_ids_lst = request.prompt_token_ids + request.output_token_ids
         input_ids = paddle.to_tensor(input_ids_lst, dtype="int64")
-        grid_thw = []
-        for one in inputs["grid_thw"]:
-            if one[0] == 1:
-                grid_thw.append(one)
-            else:
-                grid_thw.extend([[2, one[1], one[2]]] * (one[0] // 2))
-
+        input_ids = paddle.to_tensor(input_ids_lst, dtype="int64")
         image_patch_id = inputs["image_patch_id"]
-        grid_thw = paddle.to_tensor(grid_thw, dtype="int64")
+
         if request.multimodal_img_boundaries is None:
+            grid_thw = []
+            for one in inputs["grid_thw"]:
+                if one[0] == 1:
+                    grid_thw.append(one)
+                else:
+                    grid_thw.extend([[2, one[1], one[2]]] * (one[0] // 2))
+
+            grid_thw = paddle.to_tensor(grid_thw, dtype="int64")
             from fastdeploy.model_executor.ops.gpu import get_img_boundaries
 
             request.multimodal_img_boundaries = get_img_boundaries(
                 task_input_ids=input_ids, grid_thw=grid_thw, image_patch_id=image_patch_id
             ).numpy()
 
+            grid_thw = grid_thw.numpy().reshape([-1, 3])
+            inputs["grid_thw"] = grid_thw
+
+        grid_thw = inputs["grid_thw"]
         img_boundaries_idx = request.multimodal_img_boundaries[0]
         img_num_per_boundary = request.multimodal_img_boundaries[1]
         ori_prompt_len = img_boundaries_idx[-1].item()
-        grid_thw = grid_thw.numpy().reshape([-1, 3])
         pre_end_idx = request.num_computed_tokens
         new_end_idx = pre_end_idx + num_new_tokens
         if new_end_idx < ori_prompt_len and input_ids[new_end_idx - 1] == image_patch_id: