@@ -142,26 +142,31 @@ def _get_num_new_tokens(self, request, token_budget):
142
142
143
143
input_ids_lst = request .prompt_token_ids + request .output_token_ids
144
144
input_ids = paddle .to_tensor (input_ids_lst , dtype = "int64" )
145
- grid_thw = []
146
- for one in inputs ["grid_thw" ]:
147
- if one [0 ] == 1 :
148
- grid_thw .append (one )
149
- else :
150
- grid_thw .extend ([[2 , one [1 ], one [2 ]]] * (one [0 ] // 2 ))
151
-
145
+ input_ids = paddle .to_tensor (input_ids_lst , dtype = "int64" )
152
146
image_patch_id = inputs ["image_patch_id" ]
153
- grid_thw = paddle . to_tensor ( grid_thw , dtype = "int64" )
147
+
154
148
if request .multimodal_img_boundaries is None :
149
+ grid_thw = []
150
+ for one in inputs ["grid_thw" ]:
151
+ if one [0 ] == 1 :
152
+ grid_thw .append (one )
153
+ else :
154
+ grid_thw .extend ([[2 , one [1 ], one [2 ]]] * (one [0 ] // 2 ))
155
+
156
+ grid_thw = paddle .to_tensor (grid_thw , dtype = "int64" )
155
157
from fastdeploy .model_executor .ops .gpu import get_img_boundaries
156
158
157
159
request .multimodal_img_boundaries = get_img_boundaries (
158
160
task_input_ids = input_ids , grid_thw = grid_thw , image_patch_id = image_patch_id
159
161
).numpy ()
160
162
163
+ grid_thw = grid_thw .numpy ().reshape ([- 1 , 3 ])
164
+ inputs ["grid_thw" ] = grid_thw
165
+
166
+ grid_thw = inputs ["grid_thw" ]
161
167
img_boundaries_idx = request .multimodal_img_boundaries [0 ]
162
168
img_num_per_boundary = request .multimodal_img_boundaries [1 ]
163
169
ori_prompt_len = img_boundaries_idx [- 1 ].item ()
164
- grid_thw = grid_thw .numpy ().reshape ([- 1 , 3 ])
165
170
pre_end_idx = request .num_computed_tokens
166
171
new_end_idx = pre_end_idx + num_new_tokens
167
172
if new_end_idx < ori_prompt_len and input_ids [new_end_idx - 1 ] == image_patch_id :
0 commit comments