-
Notifications
You must be signed in to change notification settings - Fork 87
Open
Description
Hello!
I want to compare unicorn with our method on vot2020.
[unicorn] #
label = unicorn
protocol = traxpython
command = import tools.run_vot as run_vot; run_vot.run_vot2020('unicorn_vos', 'unicorn_track_r50_mask') # Set the tracker name and the parameter name
Specify a path to trax python wrapper if it is not visible (separate by ; if using multiple paths)
paths = /media/wuhan/disk1/wh_code_backup/Unicorn
Additional environment paths
env_PATH = /home/wuhan/anaconda3/envs/unicorn/bin/python;${PATH}
And I modified the Unicorn/external/lib/test/tracker/unicorn_vos.py
def initialize(self, image, info: dict):
self.frame_id = 0
# process init_info
self.init_object_ids = info["init_object_ids"]
self.sequence_object_ids = info['sequence_object_ids']
# assert self.init_object_ids == self.sequence_object_ids
# forward the reference frame once
"""resize the original image and transform the coordinates"""
self.H, self.W, _ = image.shape
ref_frame_t, r = self.preprocessor.process(image, self.input_size)
"""forward the network"""
with torch.no_grad():
_, self.out_dict_pre = self.model(imgs=ref_frame_t, mode="backbone") # backbone output (previous frame) (b, 3, H, W)
self.dh, self.dw = self.out_dict_pre["h"] * 2, self.out_dict_pre["w"] * 2 # STRIDE = 8
"""get initial label mask (K, H/8*W/8)"""
self.lbs_pre_dict = {}
self.state_pre_dict = {}
for obj_id in self.init_object_ids:
self.state_pre_dict[obj_id] = info["init_bbox"]
init_box = torch.tensor(info["init_bbox"]).view(-1)
init_box[2:] += init_box[:2] # (x1, y1, x2, y2)
init_box_rsz = init_box * r # coordinates on the resized image
self.lbs_pre_dict[obj_id] = F.interpolate(get_label_map(init_box_rsz, self.input_size[0], self.input_size[1]) \
, scale_factor=1/8, mode="bilinear", align_corners=False)[0].flatten(-2).to(self.device) # (1, H/8*W/8)
"""deal with new-incoming instances"""
self.out_dict_pre_new = [] # a list containing out_dict for new in-coming instances
self.obj_ids_new = []
def track(self, image, info: dict = None, bboxes=None, scores=None, gt_box=None):
self.frame_id += 1
"""resize the original image and transform the coordinates"""
cur_frame_t, r = self.preprocessor.process(image, self.input_size)
with torch.no_grad():
with torch.cuda.amp.autocast(enabled=False):
fpn_outs_cur, out_dict_cur = self.model(imgs=cur_frame_t, mode="backbone") # backbone output (current frame)
# deal with instances from the first frame
final_mask_dict, inst_scores = self.get_mask_results(fpn_outs_cur, out_dict_cur, self.out_dict_pre, r, self.init_object_ids)
# deal with instances from the intermediate frames
for (out_dict_pre, init_object_ids) in zip(self.out_dict_pre_new, self.obj_ids_new):
final_mask_dict_tmp, inst_scores_tmp = self.get_mask_results(fpn_outs_cur, out_dict_cur, out_dict_pre, r, init_object_ids)
final_mask_dict.update(final_mask_dict_tmp)
inst_scores = np.concatenate([inst_scores, inst_scores_tmp])
# deal with instances from the current frame"""
if "init_object_ids" in info.keys():
self.out_dict_pre_new.append(out_dict_cur)
self.obj_ids_new.append(info["init_object_ids"])
inst_scores_tmp = np.ones((len(info["init_object_ids"]),))
inst_scores = np.concatenate([inst_scores, inst_scores_tmp])
for obj_id in info["init_object_ids"]:
self.state_pre_dict[obj_id] = info["init_bbox"]
init_box = torch.tensor(info["init_bbox"]).view(-1)
init_box[2:] += init_box[:2] # (x1, y1, x2, y2)
init_box_rsz = init_box * r # coordinates on the resized image
self.lbs_pre_dict[obj_id] = F.interpolate(get_label_map(init_box_rsz, self.input_size[0], self.input_size[1]) \
, scale_factor=1/8, mode="bilinear", align_corners=False)[0].flatten(-2).to(self.device) # (1, H/8*W/8)
final_mask_dict[obj_id] = info["init_mask"]
# Deal with overlapped masks
cur_obj_ids = copy.deepcopy(self.init_object_ids)
for obj_ids_inter in self.obj_ids_new:
cur_obj_ids += obj_ids_inter
if "init_object_ids" in info.keys():
cur_obj_ids += info["init_object_ids"]
# soft aggregation
cur_obj_ids_int = [int(x) for x in cur_obj_ids]
mask_merge = np.zeros((self.H, self.W, max(cur_obj_ids_int)+1)) # (H, W, N+1)
tmp_list = []
for cur_id in cur_obj_ids:
mask_merge[:, :, int(cur_id)] = final_mask_dict[cur_id]
tmp_list.append(final_mask_dict[cur_id])
back_prob = np.prod(1 - np.stack(tmp_list, axis=-1), axis=-1, keepdims=False)
mask_merge[:, :, 0] = back_prob
mask_merge_final = np.argmax(mask_merge, axis=-1) # (H, W)
for cur_id in cur_obj_ids:
final_mask_dict[cur_id] = (mask_merge_final == int(cur_id))
"""get the final result"""
final_mask = np.zeros((self.H, self.W), dtype=np.uint8)
# for obj_id in cur_obj_ids:
# final_mask[final_mask_dict[obj_id]==1] = int(obj_id)
final_mask = mask_merge_final
return {"segmentation": final_mask}
But the tracking and segmentation results is "0, 0, 0, 0"
Can you help me?
Metadata
Metadata
Assignees
Labels
No labels