Skip to content

Commit aa23661

Browse files
Bugfix: Fixed error when copying items from different datasets (#10)
* Refactor image retrieval logic in images_get_list * Refactor annotation copying and logging for improved clarity and error handling * Enhance image cloning logic to handle name conflicts with improved resolution options and logging * Refactor image cloning logic to eliminate duplicates and improve conflict resolution handling
1 parent 6ce7976 commit aa23661

File tree

2 files changed

+98
-42
lines changed

2 files changed

+98
-42
lines changed

src/api_utils.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,14 @@ def images_get_list(api: sly.Api, dataset_id, image_ids=None):
7373
ApiField.PATH_ORIGINAL,
7474
ApiField.CREATED_BY_ID[0][0],
7575
]
76-
filters = None
77-
if image_ids is not None:
78-
filters = [{"field": ApiField.ID, "operator": "in", "value": image_ids}]
79-
img_infos = api.image.get_list(
80-
dataset_id, filters=filters, fields=api_fields, force_metadata_for_links=False
81-
)
76+
if image_ids is None:
77+
img_infos = api.image.get_list(
78+
dataset_id, fields=api_fields, force_metadata_for_links=False
79+
)
80+
else:
81+
img_infos = api.image.get_info_by_id_batch(
82+
ids=image_ids, fields=api_fields, force_metadata_for_links=False
83+
)
8284
return img_infos
8385

8486

@@ -92,7 +94,6 @@ def create_dataset(
9294
created_at: Optional[str] = None,
9395
updated_at: Optional[str] = None,
9496
created_by: Optional[int] = None,
95-
custom_data: Optional[Dict] = None,
9697
):
9798
effective_name = api.dataset._get_effective_new_name(
9899
project_id=project_id,
@@ -112,8 +113,6 @@ def create_dataset(
112113
data[ApiField.UPDATED_AT] = updated_at
113114
if created_by is not None:
114115
data[ApiField.CREATED_BY_ID[0][0]] = created_by
115-
if custom_data is not None:
116-
data[ApiField.CUSTOM_DATA] = custom_data
117116
try:
118117
response = api.post(
119118
"datasets.add",

src/main.py

Lines changed: 90 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
executor = ThreadPoolExecutor(max_workers=5)
3232
merged_meta = None
3333
TASK_ID = None
34-
cancel_deletion = False # flag to cancel deletion of the source items
34+
cancel_deletion = False # flag to cancel deletion of the source items
3535

3636
if sly.is_development():
3737
api.app.workflow.enable()
@@ -393,6 +393,22 @@ def clone_images_with_annotations(
393393
image_infos = [info for info in image_infos if info.name not in existing]
394394
if progress_cb is not None:
395395
progress_cb(len_before - len(image_infos))
396+
src_existing = set()
397+
if options[JSONKEYS.CONFLICT_RESOLUTION_MODE] in [
398+
JSONKEYS.CONFLICT_SKIP,
399+
JSONKEYS.CONFLICT_REPLACE,
400+
]:
401+
len_before = len(image_infos)
402+
non_duplicate = []
403+
for image_info in image_infos:
404+
if image_info.name not in src_existing:
405+
non_duplicate.append(image_info)
406+
src_existing.add(image_info.name)
407+
image_infos = non_duplicate
408+
if progress_cb is not None:
409+
progress_cb(len_before - len(image_infos))
410+
if len(image_infos) != len_before:
411+
logger.info("Some images were skipped due to name conflicts within source images.")
396412

397413
if len(image_infos) == 0:
398414
return []
@@ -407,22 +423,29 @@ def _copy_imgs(
407423
return infos, uploaded
408424

409425
def _copy_anns(src: List[sly.ImageInfo], dst: List[sly.ImageInfo]):
410-
try:
411-
api.annotation.copy_batch_by_ids(
412-
[i.id for i in src],
413-
[i.id for i in dst],
414-
save_source_date=options[JSONKEYS.PRESERVE_SRC_DATE],
415-
)
416-
except Exception as e:
417-
if "Some users are not members of the destination group" in str(e):
418-
raise ValueError(
419-
"Unable to copy annotations. Annotation creator is not a member of the destination team."
420-
) from e
421-
else:
422-
raise e
426+
by_dataset = defaultdict(list)
427+
for src_info, dst_info in zip(src, dst):
428+
by_dataset[src_info.dataset_id].append((src_info, dst_info))
429+
for pairs in by_dataset.values():
430+
src_ids = [info[0].id for info in pairs]
431+
dst_ids = [info[1].id for info in pairs]
432+
try:
433+
api.annotation.copy_batch_by_ids(
434+
src_ids,
435+
dst_ids,
436+
save_source_date=options[JSONKEYS.PRESERVE_SRC_DATE],
437+
)
438+
except Exception as e:
439+
if "Some users are not members of the destination group" in str(e):
440+
raise ValueError(
441+
"Unable to copy annotations. Annotation creator is not a member of the destination team."
442+
) from e
443+
else:
444+
raise e
423445

424446
return src, dst
425447

448+
reserved_names = set(existing.keys())
426449
to_rename = {} # {new_name: old_name}
427450
upload_images_tasks = []
428451
for src_image_infos_batch in sly.batched(image_infos, UPLOAD_IMAGES_BATCH_SIZE):
@@ -434,12 +457,32 @@ def _copy_anns(src: List[sly.ImageInfo], dst: List[sly.ImageInfo]):
434457
JSONKEYS.CONFLICT_REPLACE,
435458
]:
436459
for i, name in enumerate(names):
437-
if name in existing:
438-
names[i] = (
439-
".".join(name.split(".")[:-1]) + "_" + now + "." + name.split(".")[-1]
440-
)
460+
j = 0
461+
if name in reserved_names:
462+
new_name = name
463+
while new_name in reserved_names:
464+
if j == 0:
465+
new_name = (
466+
".".join(name.split(".")[:-1])
467+
+ "_"
468+
+ now
469+
+ "."
470+
+ name.split(".")[-1]
471+
)
472+
else:
473+
new_name = (
474+
".".join(name.split(".")[:-1])
475+
+ "_"
476+
+ now
477+
+ f"_{j}"
478+
+ "."
479+
+ name.split(".")[-1]
480+
)
481+
j += 1
482+
names[i] = new_name
441483
if options[JSONKEYS.CONFLICT_RESOLUTION_MODE] == JSONKEYS.CONFLICT_REPLACE:
442484
to_rename[names[i]] = name
485+
reserved_names.add(new_name)
443486
upload_images_tasks.append(
444487
executor.submit(
445488
_copy_imgs,
@@ -645,21 +688,20 @@ def _copy_anns(
645688
sf_idx_to_remove.reverse()
646689
for idx in sf_idx_to_remove:
647690
ann.spatial_figures.pop(idx)
648-
run_in_executor(
649-
api.volume.figure.download_sf_geometries, mask_ids, mask_paths)
691+
run_in_executor(api.volume.figure.download_sf_geometries, mask_ids, mask_paths)
650692
tasks.append(
651693
executor.submit(
652694
api.volume.annotation.append, dst_info.id, ann, key_id_map, volume_info=dst_info
653695
)
654696
)
655-
697+
656698
for task in as_completed(tasks):
657699
task.result()
658700
progress_masks = tqdm(total=len(mask_paths), desc="Uploading Mask 3D geometries")
659701
for file in mask_paths:
660-
with open(file, 'rb') as f:
702+
with open(file, "rb") as f:
661703
key = UUID(os.path.basename(f.name))
662-
api.volume.figure.upload_sf_geometries([key] , {key:f.read()}, key_id_map)
704+
api.volume.figure.upload_sf_geometries([key], {key: f.read()}, key_id_map)
663705
progress_masks.update(1)
664706
progress_masks.close()
665707
if set_csm_warning:
@@ -1057,7 +1099,9 @@ def _create_rec(
10571099
dataset_info, created_info, conflict_resolution_result=conflict_resolution_result
10581100
)
10591101
if dataset_info.custom_data:
1060-
run_in_executor(api.dataset.update, created_id, custom_data=dataset_info.custom_data)
1102+
run_in_executor(
1103+
api.dataset.update, created_id, custom_data=dataset_info.custom_data
1104+
)
10611105
logger.info(
10621106
"Created Dataset",
10631107
extra={
@@ -1328,7 +1372,9 @@ def replace_dataset(src_dataset_info: sly.DatasetInfo, dst_dataset_info: sly.Dat
13281372
"""Remove src_dataset_info and change name of dst_dataset_info to src_dataset_info.name"""
13291373
api.dataset.update(src_dataset_info.id, name=src_dataset_info.name + "__to_remove")
13301374
api.dataset.remove(src_dataset_info.id)
1331-
return api.dataset.update(dst_dataset_info.id, name=src_dataset_info.name, custom_data=src_dataset_info.custom_data)
1375+
return api.dataset.update(
1376+
dst_dataset_info.id, name=src_dataset_info.name, custom_data=src_dataset_info.custom_data
1377+
)
13321378

13331379

13341380
def run_in_executor(func, *args, **kwargs):
@@ -1370,7 +1416,7 @@ def copy_project_with_replace(
13701416
parent_id=dst_dataset_id,
13711417
created_at=src_project_info.created_at if perserve_date else None,
13721418
updated_at=src_project_info.updated_at if perserve_date else None,
1373-
created_by=src_project_info.created_by_id if perserve_date else None,
1419+
created_by=src_project_info.created_by_id if perserve_date else None,
13741420
)
13751421
existing_datasets = find_children_in_tree(datasets_tree, parent_id=dst_dataset_id)
13761422
created_datasets.append(
@@ -1669,9 +1715,12 @@ def move_project(
16691715
"No datasets created. Skipping deletion", extra={"project_id": src_project_info.id}
16701716
)
16711717
return []
1672-
1718+
16731719
if cancel_deletion:
1674-
logger.info("The source project will not be removed because some of its entities cannot be moved.", extra={"project_id": src_project_info.id})
1720+
logger.info(
1721+
"The source project will not be removed because some of its entities cannot be moved.",
1722+
extra={"project_id": src_project_info.id},
1723+
)
16751724
else:
16761725
logger.info("Removing source project", extra={"project_id": src_project_info.id})
16771726
run_in_executor(api.project.remove, src_project_info.id)
@@ -1749,9 +1798,12 @@ def move_datasets_tree(
17491798
if len(datasets_to_remove) == 0:
17501799
logger.info("No datasets to remove", extra={"dataset_id": dst_dataset_id})
17511800
return creted_datasets
1752-
1801+
17531802
if cancel_deletion:
1754-
logger.info("The source datasets will not be removed because some of its entities cannot be moved.", extra={"dataset_id": dst_dataset_id})
1803+
logger.info(
1804+
"The source datasets will not be removed because some of its entities cannot be moved.",
1805+
extra={"dataset_id": dst_dataset_id},
1806+
)
17551807
else:
17561808
logger.info(
17571809
"Removing source datasets",
@@ -1829,9 +1881,12 @@ def move_items_to_dataset(
18291881
options=options,
18301882
progress_cb=progress_cb,
18311883
src_infos=item_infos,
1832-
)
1884+
)
18331885
if cancel_deletion or len(created_item_infos) < len(item_infos):
1834-
logger.info("Some items were not moved. Skipping deletion of source items", extra={"dataset_id": dst_dataset_id})
1886+
logger.info(
1887+
"Some items were not moved. Skipping deletion of source items",
1888+
extra={"dataset_id": dst_dataset_id},
1889+
)
18351890
else:
18361891
delete_items(item_infos)
18371892
cancel_deletion = False
@@ -2265,7 +2320,9 @@ def transfer_from_dataset(
22652320
f"Dataset created with ID: {target_dataset.id} and name '{target_dataset.name}'"
22662321
)
22672322
if src_dataset.custom_data:
2268-
run_in_executor(api.dataset.update, target_dataset.id, custom_data=src_dataset.custom_data)
2323+
run_in_executor(
2324+
api.dataset.update, target_dataset.id, custom_data=src_dataset.custom_data
2325+
)
22692326
logger.info(f"Dataset custom data has been updated")
22702327
else:
22712328
raise NotImplementedError(

0 commit comments

Comments
 (0)