|
| 1 | +#!/usr/bin/env python |
| 2 | +import os |
| 3 | +import sys |
| 4 | +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) |
| 5 | +import open3d as o3d |
| 6 | +import numpy as np |
| 7 | +from glob import glob |
| 8 | +import argparse |
| 9 | +import zstandard as zstd |
| 10 | +import msgpack |
| 11 | +import msgpack_numpy |
| 12 | +msgpack_numpy.patch() |
| 13 | + |
| 14 | +from multiprocessing import Pool |
| 15 | + |
| 16 | +def write_compressed_msgpack(data, path, level=22, threads=0): |
| 17 | + compressor = zstd.ZstdCompressor(level=level, threads=threads) |
| 18 | + with open(path, 'wb') as f: |
| 19 | + print('writing', path) |
| 20 | + f.write(compressor.compress(msgpack.packb(data, use_bin_type=True))) |
| 21 | + |
| 22 | + |
| 23 | +def read_compressed_msgpack(path, decompressor=None): |
| 24 | + if decompressor is None: |
| 25 | + decompressor = zstd.ZstdDecompressor() |
| 26 | + with open(path, 'rb') as f: |
| 27 | + data = msgpack.unpackb(decompressor.decompress(f.read()), raw=False) |
| 28 | + return data |
| 29 | + |
| 30 | + |
| 31 | +def select_good_meshes(info_dict, data_dir): |
| 32 | + # select only good meshes |
| 33 | + raw_meshes_dir = os.path.join(data_dir,'raw_meshes') |
| 34 | + selected_meshes = [] |
| 35 | + attribution = [] |
| 36 | + selection = { |
| 37 | + 'Closed': 'TRUE', |
| 38 | + 'Single Component': 'TRUE', |
| 39 | + 'No duplicated faces': 'TRUE', |
| 40 | + 'No degenerate faces': 'TRUE', |
| 41 | + 'Vertex manifold': 'TRUE', |
| 42 | + 'oriented': '1', |
| 43 | + 'solid': '1', |
| 44 | + } |
| 45 | + licenses = ( |
| 46 | + 'Creative Commons - Attribution - Share Alike', |
| 47 | + 'Creative Commons - Attribution', |
| 48 | + 'Creative Commons - Public Domain Dedication', |
| 49 | + 'Public Domain' |
| 50 | + ) |
| 51 | + |
| 52 | + keys = sorted(info_dict.keys()) |
| 53 | + # remove bad file ids |
| 54 | + for bas_id in ('112965',): |
| 55 | + keys.remove(bas_id) |
| 56 | + |
| 57 | + for key in keys: |
| 58 | + info = info_dict[key] |
| 59 | + selected = True |
| 60 | + for sel_key, sel_val in selection.items(): |
| 61 | + if info[sel_key] != sel_val: |
| 62 | + selected = False |
| 63 | + break; |
| 64 | + if selected and info['License'] in licenses: |
| 65 | + attribution.append('"{}"({}) by {} is licensed under {}'.format(info['title'].strip(), info['Thing ID'], info['author'], info['License'])) |
| 66 | + selected_meshes.append(glob(os.path.join(raw_meshes_dir,key+'.*'))[0]) |
| 67 | + |
| 68 | + return selected_meshes, attribution |
| 69 | + |
| 70 | + |
| 71 | +def create_data(mesh_paths, output_path): |
| 72 | + data = [] |
| 73 | + for path in mesh_paths: |
| 74 | + try: |
| 75 | + mesh = o3d.io.read_triangle_mesh( path ) |
| 76 | + vertices = np.asarray(mesh.vertices) |
| 77 | + triangles = np.asarray(mesh.triangles) |
| 78 | + |
| 79 | + mesh_id = os.path.basename(path) |
| 80 | + |
| 81 | + hull = mesh.compute_convex_hull()[0] |
| 82 | + hull_vertices = np.asarray(hull.vertices) |
| 83 | + |
| 84 | + |
| 85 | + scale = np.max(np.linalg.norm(hull_vertices - hull_vertices[0], axis=1)) |
| 86 | + |
| 87 | + vertices /= scale |
| 88 | + center = 0.5*(vertices.max(axis=0)+vertices.min(axis=0)) |
| 89 | + vertices -= center |
| 90 | + |
| 91 | + feat_dict = { |
| 92 | + 'mesh_id': mesh_id, |
| 93 | + 'vertices': vertices.astype(np.float32), |
| 94 | + 'triangles': triangles.astype(np.int32), |
| 95 | + } |
| 96 | + |
| 97 | + data.append(feat_dict) |
| 98 | + except Exception as err: |
| 99 | + print("Failed to generate data for", path) |
| 100 | + |
| 101 | + write_compressed_msgpack(data, output_path) |
| 102 | + |
| 103 | + |
| 104 | + |
| 105 | +def main(): |
| 106 | + parser = argparse.ArgumentParser(description="Create data files for training", |
| 107 | + formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
| 108 | + parser.add_argument("--data_dir", type=str, required=True, help="The path to the Thingi10k dataset root.") |
| 109 | + parser.add_argument("--output_dir", type=str, default=os.path.join(os.path.dirname(__file__), 't10k'), help="The path to the output dir") |
| 110 | + parser.add_argument("--attribution_file_only", action="store_true", help="Create only the attribution file") |
| 111 | + |
| 112 | + args = parser.parse_args() |
| 113 | + |
| 114 | + info_dict = read_compressed_msgpack(os.path.join(os.path.dirname(__file__),'thingi10k_info.msgpack.zst')) |
| 115 | + |
| 116 | + meshes, attributions = select_good_meshes(info_dict, args.data_dir) |
| 117 | + |
| 118 | + os.makedirs(args.output_dir, exist_ok=True) |
| 119 | + valid_output_dir = os.path.join(args.output_dir, 'valid') |
| 120 | + os.makedirs(valid_output_dir, exist_ok=True) |
| 121 | + train_output_dir = os.path.join(args.output_dir, 'train') |
| 122 | + os.makedirs(train_output_dir, exist_ok=True) |
| 123 | + |
| 124 | + attribution_file = "{}_attribution.txt".format(os.path.basename(args.output_dir)) |
| 125 | + with open(os.path.join(args.output_dir,attribution_file), 'w') as f: |
| 126 | + f.write("\n".join(attributions)) |
| 127 | + |
| 128 | + if args.attribution_file_only: |
| 129 | + return |
| 130 | + |
| 131 | + meshes_sublists = [ [str(ii) for ii in i] for i in np.array_split(meshes, 100) ] |
| 132 | + print('objects per record', len(meshes_sublists[0])) |
| 133 | + output_paths = [ os.path.join(valid_output_dir if i < 5 else train_output_dir,'thingi10k_{0:03d}.msgpack.zst'.format(i)) for i in range(len(meshes_sublists)) ] |
| 134 | + |
| 135 | + arguments = list(zip(meshes_sublists, output_paths)) |
| 136 | + |
| 137 | + with Pool(16) as pool: |
| 138 | + pool.starmap(create_data, arguments) |
| 139 | + |
| 140 | + |
| 141 | +if __name__ == '__main__': |
| 142 | + main() |
0 commit comments