From db3eebca3e9633632ebfc0521a399e40ee55e98d Mon Sep 17 00:00:00 2001 From: Matvey Ivanov Date: Wed, 23 Jul 2025 15:35:29 +0200 Subject: [PATCH 1/5] changes to enable edge rendering using Freestyle and save files in the bop format --- .../python/renderer/RendererUtility.py | 833 +++++++++++++---- blenderproc/python/writer/BopWriterUtility.py | 867 ++++++++++++------ .../bop_challenge/main_tless_random.py | 267 ++++-- 3 files changed, 1438 insertions(+), 529 deletions(-) diff --git a/blenderproc/python/renderer/RendererUtility.py b/blenderproc/python/renderer/RendererUtility.py index 84e9fd6a7..cff6a0d11 100644 --- a/blenderproc/python/renderer/RendererUtility.py +++ b/blenderproc/python/renderer/RendererUtility.py @@ -1,13 +1,15 @@ -"""Provides functionality to render a color, normal, depth and distance image.""" +"""Provides functionality to render a color, normal, depth, edge and distance image.""" from contextlib import contextmanager import os import threading -from typing import IO, Union, Dict, List, Set, Optional, Any +from typing import IO, Union, Dict, List, Tuple, Set, Optional, Any import math import sys import platform import time +import tempfile +import cv2 import mathutils import bpy @@ -21,6 +23,7 @@ from blenderproc.python.utility.DefaultConfig import DefaultConfig from blenderproc.python.utility.Utility import Utility, stdout_redirected from blenderproc.python.writer.WriterUtility import _WriterUtility +from blenderproc.python.types.MeshObjectUtility import MeshObject def set_denoiser(denoiser: Optional[str]): @@ -53,24 +56,35 @@ def set_denoiser(denoiser: Optional[str]): denoise_node = nodes.new("CompositorNodeDenoise") # Link nodes - render_layer_node = Utility.get_the_one_node_with_type(nodes, 'CompositorNodeRLayers') - composite_node = Utility.get_the_one_node_with_type(nodes, 'CompositorNodeComposite') - Utility.insert_node_instead_existing_link(links, - render_layer_node.outputs['Image'], - denoise_node.inputs['Image'], - denoise_node.outputs['Image'], - composite_node.inputs['Image']) - - links.new(render_layer_node.outputs['DiffCol'], denoise_node.inputs['Albedo']) - links.new(render_layer_node.outputs['Normal'], denoise_node.inputs['Normal']) + render_layer_node = Utility.get_the_one_node_with_type( + nodes, "CompositorNodeRLayers" + ) + composite_node = Utility.get_the_one_node_with_type( + nodes, "CompositorNodeComposite" + ) + Utility.insert_node_instead_existing_link( + links, + render_layer_node.outputs["Image"], + denoise_node.inputs["Image"], + denoise_node.outputs["Image"], + composite_node.inputs["Image"], + ) + + links.new(render_layer_node.outputs["DiffCol"], denoise_node.inputs["Albedo"]) + links.new(render_layer_node.outputs["Normal"], denoise_node.inputs["Normal"]) else: raise Exception("No such denoiser: " + denoiser) -def set_light_bounces(diffuse_bounces: Optional[int] = None, glossy_bounces: Optional[int] = None, - ao_bounces_render: Optional[int] = None, max_bounces: Optional[int] = None, - transmission_bounces: Optional[int] = None, transparent_max_bounces: Optional[int] = None, - volume_bounces: Optional[int] = None): +def set_light_bounces( + diffuse_bounces: Optional[int] = None, + glossy_bounces: Optional[int] = None, + ao_bounces_render: Optional[int] = None, + max_bounces: Optional[int] = None, + transmission_bounces: Optional[int] = None, + transparent_max_bounces: Optional[int] = None, + volume_bounces: Optional[int] = None, +): """ Sets the number of light bounces that should be used by the raytracing renderer. Default values are defined in DefaultConfig.py @@ -101,7 +115,7 @@ def set_light_bounces(diffuse_bounces: Optional[int] = None, glossy_bounces: Opt def set_cpu_threads(num_threads: int): - """ Sets the number of CPU cores to use simultaneously while rendering. + """Sets the number of CPU cores to use simultaneously while rendering. :param num_threads: The number of threads to use. If 0 is given the number is automatically detected based on the cpu cores. @@ -115,7 +129,7 @@ def set_cpu_threads(num_threads: int): def toggle_stereo(enable: bool): - """ Enables/Disables stereoscopy. + """Enables/Disables stereoscopy. :param enable: True, if stereoscopy should be enabled. """ @@ -123,8 +137,9 @@ def toggle_stereo(enable: bool): if enable: bpy.context.scene.render.views_format = "STEREO_3D" + def toggle_light_tree(enable: bool): - """ Enables/Disables blender's light tree for rendering. + """Enables/Disables blender's light tree for rendering. Enabling the light tree reduces the noise in scenes with many point lights, however it increases the render time per sample. @@ -134,21 +149,24 @@ def toggle_light_tree(enable: bool): """ bpy.context.scene.cycles.use_light_tree = enable + def set_simplify_subdivision_render(simplify_subdivision_render: int): - """ Sets global maximum subdivision level during rendering to speedup rendering. + """Sets global maximum subdivision level during rendering to speedup rendering. :param simplify_subdivision_render: The maximum subdivision level. If 0 is given, simplification of scene is disabled. """ if simplify_subdivision_render > 0: bpy.context.scene.render.use_simplify = True - bpy.context.scene.render.simplify_subdivision_render = simplify_subdivision_render + bpy.context.scene.render.simplify_subdivision_render = ( + simplify_subdivision_render + ) else: bpy.context.scene.render.use_simplify = False def set_noise_threshold(noise_threshold: float): - """ Configures the adaptive sampling, the noise threshold is typically between 0.1 and 0.001. + """Configures the adaptive sampling, the noise threshold is typically between 0.1 and 0.001. Adaptive sampling automatically decreases the number of samples per pixel based on estimated level of noise. We do not recommend setting the noise threshold value to zero and therefore turning off the adaptive sampling. @@ -167,7 +185,7 @@ def set_noise_threshold(noise_threshold: float): def set_max_amount_of_samples(samples: int): - """ Sets the maximum number of samples to render for each pixel. + """Sets the maximum number of samples to render for each pixel. This maximum amount is usually not reached if the noise threshold is low enough. If the noise threshold was set to 0, then only the maximum number of samples is used (We do not recommend this). @@ -176,11 +194,15 @@ def set_max_amount_of_samples(samples: int): bpy.context.scene.cycles.samples = samples -def enable_distance_output(activate_antialiasing: bool, output_dir: Optional[str] = None, - file_prefix: str = "distance_", - output_key: str = "distance", antialiasing_distance_max: float = None, - convert_to_depth: bool = False): - """ Enables writing distance images. +def enable_distance_output( + activate_antialiasing: bool, + output_dir: Optional[str] = None, + file_prefix: str = "distance_", + output_key: str = "distance", + antialiasing_distance_max: float = None, + convert_to_depth: bool = False, +): + """Enables writing distance images. :param activate_antialiasing: If this is True the final image will be anti-aliased @@ -193,18 +215,26 @@ def enable_distance_output(activate_antialiasing: bool, output_dir: Optional[str image to a depth image """ if not activate_antialiasing: - return enable_depth_output(activate_antialiasing, output_dir, file_prefix, output_key, convert_to_distance=True) + return enable_depth_output( + activate_antialiasing, + output_dir, + file_prefix, + output_key, + convert_to_distance=True, + ) if output_dir is None: output_dir = Utility.get_temporary_directory() if antialiasing_distance_max is None: antialiasing_distance_max = DefaultConfig.antialiasing_distance_max if GlobalStorage.is_in_storage("distance_output_is_enabled"): - msg = "The distance enable function can not be called twice. Either you called it twice or you used the " \ - "enable_depth_output with activate_antialiasing=True, which internally calls this function. This is " \ - "currently not supported, but there is an easy way to solve this, you can use the " \ - "bproc.postprocessing.dist2depth and depth2dist function on the output of the renderer and generate " \ - "the antialiased depth image yourself." + msg = ( + "The distance enable function can not be called twice. Either you called it twice or you used the " + "enable_depth_output with activate_antialiasing=True, which internally calls this function. This is " + "currently not supported, but there is an easy way to solve this, you can use the " + "bproc.postprocessing.dist2depth and depth2dist function on the output of the renderer and generate " + "the antialiased depth image yourself." + ) raise RuntimeError(msg) GlobalStorage.add("distance_output_is_enabled", True) @@ -215,7 +245,9 @@ def enable_distance_output(activate_antialiasing: bool, output_dir: Optional[str tree = bpy.context.scene.node_tree links = tree.links # Use existing render layer - render_layer_node = Utility.get_the_one_node_with_type(tree.nodes, 'CompositorNodeRLayers') + render_layer_node = Utility.get_the_one_node_with_type( + tree.nodes, "CompositorNodeRLayers" + ) # Set mist pass limits bpy.context.scene.world.mist_settings.start = 0 @@ -225,11 +257,11 @@ def enable_distance_output(activate_antialiasing: bool, output_dir: Optional[str bpy.context.view_layer.use_pass_mist = True # Enable distance pass # Create a mapper node to map from 0-1 to SI units mapper_node = tree.nodes.new("CompositorNodeMapRange") - links.new(render_layer_node.outputs["Mist"], mapper_node.inputs['Value']) + links.new(render_layer_node.outputs["Mist"], mapper_node.inputs["Value"]) # map the values 0-1 to range distance_start to distance_range - mapper_node.inputs['From Max'].default_value = 1.0 - mapper_node.inputs['To Min'].default_value = 0 - mapper_node.inputs['To Max'].default_value = antialiasing_distance_max + mapper_node.inputs["From Max"].default_value = 1.0 + mapper_node.inputs["To Min"].default_value = 0 + mapper_node.inputs["To Max"].default_value = antialiasing_distance_max # Build output node output_file = tree.nodes.new("CompositorNodeOutputFile") @@ -243,23 +275,30 @@ def enable_distance_output(activate_antialiasing: bool, output_dir: Optional[str combine_color = tree.nodes.new("CompositorNodeCombineColor") combine_color.mode = "HSV" links.new(mapper_node.outputs["Value"], combine_color.inputs[2]) - + # Feed the Z-Buffer or Mist output of the render layer to the input of the file IO layer - links.new(combine_color.outputs["Image"], output_file.inputs['Image']) - - Utility.add_output_entry({ - "key": output_key, - "path": os.path.join(output_dir, file_prefix) + "%04d" + ".exr", - "version": "2.0.0", - "trim_redundant_channels": True, - "convert_to_depth": convert_to_depth - }) + links.new(combine_color.outputs["Image"], output_file.inputs["Image"]) + + Utility.add_output_entry( + { + "key": output_key, + "path": os.path.join(output_dir, file_prefix) + "%04d" + ".exr", + "version": "2.0.0", + "trim_redundant_channels": True, + "convert_to_depth": convert_to_depth, + } + ) return None -def enable_depth_output(activate_antialiasing: bool, output_dir: Optional[str] = None, file_prefix: str = "depth_", - output_key: str = "depth", antialiasing_distance_max: float = None, - convert_to_distance: bool = False): +def enable_depth_output( + activate_antialiasing: bool, + output_dir: Optional[str] = None, + file_prefix: str = "depth_", + output_key: str = "depth", + antialiasing_distance_max: float = None, + convert_to_distance: bool = False, +): """ Enables writing depth images. Depth images will be written in the form of .exr files during the next rendering. @@ -274,17 +313,25 @@ def enable_depth_output(activate_antialiasing: bool, output_dir: Optional[str] = image to a distance image """ if activate_antialiasing: - return enable_distance_output(activate_antialiasing, output_dir, file_prefix, output_key, - antialiasing_distance_max, convert_to_depth=True) + return enable_distance_output( + activate_antialiasing, + output_dir, + file_prefix, + output_key, + antialiasing_distance_max, + convert_to_depth=True, + ) if output_dir is None: output_dir = Utility.get_temporary_directory() if GlobalStorage.is_in_storage("depth_output_is_enabled"): - msg = "The depth enable function can not be called twice. Either you called it twice or you used the " \ - "enable_distance_output with activate_antialiasing=False, which internally calls this function. This " \ - "is currently not supported, but there is an easy way to solve this, you can use the " \ - "bproc.postprocessing.dist2depth and depth2dist function on the output of the renderer and generate " \ - "the antialiased distance image yourself." + msg = ( + "The depth enable function can not be called twice. Either you called it twice or you used the " + "enable_distance_output with activate_antialiasing=False, which internally calls this function. This " + "is currently not supported, but there is an easy way to solve this, you can use the " + "bproc.postprocessing.dist2depth and depth2dist function on the output of the renderer and generate " + "the antialiased distance image yourself." + ) raise RuntimeError(msg) GlobalStorage.add("depth_output_is_enabled", True) @@ -294,7 +341,9 @@ def enable_depth_output(activate_antialiasing: bool, output_dir: Optional[str] = tree = bpy.context.scene.node_tree links = tree.links # Use existing render layer - render_layer_node = Utility.get_the_one_node_with_type(tree.nodes, 'CompositorNodeRLayers') + render_layer_node = Utility.get_the_one_node_with_type( + tree.nodes, "CompositorNodeRLayers" + ) # Enable z-buffer pass bpy.context.view_layer.use_pass_z = True @@ -311,23 +360,28 @@ def enable_depth_output(activate_antialiasing: bool, output_dir: Optional[str] = combine_color = tree.nodes.new("CompositorNodeCombineColor") combine_color.mode = "HSV" links.new(render_layer_node.outputs["Depth"], combine_color.inputs[2]) - + # Feed the Z-Buffer RGB output from the Combine Color node to the input of the file IO layer links.new(combine_color.outputs["Image"], output_file.inputs["Image"]) - Utility.add_output_entry({ - "key": output_key, - "path": os.path.join(output_dir, file_prefix) + "%04d" + ".exr", - "version": "2.0.0", - "trim_redundant_channels": True, - "convert_to_distance": convert_to_distance - }) + Utility.add_output_entry( + { + "key": output_key, + "path": os.path.join(output_dir, file_prefix) + "%04d" + ".exr", + "version": "2.0.0", + "trim_redundant_channels": True, + "convert_to_distance": convert_to_distance, + } + ) return None -def enable_normals_output(output_dir: Optional[str] = None, file_prefix: str = "normals_", - output_key: str = "normals"): - """ Enables writing normal images. +def enable_normals_output( + output_dir: Optional[str] = None, + file_prefix: str = "normals_", + output_key: str = "normals", +): + """Enables writing normal images. Normal images will be written in the form of .exr files during the next rendering. @@ -345,7 +399,9 @@ def enable_normals_output(output_dir: Optional[str] = None, file_prefix: str = " links = tree.links # Use existing render layer - render_layer_node = Utility.get_the_one_node_with_type(tree.nodes, 'CompositorNodeRLayers') + render_layer_node = Utility.get_the_one_node_with_type( + tree.nodes, "CompositorNodeRLayers" + ) separate_rgba = tree.nodes.new("CompositorNodeSepRGBA") space_between_nodes_x = 200 @@ -391,14 +447,18 @@ def enable_normals_output(output_dir: Optional[str] = None, file_prefix: str = " channel_results[channel] = second_add # set the matrix accordingly - rot_around_x_axis = mathutils.Matrix.Rotation(math.radians(-90.0), 4, 'X') + rot_around_x_axis = mathutils.Matrix.Rotation(math.radians(-90.0), 4, "X") for frame in range(bpy.context.scene.frame_start, bpy.context.scene.frame_end): used_rotation_matrix = CameraUtility.get_camera_pose(frame) @ rot_around_x_axis for row_index in range(3): for column_index in range(3): current_multiply = multiplication_values[row_index][column_index] - current_multiply.inputs[1].default_value = used_rotation_matrix[column_index][row_index] - current_multiply.inputs[1].keyframe_insert(data_path='default_value', frame=frame) + current_multiply.inputs[1].default_value = used_rotation_matrix[ + column_index + ][row_index] + current_multiply.inputs[1].keyframe_insert( + data_path="default_value", frame=frame + ) offset = 8 * space_between_nodes_x for index, channel in enumerate(c_channels): multiply = tree.nodes.new("CompositorNodeMath") @@ -430,19 +490,24 @@ def enable_normals_output(output_dir: Optional[str] = None, file_prefix: str = " output_file.location.x = space_between_nodes_x * 15 links.new(combine_rgba.outputs["Image"], output_file.inputs["Image"]) - Utility.add_output_entry({ - "key": output_key, - "path": os.path.join(output_dir, file_prefix) + "%04d" + ".exr", - "version": "2.0.0" - }) - - -def enable_segmentation_output(map_by: Union[str, List[str]] = "category_id", - default_values: Optional[Dict[str, Any]] = None, - pass_alpha_threshold: float = 0.05, - output_dir: Optional[str] = None, - file_prefix: str = "segmap_", output_key: str = "segmap"): - """ Enables segmentation output by certain keys. + Utility.add_output_entry( + { + "key": output_key, + "path": os.path.join(output_dir, file_prefix) + "%04d" + ".exr", + "version": "2.0.0", + } + ) + + +def enable_segmentation_output( + map_by: Union[str, List[str]] = "category_id", + default_values: Optional[Dict[str, Any]] = None, + pass_alpha_threshold: float = 0.05, + output_dir: Optional[str] = None, + file_prefix: str = "segmap_", + output_key: str = "segmap", +): + """Enables segmentation output by certain keys. The key instances is used, if a mapping of every object in the scene to an integer is requested. These integers are assigned randomly and do not follow any system. They are consisted for one rendering call. @@ -476,24 +541,26 @@ def enable_segmentation_output(map_by: Union[str, List[str]] = "category_id", tree = bpy.context.scene.node_tree links = tree.links - render_layer_node = tree.nodes.get('Render Layers') + render_layer_node = tree.nodes.get("Render Layers") if output_dir is None: output_dir = Utility.get_temporary_directory() - output_node = tree.nodes.new('CompositorNodeOutputFile') + output_node = tree.nodes.new("CompositorNodeOutputFile") output_node.base_path = output_dir output_node.format.file_format = "OPEN_EXR" output_node.file_slots.values()[0].path = file_prefix - Utility.add_output_entry({ - "key": output_key, - "path": os.path.join(output_dir, file_prefix) + "%04d" + ".exr", - "version": "3.0.0", - "trim_redundant_channels": True, - "is_semantic_segmentation": True, - "semantic_segmentation_mapping": map_by, - "semantic_segmentation_default_values": default_values - }) + Utility.add_output_entry( + { + "key": output_key, + "path": os.path.join(output_dir, file_prefix) + "%04d" + ".exr", + "version": "3.0.0", + "trim_redundant_channels": True, + "is_semantic_segmentation": True, + "semantic_segmentation_mapping": map_by, + "semantic_segmentation_default_values": default_values, + } + ) # Feed the output through 'Combine Color' node, to create 3 channel RGB grayscale image as a lot of # EXR readers don't support single float channel EXR files and Blender writes depth as a single @@ -501,16 +568,21 @@ def enable_segmentation_output(map_by: Union[str, List[str]] = "category_id", combine_color = tree.nodes.new("CompositorNodeCombineColor") combine_color.mode = "HSV" links.new(render_layer_node.outputs["IndexOB"], combine_color.inputs[2]) - + links.new(combine_color.outputs["Image"], output_node.inputs["Image"]) # set the threshold low to avoid noise in alpha materials - bpy.context.scene.view_layers["ViewLayer"].pass_alpha_threshold = pass_alpha_threshold + bpy.context.scene.view_layers["ViewLayer"].pass_alpha_threshold = ( + pass_alpha_threshold + ) -def enable_diffuse_color_output(output_dir: Optional[str] = None, file_prefix: str = "diffuse_", - output_key: str = "diffuse"): - """ Enables writing diffuse color (albedo) images. +def enable_diffuse_color_output( + output_dir: Optional[str] = None, + file_prefix: str = "diffuse_", + output_key: str = "diffuse", +): + """Enables writing diffuse color (albedo) images. Diffuse color images will be written in the form of .png files during the next rendering. @@ -527,39 +599,45 @@ def enable_diffuse_color_output(output_dir: Optional[str] = None, file_prefix: s links = tree.links bpy.context.view_layer.use_pass_diffuse_color = True - render_layer_node = Utility.get_the_one_node_with_type(tree.nodes, 'CompositorNodeRLayers') + render_layer_node = Utility.get_the_one_node_with_type( + tree.nodes, "CompositorNodeRLayers" + ) final_output = render_layer_node.outputs["DiffCol"] - output_file = tree.nodes.new('CompositorNodeOutputFile') + output_file = tree.nodes.new("CompositorNodeOutputFile") output_file.base_path = output_dir output_file.format.file_format = "PNG" output_file.file_slots.values()[0].path = file_prefix - links.new(final_output, output_file.inputs['Image']) + links.new(final_output, output_file.inputs["Image"]) - Utility.add_output_entry({ - "key": output_key, - "path": os.path.join(output_dir, file_prefix) + "%04d" + ".png", - "version": "2.0.0" - }) + Utility.add_output_entry( + { + "key": output_key, + "path": os.path.join(output_dir, file_prefix) + "%04d" + ".png", + "version": "2.0.0", + } + ) def map_file_format_to_file_ending(file_format: str) -> str: - """ Returns the files endings for a given blender output format. + """Returns the files endings for a given blender output format. :param file_format: The blender file format. :return: The file ending. """ - if file_format == 'PNG': + if file_format == "PNG": return ".png" - if file_format == 'JPEG': + if file_format == "JPEG": return ".jpg" - if file_format == 'OPEN_EXR': + if file_format == "OPEN_EXR": return ".exr" raise RuntimeError(f"Unknown Image Type {file_format}") -def _progress_bar_thread(pipe_out: int, stdout: IO, total_frames: int, num_samples: int): - """ The thread rendering the progress bar +def _progress_bar_thread( + pipe_out: int, stdout: IO, total_frames: int, num_samples: int +): + """The thread rendering the progress bar :param pipe_out: The pipe output delivering blenders debug messages. :param stdout: The stdout to which the progress bar should be written. @@ -576,7 +654,9 @@ def _progress_bar_thread(pipe_out: int, stdout: IO, total_frames: int, num_sampl # Initializes progress bar using given stdout with Progress(*columns, console=Console(file=stdout), transient=True) as progress: complete_task = progress.add_task("[green]Total", total=total_frames, status="") - frame_task = progress.add_task("[yellow]Current frame", total=num_samples, status="") + frame_task = progress.add_task( + "[yellow]Current frame", total=num_samples, status="" + ) # Continuously read blenders debug messages current_line = "" @@ -595,22 +675,36 @@ def _progress_bar_thread(pipe_out: int, stdout: IO, total_frames: int, num_sampl # Check if its a line we can use (starts with "Fra:") if current_line.startswith("Fra:"): # Extract current frame number and use it to set the progress bar - frame_number = int(current_line.split()[0][len("Fra:"):]) + frame_number = int(current_line.split()[0][len("Fra:") :]) frames_completed = frame_number - starting_frame_number progress.update(complete_task, completed=frames_completed) - progress.update(complete_task, status=f"Rendering frame {frames_completed + 1} of {total_frames}") + progress.update( + complete_task, + status=f"Rendering frame {frames_completed + 1} of {total_frames}", + ) # Split line into columns status_columns = [col.strip() for col in current_line.split("|")] if "Scene, ViewLayer" in status_columns: # If we are currently at "Scene, ViewLayer", use everything afterwards - status = " | ".join(status_columns[status_columns.index("Scene, ViewLayer") + 1:]) + status = " | ".join( + status_columns[ + status_columns.index("Scene, ViewLayer") + 1 : + ] + ) # If we are currently rendering, update the progress if status.startswith("Sample"): - progress.update(frame_task, completed=int(status[len("Sample"):].split("/", maxsplit=1)[0])) + progress.update( + frame_task, + completed=int( + status[len("Sample") :].split("/", maxsplit=1)[0] + ), + ) elif "Compositing" in status_columns: # If we are at "Compositing", use everything afterwards including "Compositing" - status = " | ".join(status_columns[status_columns.index("Compositing"):]) + status = " | ".join( + status_columns[status_columns.index("Compositing") :] + ) # Set render progress to complete progress.update(frame_task, completed=num_samples) else: @@ -626,8 +720,10 @@ def _progress_bar_thread(pipe_out: int, stdout: IO, total_frames: int, num_sampl @contextmanager -def _render_progress_bar(pipe_out: int, pipe_in: int, stdout: IO, total_frames: int, enabled: bool = True): - """ Shows a progress bar visualizing the render progress. +def _render_progress_bar( + pipe_out: int, pipe_in: int, stdout: IO, total_frames: int, enabled: bool = True +): + """Shows a progress bar visualizing the render progress. :param pipe_out: The pipe output delivering blenders debug messages. :param pipe_in: The input of the pipe, necessary to send the end character. @@ -636,14 +732,16 @@ def _render_progress_bar(pipe_out: int, pipe_in: int, stdout: IO, total_frames: :param enabled: If False, no progress bar is shown. """ if enabled: - thread = threading.Thread(target=_progress_bar_thread, - args=(pipe_out, stdout, total_frames, bpy.context.scene.cycles.samples)) + thread = threading.Thread( + target=_progress_bar_thread, + args=(pipe_out, stdout, total_frames, bpy.context.scene.cycles.samples), + ) thread.start() try: yield finally: # Send final character, so the thread knows to stop - w = os.fdopen(pipe_in, 'w') + w = os.fdopen(pipe_in, "w") w.write("\b") w.close() thread.join() @@ -651,10 +749,15 @@ def _render_progress_bar(pipe_out: int, pipe_in: int, stdout: IO, total_frames: yield -def render(output_dir: Optional[str] = None, file_prefix: str = "rgb_", output_key: Optional[str] = "colors", - load_keys: Optional[Set[str]] = None, return_data: bool = True, - keys_with_alpha_channel: Optional[Set[str]] = None, - verbose: bool = False) -> Dict[str, Union[np.ndarray, List[np.ndarray]]]: +def render( + output_dir: Optional[str] = None, + file_prefix: str = "rgb_", + output_key: Optional[str] = "colors", + load_keys: Optional[Set[str]] = None, + return_data: bool = True, + keys_with_alpha_channel: Optional[Set[str]] = None, + verbose: bool = False, +) -> Dict[str, Union[np.ndarray, List[np.ndarray]]]: """ Render all frames. This will go through all frames from scene.frame_start to scene.frame_end and render each of them. @@ -672,16 +775,23 @@ def render(output_dir: Optional[str] = None, file_prefix: str = "rgb_", output_k if output_dir is None: output_dir = Utility.get_temporary_directory() if load_keys is None: - load_keys = {'colors', 'distance', 'normals', 'diffuse', 'depth', 'segmap'} - keys_with_alpha_channel = {'colors'} if bpy.context.scene.render.film_transparent else None + load_keys = {"colors", "distance", "normals", "diffuse", "depth", "segmap"} + keys_with_alpha_channel = ( + {"colors"} if bpy.context.scene.render.film_transparent else None + ) if output_key is not None: - Utility.add_output_entry({ - "key": output_key, - "path": os.path.join(output_dir, file_prefix) + "%04d" + - map_file_format_to_file_ending(bpy.context.scene.render.image_settings.file_format), - "version": "2.0.0" - }) + Utility.add_output_entry( + { + "key": output_key, + "path": os.path.join(output_dir, file_prefix) + + "%04d" + + map_file_format_to_file_ending( + bpy.context.scene.render.image_settings.file_format + ), + "version": "2.0.0", + } + ) load_keys.add(output_key) bpy.context.scene.render.filepath = os.path.join(output_dir, file_prefix) @@ -689,13 +799,19 @@ def render(output_dir: Optional[str] = None, file_prefix: str = "rgb_", output_k # Skip if there is nothing to render if bpy.context.scene.frame_end != bpy.context.scene.frame_start: if len(get_all_blender_mesh_objects()) == 0: - raise Exception("There are no mesh-objects to render, " - "please load an object before invoking the renderer.") + raise Exception( + "There are no mesh-objects to render, " + "please load an object before invoking the renderer." + ) # Print what is rendered total_frames = bpy.context.scene.frame_end - bpy.context.scene.frame_start if load_keys: - registered_output_keys = [output["key"] for output in Utility.get_registered_outputs()] - keys_to_render = sorted([key for key in load_keys if key in registered_output_keys]) + registered_output_keys = [ + output["key"] for output in Utility.get_registered_outputs() + ] + keys_to_render = sorted( + [key for key in load_keys if key in registered_output_keys] + ) print(f"Rendering {total_frames} frames of {', '.join(keys_to_render)}...") # As frame_end is pointing to the next free frame, decrease it by one, as @@ -706,7 +822,9 @@ def render(output_dir: Optional[str] = None, file_prefix: str = "rgb_", output_k pipe_out, pipe_in = os.pipe() begin = time.time() with stdout_redirected(pipe_in, enabled=not verbose) as stdout: - with _render_progress_bar(pipe_out, pipe_in, stdout, total_frames, enabled=not verbose): + with _render_progress_bar( + pipe_out, pipe_in, stdout, total_frames, enabled=not verbose + ): bpy.ops.render.render(animation=True, write_still=True) # Close Pipes to prevent having unclosed file handles @@ -723,17 +841,29 @@ def render(output_dir: Optional[str] = None, file_prefix: str = "rgb_", output_k # Revert changes bpy.context.scene.frame_end += 1 else: - raise RuntimeError("No camera poses have been registered, therefore nothing can be rendered. A camera " - "pose can be registered via bproc.camera.add_camera_pose().") - - return _WriterUtility.load_registered_outputs(load_keys, keys_with_alpha_channel) if return_data else {} - - -def set_output_format(file_format: Optional[str] = None, color_depth: Optional[int] = None, - enable_transparency: Optional[bool] = None, jpg_quality: Optional[int] = None, - view_transform: Optional[str] = None, look: Optional[str] = None, - exposure: Optional[float] = None, gamma: Optional[float] = None): - """ Sets the output format to use for rendering. Default values defined in DefaultConfig.py. + raise RuntimeError( + "No camera poses have been registered, therefore nothing can be rendered. A camera " + "pose can be registered via bproc.camera.add_camera_pose()." + ) + + return ( + _WriterUtility.load_registered_outputs(load_keys, keys_with_alpha_channel) + if return_data + else {} + ) + + +def set_output_format( + file_format: Optional[str] = None, + color_depth: Optional[int] = None, + enable_transparency: Optional[bool] = None, + jpg_quality: Optional[int] = None, + view_transform: Optional[str] = None, + look: Optional[str] = None, + exposure: Optional[float] = None, + gamma: Optional[float] = None, +): + """Sets the output format to use for rendering. Default values defined in DefaultConfig.py. :param file_format: The file format to use, e.q. "PNG", "JPEG" or "OPEN_EXR". :param color_depth: The color depth. @@ -748,7 +878,9 @@ def set_output_format(file_format: Optional[str] = None, color_depth: Optional[i if enable_transparency is not None: # In case a previous renderer changed these settings # Store as RGB by default unless the user specifies store_alpha as true in yaml - bpy.context.scene.render.image_settings.color_mode = "RGBA" if enable_transparency else "RGB" + bpy.context.scene.render.image_settings.color_mode = ( + "RGBA" if enable_transparency else "RGB" + ) # set the background as transparent if transparent_background is true in yaml bpy.context.scene.render.film_transparent = enable_transparency if file_format is not None: @@ -768,9 +900,12 @@ def set_output_format(file_format: Optional[str] = None, color_depth: Optional[i bpy.context.scene.view_settings.gamma = gamma -def enable_motion_blur(motion_blur_length: float = 0.5, rolling_shutter_type: str = "NONE", - rolling_shutter_length: float = 0.1): - """ Enables motion blur and sets rolling shutter. +def enable_motion_blur( + motion_blur_length: float = 0.5, + rolling_shutter_type: str = "NONE", + rolling_shutter_length: float = 0.1, +): + """Enables motion blur and sets rolling shutter. :param motion_blur_length: Time taken in frames between shutter open and close. :param rolling_shutter_type: Type of rolling shutter effect. If "NONE", rolling shutter is disabled. @@ -784,13 +919,13 @@ def enable_motion_blur(motion_blur_length: float = 0.5, rolling_shutter_type: st def render_init(): - """ Initializes the renderer. + """Initializes the renderer. This enables the cycles renderer and sets some options to speedup rendering. """ bpy.context.scene.render.resolution_percentage = 100 # Lightning settings to reduce training time - bpy.context.scene.render.engine = 'CYCLES' + bpy.context.scene.render.engine = "CYCLES" bpy.context.scene.cycles.debug_bvh_type = "STATIC_BVH" bpy.context.scene.cycles.debug_use_spatial_splits = True @@ -799,7 +934,7 @@ def render_init(): def disable_all_denoiser(): - """ Disables all denoiser. + """Disables all denoiser. At the moment this includes the cycles and the intel denoiser. """ @@ -813,9 +948,11 @@ def disable_all_denoiser(): links = bpy.context.scene.node_tree.links # Go through all existing denoiser nodes - for denoiser_node in Utility.get_nodes_with_type(nodes, 'CompositorNodeDenoise'): - in_node = denoiser_node.inputs['Image'] - out_node = denoiser_node.outputs['Image'] + for denoiser_node in Utility.get_nodes_with_type( + nodes, "CompositorNodeDenoise" + ): + in_node = denoiser_node.inputs["Image"] + out_node = denoiser_node.outputs["Image"] # If it is fully included into the node tree if in_node.is_linked and out_node.is_linked: @@ -830,7 +967,7 @@ def disable_all_denoiser(): def set_world_background(color: List[float], strength: float = 1): - """ Sets the color of blenders world background + """Sets the color of blenders world background :param color: A three-dimensional list specifying the new color in floats. :param strength: The strength of the emitted background light. @@ -841,21 +978,24 @@ def set_world_background(color: List[float], strength: float = 1): links = world.node_tree.links # Unlink any incoming link that would overwrite the default value - if len(nodes.get("Background").inputs['Color'].links) > 0: - links.remove(nodes.get("Background").inputs['Color'].links[0]) + if len(nodes.get("Background").inputs["Color"].links) > 0: + links.remove(nodes.get("Background").inputs["Color"].links[0]) - nodes.get("Background").inputs['Strength'].default_value = strength - nodes.get("Background").inputs['Color'].default_value = color + [1] + nodes.get("Background").inputs["Strength"].default_value = strength + nodes.get("Background").inputs["Color"].default_value = color + [1] def enable_experimental_features(): - """ Enables experimental cycles features. """ - bpy.context.scene.cycles.feature_set = 'EXPERIMENTAL' + """Enables experimental cycles features.""" + bpy.context.scene.cycles.feature_set = "EXPERIMENTAL" -def set_render_devices(use_only_cpu: bool = False, desired_gpu_device_type: Union[str, List[str]] = None, - desired_gpu_ids: Union[int, List[int]] = None): - """ Configures the devices to use for rendering. +def set_render_devices( + use_only_cpu: bool = False, + desired_gpu_device_type: Union[str, List[str]] = None, + desired_gpu_ids: Union[int, List[int]] = None, +): + """Configures the devices to use for rendering. :param use_only_cpu: If True, only the cpu is used for rendering. :param desired_gpu_device_type: One or multiple GPU device types to consider. If multiple are given, @@ -873,7 +1013,9 @@ def set_render_devices(use_only_cpu: bool = False, desired_gpu_device_type: Unio mac_version = platform.mac_ver()[0] mac_version_numbers = [int(ele) for ele in mac_version.split(".")] # On recent macs, use METAL, otherwise use cpu only - if (mac_version_numbers[0] == 12 and mac_version_numbers[1] >= 3) or mac_version_numbers[0] > 12: + if ( + mac_version_numbers[0] == 12 and mac_version_numbers[1] >= 3 + ) or mac_version_numbers[0] > 12: desired_gpu_device_type = ["METAL"] else: desired_gpu_device_type = [] @@ -892,12 +1034,14 @@ def set_render_devices(use_only_cpu: bool = False, desired_gpu_device_type: Unio if not desired_gpu_device_type or use_only_cpu: # Use only CPU bpy.context.scene.cycles.device = "CPU" - bpy.context.preferences.addons['cycles'].preferences.compute_device_type = "NONE" + bpy.context.preferences.addons["cycles"].preferences.compute_device_type = ( + "NONE" + ) print("Using only the CPU for rendering") else: # Use GPU bpy.context.scene.cycles.device = "GPU" - preferences = bpy.context.preferences.addons['cycles'].preferences + preferences = bpy.context.preferences.addons["cycles"].preferences # Go over all specified device types found = False @@ -906,25 +1050,344 @@ def set_render_devices(use_only_cpu: bool = False, desired_gpu_device_type: Unio devices = preferences.get_devices_for_type(device_type) if devices: # Set device type - bpy.context.preferences.addons['cycles'].preferences.compute_device_type = device_type + bpy.context.preferences.addons[ + "cycles" + ].preferences.compute_device_type = device_type # Go over all devices with that type found = False for i, device in enumerate(devices): # Only use gpus with specified ids if desired_gpu_ids is None or i in desired_gpu_ids: - print(f"Device {device.name} of type {device.type} found and used.") + print( + f"Device {device.name} of type {device.type} found and used." + ) device.use = True found = True else: device.use = False if not found: - raise RuntimeError(f"The specified gpu ids lead to no selected gpu at all. Valid gpu ids are " - f"{list(range(len(devices)))}") + raise RuntimeError( + f"The specified gpu ids lead to no selected gpu at all. Valid gpu ids are " + f"{list(range(len(devices)))}" + ) break if not found: bpy.context.scene.cycles.device = "CPU" - bpy.context.preferences.addons['cycles'].preferences.compute_device_type = "NONE" + bpy.context.preferences.addons["cycles"].preferences.compute_device_type = ( + "NONE" + ) print("Using only the CPU for rendering") + + +def load_edge_render(temp_filepath: str) -> np.ndarray: + """ + Loads an edge render image from a temporary file and appends it to a list. + + :param temp_filepath: Path to the temporary image file to load. Must exist and be readable. + :return: An image as numpy array + """ + if not os.path.isfile(temp_filepath): + raise FileNotFoundError( + "Temporary edge image render not found at: ", temp_filepath + ) + + if not os.path.isfile(temp_filepath): + raise FileExistsError( + f"tempfile with edge render does not exist under: {temp_filepath}" + ) + # Read the image back as a NumPy array and append it to the list + # Load with alpha channel if present + temp_img = cv2.imread(temp_filepath, cv2.IMREAD_UNCHANGED) + + # Remove the temporary file + os.remove(temp_filepath) + + return temp_img + + +def freestyle_config( + line_thickness: float, + crease_angle: float, + view_layer: bpy.types.ViewLayer, + scene: bpy.types.Scene, +) -> None: + """ + Configures Blender Freestyle settings for stylized edge rendering. + + :param line_thickness: Thickness of the rendered lines in pixels. + :param crease_angle: Crease angle in degrees used to detect and render sharp edges. + :param view_layer: The Blender ViewLayer where Freestyle is configured. + :param scene: The Blender Scene associated with the rendering. Used to enable Freestyle globally. + """ + # Enable Freestyle rendering + scene.render.use_freestyle = True + + # Get or create a Freestyle settings object + freestyle_settings = view_layer.freestyle_settings + freestyle_settings.as_render_pass = True # Output as separate pass + freestyle_settings.use_smoothness = False + freestyle_settings.use_culling = True # Enable edge culling to speed up rendering + freestyle_settings.crease_angle = np.deg2rad(crease_angle) # Set the crease angle + + # Ensure a Line Set exists + if not freestyle_settings.linesets: + line_set = freestyle_settings.linesets.new(name="TargetEdges") + else: + line_set = freestyle_settings.linesets[0] + + line_set.select_external_contour = False + line_set.select_material_boundary = False + + # Ensure a Line Style exists + if not line_set.linestyle: + linestyle = bpy.data.linestyles.new(name="TargetEdgeStyle") + line_set.linestyle = linestyle # Attach the new linestyle + else: + linestyle = line_set.linestyle + + # Customize Line Style + linestyle.use_chaining = True # Ensures edges are properly connected + linestyle.chaining = "PLAIN" # Prevents sketchy overlapping + linestyle.thickness = line_thickness + linestyle.color = (0, 0, 0) # Black edges + linestyle.alpha = 1.0 + linestyle.use_dashed_line = False # Avoid unnecessary complexity + + # Set Edge Types: Only render essential edges + line_set.select_silhouette = True # Keeps silhouette edges + line_set.select_border = False # Ignore outer object borders + line_set.select_crease = True # Keep sharp creases + line_set.select_contour = True # Main outlines + line_set.select_edge_mark = True # Removes hidden edges + line_set.visibility = "VISIBLE" # Ensures only visible edges are considered + + +def freestyle_render_config(scene: bpy.types.Scene) -> None: + """ + Sets up the compositor node tree for Freestyle edge rendering output. + + :param scene: The Blender Scene to configure. Enables use of nodes and sets up a node tree + to output the Freestyle render pass as a PNG with RGBA channels. + """ + scene.use_nodes = True + tree = scene.node_tree + tree.nodes.clear() + + # Check if this setup already exists to avoid duplicates + if any(n.name == "FreestyleComposite" for n in tree.nodes): + print("Nodes for FreestyleComposite already configured!") + return # Already configured + + scene.render.image_settings.color_mode = "RGBA" + scene.render.image_settings.file_format = "PNG" + + # Render Layers (includes all render passes) + render_layers = tree.nodes.new(type="CompositorNodeRLayers") + render_layers.name = "FreestyleRenderLayers" + render_layers.location = (-300, 0) + + # Composite output node + composite = tree.nodes.new(type="CompositorNodeComposite") + composite.name = "FreestyleComposite" + composite.location = (200, 0) + + # Connect Freestyle pass to output + tree.links.new(render_layers.outputs["Freestyle"], composite.inputs["Image"]) + + +def remap_target_objects_to_scene_by_geometry( + original_targets: List[MeshObject], + target_scene: bpy.types.Scene, + location_tol: float = 1e-4, + size_tol: float = 1e-4, +) -> List[MeshObject]: + """ + Attempts to remap a list of mesh objects to equivalent objects in a different scene based on geometry. + + :param original_targets: List of MeshObject instances to remap. These are the original objects to match. + :param target_scene: The Blender Scene to search for matching objects. + :param location_tol: Tolerance for comparing object world-space locations. Default is 1e-4. + :param size_tol: Tolerance for comparing object dimensions. Default is 1e-4. + :return: A list of MeshObject instances from the target scene that match the originals by geometry. + """ + remapped_targets = [] + + # Create a set of candidate objects in the target scene + candidate_objs = list(target_scene.objects) + + for original in original_targets: + orig_loc = original.blender_obj.matrix_world.translation + orig_size = original.blender_obj.dimensions + + # Find best match by comparing position and size + best_match = None + for candidate in candidate_objs: + cand_loc = candidate.matrix_world.translation + cand_size = candidate.dimensions + + loc_diff = (cand_loc - orig_loc).length + size_diff = (cand_size - orig_size).length + + if loc_diff < location_tol and size_diff < size_tol: + best_match = candidate + break # Found a confident match + + if best_match: + new_mesh_obj = MeshObject(best_match) + remapped_targets.append(new_mesh_obj) + else: + print(f"No geometry-based match found for {original.get_name()}") + + return remapped_targets + + +def get_mesh_stats(mesh: MeshObject) -> Tuple[str, int, int, int]: + """ + Returns basic statistics of a mesh object. + + :param mesh: The MeshObject instance to analyze. + :return: A tuple containing the mesh name, number of vertices, number of edges, and number of faces. + """ + bpy.ops.object.mode_set(mode="OBJECT") # Necessary to get stats + mesh_data = mesh.blender_obj.data + mesh_stats = ( + mesh.get_name(), + len(mesh_data.vertices), + len(mesh_data.edges), + len(mesh_data.polygons), + ) + bpy.ops.object.mode_set(mode="EDIT") # Return to edit for selection + return mesh_stats + + +def reduce_object_complexity( + meshes: List[MeshObject], dissolve_angle: float, connect_non_planar_angle: float +) -> List[MeshObject]: + """ + Reduces mesh complexity by dissolving small-angle geometry and splitting non-planar faces. + + :param meshes: A list of MeshObject instances to simplify. + :param dissolve_angle: Angle in degrees used to dissolve limited geometry (e.g., nearly colinear edges). + :param connect_non_planar_angle: Angle in degrees used to split non-planar faces into simpler geometry. + :return: A list of simplified MeshObject instances, with printed stats on reduction ratios. + """ + if not meshes: + print("No meshes -> No mesh optimization") + return [] + + reduced_meshes = [] + + for mesh in meshes: + orig_mesh_stats = get_mesh_stats(mesh) + # Enter edit mode for the object to perform geometry operations + bpy.context.view_layer.objects.active = mesh.blender_obj + bpy.ops.object.mode_set(mode="EDIT") + + # Dissolve limited with an angle + bpy.ops.mesh.select_all(action="SELECT") + bpy.ops.mesh.dissolve_limited( + angle_limit=np.deg2rad(dissolve_angle), use_dissolve_boundaries=False + ) + + # Split non-planar faces with an angle + bpy.ops.mesh.vert_connect_nonplanar( + angle_limit=np.deg2rad(connect_non_planar_angle) + ) + bpy.ops.mesh.delete_loose(use_faces=True, use_verts=True, use_edges=True) + + opt_mesh_stats = get_mesh_stats(mesh) + opt_ratios = tuple( + (e1 / e2) * 100 for e1, e2 in zip(opt_mesh_stats[1:], orig_mesh_stats[1:]) + ) + print( + f"Optimized {orig_mesh_stats[0]}: vertices by {opt_ratios[0]:.1f}% edges {opt_ratios[1]:.1f}% faces {opt_ratios[2]:.1f}% \n" + ) + + reduced_meshes.append(mesh) + + return reduced_meshes + + +def render_edges( + target_objects: List[bpy.types.Object], camera_poses: List[np.ndarray] +) -> List[np.ndarray]: + """ + Renders only the Freestyle edge pass for the given target objects from multiple camera poses. + + :param target_objects: List of Blender objects to render as stylized edges. + :param camera_poses: List of 4x4 camera pose matrices (world transforms) to render from. + :return: A list of NumPy arrays, each representing a rendered edge image. + """ + if not len(camera_poses) > 0: + print("No camera poses passed to render_edges function...") + return [] + if not len(target_objects) > 0: + print("No target objects passed to render_edges function...") + return [] + + # Get current scene and duplicate it + original_scene = bpy.context.scene + bpy.ops.scene.new(type="FULL_COPY") + # Now the active scene is the duplicated one + edge_scene = bpy.context.scene + edge_scene.name = "Freestyle" + edge_view_layer = edge_scene.view_layers[0] # get the duplicated view layer + edge_scene.render.engine = "BLENDER_EEVEE_NEXT" + + freestyle_config( + line_thickness=1.0, + crease_angle=160, + view_layer=edge_view_layer, + scene=edge_scene, + ) + + freestyle_render_config(edge_scene) + + edge_target_objects = remap_target_objects_to_scene_by_geometry( + target_objects, edge_scene + ) + edge_target_objects = reduce_object_complexity( + edge_target_objects, dissolve_angle=4, connect_non_planar_angle=5 + ) + + edge_images = [] # List to store rendered edge images + for pose_id, cam_pose in enumerate(camera_poses): + edge_scene.frame_set(pose_id) # Forces Freestyle to recompute scene + # Hide all objects + for obj in edge_scene.objects: + obj.hide_render = True + obj.visible_camera = False + + # Update camera transformation + edge_scene.camera.matrix_world = cam_pose + edge_view_layer.update() + + for edge_target_object in edge_target_objects: + # Enable only the target object for rendering + edge_target_object.blender_obj.hide_render = False + edge_target_object.blender_obj.visible_camera = True + + # Create a temporary file to store the rendered image + with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file: + temp_filepath = temp_file.name + + # Set Blender's output path to the temporary file + edge_scene.render.filepath = temp_filepath + + # Render the Freestyle pass for this object + bpy.ops.render.render(write_still=True) + edge_img = load_edge_render(temp_filepath) + edge_images.append(edge_img) + + # Disable the target object again after rendering + edge_target_object.blender_obj.hide_render = True + edge_target_object.blender_obj.visible_camera = False + + # Switch back to original scene + bpy.context.window.scene = original_scene + bpy.data.scenes.remove(edge_scene) + + return edge_images # Return the list of rendered edge images diff --git a/blenderproc/python/writer/BopWriterUtility.py b/blenderproc/python/writer/BopWriterUtility.py index 3a748ed52..59739f9d4 100644 --- a/blenderproc/python/writer/BopWriterUtility.py +++ b/blenderproc/python/writer/BopWriterUtility.py @@ -4,6 +4,7 @@ import json from multiprocessing import Pool import os +from pathlib import Path import glob import trimesh from typing import List, Optional, Dict, Tuple @@ -21,20 +22,35 @@ from blenderproc.python.writer.WriterUtility import _WriterUtility from blenderproc.python.types.LinkUtility import Link from blenderproc.python.utility.SetupUtility import SetupUtility -from blenderproc.python.utility.MathUtility import change_target_coordinate_frame_of_transformation_matrix +from blenderproc.python.utility.MathUtility import ( + change_target_coordinate_frame_of_transformation_matrix, +) # EGL is not available under windows if sys.platform in ["linux", "linux2"]: - os.environ['PYOPENGL_PLATFORM'] = 'egl' - - -def write_bop(output_dir: str, target_objects: Optional[List[MeshObject]] = None, - depths: List[np.ndarray] = None, colors: List[np.ndarray] = None, - color_file_format: str = "PNG", dataset: str = "", append_to_existing_output: bool = True, - depth_scale: float = 1.0, jpg_quality: int = 95, save_world2cam: bool = True, - ignore_dist_thres: float = 100., m2mm: Optional[bool] = None, annotation_unit: str = 'mm', - frames_per_chunk: int = 1000, calc_mask_info_coco: bool = True, delta: float = 0.015, - num_worker: Optional[int] = 0): + os.environ["PYOPENGL_PLATFORM"] = "egl" + + +def write_bop( + output_dir: str, + target_objects: Optional[List[MeshObject]] = None, + depths: List[np.ndarray] = None, + colors: List[np.ndarray] = None, + edges: List[np.ndarray] = None, + color_file_format: str = "PNG", + dataset: str = "", + append_to_existing_output: bool = True, + depth_scale: float = 1.0, + jpg_quality: int = 95, + save_world2cam: bool = True, + ignore_dist_thres: float = 100.0, + m2mm: Optional[bool] = None, + annotation_unit: str = "mm", + frames_per_chunk: int = 1000, + calc_mask_info_coco: bool = True, + delta: float = 0.015, + num_worker: Optional[int] = 0, +): """Write the BOP data :param output_dir: Path to the output directory. @@ -42,6 +58,7 @@ def write_bop(output_dir: str, target_objects: Optional[List[MeshObject]] = None from specified dataset :param depths: List of depth images in m to save :param colors: List of color images to save + :param edges: List of edge images to save :param color_file_format: File type to save color images. Available: "PNG", "JPEG" :param jpg_quality: If color_file_format is "JPEG", save with the given quality. :param dataset: Only save annotations for objects of the specified bop dataset. Saves all object poses if undefined. @@ -65,8 +82,8 @@ def write_bop(output_dir: str, target_objects: Optional[List[MeshObject]] = None # Output paths. dataset_dir = os.path.join(output_dir, dataset) - chunks_dir = os.path.join(dataset_dir, 'train_pbr') - camera_path = os.path.join(dataset_dir, 'camera.json') + chunks_dir = os.path.join(dataset_dir, "train_pbr") + camera_path = os.path.join(dataset_dir, "camera.json") # Create the output directory structure. if not os.path.exists(dataset_dir): @@ -80,8 +97,10 @@ def write_bop(output_dir: str, target_objects: Optional[List[MeshObject]] = None dataset_objects = target_objects for obj in dataset_objects: if obj.is_hidden(): - print(f"WARNING: The given object {obj.get_name()} is hidden. However, the bop writer will still add " - "coco annotations for it. If this is not desired, don't pass the object to the bop writer.") + print( + f"WARNING: The given object {obj.get_name()} is hidden. However, the bop writer will still add " + "coco annotations for it. If this is not desired, don't pass the object to the bop writer." + ) elif dataset: dataset_objects = [] for obj in get_all_mesh_objects(): @@ -96,23 +115,27 @@ def write_bop(output_dir: str, target_objects: Optional[List[MeshObject]] = None # Check if there is any object from the specified dataset. if not dataset_objects: - raise RuntimeError(f"The scene does not contain any object from the specified dataset: {dataset}. " - f"Either remove the dataset parameter or assign custom property 'bop_dataset_name'" - f" to selected objects") + raise RuntimeError( + f"The scene does not contain any object from the specified dataset: {dataset}. " + f"Either remove the dataset parameter or assign custom property 'bop_dataset_name'" + f" to selected objects" + ) if calc_mask_info_coco: # It might be that a chunk dir already exists where the writer appends frames. # If one (or multiple) more chunk dirs are created to save the rendered frames to, # mask/info/coco annotations need to be calculated for all of them - chunk_dirs = sorted(glob.glob(os.path.join(chunks_dir, '*'))) + chunk_dirs = sorted(glob.glob(os.path.join(chunks_dir, "*"))) chunk_dirs = [d for d in chunk_dirs if os.path.isdir(d)] last_chunk_dir = sorted(chunk_dirs)[-1] if chunk_dirs else None starting_chunk_id = 0 starting_frame_id = 0 if last_chunk_dir: - last_chunk_gt_fpath = os.path.join(last_chunk_dir, 'scene_gt.json') - chunk_gt = _BopWriterUtility.load_json(last_chunk_gt_fpath, keys_to_int=True) + last_chunk_gt_fpath = os.path.join(last_chunk_dir, "scene_gt.json") + chunk_gt = _BopWriterUtility.load_json( + last_chunk_gt_fpath, keys_to_int=True + ) # Current chunk and frame ID's. starting_chunk_id = int(os.path.basename(last_chunk_dir)) @@ -124,48 +147,72 @@ def write_bop(output_dir: str, target_objects: Optional[List[MeshObject]] = None # Save the data. _BopWriterUtility.write_camera(camera_path, depth_scale=depth_scale) - assert annotation_unit in ['m', 'dm', 'cm', 'mm'], (f"Invalid annotation unit: `{annotation_unit}`. Supported " - f"are 'm', 'dm', 'cm', 'mm'") - annotation_scale = {'m': 1., 'dm': 10., 'cm': 100., 'mm': 1000.}[annotation_unit] + assert annotation_unit in ["m", "dm", "cm", "mm"], ( + f"Invalid annotation unit: `{annotation_unit}`. Supported " + f"are 'm', 'dm', 'cm', 'mm'" + ) + annotation_scale = {"m": 1.0, "dm": 10.0, "cm": 100.0, "mm": 1000.0}[ + annotation_unit + ] if m2mm is not None: - warnings.warn("WARNING: `m2mm` is deprecated, please use `annotation_scale='mm'` instead!") - annotation_scale = 1000. - _BopWriterUtility.write_frames(chunks_dir, dataset_objects=dataset_objects, depths=depths, colors=colors, - color_file_format=color_file_format, frames_per_chunk=frames_per_chunk, - annotation_scale=annotation_scale, ignore_dist_thres=ignore_dist_thres, - save_world2cam=save_world2cam, depth_scale=depth_scale, jpg_quality=jpg_quality) + warnings.warn( + "WARNING: `m2mm` is deprecated, please use `annotation_scale='mm'` instead!" + ) + annotation_scale = 1000.0 + _BopWriterUtility.write_frames( + chunks_dir, + dataset_objects=dataset_objects, + depths=depths, + colors=colors, + color_file_format=color_file_format, + frames_per_chunk=frames_per_chunk, + annotation_scale=annotation_scale, + ignore_dist_thres=ignore_dist_thres, + save_world2cam=save_world2cam, + depth_scale=depth_scale, + jpg_quality=jpg_quality, + ) + # Determine for which directories mask_info_coco has to be calculated + chunk_dirs = sorted(glob.glob(os.path.join(chunks_dir, "*"))) + chunk_dirs = [d for d in chunk_dirs if os.path.isdir(d)] + chunk_dir_ids = [os.path.basename(d) for d in chunk_dirs] + chunk_dirs = chunk_dirs[chunk_dir_ids.index(f"{starting_chunk_id:06d}") :] + if edges: + _BopWriterUtility.write_edges(chunk_dirs=chunk_dirs, edges=edges) if calc_mask_info_coco: # Set up the bop toolkit - SetupUtility.setup_pip(["git+https://github.com/thodan/bop_toolkit", "PyOpenGL==3.1.0"]) + SetupUtility.setup_pip( + ["git+https://github.com/thodan/bop_toolkit", "PyOpenGL==3.1.0"] + ) # determine which objects to add to the vsipy renderer # for numpy>=1.20, np.float is deprecated: https://numpy.org/doc/stable/release/1.20.0-notes.html#deprecations np.float = float - # Determine for which directories mask_info_coco has to be calculated - chunk_dirs = sorted(glob.glob(os.path.join(chunks_dir, '*'))) - chunk_dirs = [d for d in chunk_dirs if os.path.isdir(d)] - chunk_dir_ids = [d.split('/')[-1] for d in chunk_dirs] - chunk_dirs = chunk_dirs[chunk_dir_ids.index(f"{starting_chunk_id:06d}"):] - # convert all objects to trimesh objects trimesh_objects = {} for obj in dataset_objects: - if obj.get_cp('category_id') in trimesh_objects: + if obj.get_cp("category_id") in trimesh_objects: continue if isinstance(obj, Link): if not obj.visuals: continue if len(obj.visuals) > 1: - warnings.warn('BOP Writer only supports saving annotations of one visual mesh per Link') + warnings.warn( + "BOP Writer only supports saving annotations of one visual mesh per Link" + ) trimesh_obj = obj.mesh_as_trimesh() # here we also add the scale factor of the objects. the position of the pyrender camera will change based # on the initial scale factor of the objects and the saved annotation format - if not np.all(np.isclose(np.array(obj.blender_obj.scale), obj.blender_obj.scale[0])): - print("WARNING: the scale is not the same across all dimensions, writing bop_toolkit annotations with " - "the bop writer will fail!") - trimesh_objects[obj.get_cp('category_id')] = trimesh_obj + if not np.all( + np.isclose(np.array(obj.blender_obj.scale), obj.blender_obj.scale[0]) + ): + print( + "WARNING: the scale is not the same across all dimensions, writing bop_toolkit annotations with " + "the bop writer will fail!" + ) + trimesh_objects[obj.get_cp("category_id")] = trimesh_obj # Create pool and init each worker width = bpy.context.scene.render.resolution_x @@ -174,17 +221,34 @@ def write_bop(output_dir: str, target_objects: Optional[List[MeshObject]] = None pool = None _BopWriterUtility._pyrender_init(width, height, trimesh_objects) else: - pool = Pool(num_worker, initializer=_BopWriterUtility._pyrender_init, initargs=[width, height, trimesh_objects]) - - _BopWriterUtility.calc_gt_masks(chunk_dirs=chunk_dirs, starting_frame_id=starting_frame_id, - annotation_scale=annotation_scale, delta=delta, pool=pool) - - _BopWriterUtility.calc_gt_info(chunk_dirs=chunk_dirs, starting_frame_id=starting_frame_id, - annotation_scale=annotation_scale, delta=delta, pool=pool) + pool = Pool( + num_worker, + initializer=_BopWriterUtility._pyrender_init, + initargs=[width, height, trimesh_objects], + ) + + _BopWriterUtility.calc_gt_masks( + chunk_dirs=chunk_dirs, + starting_frame_id=starting_frame_id, + annotation_scale=annotation_scale, + delta=delta, + pool=pool, + ) + + _BopWriterUtility.calc_gt_info( + chunk_dirs=chunk_dirs, + starting_frame_id=starting_frame_id, + annotation_scale=annotation_scale, + delta=delta, + pool=pool, + ) + + _BopWriterUtility.calc_gt_coco( + chunk_dirs=chunk_dirs, + dataset_objects=dataset_objects, + starting_frame_id=starting_frame_id, + ) - _BopWriterUtility.calc_gt_coco(chunk_dirs=chunk_dirs, dataset_objects=dataset_objects, - starting_frame_id=starting_frame_id) - if pool is not None: pool.close() pool.join() @@ -193,8 +257,10 @@ def write_bop(output_dir: str, target_objects: Optional[List[MeshObject]] = None _BopWriterUtility._pyrender_cleanup() -def bop_pose_to_pyrender_coordinate_system(cam_R_m2c: np.ndarray, cam_t_m2c: np.ndarray) -> np.ndarray: - """ Converts an object pose in bop format to pyrender camera coordinate system +def bop_pose_to_pyrender_coordinate_system( + cam_R_m2c: np.ndarray, cam_t_m2c: np.ndarray +) -> np.ndarray: + """Converts an object pose in bop format to pyrender camera coordinate system (https://pyrender.readthedocs.io/en/latest/examples/cameras.html). :param cam_R_m2c: 3x3 Rotation matrix. @@ -206,14 +272,16 @@ def bop_pose_to_pyrender_coordinate_system(cam_R_m2c: np.ndarray, cam_t_m2c: np. bop_pose[:3, :3] = cam_R_m2c bop_pose[:3, 3] = cam_t_m2c - return change_target_coordinate_frame_of_transformation_matrix(bop_pose, ["X", "-Y", "-Z"]) + return change_target_coordinate_frame_of_transformation_matrix( + bop_pose, ["X", "-Y", "-Z"] + ) class _BopWriterUtility: - """ Saves the synthesized dataset in the BOP format. The dataset is split - into chunks which are saved as individual "scenes". For more details - about the BOP format, visit the BOP toolkit docs: - https://github.com/thodan/bop_toolkit/blob/master/docs/bop_datasets_format.md + """Saves the synthesized dataset in the BOP format. The dataset is split + into chunks which are saved as individual "scenes". For more details + about the BOP format, visit the BOP toolkit docs: + https://github.com/thodan/bop_toolkit/blob/master/docs/bop_datasets_format.md """ @@ -229,9 +297,9 @@ def load_json(path, keys_to_int=False): # Keys to integers. def convert_keys_to_int(x): - return {int(k) if k.lstrip('-').isdigit() else k: v for k, v in x.items()} + return {int(k) if k.lstrip("-").isdigit() else k: v for k, v in x.items()} - with open(path, 'r', encoding="utf-8") as f: + with open(path, "r", encoding="utf-8") as f: if keys_to_int: content = json.load(f, object_hook=convert_keys_to_int) else: @@ -241,32 +309,32 @@ def convert_keys_to_int(x): @staticmethod def save_json(path, content): - """ Saves the content to a JSON file in a human-friendly format. + """Saves the content to a JSON file in a human-friendly format. From the BOP toolkit (https://github.com/thodan/bop_toolkit). :param path: Path to the output JSON file. :param content: Dictionary/list to save. """ text = "" - with open(path, 'w', encoding="utf-8") as file: + with open(path, "w", encoding="utf-8") as file: if isinstance(content, dict): - text += '{\n' + text += "{\n" content_sorted = sorted(content.items(), key=lambda x: x[0]) for elem_id, (k, v) in enumerate(content_sorted): text += f' "{k}": {json.dumps(v, sort_keys=True)}' if elem_id != len(content) - 1: - text += ',' - text += '\n' - text += '}' + text += "," + text += "\n" + text += "}" file.write(text) elif isinstance(content, list): - text += '[\n' + text += "[\n" for elem_id, elem in enumerate(content): - text += f' {json.dumps(elem, sort_keys=True)}' + text += f" {json.dumps(elem, sort_keys=True)}" if elem_id != len(content) - 1: - text += ',' - text += '\n' - text += ']' + text += "," + text += "\n" + text += "]" file.write(text) else: json.dump(content, file, sort_keys=True) @@ -280,40 +348,46 @@ def save_depth(path: str, im: np.ndarray): :param im: ndarray with the depth image to save. """ if not path.endswith(".png"): - raise ValueError('Only PNG format is currently supported.') + raise ValueError("Only PNG format is currently supported.") im[im > 65535] = 65535 im_uint16 = np.round(im).astype(np.uint16) # PyPNG library can save 16-bit PNG and is faster than imageio.imwrite(). w_depth = png.Writer(im.shape[1], im.shape[0], greyscale=True, bitdepth=16) - with open(path, 'wb') as f: + with open(path, "wb") as f: w_depth.write(f, np.reshape(im_uint16, (-1, im.shape[1]))) @staticmethod def write_camera(camera_path: str, depth_scale: float = 1.0): - """ Writes camera.json into dataset_dir. + """Writes camera.json into dataset_dir. :param camera_path: Path to camera.json :param depth_scale: Multiply the uint16 output depth image with this factor to get depth in mm. """ # Use second frame for reading intrinsics (due to backwards compatibility) bpy.context.scene.frame_set(1) - cam_K = _WriterUtility.get_cam_attribute(bpy.context.scene.camera, 'cam_K') - camera = {'cx': cam_K[0][2], - 'cy': cam_K[1][2], - 'depth_scale': depth_scale, - 'fx': cam_K[0][0], - 'fy': cam_K[1][1], - 'height': bpy.context.scene.render.resolution_y, - 'width': bpy.context.scene.render.resolution_x} + cam_K = _WriterUtility.get_cam_attribute(bpy.context.scene.camera, "cam_K") + camera = { + "cx": cam_K[0][2], + "cy": cam_K[1][2], + "depth_scale": depth_scale, + "fx": cam_K[0][0], + "fy": cam_K[1][1], + "height": bpy.context.scene.render.resolution_y, + "width": bpy.context.scene.render.resolution_x, + } _BopWriterUtility.save_json(camera_path, camera) @staticmethod - def get_frame_gt(dataset_objects: List[bpy.types.Mesh], unit_scaling: float, ignore_dist_thres: float, - destination_frame: Optional[List[str]] = None): - """ Returns GT pose annotations between active camera and objects. - + def get_frame_gt( + dataset_objects: List[bpy.types.Mesh], + unit_scaling: float, + ignore_dist_thres: float, + destination_frame: Optional[List[str]] = None, + ): + """Returns GT pose annotations between active camera and objects. + :param dataset_objects: Save annotations for these objects. :param unit_scaling: 1000. for outputting poses in mm :param ignore_dist_thres: Distance between camera and object after which object is ignored. @@ -324,8 +398,13 @@ def get_frame_gt(dataset_objects: List[bpy.types.Mesh], unit_scaling: float, ign if destination_frame is None: destination_frame = ["X", "-Y", "-Z"] - H_c2w_opencv = Matrix(_WriterUtility.get_cam_attribute(bpy.context.scene.camera, 'cam2world_matrix', - local_frame_change=destination_frame)) + H_c2w_opencv = Matrix( + _WriterUtility.get_cam_attribute( + bpy.context.scene.camera, + "cam2world_matrix", + local_frame_change=destination_frame, + ) + ) frame_gt = [] for obj in dataset_objects: @@ -333,11 +412,15 @@ def get_frame_gt(dataset_objects: List[bpy.types.Mesh], unit_scaling: float, ign if not obj.visuals: continue if len(obj.visuals) > 1: - warnings.warn('BOP Writer only supports saving poses of one visual mesh per Link') + warnings.warn( + "BOP Writer only supports saving poses of one visual mesh per Link" + ) H_m2w = Matrix(obj.get_visual_local2world_mats()[0]) else: H_m2w = Matrix(obj.get_local2world_mat()) - assert obj.has_cp("category_id"), f"{obj.get_name()} object has no custom property 'category_id'" + assert obj.has_cp( + "category_id" + ), f"{obj.get_name()} object has no custom property 'category_id'" cam_H_m2c = H_c2w_opencv.inverted() @ H_m2w cam_R_m2c = cam_H_m2c.to_quaternion().to_matrix() @@ -346,25 +429,39 @@ def get_frame_gt(dataset_objects: List[bpy.types.Mesh], unit_scaling: float, ign # ignore examples that fell through the plane if not np.linalg.norm(list(cam_t_m2c)) > ignore_dist_thres: cam_t_m2c = list(cam_t_m2c * unit_scaling) - frame_gt.append({ - 'cam_R_m2c': list(cam_R_m2c[0]) + list(cam_R_m2c[1]) + list(cam_R_m2c[2]), - 'cam_t_m2c': cam_t_m2c, - 'obj_id': obj.get_cp("category_id") if not isinstance(obj, Link) else obj.visuals[0].get_cp( - 'category_id') - }) + frame_gt.append( + { + "cam_R_m2c": list(cam_R_m2c[0]) + + list(cam_R_m2c[1]) + + list(cam_R_m2c[2]), + "cam_t_m2c": cam_t_m2c, + "obj_id": ( + obj.get_cp("category_id") + if not isinstance(obj, Link) + else obj.visuals[0].get_cp("category_id") + ), + } + ) else: - print('ignored obj, ', obj.get_cp("category_id"), 'because either ') - print('(1) it is further away than parameter "ignore_dist_thres: ",', ignore_dist_thres) - print('(e.g. because it fell through a plane during physics sim)') - print('or') - print('(2) the object pose has not been given in meters') + print("ignored obj, ", obj.get_cp("category_id"), "because either ") + print( + '(1) it is further away than parameter "ignore_dist_thres: ",', + ignore_dist_thres, + ) + print("(e.g. because it fell through a plane during physics sim)") + print("or") + print("(2) the object pose has not been given in meters") return frame_gt @staticmethod - def get_frame_camera(save_world2cam: bool, depth_scale: float = 1.0, unit_scaling: float = 1000., - destination_frame: Optional[List[str]] = None): - """ Returns camera parameters for the active camera. + def get_frame_camera( + save_world2cam: bool, + depth_scale: float = 1.0, + unit_scaling: float = 1000.0, + destination_frame: Optional[List[str]] = None, + ): + """Returns camera parameters for the active camera. :param save_world2cam: If true, camera to world transformations "cam_R_w2c", "cam_t_w2c" are saved in scene_camera.json @@ -376,31 +473,47 @@ def get_frame_camera(save_world2cam: bool, depth_scale: float = 1.0, unit_scalin if destination_frame is None: destination_frame = ["X", "-Y", "-Z"] - cam_K = _WriterUtility.get_cam_attribute(bpy.context.scene.camera, 'cam_K') + cam_K = _WriterUtility.get_cam_attribute(bpy.context.scene.camera, "cam_K") frame_camera_dict = { - 'cam_K': cam_K[0] + cam_K[1] + cam_K[2], - 'depth_scale': depth_scale + "cam_K": cam_K[0] + cam_K[1] + cam_K[2], + "depth_scale": depth_scale, } if save_world2cam: - H_c2w_opencv = Matrix(_WriterUtility.get_cam_attribute(bpy.context.scene.camera, 'cam2world_matrix', - local_frame_change=destination_frame)) + H_c2w_opencv = Matrix( + _WriterUtility.get_cam_attribute( + bpy.context.scene.camera, + "cam2world_matrix", + local_frame_change=destination_frame, + ) + ) H_w2c_opencv = H_c2w_opencv.inverted() R_w2c_opencv = H_w2c_opencv.to_quaternion().to_matrix() t_w2c_opencv = H_w2c_opencv.to_translation() * unit_scaling - frame_camera_dict['cam_R_w2c'] = list(R_w2c_opencv[0]) + list(R_w2c_opencv[1]) + list(R_w2c_opencv[2]) - frame_camera_dict['cam_t_w2c'] = list(t_w2c_opencv) + frame_camera_dict["cam_R_w2c"] = ( + list(R_w2c_opencv[0]) + list(R_w2c_opencv[1]) + list(R_w2c_opencv[2]) + ) + frame_camera_dict["cam_t_w2c"] = list(t_w2c_opencv) return frame_camera_dict @staticmethod - def write_frames(chunks_dir: str, dataset_objects: list, depths: List[np.ndarray], - colors: List[np.ndarray], color_file_format: str = "PNG", - depth_scale: float = 1.0, frames_per_chunk: int = 1000, annotation_scale: float = 1000., - ignore_dist_thres: float = 100., save_world2cam: bool = True, jpg_quality: int = 95): + def write_frames( + chunks_dir: str, + dataset_objects: list, + depths: List[np.ndarray], + colors: List[np.ndarray], + color_file_format: str = "PNG", + depth_scale: float = 1.0, + frames_per_chunk: int = 1000, + annotation_scale: float = 1000.0, + ignore_dist_thres: float = 100.0, + save_world2cam: bool = True, + jpg_quality: int = 95, + ): """Write each frame's ground truth into chunk directory in BOP format :param chunks_dir: Path to the output directory of the current chunk. @@ -418,18 +531,27 @@ def write_frames(chunks_dir: str, dataset_objects: list, depths: List[np.ndarray specified format (see `annotation_format` in `write_bop` for further details). :param frames_per_chunk: Number of frames saved in each chunk (called scene in BOP) """ - - # Format of the depth images. - depth_ext = '.png' - - rgb_tpath = os.path.join(chunks_dir, '{chunk_id:06d}', 'rgb', '{im_id:06d}' + '{im_type}') - depth_tpath = os.path.join(chunks_dir, '{chunk_id:06d}', 'depth', '{im_id:06d}' + depth_ext) - chunk_camera_tpath = os.path.join(chunks_dir, '{chunk_id:06d}', 'scene_camera.json') - chunk_gt_tpath = os.path.join(chunks_dir, '{chunk_id:06d}', 'scene_gt.json') + if not depths and not colors: + print("No depth and color information in write_frames. Returning...") + return + + # Format of the depth and edge images. + depth_ext = ".png" + + rgb_tpath = os.path.join( + chunks_dir, "{chunk_id:06d}", "rgb", "{im_id:06d}" + "{im_type}" + ) + depth_tpath = os.path.join( + chunks_dir, "{chunk_id:06d}", "depth", "{im_id:06d}" + depth_ext + ) + chunk_camera_tpath = os.path.join( + chunks_dir, "{chunk_id:06d}", "scene_camera.json" + ) + chunk_gt_tpath = os.path.join(chunks_dir, "{chunk_id:06d}", "scene_gt.json") # Paths to the already existing chunk folders (such folders may exist # when appending to an existing dataset). - chunk_dirs = sorted(glob.glob(os.path.join(chunks_dir, '*'))) + chunk_dirs = sorted(glob.glob(os.path.join(chunks_dir, "*"))) chunk_dirs = [d for d in chunk_dirs if os.path.isdir(d)] # Get ID's of the last already existing chunk and frame. @@ -437,8 +559,10 @@ def write_frames(chunks_dir: str, dataset_objects: list, depths: List[np.ndarray curr_frame_id = 0 if len(chunk_dirs): last_chunk_dir = sorted(chunk_dirs)[-1] - last_chunk_gt_fpath = os.path.join(last_chunk_dir, 'scene_gt.json') - chunk_gt = _BopWriterUtility.load_json(last_chunk_gt_fpath, keys_to_int=True) + last_chunk_gt_fpath = os.path.join(last_chunk_dir, "scene_gt.json") + chunk_gt = _BopWriterUtility.load_json( + last_chunk_gt_fpath, keys_to_int=True + ) # Last chunk and frame ID's. last_chunk_id = int(os.path.basename(last_chunk_dir)) @@ -457,18 +581,24 @@ def write_frames(chunks_dir: str, dataset_objects: list, depths: List[np.ndarray if curr_frame_id != 0: # Load GT and camera info of the chunk we are appending to. chunk_gt = _BopWriterUtility.load_json( - chunk_gt_tpath.format(chunk_id=curr_chunk_id), keys_to_int=True) + chunk_gt_tpath.format(chunk_id=curr_chunk_id), keys_to_int=True + ) chunk_camera = _BopWriterUtility.load_json( - chunk_camera_tpath.format(chunk_id=curr_chunk_id), keys_to_int=True) + chunk_camera_tpath.format(chunk_id=curr_chunk_id), keys_to_int=True + ) # Go through all frames. num_new_frames = bpy.context.scene.frame_end - bpy.context.scene.frame_start if len(depths) != len(colors) != num_new_frames: - raise Exception("The amount of images stored in the depths/colors does not correspond to the amount" - "of images specified by frame_start to frame_end.") - - for frame_id in range(bpy.context.scene.frame_start, bpy.context.scene.frame_end): + raise Exception( + "The amount of images stored in the depths/colors does not correspond " + "to the amount of images specified by frame_start to frame_end." + ) + + for frame_id in range( + bpy.context.scene.frame_start, bpy.context.scene.frame_end + ): # Activate frame. bpy.context.scene.frame_set(frame_id) @@ -476,26 +606,38 @@ def write_frames(chunks_dir: str, dataset_objects: list, depths: List[np.ndarray if curr_frame_id == 0: chunk_gt = {} chunk_camera = {} - os.makedirs(os.path.dirname( - rgb_tpath.format(chunk_id=curr_chunk_id, im_id=0, im_type='PNG'))) - os.makedirs(os.path.dirname( - depth_tpath.format(chunk_id=curr_chunk_id, im_id=0))) + os.makedirs( + os.path.dirname( + rgb_tpath.format(chunk_id=curr_chunk_id, im_id=0, im_type="PNG") + ) + ) + os.makedirs( + os.path.dirname(depth_tpath.format(chunk_id=curr_chunk_id, im_id=0)) + ) # Get GT annotations and camera info for the current frame. - chunk_gt[curr_frame_id] = _BopWriterUtility.get_frame_gt(dataset_objects, annotation_scale, - ignore_dist_thres) - chunk_camera[curr_frame_id] = _BopWriterUtility.get_frame_camera(save_world2cam, depth_scale, - annotation_scale) + chunk_gt[curr_frame_id] = _BopWriterUtility.get_frame_gt( + dataset_objects, annotation_scale, ignore_dist_thres + ) + chunk_camera[curr_frame_id] = _BopWriterUtility.get_frame_camera( + save_world2cam, depth_scale, annotation_scale + ) color_rgb = colors[frame_id] color_bgr = color_rgb.copy() color_bgr[..., :3] = color_bgr[..., :3][..., ::-1] - if color_file_format == 'PNG': - rgb_fpath = rgb_tpath.format(chunk_id=curr_chunk_id, im_id=curr_frame_id, im_type='.png') + if color_file_format == "PNG": + rgb_fpath = rgb_tpath.format( + chunk_id=curr_chunk_id, im_id=curr_frame_id, im_type=".png" + ) cv2.imwrite(rgb_fpath, color_bgr) - elif color_file_format == 'JPEG': - rgb_fpath = rgb_tpath.format(chunk_id=curr_chunk_id, im_id=curr_frame_id, im_type='.jpg') - cv2.imwrite(rgb_fpath, color_bgr, [int(cv2.IMWRITE_JPEG_QUALITY), jpg_quality]) + elif color_file_format == "JPEG": + rgb_fpath = rgb_tpath.format( + chunk_id=curr_chunk_id, im_id=curr_frame_id, im_type=".jpg" + ) + cv2.imwrite( + rgb_fpath, color_bgr, [int(cv2.IMWRITE_JPEG_QUALITY), jpg_quality] + ) depth = depths[frame_id] @@ -505,29 +647,37 @@ def write_frames(chunks_dir: str, dataset_objects: list, depths: List[np.ndarray depth_mm_scaled = depth_mm / float(depth_scale) # Save the scaled depth image. - depth_fpath = depth_tpath.format(chunk_id=curr_chunk_id, im_id=curr_frame_id) + depth_fpath = depth_tpath.format( + chunk_id=curr_chunk_id, im_id=curr_frame_id + ) _BopWriterUtility.save_depth(depth_fpath, depth_mm_scaled) # Save the chunk info if we are at the end of a chunk or at the last new frame. - if ((curr_frame_id + 1) % frames_per_chunk == 0) or \ - (frame_id == num_new_frames - 1): + if ((curr_frame_id + 1) % frames_per_chunk == 0) or ( + frame_id == num_new_frames - 1 + ): # Save GT annotations. - _BopWriterUtility.save_json(chunk_gt_tpath.format(chunk_id=curr_chunk_id), chunk_gt) + _BopWriterUtility.save_json( + chunk_gt_tpath.format(chunk_id=curr_chunk_id), chunk_gt + ) # Save camera info. - _BopWriterUtility.save_json(chunk_camera_tpath.format(chunk_id=curr_chunk_id), chunk_camera) + _BopWriterUtility.save_json( + chunk_camera_tpath.format(chunk_id=curr_chunk_id), chunk_camera + ) # Update ID's. curr_chunk_id += 1 curr_frame_id = 0 else: curr_frame_id += 1 - @staticmethod - def _pyrender_init(ren_width: int, ren_height: int, trimesh_objects: Dict[int, trimesh.Trimesh]): - """ Initializes a worker process for calc_gt_masks and calc_gt_info + def _pyrender_init( + ren_width: int, ren_height: int, trimesh_objects: Dict[int, trimesh.Trimesh] + ): + """Initializes a worker process for calc_gt_masks and calc_gt_info :param ren_width: The width of the images to render. :param ren_height: The height of the images to render. @@ -535,30 +685,42 @@ def _pyrender_init(ren_width: int, ren_height: int, trimesh_objects: Dict[int, t """ # pylint: disable=import-outside-toplevel # Import pyrender only inside the multiprocesses, otherwise this leads to an opengl error - # https://github.com/mmatl/pyrender/issues/200#issuecomment-1123713055 + # https://github.com/mmatl/pyrender/issues/200#issuecomment-1123713055 import pyrender + # pylint: enable=import-outside-toplevel global renderer, renderer_large, dataset_objects dataset_objects = {} # Create renderer for calc_gt_masks - renderer = pyrender.OffscreenRenderer(viewport_width=ren_width, viewport_height=ren_height) + renderer = pyrender.OffscreenRenderer( + viewport_width=ren_width, viewport_height=ren_height + ) # Create renderer for calc_gt_info - renderer_large = pyrender.OffscreenRenderer(viewport_width=ren_width * 3, viewport_height=ren_height * 3) + renderer_large = pyrender.OffscreenRenderer( + viewport_width=ren_width * 3, viewport_height=ren_height * 3 + ) # Create pyrender meshes for key in trimesh_objects.keys(): # we need to create a double-sided material to be able to render non-watertight meshes # the other parameters are defaults, see # https://github.com/mmatl/pyrender/blob/master/pyrender/mesh.py#L216-L223 - material = pyrender.MetallicRoughnessMaterial(alphaMode='BLEND', baseColorFactor=[0.3, 0.3, 0.3, 1.0], - metallicFactor=0.2, roughnessFactor=0.8, doubleSided=True) - dataset_objects[key] = pyrender.Mesh.from_trimesh(mesh=trimesh_objects[key], material=material) + material = pyrender.MetallicRoughnessMaterial( + alphaMode="BLEND", + baseColorFactor=[0.3, 0.3, 0.3, 1.0], + metallicFactor=0.2, + roughnessFactor=0.8, + doubleSided=True, + ) + dataset_objects[key] = pyrender.Mesh.from_trimesh( + mesh=trimesh_objects[key], material=material + ) @staticmethod def _pyrender_cleanup(): - """ Cleans up global renderer - + """Cleans up global renderer + This is only necessary when not using multiprocessing,. """ global renderer, renderer_large, dataset_objects @@ -567,10 +729,18 @@ def _pyrender_cleanup(): del dataset_objects @staticmethod - def _calc_gt_masks_iteration(annotation_scale: float, K: np.ndarray, delta: float, dist_im: np.ndarray, chunk_dir: str, im_id: int, gt_data: Tuple[int, Dict[str, int]]): - """ One iteration of calc_gt_masks(), executed inside a worker process. + def _calc_gt_masks_iteration( + annotation_scale: float, + K: np.ndarray, + delta: float, + dist_im: np.ndarray, + chunk_dir: str, + im_id: int, + gt_data: Tuple[int, Dict[str, int]], + ): + """One iteration of calc_gt_masks(), executed inside a worker process. + - :param annotation_scale: The scale factor applied to the calculated annotations (in [m]) to get them into the specified format (see `annotation_format` in `write_bop` for further details). :param K: The camera instrinsics to use. @@ -582,10 +752,12 @@ def _calc_gt_masks_iteration(annotation_scale: float, K: np.ndarray, delta: floa """ # pylint: disable=import-outside-toplevel # Import pyrender only inside the multiprocesses, otherwise this leads to an opengl error - # https://github.com/mmatl/pyrender/issues/200#issuecomment-1123713055 + # https://github.com/mmatl/pyrender/issues/200#issuecomment-1123713055 import pyrender + # This import is done inside to avoid having the requirement that BlenderProc depends on the bop_toolkit from bop_toolkit_lib import inout, misc, visibility + # pylint: enable=import-outside-toplevel global renderer, dataset_objects @@ -594,20 +766,23 @@ def _calc_gt_masks_iteration(annotation_scale: float, K: np.ndarray, delta: floa # Init pyrender camera fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] - camera = pyrender.IntrinsicsCamera(fx=fx, fy=fy, cx=cx, cy=cy, znear=0.1, zfar=100000) - + camera = pyrender.IntrinsicsCamera( + fx=fx, fy=fy, cx=cx, cy=cy, znear=0.1, zfar=100000 + ) + # create a new scene scene = pyrender.Scene() # add camera and current object scene.add(camera) - t = np.array(gt['cam_t_m2c']) + t = np.array(gt["cam_t_m2c"]) # rescale translation depending on initial saving format t /= annotation_scale - pose = bop_pose_to_pyrender_coordinate_system(cam_R_m2c=np.array(gt['cam_R_m2c']).reshape(3, 3), - cam_t_m2c=t) - scene.add(dataset_objects[gt['obj_id']], pose=pose) + pose = bop_pose_to_pyrender_coordinate_system( + cam_R_m2c=np.array(gt["cam_R_m2c"]).reshape(3, 3), cam_t_m2c=t + ) + scene.add(dataset_objects[gt["obj_id"]], pose=pose) # Render the depth image. _, depth_gt = renderer.render(scene=scene) @@ -620,23 +795,29 @@ def _calc_gt_masks_iteration(annotation_scale: float, K: np.ndarray, delta: floa # Mask of the visible part of the object silhouette. mask_visib = visibility.estimate_visib_mask_gt( - dist_im, dist_gt, delta, visib_mode='bop19') + dist_im, dist_gt, delta, visib_mode="bop19" + ) # Save the calculated masks. mask_path = os.path.join( - chunk_dir, 'mask', '{im_id:06d}_{gt_id:06d}.png').format(im_id=im_id, gt_id=gt_id) + chunk_dir, "mask", "{im_id:06d}_{gt_id:06d}.png" + ).format(im_id=im_id, gt_id=gt_id) inout.save_im(mask_path, 255 * mask.astype(np.uint8)) mask_visib_path = os.path.join( - chunk_dir, 'mask_visib', - '{im_id:06d}_{gt_id:06d}.png').format(im_id=im_id, gt_id=gt_id) + chunk_dir, "mask_visib", "{im_id:06d}_{gt_id:06d}.png" + ).format(im_id=im_id, gt_id=gt_id) inout.save_im(mask_visib_path, 255 * mask_visib.astype(np.uint8)) - @staticmethod - def calc_gt_masks(pool: Pool, chunk_dirs: List[str], starting_frame_id: int = 0, - annotation_scale: float = 1000., delta: float = 0.015): - """ Calculates the ground truth masks. + def calc_gt_masks( + pool: Pool, + chunk_dirs: List[str], + starting_frame_id: int = 0, + annotation_scale: float = 1000.0, + delta: float = 0.015, + ): + """Calculates the ground truth masks. From the BOP toolkit (https://github.com/thodan/bop_toolkit), with the difference of using pyrender for depth rendering. @@ -650,19 +831,28 @@ def calc_gt_masks(pool: Pool, chunk_dirs: List[str], starting_frame_id: int = 0, # This import is done inside to avoid having the requirement that BlenderProc depends on the bop_toolkit # pylint: disable=import-outside-toplevel from bop_toolkit_lib import inout, misc + # pylint: enable=import-outside-toplevel for dir_counter, chunk_dir in enumerate(chunk_dirs): - last_chunk_gt_fpath = os.path.join(chunk_dir, 'scene_gt.json') - last_chunk_camera_fpath = os.path.join(chunk_dir, 'scene_camera.json') - scene_gt = _BopWriterUtility.load_json(last_chunk_gt_fpath, keys_to_int=True) - scene_camera = _BopWriterUtility.load_json(last_chunk_camera_fpath, keys_to_int=True) + last_chunk_gt_fpath = os.path.join(chunk_dir, "scene_gt.json") + last_chunk_camera_fpath = os.path.join(chunk_dir, "scene_camera.json") + scene_gt = _BopWriterUtility.load_json( + last_chunk_gt_fpath, keys_to_int=True + ) + scene_camera = _BopWriterUtility.load_json( + last_chunk_camera_fpath, keys_to_int=True + ) # Create folders for the output masks (if they do not exist yet). - mask_dir_path = os.path.dirname(os.path.join(chunk_dir, 'mask', '000000_000000.png')) + mask_dir_path = os.path.dirname( + os.path.join(chunk_dir, "mask", "000000_000000.png") + ) misc.ensure_dir(mask_dir_path) - mask_visib_dir_path = os.path.dirname(os.path.join(chunk_dir, 'mask_visib', '000000_000000.png')) + mask_visib_dir_path = os.path.dirname( + os.path.join(chunk_dir, "mask_visib", "000000_000000.png") + ) misc.ensure_dir(mask_visib_dir_path) im_ids = sorted(scene_gt.keys()) @@ -673,26 +863,49 @@ def calc_gt_masks(pool: Pool, chunk_dirs: List[str], starting_frame_id: int = 0, for im_counter, im_id in enumerate(im_ids): if im_counter % 100 == 0: - misc.log(f'Calculating GT masks - {chunk_dir}, {im_counter}') + misc.log(f"Calculating GT masks - {chunk_dir}, {im_counter}") - K = np.array(scene_camera[im_id]['cam_K']).reshape(3, 3) + K = np.array(scene_camera[im_id]["cam_K"]).reshape(3, 3) # Load depth image. - depth_path = os.path.join( - chunk_dir, 'depth', '{im_id:06d}.png').format(im_id=im_id) + depth_path = os.path.join(chunk_dir, "depth", "{im_id:06d}.png").format( + im_id=im_id + ) depth_im = inout.load_depth(depth_path) - depth_im *= scene_camera[im_id]['depth_scale'] # to [mm] - depth_im /= 1000. # to [m] + depth_im *= scene_camera[im_id]["depth_scale"] # to [mm] + depth_im /= 1000.0 # to [m] dist_im = misc.depth_im_to_dist_im_fast(depth_im, K) map_fun = map if pool is None else pool.map - list(map_fun(partial(_BopWriterUtility._calc_gt_masks_iteration, annotation_scale, K, delta, dist_im, chunk_dir, im_id), enumerate(scene_gt[im_id]))) - + list( + map_fun( + partial( + _BopWriterUtility._calc_gt_masks_iteration, + annotation_scale, + K, + delta, + dist_im, + chunk_dir, + im_id, + ), + enumerate(scene_gt[im_id]), + ) + ) @staticmethod - def _calc_gt_info_iteration(annotation_scale: float, ren_cy_offset: int, ren_cx_offset: int, im_height: int, im_width: int, K: np.ndarray, delta: float, depth: np.ndarray, gt: Dict[str, int]): - """ One iteration of calc_gt_info(), executed inside a worker process. - + def _calc_gt_info_iteration( + annotation_scale: float, + ren_cy_offset: int, + ren_cx_offset: int, + im_height: int, + im_width: int, + K: np.ndarray, + delta: float, + depth: np.ndarray, + gt: Dict[str, int], + ): + """One iteration of calc_gt_info(), executed inside a worker process. + :param annotation_scale: The scale factor applied to the calculated annotations (in [m]) to get them into the specified format (see `annotation_format` in `write_bop` for further details). :param ren_cy_offset: The y offset for cropping the rendered image. @@ -703,12 +916,13 @@ def _calc_gt_info_iteration(annotation_scale: float, ren_cy_offset: int, ren_cx_ :param delta: Tolerance used for estimation of the visibility masks. :param depth: The depth image of the frame. :param gt: Containing id of the object whose mask the worker should render - """ + """ # Import pyrender only inside the multiprocesses, otherwise this leads to an opengl error - # https://github.com/mmatl/pyrender/issues/200#issuecomment-1123713055 + # https://github.com/mmatl/pyrender/issues/200#issuecomment-1123713055 # pylint: disable=import-outside-toplevel import pyrender from bop_toolkit_lib import misc, visibility + # pylint: enable=import-outside-toplevel global renderer_large, dataset_objects, renderer @@ -722,27 +936,35 @@ def _calc_gt_info_iteration(annotation_scale: float, ren_cy_offset: int, ren_cx_ # Init pyrender camera fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] im_size = (depth.shape[1], depth.shape[0]) - camera = pyrender.IntrinsicsCamera(fx=fx, fy=fy, cx=cx+ren_cx_offset, cy=cy+ren_cy_offset, znear=0.1, - zfar=100000) - + camera = pyrender.IntrinsicsCamera( + fx=fx, + fy=fy, + cx=cx + ren_cx_offset, + cy=cy + ren_cy_offset, + znear=0.1, + zfar=100000, + ) + # create a new scene scene = pyrender.Scene() # add camera and current object scene.add(camera) - t = np.array(gt['cam_t_m2c']) + t = np.array(gt["cam_t_m2c"]) # rescale translation depending on initial saving format t /= annotation_scale - pose = bop_pose_to_pyrender_coordinate_system(cam_R_m2c=np.array(gt['cam_R_m2c']).reshape(3, 3), - cam_t_m2c=t) - scene.add(dataset_objects[gt['obj_id']], pose=pose) + pose = bop_pose_to_pyrender_coordinate_system( + cam_R_m2c=np.array(gt["cam_R_m2c"]).reshape(3, 3), cam_t_m2c=t + ) + scene.add(dataset_objects[gt["obj_id"]], pose=pose) # render the depth image _, depth_gt_large = renderer_large.render(scene=scene) depth_gt = depth_gt_large[ - ren_cy_offset:(ren_cy_offset + im_height), - ren_cx_offset:(ren_cx_offset + im_width)] + ren_cy_offset : (ren_cy_offset + im_height), + ren_cx_offset : (ren_cx_offset + im_width), + ] # Convert depth images to distance images. dist_gt = misc.depth_im_to_dist_im_fast(depth_gt, K) @@ -750,7 +972,8 @@ def _calc_gt_info_iteration(annotation_scale: float, ren_cy_offset: int, ren_cx_ # Estimation of the visibility mask. visib_gt = visibility.estimate_visib_mask_gt( - dist_im, dist_gt, delta, visib_mode='bop19') + dist_im, dist_gt, delta, visib_mode="bop19" + ) # Mask of the object in the GT pose. obj_mask_gt_large = depth_gt_large > 0 @@ -790,18 +1013,23 @@ def _calc_gt_info_iteration(annotation_scale: float, ren_cy_offset: int, ren_cx_ # Store the calculated info. return { - 'px_count_all': int(px_count_all), - 'px_count_valid': int(px_count_valid), - 'px_count_visib': int(px_count_visib), - 'visib_fract': float(visib_fract), - 'bbox_obj': [int(e) for e in bbox], - 'bbox_visib': [int(e) for e in bbox_visib] + "px_count_all": int(px_count_all), + "px_count_valid": int(px_count_valid), + "px_count_visib": int(px_count_visib), + "visib_fract": float(visib_fract), + "bbox_obj": [int(e) for e in bbox], + "bbox_visib": [int(e) for e in bbox_visib], } @staticmethod - def calc_gt_info(pool, chunk_dirs: List[str], starting_frame_id: int = 0, - annotation_scale: float = 1000., delta: float = 0.015): - """ Calculates the ground truth masks. + def calc_gt_info( + pool, + chunk_dirs: List[str], + starting_frame_id: int = 0, + annotation_scale: float = 1000.0, + delta: float = 0.015, + ): + """Calculates the ground truth masks. From the BOP toolkit (https://github.com/thodan/bop_toolkit), with the difference of using pyrender for depth rendering. @@ -814,22 +1042,31 @@ def calc_gt_info(pool, chunk_dirs: List[str], starting_frame_id: int = 0, # This import is done inside to avoid having the requirement that BlenderProc depends on the bop_toolkit # pylint: disable=import-outside-toplevel from bop_toolkit_lib import inout, misc + # pylint: enable=import-outside-toplevel - im_width, im_height = bpy.context.scene.render.resolution_x, bpy.context.scene.render.resolution_y + im_width, im_height = ( + bpy.context.scene.render.resolution_x, + bpy.context.scene.render.resolution_y, + ) ren_cx_offset, ren_cy_offset = im_width, im_height for dir_counter, chunk_dir in enumerate(chunk_dirs): - last_chunk_gt_fpath = os.path.join(chunk_dir, 'scene_gt.json') - last_chunk_camera_fpath = os.path.join(chunk_dir, 'scene_camera.json') - scene_gt = _BopWriterUtility.load_json(last_chunk_gt_fpath, keys_to_int=True) - scene_camera = _BopWriterUtility.load_json(last_chunk_camera_fpath, keys_to_int=True) + last_chunk_gt_fpath = os.path.join(chunk_dir, "scene_gt.json") + last_chunk_camera_fpath = os.path.join(chunk_dir, "scene_camera.json") + scene_gt = _BopWriterUtility.load_json( + last_chunk_gt_fpath, keys_to_int=True + ) + scene_camera = _BopWriterUtility.load_json( + last_chunk_camera_fpath, keys_to_int=True + ) # load existing gt info if dir_counter == 0 and starting_frame_id > 0: misc.log(f"Loading gt info from existing chunk dir - {chunk_dir}") - scene_gt_info = _BopWriterUtility.load_json(os.path.join(chunk_dir, 'scene_gt_info.json'), - keys_to_int=True) + scene_gt_info = _BopWriterUtility.load_json( + os.path.join(chunk_dir, "scene_gt_info.json"), keys_to_int=True + ) else: scene_gt_info = {} @@ -841,29 +1078,49 @@ def calc_gt_info(pool, chunk_dirs: List[str], starting_frame_id: int = 0, for im_counter, im_id in enumerate(im_ids): if im_counter % 100 == 0: - misc.log(f'Calculating GT info - {chunk_dir}, {im_counter}') + misc.log(f"Calculating GT info - {chunk_dir}, {im_counter}") # Load depth image. - depth_fpath = os.path.join(chunk_dir, 'depth', '{im_id:06d}.png').format(im_id=im_id) + depth_fpath = os.path.join( + chunk_dir, "depth", "{im_id:06d}.png" + ).format(im_id=im_id) assert os.path.isfile(depth_fpath) depth = inout.load_depth(depth_fpath) - depth *= scene_camera[im_id]['depth_scale'] # Convert to [mm]. - depth /= 1000. # to [m] + depth *= scene_camera[im_id]["depth_scale"] # Convert to [mm]. + depth /= 1000.0 # to [m] - K = np.array(scene_camera[im_id]['cam_K']).reshape(3, 3) + K = np.array(scene_camera[im_id]["cam_K"]).reshape(3, 3) map_fun = map if pool is None else pool.map - scene_gt_info[im_id] = list(map_fun(partial(_BopWriterUtility._calc_gt_info_iteration, annotation_scale, ren_cy_offset, ren_cx_offset, im_height, im_width, K, delta, depth), scene_gt[im_id])) - + scene_gt_info[im_id] = list( + map_fun( + partial( + _BopWriterUtility._calc_gt_info_iteration, + annotation_scale, + ren_cy_offset, + ren_cx_offset, + im_height, + im_width, + K, + delta, + depth, + ), + scene_gt[im_id], + ) + ) # Save the info for the current scene. - scene_gt_info_path = os.path.join(chunk_dir, 'scene_gt_info.json') + scene_gt_info_path = os.path.join(chunk_dir, "scene_gt_info.json") misc.ensure_dir(os.path.dirname(scene_gt_info_path)) inout.save_json(scene_gt_info_path, scene_gt_info) @staticmethod - def calc_gt_coco(chunk_dirs: List[str], dataset_objects: List[MeshObject], starting_frame_id: int = 0): - """ Calculates the COCO annotations. + def calc_gt_coco( + chunk_dirs: List[str], + dataset_objects: List[MeshObject], + starting_frame_id: int = 0, + ): + """Calculates the COCO annotations. From the BOP toolkit (https://github.com/thodan/bop_toolkit). :param chunk_dirs: List of directories to calculate the gt coco annotations for. @@ -873,33 +1130,46 @@ def calc_gt_coco(chunk_dirs: List[str], dataset_objects: List[MeshObject], start # This import is done inside to avoid having the requirement that BlenderProc depends on the bop_toolkit # pylint: disable=import-outside-toplevel from bop_toolkit_lib import inout, misc, pycoco_utils + # pylint: enable=import-outside-toplevel for dir_counter, chunk_dir in enumerate(chunk_dirs): - dataset_name = chunk_dir.split('/')[-3] + dataset_name = Path(chunk_dir).parents[2].name - CATEGORIES = [{'id': obj.get_cp('category_id'), 'name': str(obj.get_cp('category_id')), 'supercategory': - dataset_name} for obj in dataset_objects] + CATEGORIES = [ + { + "id": obj.get_cp("category_id"), + "name": str(obj.get_cp("category_id")), + "supercategory": dataset_name, + } + for obj in dataset_objects + ] # Remove all duplicate dicts from list. # Ref: https://stackoverflow.com/questions/9427163/remove-duplicate-dict-in-list-in-python - CATEGORIES = list({frozenset(item.items()):item for item in CATEGORIES}.values()) + CATEGORIES = list( + {frozenset(item.items()): item for item in CATEGORIES}.values() + ) INFO = { - "description": dataset_name + '_train', + "description": dataset_name + "_train", "url": "https://github.com/thodan/bop_toolkit", "version": "0.1.0", "year": datetime.date.today().year, "contributor": "", - "date_created": datetime.datetime.utcnow().isoformat(' ') + "date_created": datetime.datetime.utcnow().isoformat(" "), } # load existing coco annotations if dir_counter == 0 and starting_frame_id > 0: - misc.log(f"Loading coco annotations from existing chunk dir - {chunk_dir}") - coco_scene_output = _BopWriterUtility.load_json(os.path.join(chunk_dir, 'scene_gt_coco.json')) + misc.log( + f"Loading coco annotations from existing chunk dir - {chunk_dir}" + ) + coco_scene_output = _BopWriterUtility.load_json( + os.path.join(chunk_dir, "scene_gt_coco.json") + ) if coco_scene_output["annotations"]: - segmentation_id = coco_scene_output["annotations"][-1]['id'] + 1 + segmentation_id = coco_scene_output["annotations"][-1]["id"] + 1 else: segmentation_id = 1 else: @@ -908,18 +1178,20 @@ def calc_gt_coco(chunk_dirs: List[str], dataset_objects: List[MeshObject], start "licenses": [], "categories": CATEGORIES, "images": [], - "annotations": [] + "annotations": [], } segmentation_id = 1 # Load info about the GT poses (e.g. visibility) for the current scene. - last_chunk_gt_fpath = os.path.join(chunk_dir, 'scene_gt.json') - scene_gt = _BopWriterUtility.load_json(last_chunk_gt_fpath, keys_to_int=True) - last_chunk_gt_info_fpath = os.path.join(chunk_dir, 'scene_gt_info.json') + last_chunk_gt_fpath = os.path.join(chunk_dir, "scene_gt.json") + scene_gt = _BopWriterUtility.load_json( + last_chunk_gt_fpath, keys_to_int=True + ) + last_chunk_gt_info_fpath = os.path.join(chunk_dir, "scene_gt_info.json") scene_gt_info = inout.load_json(last_chunk_gt_info_fpath, keys_to_int=True) # Output coco path - coco_gt_path = os.path.join(chunk_dir, 'scene_gt_coco.json') - misc.log(f'Calculating COCO annotations - {chunk_dir}') + coco_gt_path = os.path.join(chunk_dir, "scene_gt_coco.json") + misc.log(f"Calculating COCO annotations - {chunk_dir}") # Go through each view in scene_gt for scene_view, inst_list in scene_gt.items(): @@ -929,24 +1201,34 @@ def calc_gt_coco(chunk_dirs: List[str], dataset_objects: List[MeshObject], start if dir_counter == 0 and im_id < starting_frame_id: continue - img_path = os.path.join(chunk_dir, 'rgb', '{im_id:06d}.jpg').format(im_id=im_id) - relative_img_path = os.path.relpath(img_path, os.path.dirname(coco_gt_path)) - im_size = (bpy.context.scene.render.resolution_x, bpy.context.scene.render.resolution_y) - image_info = pycoco_utils.create_image_info(im_id, relative_img_path, im_size) + img_path = os.path.join(chunk_dir, "rgb", "{im_id:06d}.jpg").format( + im_id=im_id + ) + relative_img_path = os.path.relpath( + img_path, os.path.dirname(coco_gt_path) + ) + im_size = ( + bpy.context.scene.render.resolution_x, + bpy.context.scene.render.resolution_y, + ) + image_info = pycoco_utils.create_image_info( + im_id, relative_img_path, im_size + ) coco_scene_output["images"].append(image_info) gt_info = scene_gt_info[scene_view] # Go through each instance in view for idx, inst in enumerate(inst_list): - category_info = inst['obj_id'] - visibility = gt_info[idx]['visib_fract'] + category_info = inst["obj_id"] + visibility = gt_info[idx]["visib_fract"] # Add ignore flag for objects smaller than 10% visible ignore_gt = visibility < 0.1 mask_visib_p = os.path.join( - chunk_dir, 'mask_visib', - '{im_id:06d}_{gt_id:06d}.png').format(im_id=im_id, gt_id=idx) + chunk_dir, "mask_visib", "{im_id:06d}_{gt_id:06d}.png" + ).format(im_id=im_id, gt_id=idx) mask_full_p = os.path.join( - chunk_dir, 'mask', '{im_id:06d}_{gt_id:06d}.png').format(im_id=im_id, gt_id=idx) + chunk_dir, "mask", "{im_id:06d}_{gt_id:06d}.png" + ).format(im_id=im_id, gt_id=idx) binary_inst_mask_visib = inout.load_depth(mask_visib_p).astype(bool) if binary_inst_mask_visib.sum() < 1: @@ -956,16 +1238,73 @@ def calc_gt_coco(chunk_dirs: List[str], dataset_objects: List[MeshObject], start binary_inst_mask_full = inout.load_depth(mask_full_p).astype(bool) if binary_inst_mask_full.sum() < 1: continue - bounding_box = pycoco_utils.bbox_from_binary_mask(binary_inst_mask_full) + bounding_box = pycoco_utils.bbox_from_binary_mask( + binary_inst_mask_full + ) annotation_info = pycoco_utils.create_annotation_info( - segmentation_id, im_id, category_info, binary_inst_mask_visib, bounding_box, tolerance=2, - ignore=ignore_gt) + segmentation_id, + im_id, + category_info, + binary_inst_mask_visib, + bounding_box, + tolerance=2, + ignore=ignore_gt, + ) if annotation_info is not None: coco_scene_output["annotations"].append(annotation_info) segmentation_id += 1 - with open(coco_gt_path, 'w', encoding='utf-8') as output_json_file: + with open(coco_gt_path, "w", encoding="utf-8") as output_json_file: json.dump(coco_scene_output, output_json_file) + + @staticmethod + def write_edges(chunk_dirs: List[str], edges: List[np.ndarray]) -> None: + """Writes rendered edge images to files + + Args: + chunk_dirs (List[str]): contains path strings to chunk directories + edges (List[np.ndarray]): contains rendered edge images + """ + # This import is done inside to avoid having the requirement that BlenderProc depends on the bop_toolkit + # pylint: disable=import-outside-toplevel + from bop_toolkit_lib import misc + + misc.log("Saving edge renders to disk") + + for chunk_dir in chunk_dirs: + os_agnostic_path = os.path.normpath(chunk_dir) + edges_dir = os.path.join(os_agnostic_path, "edges") + misc.ensure_dir(edges_dir) + + # Load info about the GT poses (e.g. visibility) for the current scene. + scene_gt_fpath = os.path.join(os_agnostic_path, "scene_gt.json") + scene_gt = _BopWriterUtility.load_json(scene_gt_fpath, keys_to_int=True) + + # Sort by im_id to ensure deterministic order + sorted_scene_views = sorted(scene_gt.items(), key=lambda x: int(x[0])) + + edge_counter = 0 # Index in the 'edges' list + + for scene_view_str, inst_list in sorted_scene_views: + im_id = int(scene_view_str) + + for idx, _ in enumerate(inst_list): + filename = f"{im_id:06d}_{idx:06d}.png" + filepath = os.path.join(edges_dir, filename) + + # Skip if file already exists + if os.path.exists(filepath): + continue + + # Make sure we have a valid edge to write + if edge_counter >= len(edges): + break + + try: + cv2.imwrite(filepath, edges[edge_counter]) + except Exception as e: + print(f"Error writing {filename}: {e}") + edge_counter += 1 diff --git a/examples/datasets/bop_challenge/main_tless_random.py b/examples/datasets/bop_challenge/main_tless_random.py index c30c3d407..d596aa299 100644 --- a/examples/datasets/bop_challenge/main_tless_random.py +++ b/examples/datasets/bop_challenge/main_tless_random.py @@ -2,45 +2,91 @@ import argparse import os import numpy as np +from time import time + + +start_time = time() parser = argparse.ArgumentParser() -parser.add_argument('bop_parent_path', help="Path to the bop datasets parent directory") -parser.add_argument('cc_textures_path', default="resources/cctextures", help="Path to downloaded cc textures") -parser.add_argument('output_dir', help="Path to where the final files will be saved ") -parser.add_argument('--num_scenes', type=int, default=2000, help="How many scenes with 25 images each to generate") +parser.add_argument("bop_parent_path", help="Path to the bop datasets parent directory") +parser.add_argument( + "cc_textures_path", + default="resources/cctextures", + help="Path to downloaded cc textures", +) +parser.add_argument("output_dir", help="Path to where the final files will be saved ") +parser.add_argument( + "--num_scenes", + type=int, + default=2000, + help="How many scenes with 25 images each to generate", +) args = parser.parse_args() bproc.init() # load bop objects into the scene -target_bop_objs = bproc.loader.load_bop_objs(bop_dataset_path = os.path.join(args.bop_parent_path, 'tless'), model_type = 'cad', mm2m = True) +target_bop_objs = bproc.loader.load_bop_objs( + bop_dataset_path=os.path.join(args.bop_parent_path, "tless"), + model_type="cad", + object_model_unit="mm", +) # load distractor bop objects -itodd_dist_bop_objs = bproc.loader.load_bop_objs(bop_dataset_path = os.path.join(args.bop_parent_path, 'itodd'), mm2m = True) -ycbv_dist_bop_objs = bproc.loader.load_bop_objs(bop_dataset_path = os.path.join(args.bop_parent_path, 'ycbv'), mm2m = True) -hb_dist_bop_objs = bproc.loader.load_bop_objs(bop_dataset_path = os.path.join(args.bop_parent_path, 'hb'), mm2m = True) +itodd_dist_bop_objs = bproc.loader.load_bop_objs( + bop_dataset_path=os.path.join(args.bop_parent_path, "itodd"), object_model_unit="mm" +) +ycbv_dist_bop_objs = bproc.loader.load_bop_objs( + bop_dataset_path=os.path.join(args.bop_parent_path, "ycbv"), object_model_unit="mm" +) +hb_dist_bop_objs = bproc.loader.load_bop_objs( + bop_dataset_path=os.path.join(args.bop_parent_path, "hb"), object_model_unit="mm" +) # load BOP datset intrinsics -bproc.loader.load_bop_intrinsics(bop_dataset_path = os.path.join(args.bop_parent_path, 'tless')) +bproc.loader.load_bop_intrinsics( + bop_dataset_path=os.path.join(args.bop_parent_path, "tless") +) # set shading and hide objects -for obj in (target_bop_objs + itodd_dist_bop_objs + ycbv_dist_bop_objs + hb_dist_bop_objs): - obj.set_shading_mode('auto') +for obj in ( + target_bop_objs + itodd_dist_bop_objs + ycbv_dist_bop_objs + hb_dist_bop_objs +): + obj.set_shading_mode("auto") obj.hide(True) - + # create room -room_planes = [bproc.object.create_primitive('PLANE', scale=[2, 2, 1]), - bproc.object.create_primitive('PLANE', scale=[2, 2, 1], location=[0, -2, 2], rotation=[-1.570796, 0, 0]), - bproc.object.create_primitive('PLANE', scale=[2, 2, 1], location=[0, 2, 2], rotation=[1.570796, 0, 0]), - bproc.object.create_primitive('PLANE', scale=[2, 2, 1], location=[2, 0, 2], rotation=[0, -1.570796, 0]), - bproc.object.create_primitive('PLANE', scale=[2, 2, 1], location=[-2, 0, 2], rotation=[0, 1.570796, 0])] +room_planes = [ + bproc.object.create_primitive("PLANE", scale=[2, 2, 1]), + bproc.object.create_primitive( + "PLANE", scale=[2, 2, 1], location=[0, -2, 2], rotation=[-1.570796, 0, 0] + ), + bproc.object.create_primitive( + "PLANE", scale=[2, 2, 1], location=[0, 2, 2], rotation=[1.570796, 0, 0] + ), + bproc.object.create_primitive( + "PLANE", scale=[2, 2, 1], location=[2, 0, 2], rotation=[0, -1.570796, 0] + ), + bproc.object.create_primitive( + "PLANE", scale=[2, 2, 1], location=[-2, 0, 2], rotation=[0, 1.570796, 0] + ), +] for plane in room_planes: - plane.enable_rigidbody(False, collision_shape='BOX', mass=1.0, friction = 100.0, linear_damping = 0.99, angular_damping = 0.99) + plane.enable_rigidbody( + False, + collision_shape="BOX", + mass=1.0, + friction=100.0, + linear_damping=0.99, + angular_damping=0.99, + ) # sample light color and strenght from ceiling -light_plane = bproc.object.create_primitive('PLANE', scale=[3, 3, 1], location=[0, 0, 10]) -light_plane.set_name('light_plane') -light_plane_material = bproc.material.create('light_material') +light_plane = bproc.object.create_primitive( + "PLANE", scale=[3, 3, 1], location=[0, 0, 10] +) +light_plane.set_name("light_plane") +light_plane_material = bproc.material.create("light_material") # sample point light on shell light_point = bproc.types.Light() @@ -49,13 +95,20 @@ # load cc_textures cc_textures = bproc.loader.load_ccmaterials(args.cc_textures_path) -# Define a function that samples 6-DoF poses -def sample_pose_func(obj: bproc.types.MeshObject): - min = np.random.uniform([-0.3, -0.3, 0.0], [-0.2, -0.2, 0.0]) - max = np.random.uniform([0.2, 0.2, 0.4], [0.3, 0.3, 0.6]) - obj.set_location(np.random.uniform(min, max)) - obj.set_rotation_euler(bproc.sampler.uniformSO3()) - + +def sample_pose_func(sample_obj: bproc.types.MeshObject): + """ + Randomly samples a 6-DoF pose for a given mesh object. + + :param sample_obj: The MeshObject to transform. Location is sampled from a + bounded 3D space,and rotation is uniformly sampled over SO(3). + """ + min_val = np.random.uniform([-0.3, -0.3, 0.0], [-0.2, -0.2, 0.0]) + max_val = np.random.uniform([0.2, 0.2, 0.4], [0.3, 0.3, 0.6]) + sample_obj.set_location(np.random.uniform(min_val, max_val)) + sample_obj.set_rotation_euler(bproc.sampler.uniformSO3()) + + # activate depth rendering without antialiasing and set amount of samples for color rendering bproc.renderer.enable_depth_output(activate_antialiasing=False) bproc.renderer.set_max_amount_of_samples(50) @@ -63,33 +116,54 @@ def sample_pose_func(obj: bproc.types.MeshObject): for i in range(args.num_scenes): # Sample bop objects for a scene - sampled_target_bop_objs = list(np.random.choice(target_bop_objs, size=20, replace=False)) - sampled_distractor_bop_objs = list(np.random.choice(itodd_dist_bop_objs, size=2, replace=False)) - sampled_distractor_bop_objs += list(np.random.choice(ycbv_dist_bop_objs, size=2, replace=False)) - sampled_distractor_bop_objs += list(np.random.choice(hb_dist_bop_objs, size=2, replace=False)) + sampled_target_bop_objs = list( + np.random.choice(target_bop_objs, size=20, replace=False) + ) + sampled_distractor_bop_objs = list( + np.random.choice(itodd_dist_bop_objs, size=2, replace=False) + ) + sampled_distractor_bop_objs += list( + np.random.choice(ycbv_dist_bop_objs, size=2, replace=False) + ) + sampled_distractor_bop_objs += list( + np.random.choice(hb_dist_bop_objs, size=2, replace=False) + ) # Randomize materials and set physics - for obj in (sampled_target_bop_objs + sampled_distractor_bop_objs): + for obj in sampled_target_bop_objs + sampled_distractor_bop_objs: mat = obj.get_materials()[0] - if obj.get_cp("bop_dataset_name") in ['itodd', 'tless']: - grey_col = np.random.uniform(0.1, 0.9) - mat.set_principled_shader_value("Base Color", [grey_col, grey_col, grey_col, 1]) + if obj.get_cp("bop_dataset_name") in ["itodd", "tless"]: + grey_col = np.random.uniform(0.1, 0.9) + mat.set_principled_shader_value( + "Base Color", [grey_col, grey_col, grey_col, 1] + ) mat.set_principled_shader_value("Roughness", np.random.uniform(0, 0.5)) - if obj.get_cp("bop_dataset_name") == 'itodd': + if obj.get_cp("bop_dataset_name") == "itodd": mat.set_principled_shader_value("Metallic", np.random.uniform(0.5, 1.0)) - if obj.get_cp("bop_dataset_name") == 'tless': - mat.set_principled_shader_value("Specular IOR Level", np.random.uniform(0.3, 1.0)) + if obj.get_cp("bop_dataset_name") == "tless": + mat.set_principled_shader_value( + "Specular IOR Level", np.random.uniform(0.3, 1.0) + ) mat.set_principled_shader_value("Metallic", np.random.uniform(0, 0.5)) - obj.enable_rigidbody(True, mass=1.0, friction = 100.0, linear_damping = 0.99, angular_damping = 0.99) + obj.enable_rigidbody( + True, mass=1.0, friction=100.0, linear_damping=0.99, angular_damping=0.99 + ) obj.hide(False) - + # Sample two light sources - light_plane_material.make_emissive(emission_strength=np.random.uniform(3,6), - emission_color=np.random.uniform([0.5, 0.5, 0.5, 1.0], [1.0, 1.0, 1.0, 1.0])) + light_plane_material.make_emissive( + emission_strength=np.random.uniform(3, 6), + emission_color=np.random.uniform([0.5, 0.5, 0.5, 1.0], [1.0, 1.0, 1.0, 1.0]), + ) light_plane.replace_materials(light_plane_material) - light_point.set_color(np.random.uniform([0.5,0.5,0.5],[1,1,1])) - location = bproc.sampler.shell(center = [0, 0, 0], radius_min = 1, radius_max = 1.5, - elevation_min = 5, elevation_max = 89) + light_point.set_color(np.random.uniform([0.5, 0.5, 0.5], [1, 1, 1])) + location = bproc.sampler.shell( + center=[0, 0, 0], + radius_min=1, + radius_max=1.5, + elevation_min=5, + elevation_max=89, + ) light_point.set_location(location) # sample CC Texture and assign to room planes @@ -97,56 +171,89 @@ def sample_pose_func(obj: bproc.types.MeshObject): for plane in room_planes: plane.replace_materials(random_cc_texture) + # Sample object poses and check collisions + bproc.object.sample_poses( + objects_to_sample=sampled_target_bop_objs + sampled_distractor_bop_objs, + sample_pose_func=sample_pose_func, + max_tries=1000, + ) - # Sample object poses and check collisions - bproc.object.sample_poses(objects_to_sample = sampled_target_bop_objs + sampled_distractor_bop_objs, - sample_pose_func = sample_pose_func, - max_tries = 1000) - # Physics Positioning - bproc.object.simulate_physics_and_fix_final_poses(min_simulation_time=3, - max_simulation_time=10, - check_object_interval=1, - substeps_per_frame = 20, - solver_iters=25) + bproc.object.simulate_physics_and_fix_final_poses( + min_simulation_time=3, + max_simulation_time=10, + check_object_interval=1, + substeps_per_frame=20, + solver_iters=25, + ) # BVH tree used for camera obstacle checks - bop_bvh_tree = bproc.object.create_bvh_tree_multi_objects(sampled_target_bop_objs + sampled_distractor_bop_objs) - + bop_bvh_tree = bproc.object.create_bvh_tree_multi_objects( + sampled_target_bop_objs + sampled_distractor_bop_objs + ) + camera_poses = [] cam_poses = 0 while cam_poses < 25: # Sample location - location = bproc.sampler.shell(center = [0, 0, 0], - radius_min = 0.65, - radius_max = 0.94, - elevation_min = 5, - elevation_max = 89) + location = bproc.sampler.shell( + center=[0, 0, 0], + radius_min=0.65, + radius_max=0.94, + elevation_min=5, + elevation_max=89, + ) # Determine point of interest in scene as the object closest to the mean of a subset of objects - poi = bproc.object.compute_poi(np.random.choice(sampled_target_bop_objs, size=15, replace=False)) + poi = bproc.object.compute_poi( + np.random.choice(sampled_target_bop_objs, size=15, replace=False) + ) # Compute rotation based on vector going from location towards poi - rotation_matrix = bproc.camera.rotation_from_forward_vec(poi - location, inplane_rot=np.random.uniform(-3.14159, 3.14159)) + rotation_matrix = bproc.camera.rotation_from_forward_vec( + poi - location, inplane_rot=np.random.uniform(-3.14159, 3.14159) + ) # Add homog cam pose based on location an rotation - cam2world_matrix = bproc.math.build_transformation_mat(location, rotation_matrix) - - # Check that obstacles are at least 0.3 meter away from the camera and make sure the view interesting enough - if bproc.camera.perform_obstacle_in_view_check(cam2world_matrix, {"min": 0.3}, bop_bvh_tree): + cam2world_matrix = bproc.math.build_transformation_mat( + location, rotation_matrix + ) + + # Check that obstacles are at least 0.3 meter away from the camera + # Make sure the view interesting enough + if bproc.camera.perform_obstacle_in_view_check( + cam2world_matrix, {"min": 0.3}, bop_bvh_tree + ): # Persist camera pose bproc.camera.add_camera_pose(cam2world_matrix, frame=cam_poses) + camera_poses.append(cam2world_matrix) cam_poses += 1 - # render the whole pipeline + # Render the whole pipeline data = bproc.renderer.render() + # Render only the edges + data["edges"] = bproc.renderer.render_edges( + target_objects=sampled_target_bop_objs, camera_poses=camera_poses + ) # Write data in bop format - bproc.writer.write_bop(os.path.join(args.output_dir, 'bop_data'), - target_objects = sampled_target_bop_objs, - dataset = 'tless', - depth_scale = 0.1, - depths = data["depth"], - colors = data["colors"], - color_file_format = "JPEG", - ignore_dist_thres = 10) - - for obj in (sampled_target_bop_objs + sampled_distractor_bop_objs): + bproc.writer.write_bop( + os.path.join(args.output_dir, "bop_data"), + target_objects=sampled_target_bop_objs, + dataset="tless", + depth_scale=0.1, + depths=data["depth"], + colors=data["colors"], + edges=data["edges"], + color_file_format="JPEG", + ignore_dist_thres=10, + ) + + for obj in sampled_target_bop_objs + sampled_distractor_bop_objs: obj.disable_rigidbody() obj.hide(True) + + +runtime = time() - start_time +# Convert runtime to minutes and seconds format +minutes = int(runtime // 60) +seconds = runtime % 60 + +# Print runtime in minutes and seconds format +print(f"{'#'*50}\nTook {minutes} minutes and {seconds:.2f} seconds\n{'#'*50}") From 3973377d05779d0da57d064cd8e9ef62b8c2b854 Mon Sep 17 00:00:00 2001 From: Matvey Ivanov Date: Thu, 24 Jul 2025 10:50:04 +0200 Subject: [PATCH 2/5] fixed formatting for easier RP merge --- blenderproc/api/renderer/__init__.py | 5 +- .../python/renderer/RendererUtility.py | 530 ++++-------- blenderproc/python/writer/BopWriterUtility.py | 806 ++++++------------ .../bop_challenge/main_tless_random.py | 260 ++---- 4 files changed, 536 insertions(+), 1065 deletions(-) diff --git a/blenderproc/api/renderer/__init__.py b/blenderproc/api/renderer/__init__.py index cdd2edf23..abb6d8839 100644 --- a/blenderproc/api/renderer/__init__.py +++ b/blenderproc/api/renderer/__init__.py @@ -1,8 +1,9 @@ from blenderproc.python.renderer.RendererUtility import set_denoiser, set_light_bounces, \ set_cpu_threads, toggle_stereo, set_simplify_subdivision_render, set_noise_threshold, \ set_max_amount_of_samples, enable_distance_output, enable_depth_output, enable_normals_output, \ - enable_diffuse_color_output, map_file_format_to_file_ending, render, set_output_format, enable_motion_blur, \ - enable_segmentation_output, set_world_background, set_render_devices, enable_experimental_features, toggle_light_tree + enable_diffuse_color_output, map_file_format_to_file_ending, render, render_edges, set_output_format, \ + enable_motion_blur, enable_segmentation_output, set_world_background, set_render_devices, \ + enable_experimental_features, toggle_light_tree from blenderproc.python.renderer.SegMapRendererUtility import render_segmap from blenderproc.python.renderer.FlowRendererUtility import render_optical_flow from blenderproc.python.renderer.NOCSRendererUtility import render_nocs diff --git a/blenderproc/python/renderer/RendererUtility.py b/blenderproc/python/renderer/RendererUtility.py index cff6a0d11..b8cca667a 100644 --- a/blenderproc/python/renderer/RendererUtility.py +++ b/blenderproc/python/renderer/RendererUtility.py @@ -56,35 +56,24 @@ def set_denoiser(denoiser: Optional[str]): denoise_node = nodes.new("CompositorNodeDenoise") # Link nodes - render_layer_node = Utility.get_the_one_node_with_type( - nodes, "CompositorNodeRLayers" - ) - composite_node = Utility.get_the_one_node_with_type( - nodes, "CompositorNodeComposite" - ) - Utility.insert_node_instead_existing_link( - links, - render_layer_node.outputs["Image"], - denoise_node.inputs["Image"], - denoise_node.outputs["Image"], - composite_node.inputs["Image"], - ) - - links.new(render_layer_node.outputs["DiffCol"], denoise_node.inputs["Albedo"]) - links.new(render_layer_node.outputs["Normal"], denoise_node.inputs["Normal"]) + render_layer_node = Utility.get_the_one_node_with_type(nodes, 'CompositorNodeRLayers') + composite_node = Utility.get_the_one_node_with_type(nodes, 'CompositorNodeComposite') + Utility.insert_node_instead_existing_link(links, + render_layer_node.outputs['Image'], + denoise_node.inputs['Image'], + denoise_node.outputs['Image'], + composite_node.inputs['Image']) + + links.new(render_layer_node.outputs['DiffCol'], denoise_node.inputs['Albedo']) + links.new(render_layer_node.outputs['Normal'], denoise_node.inputs['Normal']) else: raise Exception("No such denoiser: " + denoiser) -def set_light_bounces( - diffuse_bounces: Optional[int] = None, - glossy_bounces: Optional[int] = None, - ao_bounces_render: Optional[int] = None, - max_bounces: Optional[int] = None, - transmission_bounces: Optional[int] = None, - transparent_max_bounces: Optional[int] = None, - volume_bounces: Optional[int] = None, -): +def set_light_bounces(diffuse_bounces: Optional[int] = None, glossy_bounces: Optional[int] = None, + ao_bounces_render: Optional[int] = None, max_bounces: Optional[int] = None, + transmission_bounces: Optional[int] = None, transparent_max_bounces: Optional[int] = None, + volume_bounces: Optional[int] = None): """ Sets the number of light bounces that should be used by the raytracing renderer. Default values are defined in DefaultConfig.py @@ -115,7 +104,7 @@ def set_light_bounces( def set_cpu_threads(num_threads: int): - """Sets the number of CPU cores to use simultaneously while rendering. + """ Sets the number of CPU cores to use simultaneously while rendering. :param num_threads: The number of threads to use. If 0 is given the number is automatically detected based on the cpu cores. @@ -129,7 +118,7 @@ def set_cpu_threads(num_threads: int): def toggle_stereo(enable: bool): - """Enables/Disables stereoscopy. + """ Enables/Disables stereoscopy. :param enable: True, if stereoscopy should be enabled. """ @@ -137,9 +126,8 @@ def toggle_stereo(enable: bool): if enable: bpy.context.scene.render.views_format = "STEREO_3D" - def toggle_light_tree(enable: bool): - """Enables/Disables blender's light tree for rendering. + """ Enables/Disables blender's light tree for rendering. Enabling the light tree reduces the noise in scenes with many point lights, however it increases the render time per sample. @@ -149,24 +137,21 @@ def toggle_light_tree(enable: bool): """ bpy.context.scene.cycles.use_light_tree = enable - def set_simplify_subdivision_render(simplify_subdivision_render: int): - """Sets global maximum subdivision level during rendering to speedup rendering. + """ Sets global maximum subdivision level during rendering to speedup rendering. :param simplify_subdivision_render: The maximum subdivision level. If 0 is given, simplification of scene is disabled. """ if simplify_subdivision_render > 0: bpy.context.scene.render.use_simplify = True - bpy.context.scene.render.simplify_subdivision_render = ( - simplify_subdivision_render - ) + bpy.context.scene.render.simplify_subdivision_render = simplify_subdivision_render else: bpy.context.scene.render.use_simplify = False def set_noise_threshold(noise_threshold: float): - """Configures the adaptive sampling, the noise threshold is typically between 0.1 and 0.001. + """ Configures the adaptive sampling, the noise threshold is typically between 0.1 and 0.001. Adaptive sampling automatically decreases the number of samples per pixel based on estimated level of noise. We do not recommend setting the noise threshold value to zero and therefore turning off the adaptive sampling. @@ -185,7 +170,7 @@ def set_noise_threshold(noise_threshold: float): def set_max_amount_of_samples(samples: int): - """Sets the maximum number of samples to render for each pixel. + """ Sets the maximum number of samples to render for each pixel. This maximum amount is usually not reached if the noise threshold is low enough. If the noise threshold was set to 0, then only the maximum number of samples is used (We do not recommend this). @@ -194,15 +179,11 @@ def set_max_amount_of_samples(samples: int): bpy.context.scene.cycles.samples = samples -def enable_distance_output( - activate_antialiasing: bool, - output_dir: Optional[str] = None, - file_prefix: str = "distance_", - output_key: str = "distance", - antialiasing_distance_max: float = None, - convert_to_depth: bool = False, -): - """Enables writing distance images. +def enable_distance_output(activate_antialiasing: bool, output_dir: Optional[str] = None, + file_prefix: str = "distance_", + output_key: str = "distance", antialiasing_distance_max: float = None, + convert_to_depth: bool = False): + """ Enables writing distance images. :param activate_antialiasing: If this is True the final image will be anti-aliased @@ -215,26 +196,18 @@ def enable_distance_output( image to a depth image """ if not activate_antialiasing: - return enable_depth_output( - activate_antialiasing, - output_dir, - file_prefix, - output_key, - convert_to_distance=True, - ) + return enable_depth_output(activate_antialiasing, output_dir, file_prefix, output_key, convert_to_distance=True) if output_dir is None: output_dir = Utility.get_temporary_directory() if antialiasing_distance_max is None: antialiasing_distance_max = DefaultConfig.antialiasing_distance_max if GlobalStorage.is_in_storage("distance_output_is_enabled"): - msg = ( - "The distance enable function can not be called twice. Either you called it twice or you used the " - "enable_depth_output with activate_antialiasing=True, which internally calls this function. This is " - "currently not supported, but there is an easy way to solve this, you can use the " - "bproc.postprocessing.dist2depth and depth2dist function on the output of the renderer and generate " - "the antialiased depth image yourself." - ) + msg = "The distance enable function can not be called twice. Either you called it twice or you used the " \ + "enable_depth_output with activate_antialiasing=True, which internally calls this function. This is " \ + "currently not supported, but there is an easy way to solve this, you can use the " \ + "bproc.postprocessing.dist2depth and depth2dist function on the output of the renderer and generate " \ + "the antialiased depth image yourself." raise RuntimeError(msg) GlobalStorage.add("distance_output_is_enabled", True) @@ -245,9 +218,7 @@ def enable_distance_output( tree = bpy.context.scene.node_tree links = tree.links # Use existing render layer - render_layer_node = Utility.get_the_one_node_with_type( - tree.nodes, "CompositorNodeRLayers" - ) + render_layer_node = Utility.get_the_one_node_with_type(tree.nodes, 'CompositorNodeRLayers') # Set mist pass limits bpy.context.scene.world.mist_settings.start = 0 @@ -257,11 +228,11 @@ def enable_distance_output( bpy.context.view_layer.use_pass_mist = True # Enable distance pass # Create a mapper node to map from 0-1 to SI units mapper_node = tree.nodes.new("CompositorNodeMapRange") - links.new(render_layer_node.outputs["Mist"], mapper_node.inputs["Value"]) + links.new(render_layer_node.outputs["Mist"], mapper_node.inputs['Value']) # map the values 0-1 to range distance_start to distance_range - mapper_node.inputs["From Max"].default_value = 1.0 - mapper_node.inputs["To Min"].default_value = 0 - mapper_node.inputs["To Max"].default_value = antialiasing_distance_max + mapper_node.inputs['From Max'].default_value = 1.0 + mapper_node.inputs['To Min'].default_value = 0 + mapper_node.inputs['To Max'].default_value = antialiasing_distance_max # Build output node output_file = tree.nodes.new("CompositorNodeOutputFile") @@ -275,30 +246,23 @@ def enable_distance_output( combine_color = tree.nodes.new("CompositorNodeCombineColor") combine_color.mode = "HSV" links.new(mapper_node.outputs["Value"], combine_color.inputs[2]) - + # Feed the Z-Buffer or Mist output of the render layer to the input of the file IO layer - links.new(combine_color.outputs["Image"], output_file.inputs["Image"]) - - Utility.add_output_entry( - { - "key": output_key, - "path": os.path.join(output_dir, file_prefix) + "%04d" + ".exr", - "version": "2.0.0", - "trim_redundant_channels": True, - "convert_to_depth": convert_to_depth, - } - ) + links.new(combine_color.outputs["Image"], output_file.inputs['Image']) + + Utility.add_output_entry({ + "key": output_key, + "path": os.path.join(output_dir, file_prefix) + "%04d" + ".exr", + "version": "2.0.0", + "trim_redundant_channels": True, + "convert_to_depth": convert_to_depth + }) return None -def enable_depth_output( - activate_antialiasing: bool, - output_dir: Optional[str] = None, - file_prefix: str = "depth_", - output_key: str = "depth", - antialiasing_distance_max: float = None, - convert_to_distance: bool = False, -): +def enable_depth_output(activate_antialiasing: bool, output_dir: Optional[str] = None, file_prefix: str = "depth_", + output_key: str = "depth", antialiasing_distance_max: float = None, + convert_to_distance: bool = False): """ Enables writing depth images. Depth images will be written in the form of .exr files during the next rendering. @@ -313,25 +277,17 @@ def enable_depth_output( image to a distance image """ if activate_antialiasing: - return enable_distance_output( - activate_antialiasing, - output_dir, - file_prefix, - output_key, - antialiasing_distance_max, - convert_to_depth=True, - ) + return enable_distance_output(activate_antialiasing, output_dir, file_prefix, output_key, + antialiasing_distance_max, convert_to_depth=True) if output_dir is None: output_dir = Utility.get_temporary_directory() if GlobalStorage.is_in_storage("depth_output_is_enabled"): - msg = ( - "The depth enable function can not be called twice. Either you called it twice or you used the " - "enable_distance_output with activate_antialiasing=False, which internally calls this function. This " - "is currently not supported, but there is an easy way to solve this, you can use the " - "bproc.postprocessing.dist2depth and depth2dist function on the output of the renderer and generate " - "the antialiased distance image yourself." - ) + msg = "The depth enable function can not be called twice. Either you called it twice or you used the " \ + "enable_distance_output with activate_antialiasing=False, which internally calls this function. This " \ + "is currently not supported, but there is an easy way to solve this, you can use the " \ + "bproc.postprocessing.dist2depth and depth2dist function on the output of the renderer and generate " \ + "the antialiased distance image yourself." raise RuntimeError(msg) GlobalStorage.add("depth_output_is_enabled", True) @@ -341,9 +297,7 @@ def enable_depth_output( tree = bpy.context.scene.node_tree links = tree.links # Use existing render layer - render_layer_node = Utility.get_the_one_node_with_type( - tree.nodes, "CompositorNodeRLayers" - ) + render_layer_node = Utility.get_the_one_node_with_type(tree.nodes, 'CompositorNodeRLayers') # Enable z-buffer pass bpy.context.view_layer.use_pass_z = True @@ -360,28 +314,23 @@ def enable_depth_output( combine_color = tree.nodes.new("CompositorNodeCombineColor") combine_color.mode = "HSV" links.new(render_layer_node.outputs["Depth"], combine_color.inputs[2]) - + # Feed the Z-Buffer RGB output from the Combine Color node to the input of the file IO layer links.new(combine_color.outputs["Image"], output_file.inputs["Image"]) - Utility.add_output_entry( - { - "key": output_key, - "path": os.path.join(output_dir, file_prefix) + "%04d" + ".exr", - "version": "2.0.0", - "trim_redundant_channels": True, - "convert_to_distance": convert_to_distance, - } - ) + Utility.add_output_entry({ + "key": output_key, + "path": os.path.join(output_dir, file_prefix) + "%04d" + ".exr", + "version": "2.0.0", + "trim_redundant_channels": True, + "convert_to_distance": convert_to_distance + }) return None -def enable_normals_output( - output_dir: Optional[str] = None, - file_prefix: str = "normals_", - output_key: str = "normals", -): - """Enables writing normal images. +def enable_normals_output(output_dir: Optional[str] = None, file_prefix: str = "normals_", + output_key: str = "normals"): + """ Enables writing normal images. Normal images will be written in the form of .exr files during the next rendering. @@ -399,9 +348,7 @@ def enable_normals_output( links = tree.links # Use existing render layer - render_layer_node = Utility.get_the_one_node_with_type( - tree.nodes, "CompositorNodeRLayers" - ) + render_layer_node = Utility.get_the_one_node_with_type(tree.nodes, 'CompositorNodeRLayers') separate_rgba = tree.nodes.new("CompositorNodeSepRGBA") space_between_nodes_x = 200 @@ -447,18 +394,14 @@ def enable_normals_output( channel_results[channel] = second_add # set the matrix accordingly - rot_around_x_axis = mathutils.Matrix.Rotation(math.radians(-90.0), 4, "X") + rot_around_x_axis = mathutils.Matrix.Rotation(math.radians(-90.0), 4, 'X') for frame in range(bpy.context.scene.frame_start, bpy.context.scene.frame_end): used_rotation_matrix = CameraUtility.get_camera_pose(frame) @ rot_around_x_axis for row_index in range(3): for column_index in range(3): current_multiply = multiplication_values[row_index][column_index] - current_multiply.inputs[1].default_value = used_rotation_matrix[ - column_index - ][row_index] - current_multiply.inputs[1].keyframe_insert( - data_path="default_value", frame=frame - ) + current_multiply.inputs[1].default_value = used_rotation_matrix[column_index][row_index] + current_multiply.inputs[1].keyframe_insert(data_path='default_value', frame=frame) offset = 8 * space_between_nodes_x for index, channel in enumerate(c_channels): multiply = tree.nodes.new("CompositorNodeMath") @@ -490,24 +433,19 @@ def enable_normals_output( output_file.location.x = space_between_nodes_x * 15 links.new(combine_rgba.outputs["Image"], output_file.inputs["Image"]) - Utility.add_output_entry( - { - "key": output_key, - "path": os.path.join(output_dir, file_prefix) + "%04d" + ".exr", - "version": "2.0.0", - } - ) + Utility.add_output_entry({ + "key": output_key, + "path": os.path.join(output_dir, file_prefix) + "%04d" + ".exr", + "version": "2.0.0" + }) -def enable_segmentation_output( - map_by: Union[str, List[str]] = "category_id", - default_values: Optional[Dict[str, Any]] = None, - pass_alpha_threshold: float = 0.05, - output_dir: Optional[str] = None, - file_prefix: str = "segmap_", - output_key: str = "segmap", -): - """Enables segmentation output by certain keys. +def enable_segmentation_output(map_by: Union[str, List[str]] = "category_id", + default_values: Optional[Dict[str, Any]] = None, + pass_alpha_threshold: float = 0.05, + output_dir: Optional[str] = None, + file_prefix: str = "segmap_", output_key: str = "segmap"): + """ Enables segmentation output by certain keys. The key instances is used, if a mapping of every object in the scene to an integer is requested. These integers are assigned randomly and do not follow any system. They are consisted for one rendering call. @@ -541,26 +479,24 @@ def enable_segmentation_output( tree = bpy.context.scene.node_tree links = tree.links - render_layer_node = tree.nodes.get("Render Layers") + render_layer_node = tree.nodes.get('Render Layers') if output_dir is None: output_dir = Utility.get_temporary_directory() - output_node = tree.nodes.new("CompositorNodeOutputFile") + output_node = tree.nodes.new('CompositorNodeOutputFile') output_node.base_path = output_dir output_node.format.file_format = "OPEN_EXR" output_node.file_slots.values()[0].path = file_prefix - Utility.add_output_entry( - { - "key": output_key, - "path": os.path.join(output_dir, file_prefix) + "%04d" + ".exr", - "version": "3.0.0", - "trim_redundant_channels": True, - "is_semantic_segmentation": True, - "semantic_segmentation_mapping": map_by, - "semantic_segmentation_default_values": default_values, - } - ) + Utility.add_output_entry({ + "key": output_key, + "path": os.path.join(output_dir, file_prefix) + "%04d" + ".exr", + "version": "3.0.0", + "trim_redundant_channels": True, + "is_semantic_segmentation": True, + "semantic_segmentation_mapping": map_by, + "semantic_segmentation_default_values": default_values + }) # Feed the output through 'Combine Color' node, to create 3 channel RGB grayscale image as a lot of # EXR readers don't support single float channel EXR files and Blender writes depth as a single @@ -568,21 +504,16 @@ def enable_segmentation_output( combine_color = tree.nodes.new("CompositorNodeCombineColor") combine_color.mode = "HSV" links.new(render_layer_node.outputs["IndexOB"], combine_color.inputs[2]) - + links.new(combine_color.outputs["Image"], output_node.inputs["Image"]) # set the threshold low to avoid noise in alpha materials - bpy.context.scene.view_layers["ViewLayer"].pass_alpha_threshold = ( - pass_alpha_threshold - ) + bpy.context.scene.view_layers["ViewLayer"].pass_alpha_threshold = pass_alpha_threshold -def enable_diffuse_color_output( - output_dir: Optional[str] = None, - file_prefix: str = "diffuse_", - output_key: str = "diffuse", -): - """Enables writing diffuse color (albedo) images. +def enable_diffuse_color_output(output_dir: Optional[str] = None, file_prefix: str = "diffuse_", + output_key: str = "diffuse"): + """ Enables writing diffuse color (albedo) images. Diffuse color images will be written in the form of .png files during the next rendering. @@ -599,45 +530,39 @@ def enable_diffuse_color_output( links = tree.links bpy.context.view_layer.use_pass_diffuse_color = True - render_layer_node = Utility.get_the_one_node_with_type( - tree.nodes, "CompositorNodeRLayers" - ) + render_layer_node = Utility.get_the_one_node_with_type(tree.nodes, 'CompositorNodeRLayers') final_output = render_layer_node.outputs["DiffCol"] - output_file = tree.nodes.new("CompositorNodeOutputFile") + output_file = tree.nodes.new('CompositorNodeOutputFile') output_file.base_path = output_dir output_file.format.file_format = "PNG" output_file.file_slots.values()[0].path = file_prefix - links.new(final_output, output_file.inputs["Image"]) + links.new(final_output, output_file.inputs['Image']) - Utility.add_output_entry( - { - "key": output_key, - "path": os.path.join(output_dir, file_prefix) + "%04d" + ".png", - "version": "2.0.0", - } - ) + Utility.add_output_entry({ + "key": output_key, + "path": os.path.join(output_dir, file_prefix) + "%04d" + ".png", + "version": "2.0.0" + }) def map_file_format_to_file_ending(file_format: str) -> str: - """Returns the files endings for a given blender output format. + """ Returns the files endings for a given blender output format. :param file_format: The blender file format. :return: The file ending. """ - if file_format == "PNG": + if file_format == 'PNG': return ".png" - if file_format == "JPEG": + if file_format == 'JPEG': return ".jpg" - if file_format == "OPEN_EXR": + if file_format == 'OPEN_EXR': return ".exr" raise RuntimeError(f"Unknown Image Type {file_format}") -def _progress_bar_thread( - pipe_out: int, stdout: IO, total_frames: int, num_samples: int -): - """The thread rendering the progress bar +def _progress_bar_thread(pipe_out: int, stdout: IO, total_frames: int, num_samples: int): + """ The thread rendering the progress bar :param pipe_out: The pipe output delivering blenders debug messages. :param stdout: The stdout to which the progress bar should be written. @@ -654,9 +579,7 @@ def _progress_bar_thread( # Initializes progress bar using given stdout with Progress(*columns, console=Console(file=stdout), transient=True) as progress: complete_task = progress.add_task("[green]Total", total=total_frames, status="") - frame_task = progress.add_task( - "[yellow]Current frame", total=num_samples, status="" - ) + frame_task = progress.add_task("[yellow]Current frame", total=num_samples, status="") # Continuously read blenders debug messages current_line = "" @@ -675,36 +598,22 @@ def _progress_bar_thread( # Check if its a line we can use (starts with "Fra:") if current_line.startswith("Fra:"): # Extract current frame number and use it to set the progress bar - frame_number = int(current_line.split()[0][len("Fra:") :]) + frame_number = int(current_line.split()[0][len("Fra:"):]) frames_completed = frame_number - starting_frame_number progress.update(complete_task, completed=frames_completed) - progress.update( - complete_task, - status=f"Rendering frame {frames_completed + 1} of {total_frames}", - ) + progress.update(complete_task, status=f"Rendering frame {frames_completed + 1} of {total_frames}") # Split line into columns status_columns = [col.strip() for col in current_line.split("|")] if "Scene, ViewLayer" in status_columns: # If we are currently at "Scene, ViewLayer", use everything afterwards - status = " | ".join( - status_columns[ - status_columns.index("Scene, ViewLayer") + 1 : - ] - ) + status = " | ".join(status_columns[status_columns.index("Scene, ViewLayer") + 1:]) # If we are currently rendering, update the progress if status.startswith("Sample"): - progress.update( - frame_task, - completed=int( - status[len("Sample") :].split("/", maxsplit=1)[0] - ), - ) + progress.update(frame_task, completed=int(status[len("Sample"):].split("/", maxsplit=1)[0])) elif "Compositing" in status_columns: # If we are at "Compositing", use everything afterwards including "Compositing" - status = " | ".join( - status_columns[status_columns.index("Compositing") :] - ) + status = " | ".join(status_columns[status_columns.index("Compositing"):]) # Set render progress to complete progress.update(frame_task, completed=num_samples) else: @@ -720,10 +629,8 @@ def _progress_bar_thread( @contextmanager -def _render_progress_bar( - pipe_out: int, pipe_in: int, stdout: IO, total_frames: int, enabled: bool = True -): - """Shows a progress bar visualizing the render progress. +def _render_progress_bar(pipe_out: int, pipe_in: int, stdout: IO, total_frames: int, enabled: bool = True): + """ Shows a progress bar visualizing the render progress. :param pipe_out: The pipe output delivering blenders debug messages. :param pipe_in: The input of the pipe, necessary to send the end character. @@ -732,16 +639,14 @@ def _render_progress_bar( :param enabled: If False, no progress bar is shown. """ if enabled: - thread = threading.Thread( - target=_progress_bar_thread, - args=(pipe_out, stdout, total_frames, bpy.context.scene.cycles.samples), - ) + thread = threading.Thread(target=_progress_bar_thread, + args=(pipe_out, stdout, total_frames, bpy.context.scene.cycles.samples)) thread.start() try: yield finally: # Send final character, so the thread knows to stop - w = os.fdopen(pipe_in, "w") + w = os.fdopen(pipe_in, 'w') w.write("\b") w.close() thread.join() @@ -749,15 +654,10 @@ def _render_progress_bar( yield -def render( - output_dir: Optional[str] = None, - file_prefix: str = "rgb_", - output_key: Optional[str] = "colors", - load_keys: Optional[Set[str]] = None, - return_data: bool = True, - keys_with_alpha_channel: Optional[Set[str]] = None, - verbose: bool = False, -) -> Dict[str, Union[np.ndarray, List[np.ndarray]]]: +def render(output_dir: Optional[str] = None, file_prefix: str = "rgb_", output_key: Optional[str] = "colors", + load_keys: Optional[Set[str]] = None, return_data: bool = True, + keys_with_alpha_channel: Optional[Set[str]] = None, + verbose: bool = False) -> Dict[str, Union[np.ndarray, List[np.ndarray]]]: """ Render all frames. This will go through all frames from scene.frame_start to scene.frame_end and render each of them. @@ -775,23 +675,16 @@ def render( if output_dir is None: output_dir = Utility.get_temporary_directory() if load_keys is None: - load_keys = {"colors", "distance", "normals", "diffuse", "depth", "segmap"} - keys_with_alpha_channel = ( - {"colors"} if bpy.context.scene.render.film_transparent else None - ) + load_keys = {'colors', 'distance', 'normals', 'diffuse', 'depth', 'segmap'} + keys_with_alpha_channel = {'colors'} if bpy.context.scene.render.film_transparent else None if output_key is not None: - Utility.add_output_entry( - { - "key": output_key, - "path": os.path.join(output_dir, file_prefix) - + "%04d" - + map_file_format_to_file_ending( - bpy.context.scene.render.image_settings.file_format - ), - "version": "2.0.0", - } - ) + Utility.add_output_entry({ + "key": output_key, + "path": os.path.join(output_dir, file_prefix) + "%04d" + + map_file_format_to_file_ending(bpy.context.scene.render.image_settings.file_format), + "version": "2.0.0" + }) load_keys.add(output_key) bpy.context.scene.render.filepath = os.path.join(output_dir, file_prefix) @@ -799,19 +692,13 @@ def render( # Skip if there is nothing to render if bpy.context.scene.frame_end != bpy.context.scene.frame_start: if len(get_all_blender_mesh_objects()) == 0: - raise Exception( - "There are no mesh-objects to render, " - "please load an object before invoking the renderer." - ) + raise Exception("There are no mesh-objects to render, " + "please load an object before invoking the renderer.") # Print what is rendered total_frames = bpy.context.scene.frame_end - bpy.context.scene.frame_start if load_keys: - registered_output_keys = [ - output["key"] for output in Utility.get_registered_outputs() - ] - keys_to_render = sorted( - [key for key in load_keys if key in registered_output_keys] - ) + registered_output_keys = [output["key"] for output in Utility.get_registered_outputs()] + keys_to_render = sorted([key for key in load_keys if key in registered_output_keys]) print(f"Rendering {total_frames} frames of {', '.join(keys_to_render)}...") # As frame_end is pointing to the next free frame, decrease it by one, as @@ -822,9 +709,7 @@ def render( pipe_out, pipe_in = os.pipe() begin = time.time() with stdout_redirected(pipe_in, enabled=not verbose) as stdout: - with _render_progress_bar( - pipe_out, pipe_in, stdout, total_frames, enabled=not verbose - ): + with _render_progress_bar(pipe_out, pipe_in, stdout, total_frames, enabled=not verbose): bpy.ops.render.render(animation=True, write_still=True) # Close Pipes to prevent having unclosed file handles @@ -841,29 +726,17 @@ def render( # Revert changes bpy.context.scene.frame_end += 1 else: - raise RuntimeError( - "No camera poses have been registered, therefore nothing can be rendered. A camera " - "pose can be registered via bproc.camera.add_camera_pose()." - ) + raise RuntimeError("No camera poses have been registered, therefore nothing can be rendered. A camera " + "pose can be registered via bproc.camera.add_camera_pose().") - return ( - _WriterUtility.load_registered_outputs(load_keys, keys_with_alpha_channel) - if return_data - else {} - ) + return _WriterUtility.load_registered_outputs(load_keys, keys_with_alpha_channel) if return_data else {} -def set_output_format( - file_format: Optional[str] = None, - color_depth: Optional[int] = None, - enable_transparency: Optional[bool] = None, - jpg_quality: Optional[int] = None, - view_transform: Optional[str] = None, - look: Optional[str] = None, - exposure: Optional[float] = None, - gamma: Optional[float] = None, -): - """Sets the output format to use for rendering. Default values defined in DefaultConfig.py. +def set_output_format(file_format: Optional[str] = None, color_depth: Optional[int] = None, + enable_transparency: Optional[bool] = None, jpg_quality: Optional[int] = None, + view_transform: Optional[str] = None, look: Optional[str] = None, + exposure: Optional[float] = None, gamma: Optional[float] = None): + """ Sets the output format to use for rendering. Default values defined in DefaultConfig.py. :param file_format: The file format to use, e.q. "PNG", "JPEG" or "OPEN_EXR". :param color_depth: The color depth. @@ -878,9 +751,7 @@ def set_output_format( if enable_transparency is not None: # In case a previous renderer changed these settings # Store as RGB by default unless the user specifies store_alpha as true in yaml - bpy.context.scene.render.image_settings.color_mode = ( - "RGBA" if enable_transparency else "RGB" - ) + bpy.context.scene.render.image_settings.color_mode = "RGBA" if enable_transparency else "RGB" # set the background as transparent if transparent_background is true in yaml bpy.context.scene.render.film_transparent = enable_transparency if file_format is not None: @@ -900,12 +771,9 @@ def set_output_format( bpy.context.scene.view_settings.gamma = gamma -def enable_motion_blur( - motion_blur_length: float = 0.5, - rolling_shutter_type: str = "NONE", - rolling_shutter_length: float = 0.1, -): - """Enables motion blur and sets rolling shutter. +def enable_motion_blur(motion_blur_length: float = 0.5, rolling_shutter_type: str = "NONE", + rolling_shutter_length: float = 0.1): + """ Enables motion blur and sets rolling shutter. :param motion_blur_length: Time taken in frames between shutter open and close. :param rolling_shutter_type: Type of rolling shutter effect. If "NONE", rolling shutter is disabled. @@ -919,13 +787,13 @@ def enable_motion_blur( def render_init(): - """Initializes the renderer. + """ Initializes the renderer. This enables the cycles renderer and sets some options to speedup rendering. """ bpy.context.scene.render.resolution_percentage = 100 # Lightning settings to reduce training time - bpy.context.scene.render.engine = "CYCLES" + bpy.context.scene.render.engine = 'CYCLES' bpy.context.scene.cycles.debug_bvh_type = "STATIC_BVH" bpy.context.scene.cycles.debug_use_spatial_splits = True @@ -934,7 +802,7 @@ def render_init(): def disable_all_denoiser(): - """Disables all denoiser. + """ Disables all denoiser. At the moment this includes the cycles and the intel denoiser. """ @@ -948,11 +816,9 @@ def disable_all_denoiser(): links = bpy.context.scene.node_tree.links # Go through all existing denoiser nodes - for denoiser_node in Utility.get_nodes_with_type( - nodes, "CompositorNodeDenoise" - ): - in_node = denoiser_node.inputs["Image"] - out_node = denoiser_node.outputs["Image"] + for denoiser_node in Utility.get_nodes_with_type(nodes, 'CompositorNodeDenoise'): + in_node = denoiser_node.inputs['Image'] + out_node = denoiser_node.outputs['Image'] # If it is fully included into the node tree if in_node.is_linked and out_node.is_linked: @@ -967,7 +833,7 @@ def disable_all_denoiser(): def set_world_background(color: List[float], strength: float = 1): - """Sets the color of blenders world background + """ Sets the color of blenders world background :param color: A three-dimensional list specifying the new color in floats. :param strength: The strength of the emitted background light. @@ -978,24 +844,21 @@ def set_world_background(color: List[float], strength: float = 1): links = world.node_tree.links # Unlink any incoming link that would overwrite the default value - if len(nodes.get("Background").inputs["Color"].links) > 0: - links.remove(nodes.get("Background").inputs["Color"].links[0]) + if len(nodes.get("Background").inputs['Color'].links) > 0: + links.remove(nodes.get("Background").inputs['Color'].links[0]) - nodes.get("Background").inputs["Strength"].default_value = strength - nodes.get("Background").inputs["Color"].default_value = color + [1] + nodes.get("Background").inputs['Strength'].default_value = strength + nodes.get("Background").inputs['Color'].default_value = color + [1] def enable_experimental_features(): - """Enables experimental cycles features.""" - bpy.context.scene.cycles.feature_set = "EXPERIMENTAL" + """ Enables experimental cycles features. """ + bpy.context.scene.cycles.feature_set = 'EXPERIMENTAL' -def set_render_devices( - use_only_cpu: bool = False, - desired_gpu_device_type: Union[str, List[str]] = None, - desired_gpu_ids: Union[int, List[int]] = None, -): - """Configures the devices to use for rendering. +def set_render_devices(use_only_cpu: bool = False, desired_gpu_device_type: Union[str, List[str]] = None, + desired_gpu_ids: Union[int, List[int]] = None): + """ Configures the devices to use for rendering. :param use_only_cpu: If True, only the cpu is used for rendering. :param desired_gpu_device_type: One or multiple GPU device types to consider. If multiple are given, @@ -1013,9 +876,7 @@ def set_render_devices( mac_version = platform.mac_ver()[0] mac_version_numbers = [int(ele) for ele in mac_version.split(".")] # On recent macs, use METAL, otherwise use cpu only - if ( - mac_version_numbers[0] == 12 and mac_version_numbers[1] >= 3 - ) or mac_version_numbers[0] > 12: + if (mac_version_numbers[0] == 12 and mac_version_numbers[1] >= 3) or mac_version_numbers[0] > 12: desired_gpu_device_type = ["METAL"] else: desired_gpu_device_type = [] @@ -1034,14 +895,12 @@ def set_render_devices( if not desired_gpu_device_type or use_only_cpu: # Use only CPU bpy.context.scene.cycles.device = "CPU" - bpy.context.preferences.addons["cycles"].preferences.compute_device_type = ( - "NONE" - ) + bpy.context.preferences.addons['cycles'].preferences.compute_device_type = "NONE" print("Using only the CPU for rendering") else: # Use GPU bpy.context.scene.cycles.device = "GPU" - preferences = bpy.context.preferences.addons["cycles"].preferences + preferences = bpy.context.preferences.addons['cycles'].preferences # Go over all specified device types found = False @@ -1050,35 +909,27 @@ def set_render_devices( devices = preferences.get_devices_for_type(device_type) if devices: # Set device type - bpy.context.preferences.addons[ - "cycles" - ].preferences.compute_device_type = device_type + bpy.context.preferences.addons['cycles'].preferences.compute_device_type = device_type # Go over all devices with that type found = False for i, device in enumerate(devices): # Only use gpus with specified ids if desired_gpu_ids is None or i in desired_gpu_ids: - print( - f"Device {device.name} of type {device.type} found and used." - ) + print(f"Device {device.name} of type {device.type} found and used.") device.use = True found = True else: device.use = False if not found: - raise RuntimeError( - f"The specified gpu ids lead to no selected gpu at all. Valid gpu ids are " - f"{list(range(len(devices)))}" - ) + raise RuntimeError(f"The specified gpu ids lead to no selected gpu at all. Valid gpu ids are " + f"{list(range(len(devices)))}") break if not found: bpy.context.scene.cycles.device = "CPU" - bpy.context.preferences.addons["cycles"].preferences.compute_device_type = ( - "NONE" - ) + bpy.context.preferences.addons['cycles'].preferences.compute_device_type = "NONE" print("Using only the CPU for rendering") @@ -1108,12 +959,8 @@ def load_edge_render(temp_filepath: str) -> np.ndarray: return temp_img -def freestyle_config( - line_thickness: float, - crease_angle: float, - view_layer: bpy.types.ViewLayer, - scene: bpy.types.Scene, -) -> None: +def freestyle_config(line_thickness: float, crease_angle: float, view_layer: bpy.types.ViewLayer, + scene: bpy.types.Scene) -> None: """ Configures Blender Freestyle settings for stylized edge rendering. @@ -1198,12 +1045,8 @@ def freestyle_render_config(scene: bpy.types.Scene) -> None: tree.links.new(render_layers.outputs["Freestyle"], composite.inputs["Image"]) -def remap_target_objects_to_scene_by_geometry( - original_targets: List[MeshObject], - target_scene: bpy.types.Scene, - location_tol: float = 1e-4, - size_tol: float = 1e-4, -) -> List[MeshObject]: +def remap_target_objects_to_scene_by_geometry(original_targets: List[MeshObject], target_scene: bpy.types.Scene, + location_tol: float = 1e-4, size_tol: float = 1e-4) -> List[MeshObject]: """ Attempts to remap a list of mesh objects to equivalent objects in a different scene based on geometry. @@ -1263,9 +1106,8 @@ def get_mesh_stats(mesh: MeshObject) -> Tuple[str, int, int, int]: return mesh_stats -def reduce_object_complexity( - meshes: List[MeshObject], dissolve_angle: float, connect_non_planar_angle: float -) -> List[MeshObject]: +def reduce_object_complexity(meshes: List[MeshObject], dissolve_angle: float, + connect_non_planar_angle: float) -> List[MeshObject]: """ Reduces mesh complexity by dissolving small-angle geometry and splitting non-planar faces. @@ -1311,9 +1153,7 @@ def reduce_object_complexity( return reduced_meshes -def render_edges( - target_objects: List[bpy.types.Object], camera_poses: List[np.ndarray] -) -> List[np.ndarray]: +def render_edges(target_objects: List[bpy.types.Object], camera_poses: List[np.ndarray]) -> List[np.ndarray]: """ Renders only the Freestyle edge pass for the given target objects from multiple camera poses. diff --git a/blenderproc/python/writer/BopWriterUtility.py b/blenderproc/python/writer/BopWriterUtility.py index 59739f9d4..93b5014b4 100644 --- a/blenderproc/python/writer/BopWriterUtility.py +++ b/blenderproc/python/writer/BopWriterUtility.py @@ -22,35 +22,20 @@ from blenderproc.python.writer.WriterUtility import _WriterUtility from blenderproc.python.types.LinkUtility import Link from blenderproc.python.utility.SetupUtility import SetupUtility -from blenderproc.python.utility.MathUtility import ( - change_target_coordinate_frame_of_transformation_matrix, -) +from blenderproc.python.utility.MathUtility import change_target_coordinate_frame_of_transformation_matrix # EGL is not available under windows if sys.platform in ["linux", "linux2"]: - os.environ["PYOPENGL_PLATFORM"] = "egl" - - -def write_bop( - output_dir: str, - target_objects: Optional[List[MeshObject]] = None, - depths: List[np.ndarray] = None, - colors: List[np.ndarray] = None, - edges: List[np.ndarray] = None, - color_file_format: str = "PNG", - dataset: str = "", - append_to_existing_output: bool = True, - depth_scale: float = 1.0, - jpg_quality: int = 95, - save_world2cam: bool = True, - ignore_dist_thres: float = 100.0, - m2mm: Optional[bool] = None, - annotation_unit: str = "mm", - frames_per_chunk: int = 1000, - calc_mask_info_coco: bool = True, - delta: float = 0.015, - num_worker: Optional[int] = 0, -): + os.environ['PYOPENGL_PLATFORM'] = 'egl' + + +def write_bop(output_dir: str, target_objects: Optional[List[MeshObject]] = None, + depths: List[np.ndarray] = None, colors: List[np.ndarray] = None, edges: List[np.ndarray] = None, + color_file_format: str = "PNG", dataset: str = "", append_to_existing_output: bool = True, + depth_scale: float = 1.0, jpg_quality: int = 95, save_world2cam: bool = True, + ignore_dist_thres: float = 100., m2mm: Optional[bool] = None, annotation_unit: str = 'mm', + frames_per_chunk: int = 1000, calc_mask_info_coco: bool = True, delta: float = 0.015, + num_worker: Optional[int] = 0): """Write the BOP data :param output_dir: Path to the output directory. @@ -82,8 +67,8 @@ def write_bop( # Output paths. dataset_dir = os.path.join(output_dir, dataset) - chunks_dir = os.path.join(dataset_dir, "train_pbr") - camera_path = os.path.join(dataset_dir, "camera.json") + chunks_dir = os.path.join(dataset_dir, 'train_pbr') + camera_path = os.path.join(dataset_dir, 'camera.json') # Create the output directory structure. if not os.path.exists(dataset_dir): @@ -97,10 +82,8 @@ def write_bop( dataset_objects = target_objects for obj in dataset_objects: if obj.is_hidden(): - print( - f"WARNING: The given object {obj.get_name()} is hidden. However, the bop writer will still add " - "coco annotations for it. If this is not desired, don't pass the object to the bop writer." - ) + print(f"WARNING: The given object {obj.get_name()} is hidden. However, the bop writer will still add " + "coco annotations for it. If this is not desired, don't pass the object to the bop writer.") elif dataset: dataset_objects = [] for obj in get_all_mesh_objects(): @@ -115,27 +98,23 @@ def write_bop( # Check if there is any object from the specified dataset. if not dataset_objects: - raise RuntimeError( - f"The scene does not contain any object from the specified dataset: {dataset}. " - f"Either remove the dataset parameter or assign custom property 'bop_dataset_name'" - f" to selected objects" - ) + raise RuntimeError(f"The scene does not contain any object from the specified dataset: {dataset}. " + f"Either remove the dataset parameter or assign custom property 'bop_dataset_name'" + f" to selected objects") if calc_mask_info_coco: # It might be that a chunk dir already exists where the writer appends frames. # If one (or multiple) more chunk dirs are created to save the rendered frames to, # mask/info/coco annotations need to be calculated for all of them - chunk_dirs = sorted(glob.glob(os.path.join(chunks_dir, "*"))) + chunk_dirs = sorted(glob.glob(os.path.join(chunks_dir, '*'))) chunk_dirs = [d for d in chunk_dirs if os.path.isdir(d)] last_chunk_dir = sorted(chunk_dirs)[-1] if chunk_dirs else None starting_chunk_id = 0 starting_frame_id = 0 if last_chunk_dir: - last_chunk_gt_fpath = os.path.join(last_chunk_dir, "scene_gt.json") - chunk_gt = _BopWriterUtility.load_json( - last_chunk_gt_fpath, keys_to_int=True - ) + last_chunk_gt_fpath = os.path.join(last_chunk_dir, 'scene_gt.json') + chunk_gt = _BopWriterUtility.load_json(last_chunk_gt_fpath, keys_to_int=True) # Current chunk and frame ID's. starting_chunk_id = int(os.path.basename(last_chunk_dir)) @@ -147,44 +126,30 @@ def write_bop( # Save the data. _BopWriterUtility.write_camera(camera_path, depth_scale=depth_scale) - assert annotation_unit in ["m", "dm", "cm", "mm"], ( - f"Invalid annotation unit: `{annotation_unit}`. Supported " - f"are 'm', 'dm', 'cm', 'mm'" - ) - annotation_scale = {"m": 1.0, "dm": 10.0, "cm": 100.0, "mm": 1000.0}[ - annotation_unit - ] + assert annotation_unit in ['m', 'dm', 'cm', 'mm'], (f"Invalid annotation unit: `{annotation_unit}`. Supported " + f"are 'm', 'dm', 'cm', 'mm'") + annotation_scale = {'m': 1., 'dm': 10., 'cm': 100., 'mm': 1000.}[annotation_unit] if m2mm is not None: - warnings.warn( - "WARNING: `m2mm` is deprecated, please use `annotation_scale='mm'` instead!" - ) - annotation_scale = 1000.0 - _BopWriterUtility.write_frames( - chunks_dir, - dataset_objects=dataset_objects, - depths=depths, - colors=colors, - color_file_format=color_file_format, - frames_per_chunk=frames_per_chunk, - annotation_scale=annotation_scale, - ignore_dist_thres=ignore_dist_thres, - save_world2cam=save_world2cam, - depth_scale=depth_scale, - jpg_quality=jpg_quality, - ) + warnings.warn("WARNING: `m2mm` is deprecated, please use `annotation_scale='mm'` instead!") + annotation_scale = 1000. + _BopWriterUtility.write_frames(chunks_dir, dataset_objects=dataset_objects, depths=depths, colors=colors, + color_file_format=color_file_format, frames_per_chunk=frames_per_chunk, + annotation_scale=annotation_scale, ignore_dist_thres=ignore_dist_thres, + save_world2cam=save_world2cam, depth_scale=depth_scale, jpg_quality=jpg_quality) + # Determine for which directories mask_info_coco has to be calculated chunk_dirs = sorted(glob.glob(os.path.join(chunks_dir, "*"))) chunk_dirs = [d for d in chunk_dirs if os.path.isdir(d)] chunk_dir_ids = [os.path.basename(d) for d in chunk_dirs] chunk_dirs = chunk_dirs[chunk_dir_ids.index(f"{starting_chunk_id:06d}") :] + + # If any edge images are passed, write them to disk if edges: _BopWriterUtility.write_edges(chunk_dirs=chunk_dirs, edges=edges) if calc_mask_info_coco: # Set up the bop toolkit - SetupUtility.setup_pip( - ["git+https://github.com/thodan/bop_toolkit", "PyOpenGL==3.1.0"] - ) + SetupUtility.setup_pip(["git+https://github.com/thodan/bop_toolkit", "PyOpenGL==3.1.0"]) # determine which objects to add to the vsipy renderer # for numpy>=1.20, np.float is deprecated: https://numpy.org/doc/stable/release/1.20.0-notes.html#deprecations @@ -193,26 +158,20 @@ def write_bop( # convert all objects to trimesh objects trimesh_objects = {} for obj in dataset_objects: - if obj.get_cp("category_id") in trimesh_objects: + if obj.get_cp('category_id') in trimesh_objects: continue if isinstance(obj, Link): if not obj.visuals: continue if len(obj.visuals) > 1: - warnings.warn( - "BOP Writer only supports saving annotations of one visual mesh per Link" - ) + warnings.warn('BOP Writer only supports saving annotations of one visual mesh per Link') trimesh_obj = obj.mesh_as_trimesh() # here we also add the scale factor of the objects. the position of the pyrender camera will change based # on the initial scale factor of the objects and the saved annotation format - if not np.all( - np.isclose(np.array(obj.blender_obj.scale), obj.blender_obj.scale[0]) - ): - print( - "WARNING: the scale is not the same across all dimensions, writing bop_toolkit annotations with " - "the bop writer will fail!" - ) - trimesh_objects[obj.get_cp("category_id")] = trimesh_obj + if not np.all(np.isclose(np.array(obj.blender_obj.scale), obj.blender_obj.scale[0])): + print("WARNING: the scale is not the same across all dimensions, writing bop_toolkit annotations with " + "the bop writer will fail!") + trimesh_objects[obj.get_cp('category_id')] = trimesh_obj # Create pool and init each worker width = bpy.context.scene.render.resolution_x @@ -221,34 +180,17 @@ def write_bop( pool = None _BopWriterUtility._pyrender_init(width, height, trimesh_objects) else: - pool = Pool( - num_worker, - initializer=_BopWriterUtility._pyrender_init, - initargs=[width, height, trimesh_objects], - ) - - _BopWriterUtility.calc_gt_masks( - chunk_dirs=chunk_dirs, - starting_frame_id=starting_frame_id, - annotation_scale=annotation_scale, - delta=delta, - pool=pool, - ) - - _BopWriterUtility.calc_gt_info( - chunk_dirs=chunk_dirs, - starting_frame_id=starting_frame_id, - annotation_scale=annotation_scale, - delta=delta, - pool=pool, - ) - - _BopWriterUtility.calc_gt_coco( - chunk_dirs=chunk_dirs, - dataset_objects=dataset_objects, - starting_frame_id=starting_frame_id, - ) + pool = Pool(num_worker, initializer=_BopWriterUtility._pyrender_init, initargs=[width, height, trimesh_objects]) + _BopWriterUtility.calc_gt_masks(chunk_dirs=chunk_dirs, starting_frame_id=starting_frame_id, + annotation_scale=annotation_scale, delta=delta, pool=pool) + + _BopWriterUtility.calc_gt_info(chunk_dirs=chunk_dirs, starting_frame_id=starting_frame_id, + annotation_scale=annotation_scale, delta=delta, pool=pool) + + _BopWriterUtility.calc_gt_coco(chunk_dirs=chunk_dirs, dataset_objects=dataset_objects, + starting_frame_id=starting_frame_id) + if pool is not None: pool.close() pool.join() @@ -257,10 +199,8 @@ def write_bop( _BopWriterUtility._pyrender_cleanup() -def bop_pose_to_pyrender_coordinate_system( - cam_R_m2c: np.ndarray, cam_t_m2c: np.ndarray -) -> np.ndarray: - """Converts an object pose in bop format to pyrender camera coordinate system +def bop_pose_to_pyrender_coordinate_system(cam_R_m2c: np.ndarray, cam_t_m2c: np.ndarray) -> np.ndarray: + """ Converts an object pose in bop format to pyrender camera coordinate system (https://pyrender.readthedocs.io/en/latest/examples/cameras.html). :param cam_R_m2c: 3x3 Rotation matrix. @@ -272,16 +212,14 @@ def bop_pose_to_pyrender_coordinate_system( bop_pose[:3, :3] = cam_R_m2c bop_pose[:3, 3] = cam_t_m2c - return change_target_coordinate_frame_of_transformation_matrix( - bop_pose, ["X", "-Y", "-Z"] - ) + return change_target_coordinate_frame_of_transformation_matrix(bop_pose, ["X", "-Y", "-Z"]) class _BopWriterUtility: - """Saves the synthesized dataset in the BOP format. The dataset is split - into chunks which are saved as individual "scenes". For more details - about the BOP format, visit the BOP toolkit docs: - https://github.com/thodan/bop_toolkit/blob/master/docs/bop_datasets_format.md + """ Saves the synthesized dataset in the BOP format. The dataset is split + into chunks which are saved as individual "scenes". For more details + about the BOP format, visit the BOP toolkit docs: + https://github.com/thodan/bop_toolkit/blob/master/docs/bop_datasets_format.md """ @@ -297,9 +235,9 @@ def load_json(path, keys_to_int=False): # Keys to integers. def convert_keys_to_int(x): - return {int(k) if k.lstrip("-").isdigit() else k: v for k, v in x.items()} + return {int(k) if k.lstrip('-').isdigit() else k: v for k, v in x.items()} - with open(path, "r", encoding="utf-8") as f: + with open(path, 'r', encoding="utf-8") as f: if keys_to_int: content = json.load(f, object_hook=convert_keys_to_int) else: @@ -309,32 +247,32 @@ def convert_keys_to_int(x): @staticmethod def save_json(path, content): - """Saves the content to a JSON file in a human-friendly format. + """ Saves the content to a JSON file in a human-friendly format. From the BOP toolkit (https://github.com/thodan/bop_toolkit). :param path: Path to the output JSON file. :param content: Dictionary/list to save. """ text = "" - with open(path, "w", encoding="utf-8") as file: + with open(path, 'w', encoding="utf-8") as file: if isinstance(content, dict): - text += "{\n" + text += '{\n' content_sorted = sorted(content.items(), key=lambda x: x[0]) for elem_id, (k, v) in enumerate(content_sorted): text += f' "{k}": {json.dumps(v, sort_keys=True)}' if elem_id != len(content) - 1: - text += "," - text += "\n" - text += "}" + text += ',' + text += '\n' + text += '}' file.write(text) elif isinstance(content, list): - text += "[\n" + text += '[\n' for elem_id, elem in enumerate(content): - text += f" {json.dumps(elem, sort_keys=True)}" + text += f' {json.dumps(elem, sort_keys=True)}' if elem_id != len(content) - 1: - text += "," - text += "\n" - text += "]" + text += ',' + text += '\n' + text += ']' file.write(text) else: json.dump(content, file, sort_keys=True) @@ -348,46 +286,40 @@ def save_depth(path: str, im: np.ndarray): :param im: ndarray with the depth image to save. """ if not path.endswith(".png"): - raise ValueError("Only PNG format is currently supported.") + raise ValueError('Only PNG format is currently supported.') im[im > 65535] = 65535 im_uint16 = np.round(im).astype(np.uint16) # PyPNG library can save 16-bit PNG and is faster than imageio.imwrite(). w_depth = png.Writer(im.shape[1], im.shape[0], greyscale=True, bitdepth=16) - with open(path, "wb") as f: + with open(path, 'wb') as f: w_depth.write(f, np.reshape(im_uint16, (-1, im.shape[1]))) @staticmethod def write_camera(camera_path: str, depth_scale: float = 1.0): - """Writes camera.json into dataset_dir. + """ Writes camera.json into dataset_dir. :param camera_path: Path to camera.json :param depth_scale: Multiply the uint16 output depth image with this factor to get depth in mm. """ # Use second frame for reading intrinsics (due to backwards compatibility) bpy.context.scene.frame_set(1) - cam_K = _WriterUtility.get_cam_attribute(bpy.context.scene.camera, "cam_K") - camera = { - "cx": cam_K[0][2], - "cy": cam_K[1][2], - "depth_scale": depth_scale, - "fx": cam_K[0][0], - "fy": cam_K[1][1], - "height": bpy.context.scene.render.resolution_y, - "width": bpy.context.scene.render.resolution_x, - } + cam_K = _WriterUtility.get_cam_attribute(bpy.context.scene.camera, 'cam_K') + camera = {'cx': cam_K[0][2], + 'cy': cam_K[1][2], + 'depth_scale': depth_scale, + 'fx': cam_K[0][0], + 'fy': cam_K[1][1], + 'height': bpy.context.scene.render.resolution_y, + 'width': bpy.context.scene.render.resolution_x} _BopWriterUtility.save_json(camera_path, camera) @staticmethod - def get_frame_gt( - dataset_objects: List[bpy.types.Mesh], - unit_scaling: float, - ignore_dist_thres: float, - destination_frame: Optional[List[str]] = None, - ): - """Returns GT pose annotations between active camera and objects. - + def get_frame_gt(dataset_objects: List[bpy.types.Mesh], unit_scaling: float, ignore_dist_thres: float, + destination_frame: Optional[List[str]] = None): + """ Returns GT pose annotations between active camera and objects. + :param dataset_objects: Save annotations for these objects. :param unit_scaling: 1000. for outputting poses in mm :param ignore_dist_thres: Distance between camera and object after which object is ignored. @@ -398,13 +330,8 @@ def get_frame_gt( if destination_frame is None: destination_frame = ["X", "-Y", "-Z"] - H_c2w_opencv = Matrix( - _WriterUtility.get_cam_attribute( - bpy.context.scene.camera, - "cam2world_matrix", - local_frame_change=destination_frame, - ) - ) + H_c2w_opencv = Matrix(_WriterUtility.get_cam_attribute(bpy.context.scene.camera, 'cam2world_matrix', + local_frame_change=destination_frame)) frame_gt = [] for obj in dataset_objects: @@ -412,15 +339,11 @@ def get_frame_gt( if not obj.visuals: continue if len(obj.visuals) > 1: - warnings.warn( - "BOP Writer only supports saving poses of one visual mesh per Link" - ) + warnings.warn('BOP Writer only supports saving poses of one visual mesh per Link') H_m2w = Matrix(obj.get_visual_local2world_mats()[0]) else: H_m2w = Matrix(obj.get_local2world_mat()) - assert obj.has_cp( - "category_id" - ), f"{obj.get_name()} object has no custom property 'category_id'" + assert obj.has_cp("category_id"), f"{obj.get_name()} object has no custom property 'category_id'" cam_H_m2c = H_c2w_opencv.inverted() @ H_m2w cam_R_m2c = cam_H_m2c.to_quaternion().to_matrix() @@ -429,39 +352,25 @@ def get_frame_gt( # ignore examples that fell through the plane if not np.linalg.norm(list(cam_t_m2c)) > ignore_dist_thres: cam_t_m2c = list(cam_t_m2c * unit_scaling) - frame_gt.append( - { - "cam_R_m2c": list(cam_R_m2c[0]) - + list(cam_R_m2c[1]) - + list(cam_R_m2c[2]), - "cam_t_m2c": cam_t_m2c, - "obj_id": ( - obj.get_cp("category_id") - if not isinstance(obj, Link) - else obj.visuals[0].get_cp("category_id") - ), - } - ) + frame_gt.append({ + 'cam_R_m2c': list(cam_R_m2c[0]) + list(cam_R_m2c[1]) + list(cam_R_m2c[2]), + 'cam_t_m2c': cam_t_m2c, + 'obj_id': obj.get_cp("category_id") if not isinstance(obj, Link) else obj.visuals[0].get_cp( + 'category_id') + }) else: - print("ignored obj, ", obj.get_cp("category_id"), "because either ") - print( - '(1) it is further away than parameter "ignore_dist_thres: ",', - ignore_dist_thres, - ) - print("(e.g. because it fell through a plane during physics sim)") - print("or") - print("(2) the object pose has not been given in meters") + print('ignored obj, ', obj.get_cp("category_id"), 'because either ') + print('(1) it is further away than parameter "ignore_dist_thres: ",', ignore_dist_thres) + print('(e.g. because it fell through a plane during physics sim)') + print('or') + print('(2) the object pose has not been given in meters') return frame_gt @staticmethod - def get_frame_camera( - save_world2cam: bool, - depth_scale: float = 1.0, - unit_scaling: float = 1000.0, - destination_frame: Optional[List[str]] = None, - ): - """Returns camera parameters for the active camera. + def get_frame_camera(save_world2cam: bool, depth_scale: float = 1.0, unit_scaling: float = 1000., + destination_frame: Optional[List[str]] = None): + """ Returns camera parameters for the active camera. :param save_world2cam: If true, camera to world transformations "cam_R_w2c", "cam_t_w2c" are saved in scene_camera.json @@ -473,47 +382,31 @@ def get_frame_camera( if destination_frame is None: destination_frame = ["X", "-Y", "-Z"] - cam_K = _WriterUtility.get_cam_attribute(bpy.context.scene.camera, "cam_K") + cam_K = _WriterUtility.get_cam_attribute(bpy.context.scene.camera, 'cam_K') frame_camera_dict = { - "cam_K": cam_K[0] + cam_K[1] + cam_K[2], - "depth_scale": depth_scale, + 'cam_K': cam_K[0] + cam_K[1] + cam_K[2], + 'depth_scale': depth_scale } if save_world2cam: - H_c2w_opencv = Matrix( - _WriterUtility.get_cam_attribute( - bpy.context.scene.camera, - "cam2world_matrix", - local_frame_change=destination_frame, - ) - ) + H_c2w_opencv = Matrix(_WriterUtility.get_cam_attribute(bpy.context.scene.camera, 'cam2world_matrix', + local_frame_change=destination_frame)) H_w2c_opencv = H_c2w_opencv.inverted() R_w2c_opencv = H_w2c_opencv.to_quaternion().to_matrix() t_w2c_opencv = H_w2c_opencv.to_translation() * unit_scaling - frame_camera_dict["cam_R_w2c"] = ( - list(R_w2c_opencv[0]) + list(R_w2c_opencv[1]) + list(R_w2c_opencv[2]) - ) - frame_camera_dict["cam_t_w2c"] = list(t_w2c_opencv) + frame_camera_dict['cam_R_w2c'] = list(R_w2c_opencv[0]) + list(R_w2c_opencv[1]) + list(R_w2c_opencv[2]) + frame_camera_dict['cam_t_w2c'] = list(t_w2c_opencv) return frame_camera_dict @staticmethod - def write_frames( - chunks_dir: str, - dataset_objects: list, - depths: List[np.ndarray], - colors: List[np.ndarray], - color_file_format: str = "PNG", - depth_scale: float = 1.0, - frames_per_chunk: int = 1000, - annotation_scale: float = 1000.0, - ignore_dist_thres: float = 100.0, - save_world2cam: bool = True, - jpg_quality: int = 95, - ): + def write_frames(chunks_dir: str, dataset_objects: list, depths: List[np.ndarray], + colors: List[np.ndarray], color_file_format: str = "PNG", + depth_scale: float = 1.0, frames_per_chunk: int = 1000, annotation_scale: float = 1000., + ignore_dist_thres: float = 100., save_world2cam: bool = True, jpg_quality: int = 95): """Write each frame's ground truth into chunk directory in BOP format :param chunks_dir: Path to the output directory of the current chunk. @@ -531,27 +424,18 @@ def write_frames( specified format (see `annotation_format` in `write_bop` for further details). :param frames_per_chunk: Number of frames saved in each chunk (called scene in BOP) """ - if not depths and not colors: - print("No depth and color information in write_frames. Returning...") - return - - # Format of the depth and edge images. - depth_ext = ".png" - - rgb_tpath = os.path.join( - chunks_dir, "{chunk_id:06d}", "rgb", "{im_id:06d}" + "{im_type}" - ) - depth_tpath = os.path.join( - chunks_dir, "{chunk_id:06d}", "depth", "{im_id:06d}" + depth_ext - ) - chunk_camera_tpath = os.path.join( - chunks_dir, "{chunk_id:06d}", "scene_camera.json" - ) - chunk_gt_tpath = os.path.join(chunks_dir, "{chunk_id:06d}", "scene_gt.json") + + # Format of the depth images. + depth_ext = '.png' + + rgb_tpath = os.path.join(chunks_dir, '{chunk_id:06d}', 'rgb', '{im_id:06d}' + '{im_type}') + depth_tpath = os.path.join(chunks_dir, '{chunk_id:06d}', 'depth', '{im_id:06d}' + depth_ext) + chunk_camera_tpath = os.path.join(chunks_dir, '{chunk_id:06d}', 'scene_camera.json') + chunk_gt_tpath = os.path.join(chunks_dir, '{chunk_id:06d}', 'scene_gt.json') # Paths to the already existing chunk folders (such folders may exist # when appending to an existing dataset). - chunk_dirs = sorted(glob.glob(os.path.join(chunks_dir, "*"))) + chunk_dirs = sorted(glob.glob(os.path.join(chunks_dir, '*'))) chunk_dirs = [d for d in chunk_dirs if os.path.isdir(d)] # Get ID's of the last already existing chunk and frame. @@ -559,10 +443,8 @@ def write_frames( curr_frame_id = 0 if len(chunk_dirs): last_chunk_dir = sorted(chunk_dirs)[-1] - last_chunk_gt_fpath = os.path.join(last_chunk_dir, "scene_gt.json") - chunk_gt = _BopWriterUtility.load_json( - last_chunk_gt_fpath, keys_to_int=True - ) + last_chunk_gt_fpath = os.path.join(last_chunk_dir, 'scene_gt.json') + chunk_gt = _BopWriterUtility.load_json(last_chunk_gt_fpath, keys_to_int=True) # Last chunk and frame ID's. last_chunk_id = int(os.path.basename(last_chunk_dir)) @@ -581,24 +463,18 @@ def write_frames( if curr_frame_id != 0: # Load GT and camera info of the chunk we are appending to. chunk_gt = _BopWriterUtility.load_json( - chunk_gt_tpath.format(chunk_id=curr_chunk_id), keys_to_int=True - ) + chunk_gt_tpath.format(chunk_id=curr_chunk_id), keys_to_int=True) chunk_camera = _BopWriterUtility.load_json( - chunk_camera_tpath.format(chunk_id=curr_chunk_id), keys_to_int=True - ) + chunk_camera_tpath.format(chunk_id=curr_chunk_id), keys_to_int=True) # Go through all frames. num_new_frames = bpy.context.scene.frame_end - bpy.context.scene.frame_start if len(depths) != len(colors) != num_new_frames: - raise Exception( - "The amount of images stored in the depths/colors does not correspond " - "to the amount of images specified by frame_start to frame_end." - ) - - for frame_id in range( - bpy.context.scene.frame_start, bpy.context.scene.frame_end - ): + raise Exception("The amount of images stored in the depths/colors does not correspond to the amount" + "of images specified by frame_start to frame_end.") + + for frame_id in range(bpy.context.scene.frame_start, bpy.context.scene.frame_end): # Activate frame. bpy.context.scene.frame_set(frame_id) @@ -606,38 +482,26 @@ def write_frames( if curr_frame_id == 0: chunk_gt = {} chunk_camera = {} - os.makedirs( - os.path.dirname( - rgb_tpath.format(chunk_id=curr_chunk_id, im_id=0, im_type="PNG") - ) - ) - os.makedirs( - os.path.dirname(depth_tpath.format(chunk_id=curr_chunk_id, im_id=0)) - ) + os.makedirs(os.path.dirname( + rgb_tpath.format(chunk_id=curr_chunk_id, im_id=0, im_type='PNG'))) + os.makedirs(os.path.dirname( + depth_tpath.format(chunk_id=curr_chunk_id, im_id=0))) # Get GT annotations and camera info for the current frame. - chunk_gt[curr_frame_id] = _BopWriterUtility.get_frame_gt( - dataset_objects, annotation_scale, ignore_dist_thres - ) - chunk_camera[curr_frame_id] = _BopWriterUtility.get_frame_camera( - save_world2cam, depth_scale, annotation_scale - ) + chunk_gt[curr_frame_id] = _BopWriterUtility.get_frame_gt(dataset_objects, annotation_scale, + ignore_dist_thres) + chunk_camera[curr_frame_id] = _BopWriterUtility.get_frame_camera(save_world2cam, depth_scale, + annotation_scale) color_rgb = colors[frame_id] color_bgr = color_rgb.copy() color_bgr[..., :3] = color_bgr[..., :3][..., ::-1] - if color_file_format == "PNG": - rgb_fpath = rgb_tpath.format( - chunk_id=curr_chunk_id, im_id=curr_frame_id, im_type=".png" - ) + if color_file_format == 'PNG': + rgb_fpath = rgb_tpath.format(chunk_id=curr_chunk_id, im_id=curr_frame_id, im_type='.png') cv2.imwrite(rgb_fpath, color_bgr) - elif color_file_format == "JPEG": - rgb_fpath = rgb_tpath.format( - chunk_id=curr_chunk_id, im_id=curr_frame_id, im_type=".jpg" - ) - cv2.imwrite( - rgb_fpath, color_bgr, [int(cv2.IMWRITE_JPEG_QUALITY), jpg_quality] - ) + elif color_file_format == 'JPEG': + rgb_fpath = rgb_tpath.format(chunk_id=curr_chunk_id, im_id=curr_frame_id, im_type='.jpg') + cv2.imwrite(rgb_fpath, color_bgr, [int(cv2.IMWRITE_JPEG_QUALITY), jpg_quality]) depth = depths[frame_id] @@ -647,37 +511,29 @@ def write_frames( depth_mm_scaled = depth_mm / float(depth_scale) # Save the scaled depth image. - depth_fpath = depth_tpath.format( - chunk_id=curr_chunk_id, im_id=curr_frame_id - ) + depth_fpath = depth_tpath.format(chunk_id=curr_chunk_id, im_id=curr_frame_id) _BopWriterUtility.save_depth(depth_fpath, depth_mm_scaled) # Save the chunk info if we are at the end of a chunk or at the last new frame. - if ((curr_frame_id + 1) % frames_per_chunk == 0) or ( - frame_id == num_new_frames - 1 - ): + if ((curr_frame_id + 1) % frames_per_chunk == 0) or \ + (frame_id == num_new_frames - 1): # Save GT annotations. - _BopWriterUtility.save_json( - chunk_gt_tpath.format(chunk_id=curr_chunk_id), chunk_gt - ) + _BopWriterUtility.save_json(chunk_gt_tpath.format(chunk_id=curr_chunk_id), chunk_gt) # Save camera info. - _BopWriterUtility.save_json( - chunk_camera_tpath.format(chunk_id=curr_chunk_id), chunk_camera - ) + _BopWriterUtility.save_json(chunk_camera_tpath.format(chunk_id=curr_chunk_id), chunk_camera) # Update ID's. curr_chunk_id += 1 curr_frame_id = 0 else: curr_frame_id += 1 + @staticmethod - def _pyrender_init( - ren_width: int, ren_height: int, trimesh_objects: Dict[int, trimesh.Trimesh] - ): - """Initializes a worker process for calc_gt_masks and calc_gt_info + def _pyrender_init(ren_width: int, ren_height: int, trimesh_objects: Dict[int, trimesh.Trimesh]): + """ Initializes a worker process for calc_gt_masks and calc_gt_info :param ren_width: The width of the images to render. :param ren_height: The height of the images to render. @@ -685,42 +541,30 @@ def _pyrender_init( """ # pylint: disable=import-outside-toplevel # Import pyrender only inside the multiprocesses, otherwise this leads to an opengl error - # https://github.com/mmatl/pyrender/issues/200#issuecomment-1123713055 + # https://github.com/mmatl/pyrender/issues/200#issuecomment-1123713055 import pyrender - # pylint: enable=import-outside-toplevel global renderer, renderer_large, dataset_objects dataset_objects = {} # Create renderer for calc_gt_masks - renderer = pyrender.OffscreenRenderer( - viewport_width=ren_width, viewport_height=ren_height - ) + renderer = pyrender.OffscreenRenderer(viewport_width=ren_width, viewport_height=ren_height) # Create renderer for calc_gt_info - renderer_large = pyrender.OffscreenRenderer( - viewport_width=ren_width * 3, viewport_height=ren_height * 3 - ) + renderer_large = pyrender.OffscreenRenderer(viewport_width=ren_width * 3, viewport_height=ren_height * 3) # Create pyrender meshes for key in trimesh_objects.keys(): # we need to create a double-sided material to be able to render non-watertight meshes # the other parameters are defaults, see # https://github.com/mmatl/pyrender/blob/master/pyrender/mesh.py#L216-L223 - material = pyrender.MetallicRoughnessMaterial( - alphaMode="BLEND", - baseColorFactor=[0.3, 0.3, 0.3, 1.0], - metallicFactor=0.2, - roughnessFactor=0.8, - doubleSided=True, - ) - dataset_objects[key] = pyrender.Mesh.from_trimesh( - mesh=trimesh_objects[key], material=material - ) + material = pyrender.MetallicRoughnessMaterial(alphaMode='BLEND', baseColorFactor=[0.3, 0.3, 0.3, 1.0], + metallicFactor=0.2, roughnessFactor=0.8, doubleSided=True) + dataset_objects[key] = pyrender.Mesh.from_trimesh(mesh=trimesh_objects[key], material=material) @staticmethod def _pyrender_cleanup(): - """Cleans up global renderer - + """ Cleans up global renderer + This is only necessary when not using multiprocessing,. """ global renderer, renderer_large, dataset_objects @@ -729,18 +573,10 @@ def _pyrender_cleanup(): del dataset_objects @staticmethod - def _calc_gt_masks_iteration( - annotation_scale: float, - K: np.ndarray, - delta: float, - dist_im: np.ndarray, - chunk_dir: str, - im_id: int, - gt_data: Tuple[int, Dict[str, int]], - ): - """One iteration of calc_gt_masks(), executed inside a worker process. - + def _calc_gt_masks_iteration(annotation_scale: float, K: np.ndarray, delta: float, dist_im: np.ndarray, chunk_dir: str, im_id: int, gt_data: Tuple[int, Dict[str, int]]): + """ One iteration of calc_gt_masks(), executed inside a worker process. + :param annotation_scale: The scale factor applied to the calculated annotations (in [m]) to get them into the specified format (see `annotation_format` in `write_bop` for further details). :param K: The camera instrinsics to use. @@ -752,12 +588,10 @@ def _calc_gt_masks_iteration( """ # pylint: disable=import-outside-toplevel # Import pyrender only inside the multiprocesses, otherwise this leads to an opengl error - # https://github.com/mmatl/pyrender/issues/200#issuecomment-1123713055 + # https://github.com/mmatl/pyrender/issues/200#issuecomment-1123713055 import pyrender - # This import is done inside to avoid having the requirement that BlenderProc depends on the bop_toolkit from bop_toolkit_lib import inout, misc, visibility - # pylint: enable=import-outside-toplevel global renderer, dataset_objects @@ -766,23 +600,20 @@ def _calc_gt_masks_iteration( # Init pyrender camera fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] - camera = pyrender.IntrinsicsCamera( - fx=fx, fy=fy, cx=cx, cy=cy, znear=0.1, zfar=100000 - ) - + camera = pyrender.IntrinsicsCamera(fx=fx, fy=fy, cx=cx, cy=cy, znear=0.1, zfar=100000) + # create a new scene scene = pyrender.Scene() # add camera and current object scene.add(camera) - t = np.array(gt["cam_t_m2c"]) + t = np.array(gt['cam_t_m2c']) # rescale translation depending on initial saving format t /= annotation_scale - pose = bop_pose_to_pyrender_coordinate_system( - cam_R_m2c=np.array(gt["cam_R_m2c"]).reshape(3, 3), cam_t_m2c=t - ) - scene.add(dataset_objects[gt["obj_id"]], pose=pose) + pose = bop_pose_to_pyrender_coordinate_system(cam_R_m2c=np.array(gt['cam_R_m2c']).reshape(3, 3), + cam_t_m2c=t) + scene.add(dataset_objects[gt['obj_id']], pose=pose) # Render the depth image. _, depth_gt = renderer.render(scene=scene) @@ -795,29 +626,23 @@ def _calc_gt_masks_iteration( # Mask of the visible part of the object silhouette. mask_visib = visibility.estimate_visib_mask_gt( - dist_im, dist_gt, delta, visib_mode="bop19" - ) + dist_im, dist_gt, delta, visib_mode='bop19') # Save the calculated masks. mask_path = os.path.join( - chunk_dir, "mask", "{im_id:06d}_{gt_id:06d}.png" - ).format(im_id=im_id, gt_id=gt_id) + chunk_dir, 'mask', '{im_id:06d}_{gt_id:06d}.png').format(im_id=im_id, gt_id=gt_id) inout.save_im(mask_path, 255 * mask.astype(np.uint8)) mask_visib_path = os.path.join( - chunk_dir, "mask_visib", "{im_id:06d}_{gt_id:06d}.png" - ).format(im_id=im_id, gt_id=gt_id) + chunk_dir, 'mask_visib', + '{im_id:06d}_{gt_id:06d}.png').format(im_id=im_id, gt_id=gt_id) inout.save_im(mask_visib_path, 255 * mask_visib.astype(np.uint8)) + @staticmethod - def calc_gt_masks( - pool: Pool, - chunk_dirs: List[str], - starting_frame_id: int = 0, - annotation_scale: float = 1000.0, - delta: float = 0.015, - ): - """Calculates the ground truth masks. + def calc_gt_masks(pool: Pool, chunk_dirs: List[str], starting_frame_id: int = 0, + annotation_scale: float = 1000., delta: float = 0.015): + """ Calculates the ground truth masks. From the BOP toolkit (https://github.com/thodan/bop_toolkit), with the difference of using pyrender for depth rendering. @@ -831,28 +656,19 @@ def calc_gt_masks( # This import is done inside to avoid having the requirement that BlenderProc depends on the bop_toolkit # pylint: disable=import-outside-toplevel from bop_toolkit_lib import inout, misc - # pylint: enable=import-outside-toplevel for dir_counter, chunk_dir in enumerate(chunk_dirs): - last_chunk_gt_fpath = os.path.join(chunk_dir, "scene_gt.json") - last_chunk_camera_fpath = os.path.join(chunk_dir, "scene_camera.json") - scene_gt = _BopWriterUtility.load_json( - last_chunk_gt_fpath, keys_to_int=True - ) - scene_camera = _BopWriterUtility.load_json( - last_chunk_camera_fpath, keys_to_int=True - ) + last_chunk_gt_fpath = os.path.join(chunk_dir, 'scene_gt.json') + last_chunk_camera_fpath = os.path.join(chunk_dir, 'scene_camera.json') + scene_gt = _BopWriterUtility.load_json(last_chunk_gt_fpath, keys_to_int=True) + scene_camera = _BopWriterUtility.load_json(last_chunk_camera_fpath, keys_to_int=True) # Create folders for the output masks (if they do not exist yet). - mask_dir_path = os.path.dirname( - os.path.join(chunk_dir, "mask", "000000_000000.png") - ) + mask_dir_path = os.path.dirname(os.path.join(chunk_dir, 'mask', '000000_000000.png')) misc.ensure_dir(mask_dir_path) - mask_visib_dir_path = os.path.dirname( - os.path.join(chunk_dir, "mask_visib", "000000_000000.png") - ) + mask_visib_dir_path = os.path.dirname(os.path.join(chunk_dir, 'mask_visib', '000000_000000.png')) misc.ensure_dir(mask_visib_dir_path) im_ids = sorted(scene_gt.keys()) @@ -863,49 +679,26 @@ def calc_gt_masks( for im_counter, im_id in enumerate(im_ids): if im_counter % 100 == 0: - misc.log(f"Calculating GT masks - {chunk_dir}, {im_counter}") + misc.log(f'Calculating GT masks - {chunk_dir}, {im_counter}') - K = np.array(scene_camera[im_id]["cam_K"]).reshape(3, 3) + K = np.array(scene_camera[im_id]['cam_K']).reshape(3, 3) # Load depth image. - depth_path = os.path.join(chunk_dir, "depth", "{im_id:06d}.png").format( - im_id=im_id - ) + depth_path = os.path.join( + chunk_dir, 'depth', '{im_id:06d}.png').format(im_id=im_id) depth_im = inout.load_depth(depth_path) - depth_im *= scene_camera[im_id]["depth_scale"] # to [mm] - depth_im /= 1000.0 # to [m] + depth_im *= scene_camera[im_id]['depth_scale'] # to [mm] + depth_im /= 1000. # to [m] dist_im = misc.depth_im_to_dist_im_fast(depth_im, K) map_fun = map if pool is None else pool.map - list( - map_fun( - partial( - _BopWriterUtility._calc_gt_masks_iteration, - annotation_scale, - K, - delta, - dist_im, - chunk_dir, - im_id, - ), - enumerate(scene_gt[im_id]), - ) - ) + list(map_fun(partial(_BopWriterUtility._calc_gt_masks_iteration, annotation_scale, K, delta, dist_im, chunk_dir, im_id), enumerate(scene_gt[im_id]))) + @staticmethod - def _calc_gt_info_iteration( - annotation_scale: float, - ren_cy_offset: int, - ren_cx_offset: int, - im_height: int, - im_width: int, - K: np.ndarray, - delta: float, - depth: np.ndarray, - gt: Dict[str, int], - ): - """One iteration of calc_gt_info(), executed inside a worker process. - + def _calc_gt_info_iteration(annotation_scale: float, ren_cy_offset: int, ren_cx_offset: int, im_height: int, im_width: int, K: np.ndarray, delta: float, depth: np.ndarray, gt: Dict[str, int]): + """ One iteration of calc_gt_info(), executed inside a worker process. + :param annotation_scale: The scale factor applied to the calculated annotations (in [m]) to get them into the specified format (see `annotation_format` in `write_bop` for further details). :param ren_cy_offset: The y offset for cropping the rendered image. @@ -916,13 +709,12 @@ def _calc_gt_info_iteration( :param delta: Tolerance used for estimation of the visibility masks. :param depth: The depth image of the frame. :param gt: Containing id of the object whose mask the worker should render - """ + """ # Import pyrender only inside the multiprocesses, otherwise this leads to an opengl error - # https://github.com/mmatl/pyrender/issues/200#issuecomment-1123713055 + # https://github.com/mmatl/pyrender/issues/200#issuecomment-1123713055 # pylint: disable=import-outside-toplevel import pyrender from bop_toolkit_lib import misc, visibility - # pylint: enable=import-outside-toplevel global renderer_large, dataset_objects, renderer @@ -936,35 +728,27 @@ def _calc_gt_info_iteration( # Init pyrender camera fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] im_size = (depth.shape[1], depth.shape[0]) - camera = pyrender.IntrinsicsCamera( - fx=fx, - fy=fy, - cx=cx + ren_cx_offset, - cy=cy + ren_cy_offset, - znear=0.1, - zfar=100000, - ) - + camera = pyrender.IntrinsicsCamera(fx=fx, fy=fy, cx=cx+ren_cx_offset, cy=cy+ren_cy_offset, znear=0.1, + zfar=100000) + # create a new scene scene = pyrender.Scene() # add camera and current object scene.add(camera) - t = np.array(gt["cam_t_m2c"]) + t = np.array(gt['cam_t_m2c']) # rescale translation depending on initial saving format t /= annotation_scale - pose = bop_pose_to_pyrender_coordinate_system( - cam_R_m2c=np.array(gt["cam_R_m2c"]).reshape(3, 3), cam_t_m2c=t - ) - scene.add(dataset_objects[gt["obj_id"]], pose=pose) + pose = bop_pose_to_pyrender_coordinate_system(cam_R_m2c=np.array(gt['cam_R_m2c']).reshape(3, 3), + cam_t_m2c=t) + scene.add(dataset_objects[gt['obj_id']], pose=pose) # render the depth image _, depth_gt_large = renderer_large.render(scene=scene) depth_gt = depth_gt_large[ - ren_cy_offset : (ren_cy_offset + im_height), - ren_cx_offset : (ren_cx_offset + im_width), - ] + ren_cy_offset:(ren_cy_offset + im_height), + ren_cx_offset:(ren_cx_offset + im_width)] # Convert depth images to distance images. dist_gt = misc.depth_im_to_dist_im_fast(depth_gt, K) @@ -972,8 +756,7 @@ def _calc_gt_info_iteration( # Estimation of the visibility mask. visib_gt = visibility.estimate_visib_mask_gt( - dist_im, dist_gt, delta, visib_mode="bop19" - ) + dist_im, dist_gt, delta, visib_mode='bop19') # Mask of the object in the GT pose. obj_mask_gt_large = depth_gt_large > 0 @@ -1013,23 +796,18 @@ def _calc_gt_info_iteration( # Store the calculated info. return { - "px_count_all": int(px_count_all), - "px_count_valid": int(px_count_valid), - "px_count_visib": int(px_count_visib), - "visib_fract": float(visib_fract), - "bbox_obj": [int(e) for e in bbox], - "bbox_visib": [int(e) for e in bbox_visib], + 'px_count_all': int(px_count_all), + 'px_count_valid': int(px_count_valid), + 'px_count_visib': int(px_count_visib), + 'visib_fract': float(visib_fract), + 'bbox_obj': [int(e) for e in bbox], + 'bbox_visib': [int(e) for e in bbox_visib] } @staticmethod - def calc_gt_info( - pool, - chunk_dirs: List[str], - starting_frame_id: int = 0, - annotation_scale: float = 1000.0, - delta: float = 0.015, - ): - """Calculates the ground truth masks. + def calc_gt_info(pool, chunk_dirs: List[str], starting_frame_id: int = 0, + annotation_scale: float = 1000., delta: float = 0.015): + """ Calculates the ground truth masks. From the BOP toolkit (https://github.com/thodan/bop_toolkit), with the difference of using pyrender for depth rendering. @@ -1042,31 +820,22 @@ def calc_gt_info( # This import is done inside to avoid having the requirement that BlenderProc depends on the bop_toolkit # pylint: disable=import-outside-toplevel from bop_toolkit_lib import inout, misc - # pylint: enable=import-outside-toplevel - im_width, im_height = ( - bpy.context.scene.render.resolution_x, - bpy.context.scene.render.resolution_y, - ) + im_width, im_height = bpy.context.scene.render.resolution_x, bpy.context.scene.render.resolution_y ren_cx_offset, ren_cy_offset = im_width, im_height for dir_counter, chunk_dir in enumerate(chunk_dirs): - last_chunk_gt_fpath = os.path.join(chunk_dir, "scene_gt.json") - last_chunk_camera_fpath = os.path.join(chunk_dir, "scene_camera.json") - scene_gt = _BopWriterUtility.load_json( - last_chunk_gt_fpath, keys_to_int=True - ) - scene_camera = _BopWriterUtility.load_json( - last_chunk_camera_fpath, keys_to_int=True - ) + last_chunk_gt_fpath = os.path.join(chunk_dir, 'scene_gt.json') + last_chunk_camera_fpath = os.path.join(chunk_dir, 'scene_camera.json') + scene_gt = _BopWriterUtility.load_json(last_chunk_gt_fpath, keys_to_int=True) + scene_camera = _BopWriterUtility.load_json(last_chunk_camera_fpath, keys_to_int=True) # load existing gt info if dir_counter == 0 and starting_frame_id > 0: misc.log(f"Loading gt info from existing chunk dir - {chunk_dir}") - scene_gt_info = _BopWriterUtility.load_json( - os.path.join(chunk_dir, "scene_gt_info.json"), keys_to_int=True - ) + scene_gt_info = _BopWriterUtility.load_json(os.path.join(chunk_dir, 'scene_gt_info.json'), + keys_to_int=True) else: scene_gt_info = {} @@ -1078,49 +847,29 @@ def calc_gt_info( for im_counter, im_id in enumerate(im_ids): if im_counter % 100 == 0: - misc.log(f"Calculating GT info - {chunk_dir}, {im_counter}") + misc.log(f'Calculating GT info - {chunk_dir}, {im_counter}') # Load depth image. - depth_fpath = os.path.join( - chunk_dir, "depth", "{im_id:06d}.png" - ).format(im_id=im_id) + depth_fpath = os.path.join(chunk_dir, 'depth', '{im_id:06d}.png').format(im_id=im_id) assert os.path.isfile(depth_fpath) depth = inout.load_depth(depth_fpath) - depth *= scene_camera[im_id]["depth_scale"] # Convert to [mm]. - depth /= 1000.0 # to [m] + depth *= scene_camera[im_id]['depth_scale'] # Convert to [mm]. + depth /= 1000. # to [m] - K = np.array(scene_camera[im_id]["cam_K"]).reshape(3, 3) + K = np.array(scene_camera[im_id]['cam_K']).reshape(3, 3) map_fun = map if pool is None else pool.map - scene_gt_info[im_id] = list( - map_fun( - partial( - _BopWriterUtility._calc_gt_info_iteration, - annotation_scale, - ren_cy_offset, - ren_cx_offset, - im_height, - im_width, - K, - delta, - depth, - ), - scene_gt[im_id], - ) - ) + scene_gt_info[im_id] = list(map_fun(partial(_BopWriterUtility._calc_gt_info_iteration, annotation_scale, ren_cy_offset, ren_cx_offset, im_height, im_width, K, delta, depth), scene_gt[im_id])) + # Save the info for the current scene. - scene_gt_info_path = os.path.join(chunk_dir, "scene_gt_info.json") + scene_gt_info_path = os.path.join(chunk_dir, 'scene_gt_info.json') misc.ensure_dir(os.path.dirname(scene_gt_info_path)) inout.save_json(scene_gt_info_path, scene_gt_info) @staticmethod - def calc_gt_coco( - chunk_dirs: List[str], - dataset_objects: List[MeshObject], - starting_frame_id: int = 0, - ): - """Calculates the COCO annotations. + def calc_gt_coco(chunk_dirs: List[str], dataset_objects: List[MeshObject], starting_frame_id: int = 0): + """ Calculates the COCO annotations. From the BOP toolkit (https://github.com/thodan/bop_toolkit). :param chunk_dirs: List of directories to calculate the gt coco annotations for. @@ -1130,46 +879,33 @@ def calc_gt_coco( # This import is done inside to avoid having the requirement that BlenderProc depends on the bop_toolkit # pylint: disable=import-outside-toplevel from bop_toolkit_lib import inout, misc, pycoco_utils - # pylint: enable=import-outside-toplevel for dir_counter, chunk_dir in enumerate(chunk_dirs): dataset_name = Path(chunk_dir).parents[2].name - CATEGORIES = [ - { - "id": obj.get_cp("category_id"), - "name": str(obj.get_cp("category_id")), - "supercategory": dataset_name, - } - for obj in dataset_objects - ] + CATEGORIES = [{'id': obj.get_cp('category_id'), 'name': str(obj.get_cp('category_id')), 'supercategory': + dataset_name} for obj in dataset_objects] # Remove all duplicate dicts from list. # Ref: https://stackoverflow.com/questions/9427163/remove-duplicate-dict-in-list-in-python - CATEGORIES = list( - {frozenset(item.items()): item for item in CATEGORIES}.values() - ) + CATEGORIES = list({frozenset(item.items()):item for item in CATEGORIES}.values()) INFO = { - "description": dataset_name + "_train", + "description": dataset_name + '_train', "url": "https://github.com/thodan/bop_toolkit", "version": "0.1.0", "year": datetime.date.today().year, "contributor": "", - "date_created": datetime.datetime.utcnow().isoformat(" "), + "date_created": datetime.datetime.utcnow().isoformat(' ') } # load existing coco annotations if dir_counter == 0 and starting_frame_id > 0: - misc.log( - f"Loading coco annotations from existing chunk dir - {chunk_dir}" - ) - coco_scene_output = _BopWriterUtility.load_json( - os.path.join(chunk_dir, "scene_gt_coco.json") - ) + misc.log(f"Loading coco annotations from existing chunk dir - {chunk_dir}") + coco_scene_output = _BopWriterUtility.load_json(os.path.join(chunk_dir, 'scene_gt_coco.json')) if coco_scene_output["annotations"]: - segmentation_id = coco_scene_output["annotations"][-1]["id"] + 1 + segmentation_id = coco_scene_output["annotations"][-1]['id'] + 1 else: segmentation_id = 1 else: @@ -1178,20 +914,18 @@ def calc_gt_coco( "licenses": [], "categories": CATEGORIES, "images": [], - "annotations": [], + "annotations": [] } segmentation_id = 1 # Load info about the GT poses (e.g. visibility) for the current scene. - last_chunk_gt_fpath = os.path.join(chunk_dir, "scene_gt.json") - scene_gt = _BopWriterUtility.load_json( - last_chunk_gt_fpath, keys_to_int=True - ) - last_chunk_gt_info_fpath = os.path.join(chunk_dir, "scene_gt_info.json") + last_chunk_gt_fpath = os.path.join(chunk_dir, 'scene_gt.json') + scene_gt = _BopWriterUtility.load_json(last_chunk_gt_fpath, keys_to_int=True) + last_chunk_gt_info_fpath = os.path.join(chunk_dir, 'scene_gt_info.json') scene_gt_info = inout.load_json(last_chunk_gt_info_fpath, keys_to_int=True) # Output coco path - coco_gt_path = os.path.join(chunk_dir, "scene_gt_coco.json") - misc.log(f"Calculating COCO annotations - {chunk_dir}") + coco_gt_path = os.path.join(chunk_dir, 'scene_gt_coco.json') + misc.log(f'Calculating COCO annotations - {chunk_dir}') # Go through each view in scene_gt for scene_view, inst_list in scene_gt.items(): @@ -1201,34 +935,24 @@ def calc_gt_coco( if dir_counter == 0 and im_id < starting_frame_id: continue - img_path = os.path.join(chunk_dir, "rgb", "{im_id:06d}.jpg").format( - im_id=im_id - ) - relative_img_path = os.path.relpath( - img_path, os.path.dirname(coco_gt_path) - ) - im_size = ( - bpy.context.scene.render.resolution_x, - bpy.context.scene.render.resolution_y, - ) - image_info = pycoco_utils.create_image_info( - im_id, relative_img_path, im_size - ) + img_path = os.path.join(chunk_dir, 'rgb', '{im_id:06d}.jpg').format(im_id=im_id) + relative_img_path = os.path.relpath(img_path, os.path.dirname(coco_gt_path)) + im_size = (bpy.context.scene.render.resolution_x, bpy.context.scene.render.resolution_y) + image_info = pycoco_utils.create_image_info(im_id, relative_img_path, im_size) coco_scene_output["images"].append(image_info) gt_info = scene_gt_info[scene_view] # Go through each instance in view for idx, inst in enumerate(inst_list): - category_info = inst["obj_id"] - visibility = gt_info[idx]["visib_fract"] + category_info = inst['obj_id'] + visibility = gt_info[idx]['visib_fract'] # Add ignore flag for objects smaller than 10% visible ignore_gt = visibility < 0.1 mask_visib_p = os.path.join( - chunk_dir, "mask_visib", "{im_id:06d}_{gt_id:06d}.png" - ).format(im_id=im_id, gt_id=idx) + chunk_dir, 'mask_visib', + '{im_id:06d}_{gt_id:06d}.png').format(im_id=im_id, gt_id=idx) mask_full_p = os.path.join( - chunk_dir, "mask", "{im_id:06d}_{gt_id:06d}.png" - ).format(im_id=im_id, gt_id=idx) + chunk_dir, 'mask', '{im_id:06d}_{gt_id:06d}.png').format(im_id=im_id, gt_id=idx) binary_inst_mask_visib = inout.load_depth(mask_visib_p).astype(bool) if binary_inst_mask_visib.sum() < 1: @@ -1238,28 +962,20 @@ def calc_gt_coco( binary_inst_mask_full = inout.load_depth(mask_full_p).astype(bool) if binary_inst_mask_full.sum() < 1: continue - bounding_box = pycoco_utils.bbox_from_binary_mask( - binary_inst_mask_full - ) + bounding_box = pycoco_utils.bbox_from_binary_mask(binary_inst_mask_full) annotation_info = pycoco_utils.create_annotation_info( - segmentation_id, - im_id, - category_info, - binary_inst_mask_visib, - bounding_box, - tolerance=2, - ignore=ignore_gt, - ) + segmentation_id, im_id, category_info, binary_inst_mask_visib, bounding_box, tolerance=2, + ignore=ignore_gt) if annotation_info is not None: coco_scene_output["annotations"].append(annotation_info) segmentation_id += 1 - with open(coco_gt_path, "w", encoding="utf-8") as output_json_file: + with open(coco_gt_path, 'w', encoding='utf-8') as output_json_file: json.dump(coco_scene_output, output_json_file) - + @staticmethod def write_edges(chunk_dirs: List[str], edges: List[np.ndarray]) -> None: """Writes rendered edge images to files diff --git a/examples/datasets/bop_challenge/main_tless_random.py b/examples/datasets/bop_challenge/main_tless_random.py index d596aa299..5061effd1 100644 --- a/examples/datasets/bop_challenge/main_tless_random.py +++ b/examples/datasets/bop_challenge/main_tless_random.py @@ -4,89 +4,49 @@ import numpy as np from time import time - start_time = time() parser = argparse.ArgumentParser() -parser.add_argument("bop_parent_path", help="Path to the bop datasets parent directory") -parser.add_argument( - "cc_textures_path", - default="resources/cctextures", - help="Path to downloaded cc textures", -) -parser.add_argument("output_dir", help="Path to where the final files will be saved ") -parser.add_argument( - "--num_scenes", - type=int, - default=2000, - help="How many scenes with 25 images each to generate", -) +parser.add_argument('bop_parent_path', help="Path to the bop datasets parent directory") +parser.add_argument('cc_textures_path', default="resources/cctextures", help="Path to downloaded cc textures") +parser.add_argument('output_dir', help="Path to where the final files will be saved ") +parser.add_argument('--num_scenes', type=int, default=2000, help="How many scenes with 25 images each to generate") args = parser.parse_args() +for k,v in vars(args).items(): + print(k, v) + bproc.init() # load bop objects into the scene -target_bop_objs = bproc.loader.load_bop_objs( - bop_dataset_path=os.path.join(args.bop_parent_path, "tless"), - model_type="cad", - object_model_unit="mm", -) +target_bop_objs = bproc.loader.load_bop_objs(bop_dataset_path = os.path.join(args.bop_parent_path, 'tless'), model_type = 'cad', object_model_unit='mm') # load distractor bop objects -itodd_dist_bop_objs = bproc.loader.load_bop_objs( - bop_dataset_path=os.path.join(args.bop_parent_path, "itodd"), object_model_unit="mm" -) -ycbv_dist_bop_objs = bproc.loader.load_bop_objs( - bop_dataset_path=os.path.join(args.bop_parent_path, "ycbv"), object_model_unit="mm" -) -hb_dist_bop_objs = bproc.loader.load_bop_objs( - bop_dataset_path=os.path.join(args.bop_parent_path, "hb"), object_model_unit="mm" -) +itodd_dist_bop_objs = bproc.loader.load_bop_objs(bop_dataset_path = os.path.join(args.bop_parent_path, 'itodd'), object_model_unit='mm') +ycbv_dist_bop_objs = bproc.loader.load_bop_objs(bop_dataset_path = os.path.join(args.bop_parent_path, 'ycbv'), object_model_unit='mm') +hb_dist_bop_objs = bproc.loader.load_bop_objs(bop_dataset_path = os.path.join(args.bop_parent_path, 'hb'), object_model_unit='mm') # load BOP datset intrinsics -bproc.loader.load_bop_intrinsics( - bop_dataset_path=os.path.join(args.bop_parent_path, "tless") -) +bproc.loader.load_bop_intrinsics(bop_dataset_path = os.path.join(args.bop_parent_path, 'tless')) # set shading and hide objects -for obj in ( - target_bop_objs + itodd_dist_bop_objs + ycbv_dist_bop_objs + hb_dist_bop_objs -): - obj.set_shading_mode("auto") +for obj in (target_bop_objs + itodd_dist_bop_objs + ycbv_dist_bop_objs + hb_dist_bop_objs): + obj.set_shading_mode('auto') obj.hide(True) - + # create room -room_planes = [ - bproc.object.create_primitive("PLANE", scale=[2, 2, 1]), - bproc.object.create_primitive( - "PLANE", scale=[2, 2, 1], location=[0, -2, 2], rotation=[-1.570796, 0, 0] - ), - bproc.object.create_primitive( - "PLANE", scale=[2, 2, 1], location=[0, 2, 2], rotation=[1.570796, 0, 0] - ), - bproc.object.create_primitive( - "PLANE", scale=[2, 2, 1], location=[2, 0, 2], rotation=[0, -1.570796, 0] - ), - bproc.object.create_primitive( - "PLANE", scale=[2, 2, 1], location=[-2, 0, 2], rotation=[0, 1.570796, 0] - ), -] +room_planes = [bproc.object.create_primitive('PLANE', scale=[2, 2, 1]), + bproc.object.create_primitive('PLANE', scale=[2, 2, 1], location=[0, -2, 2], rotation=[-1.570796, 0, 0]), + bproc.object.create_primitive('PLANE', scale=[2, 2, 1], location=[0, 2, 2], rotation=[1.570796, 0, 0]), + bproc.object.create_primitive('PLANE', scale=[2, 2, 1], location=[2, 0, 2], rotation=[0, -1.570796, 0]), + bproc.object.create_primitive('PLANE', scale=[2, 2, 1], location=[-2, 0, 2], rotation=[0, 1.570796, 0])] for plane in room_planes: - plane.enable_rigidbody( - False, - collision_shape="BOX", - mass=1.0, - friction=100.0, - linear_damping=0.99, - angular_damping=0.99, - ) + plane.enable_rigidbody(False, collision_shape='BOX', mass=1.0, friction = 100.0, linear_damping = 0.99, angular_damping = 0.99) # sample light color and strenght from ceiling -light_plane = bproc.object.create_primitive( - "PLANE", scale=[3, 3, 1], location=[0, 0, 10] -) -light_plane.set_name("light_plane") -light_plane_material = bproc.material.create("light_material") +light_plane = bproc.object.create_primitive('PLANE', scale=[3, 3, 1], location=[0, 0, 10]) +light_plane.set_name('light_plane') +light_plane_material = bproc.material.create('light_material') # sample point light on shell light_point = bproc.types.Light() @@ -95,20 +55,13 @@ # load cc_textures cc_textures = bproc.loader.load_ccmaterials(args.cc_textures_path) - -def sample_pose_func(sample_obj: bproc.types.MeshObject): - """ - Randomly samples a 6-DoF pose for a given mesh object. - - :param sample_obj: The MeshObject to transform. Location is sampled from a - bounded 3D space,and rotation is uniformly sampled over SO(3). - """ - min_val = np.random.uniform([-0.3, -0.3, 0.0], [-0.2, -0.2, 0.0]) - max_val = np.random.uniform([0.2, 0.2, 0.4], [0.3, 0.3, 0.6]) - sample_obj.set_location(np.random.uniform(min_val, max_val)) - sample_obj.set_rotation_euler(bproc.sampler.uniformSO3()) - - +# Define a function that samples 6-DoF poses +def sample_pose_func(obj: bproc.types.MeshObject): + min = np.random.uniform([-0.3, -0.3, 0.0], [-0.2, -0.2, 0.0]) + max = np.random.uniform([0.2, 0.2, 0.4], [0.3, 0.3, 0.6]) + obj.set_location(np.random.uniform(min, max)) + obj.set_rotation_euler(bproc.sampler.uniformSO3()) + # activate depth rendering without antialiasing and set amount of samples for color rendering bproc.renderer.enable_depth_output(activate_antialiasing=False) bproc.renderer.set_max_amount_of_samples(50) @@ -116,54 +69,33 @@ def sample_pose_func(sample_obj: bproc.types.MeshObject): for i in range(args.num_scenes): # Sample bop objects for a scene - sampled_target_bop_objs = list( - np.random.choice(target_bop_objs, size=20, replace=False) - ) - sampled_distractor_bop_objs = list( - np.random.choice(itodd_dist_bop_objs, size=2, replace=False) - ) - sampled_distractor_bop_objs += list( - np.random.choice(ycbv_dist_bop_objs, size=2, replace=False) - ) - sampled_distractor_bop_objs += list( - np.random.choice(hb_dist_bop_objs, size=2, replace=False) - ) + sampled_target_bop_objs = list(np.random.choice(target_bop_objs, size=20, replace=False)) + sampled_distractor_bop_objs = list(np.random.choice(itodd_dist_bop_objs, size=2, replace=False)) + sampled_distractor_bop_objs += list(np.random.choice(ycbv_dist_bop_objs, size=2, replace=False)) + sampled_distractor_bop_objs += list(np.random.choice(hb_dist_bop_objs, size=2, replace=False)) # Randomize materials and set physics - for obj in sampled_target_bop_objs + sampled_distractor_bop_objs: + for obj in (sampled_target_bop_objs + sampled_distractor_bop_objs): mat = obj.get_materials()[0] - if obj.get_cp("bop_dataset_name") in ["itodd", "tless"]: - grey_col = np.random.uniform(0.1, 0.9) - mat.set_principled_shader_value( - "Base Color", [grey_col, grey_col, grey_col, 1] - ) + if obj.get_cp("bop_dataset_name") in ['itodd', 'tless']: + grey_col = np.random.uniform(0.1, 0.9) + mat.set_principled_shader_value("Base Color", [grey_col, grey_col, grey_col, 1]) mat.set_principled_shader_value("Roughness", np.random.uniform(0, 0.5)) - if obj.get_cp("bop_dataset_name") == "itodd": + if obj.get_cp("bop_dataset_name") == 'itodd': mat.set_principled_shader_value("Metallic", np.random.uniform(0.5, 1.0)) - if obj.get_cp("bop_dataset_name") == "tless": - mat.set_principled_shader_value( - "Specular IOR Level", np.random.uniform(0.3, 1.0) - ) + if obj.get_cp("bop_dataset_name") == 'tless': + mat.set_principled_shader_value("Specular IOR Level", np.random.uniform(0.3, 1.0)) mat.set_principled_shader_value("Metallic", np.random.uniform(0, 0.5)) - obj.enable_rigidbody( - True, mass=1.0, friction=100.0, linear_damping=0.99, angular_damping=0.99 - ) + obj.enable_rigidbody(True, mass=1.0, friction = 100.0, linear_damping = 0.99, angular_damping = 0.99) obj.hide(False) - + # Sample two light sources - light_plane_material.make_emissive( - emission_strength=np.random.uniform(3, 6), - emission_color=np.random.uniform([0.5, 0.5, 0.5, 1.0], [1.0, 1.0, 1.0, 1.0]), - ) + light_plane_material.make_emissive(emission_strength=np.random.uniform(3,6), + emission_color=np.random.uniform([0.5, 0.5, 0.5, 1.0], [1.0, 1.0, 1.0, 1.0])) light_plane.replace_materials(light_plane_material) - light_point.set_color(np.random.uniform([0.5, 0.5, 0.5], [1, 1, 1])) - location = bproc.sampler.shell( - center=[0, 0, 0], - radius_min=1, - radius_max=1.5, - elevation_min=5, - elevation_max=89, - ) + light_point.set_color(np.random.uniform([0.5,0.5,0.5],[1,1,1])) + location = bproc.sampler.shell(center = [0, 0, 0], radius_min = 1, radius_max = 1.5, + elevation_min = 5, elevation_max = 89) light_point.set_location(location) # sample CC Texture and assign to room planes @@ -171,89 +103,71 @@ def sample_pose_func(sample_obj: bproc.types.MeshObject): for plane in room_planes: plane.replace_materials(random_cc_texture) - # Sample object poses and check collisions - bproc.object.sample_poses( - objects_to_sample=sampled_target_bop_objs + sampled_distractor_bop_objs, - sample_pose_func=sample_pose_func, - max_tries=1000, - ) + # Sample object poses and check collisions + bproc.object.sample_poses(objects_to_sample = sampled_target_bop_objs + sampled_distractor_bop_objs, + sample_pose_func = sample_pose_func, + max_tries = 1000) + # Physics Positioning - bproc.object.simulate_physics_and_fix_final_poses( - min_simulation_time=3, - max_simulation_time=10, - check_object_interval=1, - substeps_per_frame=20, - solver_iters=25, - ) + bproc.object.simulate_physics_and_fix_final_poses(min_simulation_time=3, + max_simulation_time=10, + check_object_interval=1, + substeps_per_frame = 20, + solver_iters=25) # BVH tree used for camera obstacle checks - bop_bvh_tree = bproc.object.create_bvh_tree_multi_objects( - sampled_target_bop_objs + sampled_distractor_bop_objs - ) + bop_bvh_tree = bproc.object.create_bvh_tree_multi_objects(sampled_target_bop_objs + sampled_distractor_bop_objs) camera_poses = [] cam_poses = 0 - while cam_poses < 25: + while cam_poses < 1: # TODO: return to 25 # Sample location - location = bproc.sampler.shell( - center=[0, 0, 0], - radius_min=0.65, - radius_max=0.94, - elevation_min=5, - elevation_max=89, - ) + location = bproc.sampler.shell(center = [0, 0, 0], + radius_min = 0.65, + radius_max = 0.94, + elevation_min = 5, + elevation_max = 89) # Determine point of interest in scene as the object closest to the mean of a subset of objects - poi = bproc.object.compute_poi( - np.random.choice(sampled_target_bop_objs, size=15, replace=False) - ) + poi = bproc.object.compute_poi(np.random.choice(sampled_target_bop_objs, size=15, replace=False)) # Compute rotation based on vector going from location towards poi - rotation_matrix = bproc.camera.rotation_from_forward_vec( - poi - location, inplane_rot=np.random.uniform(-3.14159, 3.14159) - ) + rotation_matrix = bproc.camera.rotation_from_forward_vec(poi - location, inplane_rot=np.random.uniform(-3.14159, 3.14159)) # Add homog cam pose based on location an rotation - cam2world_matrix = bproc.math.build_transformation_mat( - location, rotation_matrix - ) - - # Check that obstacles are at least 0.3 meter away from the camera - # Make sure the view interesting enough - if bproc.camera.perform_obstacle_in_view_check( - cam2world_matrix, {"min": 0.3}, bop_bvh_tree - ): + cam2world_matrix = bproc.math.build_transformation_mat(location, rotation_matrix) + + # Check that obstacles are at least 0.3 meter away from the camera and make sure the view interesting enough + if bproc.camera.perform_obstacle_in_view_check(cam2world_matrix, {"min": 0.3}, bop_bvh_tree): # Persist camera pose bproc.camera.add_camera_pose(cam2world_matrix, frame=cam_poses) camera_poses.append(cam2world_matrix) cam_poses += 1 - # Render the whole pipeline + # render the whole pipeline data = bproc.renderer.render() + # Render only the edges data["edges"] = bproc.renderer.render_edges( target_objects=sampled_target_bop_objs, camera_poses=camera_poses ) # Write data in bop format - bproc.writer.write_bop( - os.path.join(args.output_dir, "bop_data"), - target_objects=sampled_target_bop_objs, - dataset="tless", - depth_scale=0.1, - depths=data["depth"], - colors=data["colors"], - edges=data["edges"], - color_file_format="JPEG", - ignore_dist_thres=10, - ) - - for obj in sampled_target_bop_objs + sampled_distractor_bop_objs: + bproc.writer.write_bop(os.path.join(args.output_dir, 'bop_data'), + target_objects = sampled_target_bop_objs, + dataset = 'tless', + depth_scale = 0.1, + depths = data["depth"], + colors = data["colors"], + edges=data["edges"], + color_file_format = "JPEG", + ignore_dist_thres = 10) + + for obj in (sampled_target_bop_objs + sampled_distractor_bop_objs): obj.disable_rigidbody() obj.hide(True) - - + runtime = time() - start_time # Convert runtime to minutes and seconds format minutes = int(runtime // 60) seconds = runtime % 60 # Print runtime in minutes and seconds format -print(f"{'#'*50}\nTook {minutes} minutes and {seconds:.2f} seconds\n{'#'*50}") +print(f"{'#'*50}\nTook {minutes} minutes and {seconds:.2f} seconds\n{'#'*50}") \ No newline at end of file From 43ed7d53989b4e35fdaec3f48d44d6cab82af25f Mon Sep 17 00:00:00 2001 From: Matvey Ivanov Date: Thu, 24 Jul 2025 10:52:00 +0200 Subject: [PATCH 3/5] reset variable to default --- examples/datasets/bop_challenge/main_tless_random.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/datasets/bop_challenge/main_tless_random.py b/examples/datasets/bop_challenge/main_tless_random.py index 5061effd1..c894d02d4 100644 --- a/examples/datasets/bop_challenge/main_tless_random.py +++ b/examples/datasets/bop_challenge/main_tless_random.py @@ -120,7 +120,7 @@ def sample_pose_func(obj: bproc.types.MeshObject): bop_bvh_tree = bproc.object.create_bvh_tree_multi_objects(sampled_target_bop_objs + sampled_distractor_bop_objs) camera_poses = [] cam_poses = 0 - while cam_poses < 1: # TODO: return to 25 + while cam_poses < 25: # Sample location location = bproc.sampler.shell(center = [0, 0, 0], radius_min = 0.65, From aa6f1d4361a488f94fe459302a907a0fde7db0b1 Mon Sep 17 00:00:00 2001 From: Matvey Ivanov Date: Thu, 24 Jul 2025 10:54:16 +0200 Subject: [PATCH 4/5] removed arguments print --- examples/datasets/bop_challenge/main_tless_random.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/examples/datasets/bop_challenge/main_tless_random.py b/examples/datasets/bop_challenge/main_tless_random.py index c894d02d4..1f66a556d 100644 --- a/examples/datasets/bop_challenge/main_tless_random.py +++ b/examples/datasets/bop_challenge/main_tless_random.py @@ -13,9 +13,6 @@ parser.add_argument('--num_scenes', type=int, default=2000, help="How many scenes with 25 images each to generate") args = parser.parse_args() -for k,v in vars(args).items(): - print(k, v) - bproc.init() # load bop objects into the scene From 0525d04ae86aa8dfa74928101eec931143654fdc Mon Sep 17 00:00:00 2001 From: Dominik Winkelbauer Date: Mon, 23 Jun 2025 18:48:31 +0200 Subject: [PATCH 5/5] fix(github): Switches to upload-artifactv4 --- .github/workflows/blenderprochelper.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/blenderprochelper.yml b/.github/workflows/blenderprochelper.yml index 6b7e91358..8cb45d47e 100644 --- a/.github/workflows/blenderprochelper.yml +++ b/.github/workflows/blenderprochelper.yml @@ -12,7 +12,7 @@ jobs: - name: Run BlenderProcHelper run: python /home/$USER/workspace/BlenderProcHelper/github_action.py - name: Archive logs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: logs - path: artifacts \ No newline at end of file + path: artifacts