|
12 | 12 | from tensordict import TensorDict
|
13 | 13 | from typing import TYPE_CHECKING, Any
|
14 | 14 |
|
| 15 | +import carb |
15 | 16 | import omni.usd
|
16 | 17 | import warp as wp
|
17 | 18 | from omni.isaac.core.prims import XFormPrimView
|
18 |
| -from pxr import UsdGeom |
| 19 | +from pxr import Usd, UsdGeom |
19 | 20 |
|
20 |
| -from omni.isaac.lab.utils.warp.kernels import reshape_tiled_image |
| 21 | +from omni.isaac.lab.utils.warp.kernels import reshape_tiled_image, reshape_tiled_image_old |
21 | 22 |
|
22 | 23 | from ..sensor_base import SensorBase
|
23 | 24 | from .camera import Camera
|
24 | 25 |
|
25 | 26 | if TYPE_CHECKING:
|
26 |
| - from .camera_cfg import TiledCameraCfg |
| 27 | + from .tiled_camera_cfg import TiledCameraCfg |
27 | 28 |
|
28 | 29 |
|
29 | 30 | class TiledCamera(Camera):
|
| 31 | + r"""The tiled rendering based camera sensor for acquiring RGBA and depth data. |
| 32 | +
|
| 33 | + This class inherits from the :class:`Camera` class but uses the tiled-rendering API to acquire |
| 34 | + the visual data. Tiled-rendering concatenates the rendered images from multiple cameras into a single image. |
| 35 | + This allows for rendering multiple cameras in parallel and is useful for rendering large scenes with multiple |
| 36 | + cameras efficiently. |
| 37 | +
|
| 38 | + The following sensor types are supported: |
| 39 | +
|
| 40 | + - ``"rgb"``: A 3-channel rendered color image. |
| 41 | + - ``"rgba"``: A 4-channel rendered color image. |
| 42 | + - ``"depth"``: An image containing the distance to camera optical center. |
| 43 | +
|
| 44 | + .. versionadded:: v1.0.0 |
| 45 | +
|
| 46 | + This feature is available starting from Isaac Sim 4.0. Before this version, the tiled rendering APIs |
| 47 | + were not available. |
| 48 | +
|
| 49 | + """ |
| 50 | + |
| 51 | + cfg: TiledCameraCfg |
| 52 | + """The configuration parameters.""" |
| 53 | + |
| 54 | + SUPPORTED_TYPES: set[str] = {"rgb", "rgba", "depth"} |
| 55 | + """The set of sensor types that are supported.""" |
| 56 | + |
| 57 | + def __init__(self, cfg: TiledCameraCfg): |
| 58 | + """Initializes the tiled camera sensor. |
| 59 | +
|
| 60 | + Args: |
| 61 | + cfg: The configuration parameters. |
| 62 | +
|
| 63 | + Raises: |
| 64 | + RuntimeError: If no camera prim is found at the given path. |
| 65 | + ValueError: If the provided data types are not supported by the camera. |
| 66 | + """ |
| 67 | + super().__init__(cfg) |
| 68 | + |
| 69 | + def __del__(self): |
| 70 | + """Unsubscribes from callbacks and detach from the replicator registry.""" |
| 71 | + # unsubscribe from callbacks |
| 72 | + SensorBase.__del__(self) |
| 73 | + # detach from the replicator registry |
| 74 | + self._annotator.detach(self.render_product_paths) |
| 75 | + |
| 76 | + def __str__(self) -> str: |
| 77 | + """Returns: A string containing information about the instance.""" |
| 78 | + # message for class |
| 79 | + return ( |
| 80 | + f"Tiled Camera @ '{self.cfg.prim_path}': \n" |
| 81 | + f"\tdata types : {self.data.output.sorted_keys} \n" |
| 82 | + f"\tupdate period (s): {self.cfg.update_period}\n" |
| 83 | + f"\tshape : {self.image_shape}\n" |
| 84 | + f"\tnumber of sensors : {self._view.count}" |
| 85 | + ) |
| 86 | + |
| 87 | + """ |
| 88 | + Operations |
| 89 | + """ |
| 90 | + |
| 91 | + def reset(self, env_ids: Sequence[int] | None = None): |
| 92 | + if not self._is_initialized: |
| 93 | + raise RuntimeError( |
| 94 | + "TiledCamera could not be initialized. Please ensure --enable_cameras is used to enable rendering." |
| 95 | + ) |
| 96 | + # reset the timestamps |
| 97 | + SensorBase.reset(self, env_ids) |
| 98 | + # resolve None |
| 99 | + if env_ids is None: |
| 100 | + env_ids = slice(None) |
| 101 | + # reset the frame count |
| 102 | + self._frame[env_ids] = 0 |
| 103 | + |
| 104 | + """ |
| 105 | + Implementation. |
| 106 | + """ |
| 107 | + |
| 108 | + def _initialize_impl(self): |
| 109 | + """Initializes the sensor handles and internal buffers. |
| 110 | +
|
| 111 | + This function creates handles and registers the provided data types with the replicator registry to |
| 112 | + be able to access the data from the sensor. It also initializes the internal buffers to store the data. |
| 113 | +
|
| 114 | + Raises: |
| 115 | + RuntimeError: If the number of camera prims in the view does not match the number of environments. |
| 116 | + RuntimeError: If replicator was not found. |
| 117 | + """ |
| 118 | + try: |
| 119 | + import omni.replicator.core as rep |
| 120 | + except ModuleNotFoundError: |
| 121 | + raise RuntimeError( |
| 122 | + "Replicator was not found for rendering. Please use --enable_cameras to enable rendering." |
| 123 | + ) |
| 124 | + |
| 125 | + # Initialize parent class |
| 126 | + SensorBase._initialize_impl(self) |
| 127 | + # Create a view for the sensor |
| 128 | + self._view = XFormPrimView(self.cfg.prim_path, reset_xform_properties=False) |
| 129 | + self._view.initialize() |
| 130 | + # Check that sizes are correct |
| 131 | + if self._view.count != self._num_envs: |
| 132 | + raise RuntimeError( |
| 133 | + f"Number of camera prims in the view ({self._view.count}) does not match" |
| 134 | + f" the number of environments ({self._num_envs})." |
| 135 | + ) |
| 136 | + |
| 137 | + # Create all env_ids buffer |
| 138 | + self._ALL_INDICES = torch.arange(self._view.count, device=self._device, dtype=torch.long) |
| 139 | + # Create frame count buffer |
| 140 | + self._frame = torch.zeros(self._view.count, device=self._device, dtype=torch.long) |
| 141 | + |
| 142 | + # Obtain current stage |
| 143 | + stage = omni.usd.get_context().get_stage() |
| 144 | + # Convert all encapsulated prims to Camera |
| 145 | + for cam_prim_path in self._view.prim_paths: |
| 146 | + # Get camera prim |
| 147 | + cam_prim = stage.GetPrimAtPath(cam_prim_path) |
| 148 | + # Check if prim is a camera |
| 149 | + if not cam_prim.IsA(UsdGeom.Camera): |
| 150 | + raise RuntimeError(f"Prim at path '{cam_prim_path}' is not a Camera.") |
| 151 | + # Add to list |
| 152 | + sensor_prim = UsdGeom.Camera(cam_prim) |
| 153 | + self._sensor_prims.append(sensor_prim) |
| 154 | + |
| 155 | + # start the orchestrator (if not already started) |
| 156 | + rep.orchestrator._orchestrator._is_started = True |
| 157 | + full_resolution = self._tiled_image_shape() |
| 158 | + |
| 159 | + # Set carb settings for tiled rendering |
| 160 | + carb_settings = carb.settings.get_settings() |
| 161 | + carb_settings.set("/rtx/viewTile/height", self.cfg.height) |
| 162 | + carb_settings.set("/rtx/viewTile/width", self.cfg.width) |
| 163 | + carb_settings.set("/rtx/viewTile/count", self._view.count) |
| 164 | + |
| 165 | + # Create render product |
| 166 | + rp = rep.create.render_product(self._view.prim_paths[0], full_resolution) |
| 167 | + |
| 168 | + # Attach all cameras to render product |
| 169 | + rp_prim = stage.GetPrimAtPath(rp.path) |
| 170 | + with Usd.EditContext(stage, stage.GetSessionLayer()): |
| 171 | + rp_prim.GetRelationship("camera").SetTargets(self._view.prim_paths) |
| 172 | + self._render_product_paths = [rp.path] |
| 173 | + # Attach the annotator |
| 174 | + self._annotators = dict() |
| 175 | + if "rgba" in self.cfg.data_types or "rgb" in self.cfg.data_types: |
| 176 | + annotator = rep.AnnotatorRegistry.get_annotator("rgb", device=self.device, do_array_copy=False) |
| 177 | + self._annotators["rgba"] = annotator |
| 178 | + if "depth" in self.cfg.data_types: |
| 179 | + annotator = rep.AnnotatorRegistry.get_annotator( |
| 180 | + "distance_to_camera", device=self.device, do_array_copy=False |
| 181 | + ) |
| 182 | + self._annotators["depth"] = annotator |
| 183 | + for annotator in self._annotators.values(): |
| 184 | + annotator.attach(self._render_product_paths) |
| 185 | + |
| 186 | + # Create internal buffers |
| 187 | + self._create_buffers() |
| 188 | + |
| 189 | + def _update_buffers_impl(self, env_ids: Sequence[int]): |
| 190 | + # Increment frame count |
| 191 | + self._frame[env_ids] += 1 |
| 192 | + |
| 193 | + # Extract the flattened image buffer |
| 194 | + for data_type, annotator in self._annotators.items(): |
| 195 | + tiled_data_buffer = annotator.get_data() |
| 196 | + if isinstance(tiled_data_buffer, np.ndarray): |
| 197 | + tiled_data_buffer = wp.array(tiled_data_buffer, device=self.device, dtype=wp.uint8) |
| 198 | + else: |
| 199 | + tiled_data_buffer = tiled_data_buffer.to(device=self.device) |
| 200 | + |
| 201 | + wp.launch( |
| 202 | + kernel=reshape_tiled_image, |
| 203 | + dim=(self._view.count, self.cfg.height, self.cfg.width), |
| 204 | + inputs=[ |
| 205 | + tiled_data_buffer.flatten(), |
| 206 | + wp.from_torch(self._data.output[data_type]), # zero-copy alias |
| 207 | + *list(self._data.output[data_type].shape[1:]), # height, width, num_channels |
| 208 | + self._tiling_grid_shape()[0], # num_tiles_x |
| 209 | + ], |
| 210 | + device=self.device, |
| 211 | + ) |
| 212 | + |
| 213 | + if data_type == "rgba": |
| 214 | + self._data.output[data_type] /= 255.0 |
| 215 | + if "rgb" in self.cfg.data_types: |
| 216 | + self._data.output["rgb"] = self._data.output["rgba"][..., :3] |
| 217 | + |
| 218 | + """ |
| 219 | + Private Helpers |
| 220 | + """ |
| 221 | + |
| 222 | + def _check_supported_data_types(self, cfg: TiledCameraCfg): |
| 223 | + """Checks if the data types are supported by the camera.""" |
| 224 | + if not set(cfg.data_types).issubset(TiledCamera.SUPPORTED_TYPES): |
| 225 | + raise ValueError( |
| 226 | + f"The TiledCamera class only supports the following types {TiledCamera.SUPPORTED_TYPES} but the" |
| 227 | + f" following where provided: {cfg.data_types}" |
| 228 | + ) |
| 229 | + |
| 230 | + def _create_buffers(self): |
| 231 | + """Create buffers for storing data.""" |
| 232 | + # create the data object |
| 233 | + # -- pose of the cameras |
| 234 | + self._data.pos_w = torch.zeros((self._view.count, 3), device=self._device) |
| 235 | + self._data.quat_w_world = torch.zeros((self._view.count, 4), device=self._device) |
| 236 | + self._update_poses(self._ALL_INDICES) |
| 237 | + # -- intrinsic matrix |
| 238 | + self._data.intrinsic_matrices = torch.zeros((self._view.count, 3, 3), device=self._device) |
| 239 | + self._update_intrinsic_matrices(self._ALL_INDICES) |
| 240 | + self._data.image_shape = self.image_shape |
| 241 | + # -- output data |
| 242 | + data_dict = dict() |
| 243 | + if "rgba" in self.cfg.data_types or "rgb" in self.cfg.data_types: |
| 244 | + data_dict["rgba"] = torch.zeros( |
| 245 | + (self._view.count, self.cfg.height, self.cfg.width, 4), device=self.device |
| 246 | + ).contiguous() |
| 247 | + if "rgb" in self.cfg.data_types: |
| 248 | + # RGB is the first 3 channels of RGBA |
| 249 | + data_dict["rgb"] = data_dict["rgba"][..., :3] |
| 250 | + if "depth" in self.cfg.data_types: |
| 251 | + data_dict["depth"] = torch.zeros( |
| 252 | + (self._view.count, self.cfg.height, self.cfg.width, 1), device=self.device |
| 253 | + ).contiguous() |
| 254 | + self._data.output = TensorDict(data_dict, batch_size=self._view.count, device=self.device) |
| 255 | + |
| 256 | + def _tiled_image_shape(self) -> tuple[int, int]: |
| 257 | + """Returns a tuple containing the dimension of the tiled image.""" |
| 258 | + cols, rows = self._tiling_grid_shape() |
| 259 | + return (self.cfg.width * cols, self.cfg.height * rows) |
| 260 | + |
| 261 | + def _tiling_grid_shape(self) -> tuple[int, int]: |
| 262 | + """Returns a tuple containing the tiling grid dimension.""" |
| 263 | + cols = round(math.sqrt(self._view.count)) |
| 264 | + rows = math.ceil(self._view.count / cols) |
| 265 | + return (cols, rows) |
| 266 | + |
| 267 | + def _create_annotator_data(self): |
| 268 | + # we do not need to create annotator data for the tiled camera sensor |
| 269 | + raise RuntimeError("This function should not be called for the tiled camera sensor.") |
| 270 | + |
| 271 | + def _process_annotator_output(self, name: str, output: Any) -> tuple[torch.tensor, dict | None]: |
| 272 | + # we do not need to process annotator output for the tiled camera sensor |
| 273 | + raise RuntimeError("This function should not be called for the tiled camera sensor.") |
| 274 | + |
| 275 | + """ |
| 276 | + Internal simulation callbacks. |
| 277 | + """ |
| 278 | + |
| 279 | + def _invalidate_initialize_callback(self, event): |
| 280 | + """Invalidates the scene elements.""" |
| 281 | + # call parent |
| 282 | + super()._invalidate_initialize_callback(event) |
| 283 | + # set all existing views to None to invalidate them |
| 284 | + self._view = None |
| 285 | + |
| 286 | + |
| 287 | +class TiledCameraOld(Camera): |
30 | 288 | r"""The tiled rendering based camera sensor for acquiring RGB and depth data.
|
31 | 289 |
|
32 | 290 | This class inherits from the :class:`Camera` class but uses the tiled-rendering API from Replicator to acquire
|
@@ -191,7 +449,7 @@ def _update_buffers_impl(self, env_ids: Sequence[int]):
|
191 | 449 | offset = self._data.output["rgb"].numel() if "rgb" in self.cfg.data_types else 0
|
192 | 450 | for data_type in self.cfg.data_types:
|
193 | 451 | wp.launch(
|
194 |
| - kernel=reshape_tiled_image, |
| 452 | + kernel=reshape_tiled_image_old, |
195 | 453 | dim=(self._view.count, self.cfg.height, self.cfg.width),
|
196 | 454 | inputs=[
|
197 | 455 | tiled_data_buffer,
|
|
0 commit comments