Skip to content

Commit d850e8d

Browse files
committed
Merge remote-tracking branch 'isaaclab-internal/benchmarks' into benchmarks
2 parents 07aea21 + daa0766 commit d850e8d

File tree

5 files changed

+356
-8
lines changed

5 files changed

+356
-8
lines changed

source/extensions/humanoid.tasks/humanoid/tasks/data/h1/h1_with_hand.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
max_depenetration_velocity=1.0,
2222
),
2323
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
24-
enabled_self_collisions=False, solver_position_iteration_count=4, solver_velocity_iteration_count=4
24+
enabled_self_collisions=False, solver_position_iteration_count=16, solver_velocity_iteration_count=1
2525
),
2626
),
2727
init_state=ArticulationCfg.InitialStateCfg(

source/extensions/humanoid.tasks/humanoid/tasks/push_box/push_box_env.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ class PushBoxEnvCfg(DirectRLEnvCfg):
217217
height = height,
218218
offset=TiledCameraCfg.OffsetCfg(pos=(0.0, -0.27, 1.5), rot=(0.0, 0.0, 0.0, -1.0), convention="opengl"),
219219
# offset=TiledCameraCfg.OffsetCfg(pos=(-2.0, 0.0, 0.75), rot=(-0.5, -0.5, 0.5, 0.5), convention="opengl"),
220-
data_types=["rgba"],
220+
data_types=["rgb"],
221221
spawn=sim_utils.PinholeCameraCfg(
222222
focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 20.0)
223223
),
@@ -240,7 +240,7 @@ class PushBoxEnvCfg(DirectRLEnvCfg):
240240
height = height,
241241
offset=TiledCameraCfg.OffsetCfg(pos=(0.0, -0.27, 1.5), rot=(0.0, 0.0, 0.0, -1.0), convention="opengl"),
242242
# offset=TiledCameraCfg.OffsetCfg(pos=(-2.0, 0.0, 0.75), rot=(-0.5, -0.5, 0.5, 0.5), convention="opengl"),
243-
data_types=["rgba"],
243+
data_types=["rgb"],
244244
spawn=sim_utils.PinholeCameraCfg(
245245
focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 20.0)
246246
),

source/extensions/omni.isaac.lab/omni/isaac/lab/sensors/camera/tiled_camera.py

Lines changed: 262 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,279 @@
1212
from tensordict import TensorDict
1313
from typing import TYPE_CHECKING, Any
1414

15+
import carb
1516
import omni.usd
1617
import warp as wp
1718
from omni.isaac.core.prims import XFormPrimView
18-
from pxr import UsdGeom
19+
from pxr import Usd, UsdGeom
1920

20-
from omni.isaac.lab.utils.warp.kernels import reshape_tiled_image
21+
from omni.isaac.lab.utils.warp.kernels import reshape_tiled_image, reshape_tiled_image_old
2122

2223
from ..sensor_base import SensorBase
2324
from .camera import Camera
2425

2526
if TYPE_CHECKING:
26-
from .camera_cfg import TiledCameraCfg
27+
from .tiled_camera_cfg import TiledCameraCfg
2728

2829

2930
class TiledCamera(Camera):
31+
r"""The tiled rendering based camera sensor for acquiring RGBA and depth data.
32+
33+
This class inherits from the :class:`Camera` class but uses the tiled-rendering API to acquire
34+
the visual data. Tiled-rendering concatenates the rendered images from multiple cameras into a single image.
35+
This allows for rendering multiple cameras in parallel and is useful for rendering large scenes with multiple
36+
cameras efficiently.
37+
38+
The following sensor types are supported:
39+
40+
- ``"rgb"``: A 3-channel rendered color image.
41+
- ``"rgba"``: A 4-channel rendered color image.
42+
- ``"depth"``: An image containing the distance to camera optical center.
43+
44+
.. versionadded:: v1.0.0
45+
46+
This feature is available starting from Isaac Sim 4.0. Before this version, the tiled rendering APIs
47+
were not available.
48+
49+
"""
50+
51+
cfg: TiledCameraCfg
52+
"""The configuration parameters."""
53+
54+
SUPPORTED_TYPES: set[str] = {"rgb", "rgba", "depth"}
55+
"""The set of sensor types that are supported."""
56+
57+
def __init__(self, cfg: TiledCameraCfg):
58+
"""Initializes the tiled camera sensor.
59+
60+
Args:
61+
cfg: The configuration parameters.
62+
63+
Raises:
64+
RuntimeError: If no camera prim is found at the given path.
65+
ValueError: If the provided data types are not supported by the camera.
66+
"""
67+
super().__init__(cfg)
68+
69+
def __del__(self):
70+
"""Unsubscribes from callbacks and detach from the replicator registry."""
71+
# unsubscribe from callbacks
72+
SensorBase.__del__(self)
73+
# detach from the replicator registry
74+
self._annotator.detach(self.render_product_paths)
75+
76+
def __str__(self) -> str:
77+
"""Returns: A string containing information about the instance."""
78+
# message for class
79+
return (
80+
f"Tiled Camera @ '{self.cfg.prim_path}': \n"
81+
f"\tdata types : {self.data.output.sorted_keys} \n"
82+
f"\tupdate period (s): {self.cfg.update_period}\n"
83+
f"\tshape : {self.image_shape}\n"
84+
f"\tnumber of sensors : {self._view.count}"
85+
)
86+
87+
"""
88+
Operations
89+
"""
90+
91+
def reset(self, env_ids: Sequence[int] | None = None):
92+
if not self._is_initialized:
93+
raise RuntimeError(
94+
"TiledCamera could not be initialized. Please ensure --enable_cameras is used to enable rendering."
95+
)
96+
# reset the timestamps
97+
SensorBase.reset(self, env_ids)
98+
# resolve None
99+
if env_ids is None:
100+
env_ids = slice(None)
101+
# reset the frame count
102+
self._frame[env_ids] = 0
103+
104+
"""
105+
Implementation.
106+
"""
107+
108+
def _initialize_impl(self):
109+
"""Initializes the sensor handles and internal buffers.
110+
111+
This function creates handles and registers the provided data types with the replicator registry to
112+
be able to access the data from the sensor. It also initializes the internal buffers to store the data.
113+
114+
Raises:
115+
RuntimeError: If the number of camera prims in the view does not match the number of environments.
116+
RuntimeError: If replicator was not found.
117+
"""
118+
try:
119+
import omni.replicator.core as rep
120+
except ModuleNotFoundError:
121+
raise RuntimeError(
122+
"Replicator was not found for rendering. Please use --enable_cameras to enable rendering."
123+
)
124+
125+
# Initialize parent class
126+
SensorBase._initialize_impl(self)
127+
# Create a view for the sensor
128+
self._view = XFormPrimView(self.cfg.prim_path, reset_xform_properties=False)
129+
self._view.initialize()
130+
# Check that sizes are correct
131+
if self._view.count != self._num_envs:
132+
raise RuntimeError(
133+
f"Number of camera prims in the view ({self._view.count}) does not match"
134+
f" the number of environments ({self._num_envs})."
135+
)
136+
137+
# Create all env_ids buffer
138+
self._ALL_INDICES = torch.arange(self._view.count, device=self._device, dtype=torch.long)
139+
# Create frame count buffer
140+
self._frame = torch.zeros(self._view.count, device=self._device, dtype=torch.long)
141+
142+
# Obtain current stage
143+
stage = omni.usd.get_context().get_stage()
144+
# Convert all encapsulated prims to Camera
145+
for cam_prim_path in self._view.prim_paths:
146+
# Get camera prim
147+
cam_prim = stage.GetPrimAtPath(cam_prim_path)
148+
# Check if prim is a camera
149+
if not cam_prim.IsA(UsdGeom.Camera):
150+
raise RuntimeError(f"Prim at path '{cam_prim_path}' is not a Camera.")
151+
# Add to list
152+
sensor_prim = UsdGeom.Camera(cam_prim)
153+
self._sensor_prims.append(sensor_prim)
154+
155+
# start the orchestrator (if not already started)
156+
rep.orchestrator._orchestrator._is_started = True
157+
full_resolution = self._tiled_image_shape()
158+
159+
# Set carb settings for tiled rendering
160+
carb_settings = carb.settings.get_settings()
161+
carb_settings.set("/rtx/viewTile/height", self.cfg.height)
162+
carb_settings.set("/rtx/viewTile/width", self.cfg.width)
163+
carb_settings.set("/rtx/viewTile/count", self._view.count)
164+
165+
# Create render product
166+
rp = rep.create.render_product(self._view.prim_paths[0], full_resolution)
167+
168+
# Attach all cameras to render product
169+
rp_prim = stage.GetPrimAtPath(rp.path)
170+
with Usd.EditContext(stage, stage.GetSessionLayer()):
171+
rp_prim.GetRelationship("camera").SetTargets(self._view.prim_paths)
172+
self._render_product_paths = [rp.path]
173+
# Attach the annotator
174+
self._annotators = dict()
175+
if "rgba" in self.cfg.data_types or "rgb" in self.cfg.data_types:
176+
annotator = rep.AnnotatorRegistry.get_annotator("rgb", device=self.device, do_array_copy=False)
177+
self._annotators["rgba"] = annotator
178+
if "depth" in self.cfg.data_types:
179+
annotator = rep.AnnotatorRegistry.get_annotator(
180+
"distance_to_camera", device=self.device, do_array_copy=False
181+
)
182+
self._annotators["depth"] = annotator
183+
for annotator in self._annotators.values():
184+
annotator.attach(self._render_product_paths)
185+
186+
# Create internal buffers
187+
self._create_buffers()
188+
189+
def _update_buffers_impl(self, env_ids: Sequence[int]):
190+
# Increment frame count
191+
self._frame[env_ids] += 1
192+
193+
# Extract the flattened image buffer
194+
for data_type, annotator in self._annotators.items():
195+
tiled_data_buffer = annotator.get_data()
196+
if isinstance(tiled_data_buffer, np.ndarray):
197+
tiled_data_buffer = wp.array(tiled_data_buffer, device=self.device, dtype=wp.uint8)
198+
else:
199+
tiled_data_buffer = tiled_data_buffer.to(device=self.device)
200+
201+
wp.launch(
202+
kernel=reshape_tiled_image,
203+
dim=(self._view.count, self.cfg.height, self.cfg.width),
204+
inputs=[
205+
tiled_data_buffer.flatten(),
206+
wp.from_torch(self._data.output[data_type]), # zero-copy alias
207+
*list(self._data.output[data_type].shape[1:]), # height, width, num_channels
208+
self._tiling_grid_shape()[0], # num_tiles_x
209+
],
210+
device=self.device,
211+
)
212+
213+
if data_type == "rgba":
214+
self._data.output[data_type] /= 255.0
215+
if "rgb" in self.cfg.data_types:
216+
self._data.output["rgb"] = self._data.output["rgba"][..., :3]
217+
218+
"""
219+
Private Helpers
220+
"""
221+
222+
def _check_supported_data_types(self, cfg: TiledCameraCfg):
223+
"""Checks if the data types are supported by the camera."""
224+
if not set(cfg.data_types).issubset(TiledCamera.SUPPORTED_TYPES):
225+
raise ValueError(
226+
f"The TiledCamera class only supports the following types {TiledCamera.SUPPORTED_TYPES} but the"
227+
f" following where provided: {cfg.data_types}"
228+
)
229+
230+
def _create_buffers(self):
231+
"""Create buffers for storing data."""
232+
# create the data object
233+
# -- pose of the cameras
234+
self._data.pos_w = torch.zeros((self._view.count, 3), device=self._device)
235+
self._data.quat_w_world = torch.zeros((self._view.count, 4), device=self._device)
236+
self._update_poses(self._ALL_INDICES)
237+
# -- intrinsic matrix
238+
self._data.intrinsic_matrices = torch.zeros((self._view.count, 3, 3), device=self._device)
239+
self._update_intrinsic_matrices(self._ALL_INDICES)
240+
self._data.image_shape = self.image_shape
241+
# -- output data
242+
data_dict = dict()
243+
if "rgba" in self.cfg.data_types or "rgb" in self.cfg.data_types:
244+
data_dict["rgba"] = torch.zeros(
245+
(self._view.count, self.cfg.height, self.cfg.width, 4), device=self.device
246+
).contiguous()
247+
if "rgb" in self.cfg.data_types:
248+
# RGB is the first 3 channels of RGBA
249+
data_dict["rgb"] = data_dict["rgba"][..., :3]
250+
if "depth" in self.cfg.data_types:
251+
data_dict["depth"] = torch.zeros(
252+
(self._view.count, self.cfg.height, self.cfg.width, 1), device=self.device
253+
).contiguous()
254+
self._data.output = TensorDict(data_dict, batch_size=self._view.count, device=self.device)
255+
256+
def _tiled_image_shape(self) -> tuple[int, int]:
257+
"""Returns a tuple containing the dimension of the tiled image."""
258+
cols, rows = self._tiling_grid_shape()
259+
return (self.cfg.width * cols, self.cfg.height * rows)
260+
261+
def _tiling_grid_shape(self) -> tuple[int, int]:
262+
"""Returns a tuple containing the tiling grid dimension."""
263+
cols = round(math.sqrt(self._view.count))
264+
rows = math.ceil(self._view.count / cols)
265+
return (cols, rows)
266+
267+
def _create_annotator_data(self):
268+
# we do not need to create annotator data for the tiled camera sensor
269+
raise RuntimeError("This function should not be called for the tiled camera sensor.")
270+
271+
def _process_annotator_output(self, name: str, output: Any) -> tuple[torch.tensor, dict | None]:
272+
# we do not need to process annotator output for the tiled camera sensor
273+
raise RuntimeError("This function should not be called for the tiled camera sensor.")
274+
275+
"""
276+
Internal simulation callbacks.
277+
"""
278+
279+
def _invalidate_initialize_callback(self, event):
280+
"""Invalidates the scene elements."""
281+
# call parent
282+
super()._invalidate_initialize_callback(event)
283+
# set all existing views to None to invalidate them
284+
self._view = None
285+
286+
287+
class TiledCameraOld(Camera):
30288
r"""The tiled rendering based camera sensor for acquiring RGB and depth data.
31289
32290
This class inherits from the :class:`Camera` class but uses the tiled-rendering API from Replicator to acquire
@@ -191,7 +449,7 @@ def _update_buffers_impl(self, env_ids: Sequence[int]):
191449
offset = self._data.output["rgb"].numel() if "rgb" in self.cfg.data_types else 0
192450
for data_type in self.cfg.data_types:
193451
wp.launch(
194-
kernel=reshape_tiled_image,
452+
kernel=reshape_tiled_image_old,
195453
dim=(self._view.count, self.cfg.height, self.cfg.width),
196454
inputs=[
197455
tiled_data_buffer,

source/extensions/omni.isaac.lab/omni/isaac/lab/sensors/camera/tiled_camera_cfg.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,49 @@ class OffsetCfg:
6464
height: int = MISSING
6565
"""Height of the image in pixels."""
6666

67+
68+
semantic_filter: str | list[str] = "*:*"
69+
"""A string or a list specifying a semantic filter predicate. Defaults to ``"*:*"``.
70+
71+
If a string, it should be a disjunctive normal form of (semantic type, labels). For examples:
72+
73+
* ``"typeA : labelA & !labelB | labelC , typeB: labelA ; typeC: labelE"``:
74+
All prims with semantic type "typeA" and label "labelA" but not "labelB" or with label "labelC".
75+
Also, all prims with semantic type "typeB" and label "labelA", or with semantic type "typeC" and label "labelE".
76+
* ``"typeA : * ; * : labelA"``: All prims with semantic type "typeA" or with label "labelA"
77+
78+
If a list of strings, each string should be a semantic type. The segmentation for prims with
79+
semantics of the specified types will be retrieved. For example, if the list is ["class"], only
80+
the segmentation for prims with semantics of type "class" will be retrieved.
81+
82+
.. seealso::
83+
84+
For more information on the semantics filter, see the documentation on `Replicator Semantics Schema Editor`_.
85+
86+
.. _Replicator Semantics Schema Editor: https://docs.omniverse.nvidia.com/extensions/latest/ext_replicator/semantics_schema_editor.html#semantics-filtering
87+
"""
88+
89+
# colorize_semantic_segmentation: bool = True
90+
# """Whether to colorize the semantic segmentation images. Defaults to True.
91+
92+
# If True, semantic segmentation is converted to an image where semantic IDs are mapped to colors
93+
# and returned as a ``uint8`` 4-channel array. If False, the output is returned as a ``int32`` array.
94+
# """
95+
96+
# colorize_instance_id_segmentation: bool = True
97+
# """Whether to colorize the instance ID segmentation images. Defaults to True.
98+
99+
# If True, instance id segmentation is converted to an image where instance IDs are mapped to colors.
100+
# and returned as a ``uint8`` 4-channel array. If False, the output is returned as a ``int32`` array.
101+
# """
102+
103+
# colorize_instance_segmentation: bool = True
104+
# """Whether to colorize the instance ID segmentation images. Defaults to True.
105+
106+
# If True, instance segmentation is converted to an image where instance IDs are mapped to colors.
107+
# and returned as a ``uint8`` 4-channel array. If False, the output is returned as a ``int32`` array.
108+
# """
109+
67110
return_latest_camera_pose: bool = False
68111
"""Whether to return the latest camera pose when fetching the camera's data. Defaults to False.
69112

0 commit comments

Comments
 (0)