diff --git a/scripts/reinforcement_learning/rsl_rl/cli_args.py b/scripts/reinforcement_learning/rsl_rl/cli_args.py index df7e5f0ff8b..c176f774515 100644 --- a/scripts/reinforcement_learning/rsl_rl/cli_args.py +++ b/scripts/reinforcement_learning/rsl_rl/cli_args.py @@ -10,7 +10,7 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg + from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg def add_rsl_rl_args(parser: argparse.ArgumentParser): @@ -39,7 +39,7 @@ def add_rsl_rl_args(parser: argparse.ArgumentParser): ) -def parse_rsl_rl_cfg(task_name: str, args_cli: argparse.Namespace) -> RslRlOnPolicyRunnerCfg: +def parse_rsl_rl_cfg(task_name: str, args_cli: argparse.Namespace) -> RslRlBaseRunnerCfg: """Parse configuration for RSL-RL agent based on inputs. Args: @@ -52,12 +52,12 @@ def parse_rsl_rl_cfg(task_name: str, args_cli: argparse.Namespace) -> RslRlOnPol from isaaclab_tasks.utils.parse_cfg import load_cfg_from_registry # load the default configuration - rslrl_cfg: RslRlOnPolicyRunnerCfg = load_cfg_from_registry(task_name, "rsl_rl_cfg_entry_point") + rslrl_cfg: RslRlBaseRunnerCfg = load_cfg_from_registry(task_name, "rsl_rl_cfg_entry_point") rslrl_cfg = update_rsl_rl_cfg(rslrl_cfg, args_cli) return rslrl_cfg -def update_rsl_rl_cfg(agent_cfg: RslRlOnPolicyRunnerCfg, args_cli: argparse.Namespace): +def update_rsl_rl_cfg(agent_cfg: RslRlBaseRunnerCfg, args_cli: argparse.Namespace): """Update configuration for RSL-RL agent based on inputs. Args: diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py index bd186f37998..6723f1418b3 100644 --- a/scripts/reinforcement_learning/rsl_rl/play.py +++ b/scripts/reinforcement_learning/rsl_rl/play.py @@ -55,7 +55,7 @@ import time import torch -from rsl_rl.runners import OnPolicyRunner +from rsl_rl.runners import DistillationRunner, OnPolicyRunner from isaaclab.envs import ( DirectMARLEnv, @@ -68,7 +68,7 @@ from isaaclab.utils.dict import print_dict from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint -from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx +from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx import isaaclab_tasks # noqa: F401 from isaaclab_tasks.utils import get_checkpoint_path @@ -78,14 +78,14 @@ @hydra_task_config(args_cli.task, "rsl_rl_cfg_entry_point") -def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlOnPolicyRunnerCfg): +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg): """Play with RSL-RL agent.""" # grab task name for checkpoint path task_name = args_cli.task.split(":")[-1] train_task_name = task_name.replace("-Play", "") # override configurations with non-hydra CLI arguments - agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) + agent_cfg: RslRlBaseRunnerCfg = cli_args.parse_rsl_rl_cfg(task_name, args_cli) env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs # set the environment seed @@ -133,32 +133,43 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen print(f"[INFO]: Loading model checkpoint from: {resume_path}") # load previously trained model - ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) - ppo_runner.load(resume_path) + if agent_cfg.class_name == "OnPolicyRunner": + runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) + elif agent_cfg.class_name == "DistillationRunner": + runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) + else: + raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}") + runner.load(resume_path) # obtain the trained policy for inference - policy = ppo_runner.get_inference_policy(device=env.unwrapped.device) + policy = runner.get_inference_policy(device=env.unwrapped.device) # extract the neural network module # we do this in a try-except to maintain backwards compatibility. try: # version 2.3 onwards - policy_nn = ppo_runner.alg.policy + policy_nn = runner.alg.policy except AttributeError: # version 2.2 and below - policy_nn = ppo_runner.alg.actor_critic + policy_nn = runner.alg.actor_critic + + # extract the normalizer + if hasattr(policy_nn, "actor_obs_normalizer"): + normalizer = policy_nn.actor_obs_normalizer + elif hasattr(policy_nn, "student_obs_normalizer"): + normalizer = policy_nn.student_obs_normalizer + else: + normalizer = None # export policy to onnx/jit export_model_dir = os.path.join(os.path.dirname(resume_path), "exported") - export_policy_as_jit(policy_nn, ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.pt") - export_policy_as_onnx( - policy_nn, normalizer=ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.onnx" - ) + export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt") + export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx") dt = env.unwrapped.step_dt # reset environment - obs, _ = env.get_observations() + obs = env.get_observations() timestep = 0 # simulate environment while simulation_app.is_running(): diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py index e534079d052..4df5c903a95 100644 --- a/scripts/reinforcement_learning/rsl_rl/train.py +++ b/scripts/reinforcement_learning/rsl_rl/train.py @@ -15,7 +15,6 @@ # local imports import cli_args # isort: skip - # add argparse arguments parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") @@ -53,9 +52,9 @@ from packaging import version # for distributed training, check minimum supported rsl-rl version -RSL_RL_VERSION = "2.3.1" +RSL_RL_VERSION = "3.0.0" installed_version = metadata.version("rsl-rl-lib") -if args_cli.distributed and version.parse(installed_version) < version.parse(RSL_RL_VERSION): +if version.parse(installed_version) < version.parse(RSL_RL_VERSION): if platform.system() == "Windows": cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"] else: @@ -74,7 +73,7 @@ import torch from datetime import datetime -from rsl_rl.runners import OnPolicyRunner +from rsl_rl.runners import DistillationRunner, OnPolicyRunner from isaaclab.envs import ( DirectMARLEnv, @@ -86,7 +85,7 @@ from isaaclab.utils.dict import print_dict from isaaclab.utils.io import dump_pickle, dump_yaml -from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper +from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper import isaaclab_tasks # noqa: F401 from isaaclab_tasks.utils import get_checkpoint_path @@ -101,7 +100,7 @@ @hydra_task_config(args_cli.task, "rsl_rl_cfg_entry_point") -def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlOnPolicyRunnerCfg): +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg): """Train with RSL-RL agent.""" # override configurations with non-hydra CLI arguments agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) @@ -164,7 +163,12 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) # create runner from rsl-rl - runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device) + if agent_cfg.class_name == "OnPolicyRunner": + runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device) + elif agent_cfg.class_name == "DistillationRunner": + runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device) + else: + raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}") # write git state to logs runner.add_git_repo_to_log(__file__) # load the checkpoint diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/distillation_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/distillation_cfg.py index 3571511c366..9be53e66295 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/distillation_cfg.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/distillation_cfg.py @@ -28,6 +28,12 @@ class RslRlDistillationStudentTeacherCfg: noise_std_type: Literal["scalar", "log"] = "scalar" """The type of noise standard deviation for the policy. Default is scalar.""" + student_obs_normalization: bool = False + """Whether to normalize the observation for the student network. Default is False.""" + + teacher_obs_normalization: bool = False + """Whether to normalize the observation for the teacher network. Default is False.""" + student_hidden_dims: list[int] = MISSING """The hidden dimensions of the student network.""" @@ -81,3 +87,9 @@ class RslRlDistillationAlgorithmCfg: max_grad_norm: None | float = None """The maximum norm the gradient is clipped to.""" + + optimizer: Literal["adam", "adamw", "sgd", "rmsprop"] = "adam" + """The optimizer to use for the student policy.""" + + loss_type: Literal["mse", "huber"] = "mse" + """The loss type to use for the student policy.""" diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py index 81a00b1e7a6..b3dc415cae4 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py @@ -32,6 +32,12 @@ class RslRlPpoActorCriticCfg: noise_std_type: Literal["scalar", "log"] = "scalar" """The type of noise standard deviation for the policy. Default is scalar.""" + actor_obs_normalization: bool = False + """Whether to normalize the observation for the actor network. Default is False.""" + + critic_obs_normalization: bool = False + """Whether to normalize the observation for the critic network. Default is False.""" + actor_hidden_dims: list[int] = MISSING """The hidden dimensions of the actor network.""" @@ -114,14 +120,12 @@ class RslRlPpoAlgorithmCfg: Otherwise, the advantage is normalized over the entire collected trajectories. """ + rnd_cfg: RslRlRndCfg | None = None + """The RND configuration. Default is None, in which case RND is not used.""" + symmetry_cfg: RslRlSymmetryCfg | None = None """The symmetry configuration. Default is None, in which case symmetry is not used.""" - rnd_cfg: RslRlRndCfg | None = None - """The configuration for the Random Network Distillation (RND) module. Default is None, - in which case RND is not used. - """ - ######################### # Runner configurations # @@ -129,8 +133,8 @@ class RslRlPpoAlgorithmCfg: @configclass -class RslRlOnPolicyRunnerCfg: - """Configuration of the runner for on-policy algorithms.""" +class RslRlBaseRunnerCfg: + """Base configuration of the runner.""" seed: int = 42 """The seed for the experiment. Default is 42.""" @@ -144,17 +148,36 @@ class RslRlOnPolicyRunnerCfg: max_iterations: int = MISSING """The maximum number of iterations.""" - empirical_normalization: bool = MISSING - """Whether to use empirical normalization.""" + empirical_normalization: bool | None = None + """This parameter is deprecated and will be removed in the future. - policy: RslRlPpoActorCriticCfg | RslRlDistillationStudentTeacherCfg = MISSING - """The policy configuration.""" + Use `actor_obs_normalization` and `critic_obs_normalization` instead. + """ - algorithm: RslRlPpoAlgorithmCfg | RslRlDistillationAlgorithmCfg = MISSING - """The algorithm configuration.""" + obs_groups: dict[str, list[str]] = MISSING + """A mapping from observation groups to observation sets. + + The keys of the dictionary are predefined observation sets used by the underlying algorithm + and values are lists of observation groups provided by the environment. + + For instance, if the environment provides a dictionary of observations with groups "policy", "images", + and "privileged", these can be mapped to algorithmic observation sets as follows: + + .. code-block:: python + + obs_groups = { + "policy": ["policy", "images"], + "critic": ["policy", "privileged"], + } + + This way, the policy will receive the "policy" and "images" observations, and the critic will + receive the "policy" and "privileged" observations. + + For more details, please check ``vec_env.py`` in the rsl_rl library. + """ clip_actions: float | None = None - """The clipping value for actions. If ``None``, then no clipping is done. + """The clipping value for actions. If None, then no clipping is done. Defaults to None. .. note:: This clipping is performed inside the :class:`RslRlVecEnvWrapper` wrapper. @@ -184,7 +207,10 @@ class RslRlOnPolicyRunnerCfg: """The wandb project name. Default is "isaaclab".""" resume: bool = False - """Whether to resume. Default is False.""" + """Whether to resume a previous training. Default is False. + + This flag will be ignored for distillation. + """ load_run: str = ".*" """The run directory to load. Default is ".*" (all). @@ -197,3 +223,31 @@ class RslRlOnPolicyRunnerCfg: If regex expression, the latest (alphabetical order) matching file will be loaded. """ + + +@configclass +class RslRlOnPolicyRunnerCfg(RslRlBaseRunnerCfg): + """Configuration of the runner for on-policy algorithms.""" + + class_name: str = "OnPolicyRunner" + """The runner class name. Default is OnPolicyRunner.""" + + policy: RslRlPpoActorCriticCfg = MISSING + """The policy configuration.""" + + algorithm: RslRlPpoAlgorithmCfg = MISSING + """The algorithm configuration.""" + + +@configclass +class RslRlDistillationRunnerCfg(RslRlBaseRunnerCfg): + """Configuration of the runner for distillation algorithms.""" + + class_name: str = "DistillationRunner" + """The runner class name. Default is DistillationRunner.""" + + policy: RslRlDistillationStudentTeacherCfg = MISSING + """The policy configuration.""" + + algorithm: RslRlDistillationAlgorithmCfg = MISSING + """The algorithm configuration.""" diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/symmetry_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/symmetry_cfg.py index bf0ecc9a829..0cd476e848d 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/symmetry_cfg.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/symmetry_cfg.py @@ -39,13 +39,11 @@ class RslRlSymmetryCfg: Args: env (VecEnv): The environment object. This is used to access the environment's properties. - obs (torch.Tensor | None): The observation tensor. If None, the observation is not used. + obs (tensordict.TensorDict | None): The observation tensor dictionary. If None, the observation is not used. action (torch.Tensor | None): The action tensor. If None, the action is not used. - obs_type (str): The name of the observation type. Defaults to "policy". - This is useful when handling augmentation for different observation groups. Returns: - A tuple containing the augmented observation and action tensors. The tensors can be None, + A tuple containing the augmented observation dictionary and action tensors. The tensors can be None, if their respective inputs are None. """ diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py index d909bf2d912..73ceae04693 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py @@ -5,6 +5,7 @@ import gymnasium as gym import torch +from tensordict import TensorDict from rsl_rl.env import VecEnv @@ -12,16 +13,9 @@ class RslRlVecEnvWrapper(VecEnv): - """Wraps around Isaac Lab environment for RSL-RL library - - To use asymmetric actor-critic, the environment instance must have the attributes :attr:`num_privileged_obs` (int). - This is used by the learning agent to allocate buffers in the trajectory memory. Additionally, the returned - observations should have the key "critic" which corresponds to the privileged observations. Since this is - optional for some environments, the wrapper checks if these attributes exist. If they don't then the wrapper - defaults to zero as number of privileged observations. + """Wraps around Isaac Lab environment for the RSL-RL library .. caution:: - This class must be the last wrapper in the wrapper chain. This is because the wrapper does not follow the :class:`gym.Wrapper` interface. Any subsequent wrappers will need to be modified to work with this wrapper. @@ -43,12 +37,14 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N Raises: ValueError: When the environment is not an instance of :class:`ManagerBasedRLEnv` or :class:`DirectRLEnv`. """ + # check that input is valid if not isinstance(env.unwrapped, ManagerBasedRLEnv) and not isinstance(env.unwrapped, DirectRLEnv): raise ValueError( "The environment must be inherited from ManagerBasedRLEnv or DirectRLEnv. Environment type:" f" {type(env)}" ) + # initialize the wrapper self.env = env self.clip_actions = clip_actions @@ -63,20 +59,6 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N self.num_actions = self.unwrapped.action_manager.total_action_dim else: self.num_actions = gym.spaces.flatdim(self.unwrapped.single_action_space) - if hasattr(self.unwrapped, "observation_manager"): - self.num_obs = self.unwrapped.observation_manager.group_obs_dim["policy"][0] - else: - self.num_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["policy"]) - # -- privileged observations - if ( - hasattr(self.unwrapped, "observation_manager") - and "critic" in self.unwrapped.observation_manager.group_obs_dim - ): - self.num_privileged_obs = self.unwrapped.observation_manager.group_obs_dim["critic"][0] - elif hasattr(self.unwrapped, "num_states") and "critic" in self.unwrapped.single_observation_space: - self.num_privileged_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["critic"]) - else: - self.num_privileged_obs = 0 # modify the action space to the clip range self._modify_action_space() @@ -133,14 +115,6 @@ def unwrapped(self) -> ManagerBasedRLEnv | DirectRLEnv: Properties """ - def get_observations(self) -> tuple[torch.Tensor, dict]: - """Returns the current observations of the environment.""" - if hasattr(self.unwrapped, "observation_manager"): - obs_dict = self.unwrapped.observation_manager.compute() - else: - obs_dict = self.unwrapped._get_observations() - return obs_dict["policy"], {"observations": obs_dict} - @property def episode_length_buf(self) -> torch.Tensor: """The episode length buffer.""" @@ -162,13 +136,20 @@ def episode_length_buf(self, value: torch.Tensor): def seed(self, seed: int = -1) -> int: # noqa: D102 return self.unwrapped.seed(seed) - def reset(self) -> tuple[torch.Tensor, dict]: # noqa: D102 + def reset(self) -> tuple[TensorDict, dict]: # noqa: D102 # reset the environment - obs_dict, _ = self.env.reset() - # return observations - return obs_dict["policy"], {"observations": obs_dict} + obs_dict, extras = self.env.reset() + return TensorDict(obs_dict, batch_size=[self.num_envs]), extras + + def get_observations(self) -> TensorDict: + """Returns the current observations of the environment.""" + if hasattr(self.unwrapped, "observation_manager"): + obs_dict = self.unwrapped.observation_manager.compute() + else: + obs_dict = self.unwrapped._get_observations() + return TensorDict(obs_dict, batch_size=[self.num_envs]) - def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]: + def step(self, actions: torch.Tensor) -> tuple[TensorDict, torch.Tensor, torch.Tensor, dict]: # clip actions if self.clip_actions is not None: actions = torch.clamp(actions, -self.clip_actions, self.clip_actions) @@ -176,16 +157,12 @@ def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch obs_dict, rew, terminated, truncated, extras = self.env.step(actions) # compute dones for compatibility with RSL-RL dones = (terminated | truncated).to(dtype=torch.long) - # move extra observations to the extras dict - obs = obs_dict["policy"] - extras["observations"] = obs_dict # move time out information to the extras dict # this is only needed for infinite horizon tasks if not self.unwrapped.cfg.is_finite_horizon: extras["time_outs"] = truncated - # return the step information - return obs, rew, dones, extras + return TensorDict(obs_dict, batch_size=[self.num_envs]), rew, dones, extras def close(self): # noqa: D102 return self.env.close() @@ -200,7 +177,8 @@ def _modify_action_space(self): return # modify the action space to the clip range - # note: this is only possible for the box action space. we need to change it in the future for other action spaces. + # note: this is only possible for the box action space. we need to change it in the future for other + # action spaces. self.env.unwrapped.single_action_space = gym.spaces.Box( low=-self.clip_actions, high=self.clip_actions, shape=(self.num_actions,) ) diff --git a/source/isaaclab_rl/setup.py b/source/isaaclab_rl/setup.py index 706f2b529cd..3d4c3c7378a 100644 --- a/source/isaaclab_rl/setup.py +++ b/source/isaaclab_rl/setup.py @@ -45,7 +45,7 @@ "rl-games @ git+https://github.com/isaac-sim/rl_games.git@python3.11", "gym", ], # rl-games still needs gym :( - "rsl-rl": ["rsl-rl-lib==2.3.3"], + "rsl-rl": ["rsl-rl-lib==3.0.0"], } # Add the names with hyphens as aliases for convenience EXTRAS_REQUIRE["rl_games"] = EXTRAS_REQUIRE["rl-games"] diff --git a/source/isaaclab_rl/test/test_rsl_rl_wrapper.py b/source/isaaclab_rl/test/test_rsl_rl_wrapper.py index a88d4864fb2..4eaf921be85 100644 --- a/source/isaaclab_rl/test/test_rsl_rl_wrapper.py +++ b/source/isaaclab_rl/test/test_rsl_rl_wrapper.py @@ -16,6 +16,7 @@ import gymnasium as gym import torch +from tensordict import TensorDict import carb import omni.usd @@ -161,6 +162,8 @@ def _check_valid_tensor(data: torch.Tensor | dict) -> bool: """ if isinstance(data, torch.Tensor): return not torch.any(torch.isnan(data)) + elif isinstance(data, TensorDict): + return not data.isnan().any() elif isinstance(data, dict): valid_tensor = True for value in data.values(): diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/allegro_hand/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/allegro_hand/agents/rsl_rl_ppo_cfg.py index 6dd5f3c99f2..8da27d1a7e0 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/allegro_hand/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/allegro_hand/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class AllegroHandPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 10000 save_interval = 250 experiment_name = "allegro_hand" - empirical_normalization = True policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=True, + critic_obs_normalization=True, actor_hidden_dims=[1024, 512, 256, 128], critic_hidden_dims=[1024, 512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/rsl_rl_ppo_cfg.py index 38b42ea08cb..5ea9520fec2 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class AntPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1000 save_interval = 50 experiment_name = "ant_direct" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[400, 200, 100], critic_hidden_dims=[400, 200, 100], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/agents/rsl_rl_ppo_cfg.py index 5c11cde53d2..efdf7d4f991 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class AnymalCFlatPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 500 save_interval = 50 experiment_name = "anymal_c_flat_direct" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[128, 128, 128], critic_hidden_dims=[128, 128, 128], activation="elu", @@ -43,9 +44,10 @@ class AnymalCRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "anymal_c_rough_direct" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/agents/rsl_rl_ppo_cfg.py index 81f77fcbd7a..1cadf22d48c 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class CartpolePPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 150 save_interval = 50 experiment_name = "cartpole_direct" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[32, 32], critic_hidden_dims=[32, 32], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_cabinet/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_cabinet/agents/rsl_rl_ppo_cfg.py index 797777f9005..74788e7b220 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_cabinet/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_cabinet/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class FrankaCabinetPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "franka_cabinet_direct" - empirical_normalization = True policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=True, + critic_obs_normalization=True, actor_hidden_dims=[256, 128, 64], critic_hidden_dims=[256, 128, 64], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid/agents/rsl_rl_ppo_cfg.py index ebbbdb6990c..02962922509 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class HumanoidPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1000 save_interval = 50 experiment_name = "humanoid_direct" - empirical_normalization = True policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=True, + critic_obs_normalization=True, actor_hidden_dims=[400, 200, 100], critic_hidden_dims=[400, 200, 100], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/quadcopter/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/quadcopter/agents/rsl_rl_ppo_cfg.py index dae0dee0bf5..86b2c550838 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/quadcopter/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/quadcopter/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class QuadcopterPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 200 save_interval = 50 experiment_name = "quadcopter_direct" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[64, 64], critic_hidden_dims=[64, 64], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rsl_rl_ppo_cfg.py index 524a799bae3..665c997e635 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class ShadowHandPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 10000 save_interval = 250 experiment_name = "shadow_hand" - empirical_normalization = True policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=True, + critic_obs_normalization=True, actor_hidden_dims=[512, 512, 256, 128], critic_hidden_dims=[512, 512, 256, 128], activation="elu", @@ -43,9 +44,10 @@ class ShadowHandAsymFFPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 10000 save_interval = 250 experiment_name = "shadow_hand_openai_ff" - empirical_normalization = True policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=True, + critic_obs_normalization=True, actor_hidden_dims=[400, 400, 200, 100], critic_hidden_dims=[512, 512, 256, 128], activation="elu", @@ -72,9 +74,10 @@ class ShadowHandVisionFFPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 50000 save_interval = 250 experiment_name = "shadow_hand_vision" - empirical_normalization = True policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=True, + critic_obs_normalization=True, actor_hidden_dims=[1024, 512, 512, 256, 128], critic_hidden_dims=[1024, 512, 512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/ant/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/ant/agents/rsl_rl_ppo_cfg.py index 7d729795f16..5257b050868 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/ant/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/ant/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class AntPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1000 save_interval = 50 experiment_name = "ant" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[400, 200, 100], critic_hidden_dims=[400, 200, 100], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/agents/rsl_rl_ppo_cfg.py index 96064394507..4ed257870de 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class CartpolePPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 150 save_interval = 50 experiment_name = "cartpole" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[32, 32], critic_hidden_dims=[32, 32], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py index ae44b8085a1..663012f94f0 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class HumanoidPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1000 save_interval = 50 experiment_name = "humanoid" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[400, 200, 100], critic_hidden_dims=[400, 200, 100], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/agents/rsl_rl_ppo_cfg.py index cb898b1e89c..942a5230f1d 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class DigitLocoManipPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 2000 save_interval = 50 experiment_name = "digit_loco_manip" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[256, 128, 128], critic_hidden_dims=[256, 128, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/a1/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/a1/agents/rsl_rl_ppo_cfg.py index 99c53ce9d7a..db162f1228f 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/a1/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/a1/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class UnitreeA1RoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "unitree_a1_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/rsl_rl_ppo_cfg.py index b3b2eaba3e5..2cc6dfeffa4 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class AnymalBRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "anymal_b_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_ppo_cfg.py index effbde9d9f9..466e847f43a 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class AnymalCRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "anymal_c_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/rsl_rl_ppo_cfg.py index baacb1e2345..625585deaf6 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class AnymalDRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "anymal_d_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/cassie/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/cassie/agents/rsl_rl_ppo_cfg.py index 9c57f001af1..719f8a24105 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/cassie/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/cassie/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class CassieRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "cassie_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/agents/rsl_rl_ppo_cfg.py index ab23e2c7b71..00be11a490f 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class DigitRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 3000 save_interval = 50 experiment_name = "digit_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1/agents/rsl_rl_ppo_cfg.py index 39e93c7dd9e..94649016538 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class G1RoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 3000 save_interval = 50 experiment_name = "g1_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go1/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go1/agents/rsl_rl_ppo_cfg.py index 47301907c39..5be515ccc0d 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go1/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go1/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class UnitreeGo1RoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "unitree_go1_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go2/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go2/agents/rsl_rl_ppo_cfg.py index caeafe6bc4a..e0c6afab9ea 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go2/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go2/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class UnitreeGo2RoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "unitree_go2_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/h1/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/h1/agents/rsl_rl_ppo_cfg.py index 39d80f892f2..1163ac744c4 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/h1/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/h1/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class H1RoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 3000 save_interval = 50 experiment_name = "h1_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/spot/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/spot/agents/rsl_rl_ppo_cfg.py index 155864175c2..951fb421cfc 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/spot/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/spot/agents/rsl_rl_ppo_cfg.py @@ -14,10 +14,11 @@ class SpotFlatPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 20000 save_interval = 50 experiment_name = "spot_flat" - empirical_normalization = False store_code_state = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/franka/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/franka/agents/rsl_rl_ppo_cfg.py index 99a4730f835..ee642fb07aa 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/franka/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/franka/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class CabinetPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 400 save_interval = 50 experiment_name = "franka_open_drawer" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[256, 128, 64], critic_hidden_dims=[256, 128, 64], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/inhand/config/allegro_hand/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/inhand/config/allegro_hand/agents/rsl_rl_ppo_cfg.py index c3471f19203..4cbe6266f24 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/inhand/config/allegro_hand/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/inhand/config/allegro_hand/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class AllegroCubePPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 5000 save_interval = 50 experiment_name = "allegro_cube" - empirical_normalization = True policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=True, + critic_obs_normalization=True, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/franka/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/franka/agents/rsl_rl_ppo_cfg.py index 3d519e926b4..067425a74d4 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/franka/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/franka/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class LiftCubePPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "franka_lift" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[256, 128, 64], critic_hidden_dims=[256, 128, 64], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/rsl_rl_ppo_cfg.py index 1b51d812d96..24bea7c5ac1 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/rsl_rl_ppo_cfg.py @@ -15,10 +15,10 @@ class FrankaReachPPORunnerCfg(RslRlOnPolicyRunnerCfg): save_interval = 50 experiment_name = "franka_reach" run_name = "" - resume = False - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[64, 64], critic_hidden_dims=[64, 64], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/ur_10/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/ur_10/agents/rsl_rl_ppo_cfg.py index 287b4ec95f8..1b55830a64e 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/ur_10/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/ur_10/agents/rsl_rl_ppo_cfg.py @@ -15,10 +15,10 @@ class UR10ReachPPORunnerCfg(RslRlOnPolicyRunnerCfg): save_interval = 50 experiment_name = "reach_ur10" run_name = "" - resume = False - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[64, 64], critic_hidden_dims=[64, 64], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/navigation/config/anymal_c/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/navigation/config/anymal_c/agents/rsl_rl_ppo_cfg.py index 1ea1a61dba0..4b23def89b2 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/navigation/config/anymal_c/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/navigation/config/anymal_c/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class NavigationEnvPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "anymal_c_navigation" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=0.5, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[128, 128], critic_hidden_dims=[128, 128], activation="elu", diff --git a/tools/template/templates/agents/rsl_rl_ppo_cfg b/tools/template/templates/agents/rsl_rl_ppo_cfg index eaeaf78bfc0..85970dfc2ce 100644 --- a/tools/template/templates/agents/rsl_rl_ppo_cfg +++ b/tools/template/templates/agents/rsl_rl_ppo_cfg @@ -14,9 +14,10 @@ class PPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 150 save_interval = 50 experiment_name = "cartpole_direct" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[32, 32], critic_hidden_dims=[32, 32], activation="elu",