From c2fbc67b88febdb1391b8a11013904fc6ddf6499 Mon Sep 17 00:00:00 2001 From: Mayank Mittal Date: Wed, 30 Jul 2025 23:32:32 +0200 Subject: [PATCH 1/7] adds agent arg to the scripts for rl --- scripts/reinforcement_learning/rl_games/play.py | 5 ++++- scripts/reinforcement_learning/rl_games/train.py | 5 ++++- scripts/reinforcement_learning/rsl_rl/play.py | 5 ++++- scripts/reinforcement_learning/rsl_rl/train.py | 5 ++++- scripts/reinforcement_learning/sb3/play.py | 5 ++++- scripts/reinforcement_learning/sb3/train.py | 5 ++++- 6 files changed, 24 insertions(+), 6 deletions(-) diff --git a/scripts/reinforcement_learning/rl_games/play.py b/scripts/reinforcement_learning/rl_games/play.py index d7e4164ea85..c107cc34e19 100644 --- a/scripts/reinforcement_learning/rl_games/play.py +++ b/scripts/reinforcement_learning/rl_games/play.py @@ -21,6 +21,9 @@ ) parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument( + "--agent", type=str, default="rl_games_cfg_entry_point", help="Name of the RL agent configuration entry point." +) parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint.") parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") parser.add_argument( @@ -82,7 +85,7 @@ # PLACEHOLDER: Extension template (do not remove this comment) -@hydra_task_config(args_cli.task, "rl_games_cfg_entry_point") +@hydra_task_config(args_cli.task, args_cli.agent) def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: dict): """Play with RL-Games agent.""" task_name = args_cli.task.split(":")[-1] diff --git a/scripts/reinforcement_learning/rl_games/train.py b/scripts/reinforcement_learning/rl_games/train.py index eb350382979..ba874000e33 100644 --- a/scripts/reinforcement_learning/rl_games/train.py +++ b/scripts/reinforcement_learning/rl_games/train.py @@ -20,6 +20,9 @@ parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument( + "--agent", type=str, default="rl_games_cfg_entry_point", help="Name of the RL agent configuration entry point." +) parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") parser.add_argument( "--distributed", action="store_true", default=False, help="Run training with multiple GPUs or nodes." @@ -84,7 +87,7 @@ # PLACEHOLDER: Extension template (do not remove this comment) -@hydra_task_config(args_cli.task, "rl_games_cfg_entry_point") +@hydra_task_config(args_cli.task, args_cli.agent) def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: dict): """Train with RL-Games agent.""" # override configurations with non-hydra CLI arguments diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py index f3c69f6b267..d81d7d520a8 100644 --- a/scripts/reinforcement_learning/rsl_rl/play.py +++ b/scripts/reinforcement_learning/rsl_rl/play.py @@ -24,6 +24,9 @@ ) parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument( + "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point." +) parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") parser.add_argument( "--use_pretrained_checkpoint", @@ -77,7 +80,7 @@ # PLACEHOLDER: Extension template (do not remove this comment) -@hydra_task_config(args_cli.task, "rsl_rl_cfg_entry_point") +@hydra_task_config(args_cli.task, args_cli.agent) def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlOnPolicyRunnerCfg): """Play with RSL-RL agent.""" task_name = args_cli.task.split(":")[-1] diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py index e534079d052..5e623fa8dcf 100644 --- a/scripts/reinforcement_learning/rsl_rl/train.py +++ b/scripts/reinforcement_learning/rsl_rl/train.py @@ -23,6 +23,9 @@ parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument( + "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point." +) parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") parser.add_argument("--max_iterations", type=int, default=None, help="RL Policy training iterations.") parser.add_argument( @@ -100,7 +103,7 @@ torch.backends.cudnn.benchmark = False -@hydra_task_config(args_cli.task, "rsl_rl_cfg_entry_point") +@hydra_task_config(args_cli.task, args_cli.agent) def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlOnPolicyRunnerCfg): """Train with RSL-RL agent.""" # override configurations with non-hydra CLI arguments diff --git a/scripts/reinforcement_learning/sb3/play.py b/scripts/reinforcement_learning/sb3/play.py index 51ada837efe..a63c9080095 100644 --- a/scripts/reinforcement_learning/sb3/play.py +++ b/scripts/reinforcement_learning/sb3/play.py @@ -22,6 +22,9 @@ ) parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument( + "--agent", type=str, default="sb3_cfg_entry_point", help="Name of the RL agent configuration entry point." +) parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint.") parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") parser.add_argument( @@ -86,7 +89,7 @@ # PLACEHOLDER: Extension template (do not remove this comment) -@hydra_task_config(args_cli.task, "sb3_cfg_entry_point") +@hydra_task_config(args_cli.task, args_cli.agent) def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: dict): """Play with stable-baselines agent.""" # randomly sample a seed if seed = -1 diff --git a/scripts/reinforcement_learning/sb3/train.py b/scripts/reinforcement_learning/sb3/train.py index e12907d6260..f12b9a4e0fc 100644 --- a/scripts/reinforcement_learning/sb3/train.py +++ b/scripts/reinforcement_learning/sb3/train.py @@ -23,6 +23,9 @@ parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument( + "--agent", type=str, default="sb3_cfg_entry_point", help="Name of the RL agent configuration entry point." +) parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") parser.add_argument("--log_interval", type=int, default=100_000, help="Log data every n timesteps.") parser.add_argument("--checkpoint", type=str, default=None, help="Continue the training from checkpoint.") @@ -96,7 +99,7 @@ def cleanup_pbar(*args): # PLACEHOLDER: Extension template (do not remove this comment) -@hydra_task_config(args_cli.task, "sb3_cfg_entry_point") +@hydra_task_config(args_cli.task, args_cli.agent) def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: dict): """Train with stable-baselines agent.""" # randomly sample a seed if seed = -1 From 0f139e04f3bf39d87e0fec19ef898f4d04646642 Mon Sep 17 00:00:00 2001 From: Mayank Mittal Date: Wed, 30 Jul 2025 23:32:48 +0200 Subject: [PATCH 2/7] adds symmetry function for anymal robot --- .../velocity/config/anymal_b/__init__.py | 4 + .../config/anymal_b/agents/rsl_rl_ppo_cfg.py | 24 +- .../velocity/config/anymal_c/__init__.py | 10 +- .../config/anymal_c/agents/rsl_rl_ppo_cfg.py | 24 +- .../velocity/config/anymal_d/__init__.py | 4 + .../config/anymal_d/agents/rsl_rl_ppo_cfg.py | 22 +- .../velocity/mdp/symmetry/__init__.py | 5 + .../velocity/mdp/symmetry/anymal.py | 270 ++++++++++++++++++ 8 files changed, 357 insertions(+), 6 deletions(-) create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/__init__.py create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/anymal.py diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/__init__.py index 1915d9518f3..f243e01cb5a 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/__init__.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/__init__.py @@ -17,6 +17,7 @@ kwargs={ "env_cfg_entry_point": f"{__name__}.flat_env_cfg:AnymalBFlatEnvCfg", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalBFlatPPORunnerCfg", + "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalBFlatPPORunnerWithSymmetryCfg", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", }, ) @@ -28,6 +29,7 @@ kwargs={ "env_cfg_entry_point": f"{__name__}.flat_env_cfg:AnymalBFlatEnvCfg_PLAY", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalBFlatPPORunnerCfg", + "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalBFlatPPORunnerWithSymmetryCfg", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", }, ) @@ -39,6 +41,7 @@ kwargs={ "env_cfg_entry_point": f"{__name__}.rough_env_cfg:AnymalBRoughEnvCfg", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalBRoughPPORunnerCfg", + "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalBRoughPPORunnerWithSymmetryCfg", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", }, ) @@ -50,6 +53,7 @@ kwargs={ "env_cfg_entry_point": f"{__name__}.rough_env_cfg:AnymalBRoughEnvCfg_PLAY", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalBRoughPPORunnerCfg", + "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalBRoughPPORunnerWithSymmetryCfg", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", }, ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/rsl_rl_ppo_cfg.py index b3b2eaba3e5..1acfecc9f4d 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/rsl_rl_ppo_cfg.py @@ -5,7 +5,9 @@ from isaaclab.utils import configclass -from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg, RslRlSymmetryCfg + +from isaaclab_tasks.manager_based.locomotion.velocity.mdp.symmetry import anymal @configclass @@ -46,3 +48,23 @@ def __post_init__(self): self.experiment_name = "anymal_b_flat" self.policy.actor_hidden_dims = [128, 128, 128] self.policy.critic_hidden_dims = [128, 128, 128] + + +@configclass +class AnymalBFlatPPORunnerWithSymmetryCfg(AnymalBFlatPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + + self.algorithm.symmetry_cfg = RslRlSymmetryCfg( + use_data_augmentation=True, data_augmentation_func=anymal.compute_symmetric_states + ) + + +@configclass +class AnymalBRoughPPORunnerWithSymmetryCfg(AnymalBRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + + self.algorithm.symmetry_cfg = RslRlSymmetryCfg( + use_data_augmentation=True, data_augmentation_func=anymal.compute_symmetric_states + ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/__init__.py index d32c76869d4..61a634920eb 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/__init__.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/__init__.py @@ -18,6 +18,7 @@ kwargs={ "env_cfg_entry_point": f"{__name__}.flat_env_cfg:AnymalCFlatEnvCfg", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalCFlatPPORunnerCfg", + "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalCFlatPPORunnerWithSymmetryCfg", "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_flat_ppo_cfg.yaml", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", }, @@ -29,8 +30,9 @@ disable_env_checker=True, kwargs={ "env_cfg_entry_point": f"{__name__}.flat_env_cfg:AnymalCFlatEnvCfg_PLAY", - "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_flat_ppo_cfg.yaml", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalCFlatPPORunnerCfg", + "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalCFlatPPORunnerWithSymmetryCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_flat_ppo_cfg.yaml", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", }, ) @@ -41,8 +43,9 @@ disable_env_checker=True, kwargs={ "env_cfg_entry_point": f"{__name__}.rough_env_cfg:AnymalCRoughEnvCfg", - "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_rough_ppo_cfg.yaml", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalCRoughPPORunnerCfg", + "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalCRoughPPORunnerWithSymmetryCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_rough_ppo_cfg.yaml", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", }, ) @@ -53,8 +56,9 @@ disable_env_checker=True, kwargs={ "env_cfg_entry_point": f"{__name__}.rough_env_cfg:AnymalCRoughEnvCfg_PLAY", - "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_rough_ppo_cfg.yaml", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalCRoughPPORunnerCfg", + "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalCRoughPPORunnerWithSymmetryCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_rough_ppo_cfg.yaml", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", }, ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_ppo_cfg.py index effbde9d9f9..9bf8b995dc0 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_ppo_cfg.py @@ -5,8 +5,9 @@ from isaaclab.utils import configclass -from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg, RslRlSymmetryCfg +from isaaclab_tasks.manager_based.locomotion.velocity.mdp.symmetry import anymal @configclass class AnymalCRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): @@ -46,3 +47,24 @@ def __post_init__(self): self.experiment_name = "anymal_c_flat" self.policy.actor_hidden_dims = [128, 128, 128] self.policy.critic_hidden_dims = [128, 128, 128] + + + +@configclass +class AnymalCFlatPPORunnerWithSymmetryCfg(AnymalCFlatPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + + self.algorithm.symmetry_cfg = RslRlSymmetryCfg( + use_data_augmentation=True, data_augmentation_func=anymal.compute_symmetric_states + ) + + +@configclass +class AnymalCRoughPPORunnerWithSymmetryCfg(AnymalCRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + + self.algorithm.symmetry_cfg = RslRlSymmetryCfg( + use_data_augmentation=True, data_augmentation_func=anymal.compute_symmetric_states + ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py index 20110f631e8..ce991ea773e 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py @@ -18,6 +18,7 @@ kwargs={ "env_cfg_entry_point": f"{__name__}.flat_env_cfg:AnymalDFlatEnvCfg", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDFlatPPORunnerCfg", + "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDFlatPPORunnerWithSymmetryCfg", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", }, ) @@ -29,6 +30,7 @@ kwargs={ "env_cfg_entry_point": f"{__name__}.flat_env_cfg:AnymalDFlatEnvCfg_PLAY", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDFlatPPORunnerCfg", + "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDFlatPPORunnerWithSymmetryCfg", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", }, ) @@ -40,6 +42,7 @@ kwargs={ "env_cfg_entry_point": f"{__name__}.rough_env_cfg:AnymalDRoughEnvCfg", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDRoughPPORunnerCfg", + "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDRoughPPORunnerWithSymmetryCfg", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", }, ) @@ -51,6 +54,7 @@ kwargs={ "env_cfg_entry_point": f"{__name__}.rough_env_cfg:AnymalDRoughEnvCfg_PLAY", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDRoughPPORunnerCfg", + "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDRoughPPORunnerWithSymmetryCfg", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", }, ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/rsl_rl_ppo_cfg.py index baacb1e2345..4b343842b83 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/rsl_rl_ppo_cfg.py @@ -5,7 +5,9 @@ from isaaclab.utils import configclass -from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg, RslRlSymmetryCfg + +from isaaclab_tasks.manager_based.locomotion.velocity.mdp.symmetry import anymal @configclass @@ -46,3 +48,21 @@ def __post_init__(self): self.experiment_name = "anymal_d_flat" self.policy.actor_hidden_dims = [128, 128, 128] self.policy.critic_hidden_dims = [128, 128, 128] + +@configclass +class AnymalDFlatPPORunnerWithSymmetryCfg(AnymalDFlatPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + + self.algorithm.symmetry_cfg = RslRlSymmetryCfg( + use_data_augmentation=True, data_augmentation_func=anymal.compute_symmetric_states + ) + +@configclass +class AnymalDRoughPPORunnerWithSymmetryCfg(AnymalDRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + + self.algorithm.symmetry_cfg = RslRlSymmetryCfg( + use_data_augmentation=True, data_augmentation_func=anymal.compute_symmetric_states + ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/__init__.py new file mode 100644 index 00000000000..4309c312057 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/__init__.py @@ -0,0 +1,5 @@ +"""Symmetry functions for the velocity tasks. + +These functions are used to augment the observations and actions of the environment. +They are specific to the velocity task and the choice of the robot. +""" diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/anymal.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/anymal.py new file mode 100644 index 00000000000..75f1cb2d8f6 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/anymal.py @@ -0,0 +1,270 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Functions to specify the symmetry in the observation and action space for ANYmal.""" + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from omni.isaac.lab.envs import ManagerBasedRLEnv + +# specify the functions that are available for import +__all__ = ["compute_symmetric_states"] + + +@torch.no_grad() +def compute_symmetric_states( + env: ManagerBasedRLEnv, + obs: torch.Tensor | None = None, + actions: torch.Tensor | None = None, + obs_type: str = "policy", +): + """Augments the given observations and actions by applying symmetry transformations. + + This function creates augmented versions of the provided observations and actions by applying + four symmetrical transformations: original, left-right, front-back, and diagonal. The symmetry + transformations are beneficial for reinforcement learning tasks by providing additional + diverse data without requiring additional data collection. + + Args: + env: The environment instance. + obs: The original observation tensor. Defaults to None. + actions: The original actions tensor. Defaults to None. + obs_type: The type of observation to augment. Defaults to "policy". + + Returns: + Augmented observations and actions tensors, or None if the respective input was None. + """ + + # observations + if obs is not None: + num_envs = obs.shape[0] + # since we have 4 different symmetries, we need to augment the batch size by 4 + obs_aug = torch.zeros(num_envs * 4, obs.shape[1], device=obs.device) + # -- original + obs_aug[:num_envs] = obs[:] + # -- left-right + obs_aug[num_envs : 2 * num_envs] = _transform_obs_left_right(env.unwrapped, obs, obs_type) + # -- front-back + obs_aug[2 * num_envs : 3 * num_envs] = _transform_obs_front_back(env.unwrapped, obs, obs_type) + # -- diagonal + obs_aug[3 * num_envs :] = _transform_obs_front_back(env.unwrapped, obs_aug[num_envs : 2 * num_envs]) + else: + obs_aug = None + + # actions + if actions is not None: + num_envs = actions.shape[0] + # since we have 4 different symmetries, we need to augment the batch size by 4 + actions_aug = torch.zeros(num_envs * 4, actions.shape[1], device=actions.device) + # -- original + actions_aug[:num_envs] = actions[:] + # -- left-right + actions_aug[num_envs : 2 * num_envs] = _transform_actions_left_right(actions) + # -- front-back + actions_aug[2 * num_envs : 3 * num_envs] = _transform_actions_front_back(actions) + # -- diagonal + actions_aug[3 * num_envs :] = _transform_actions_front_back(actions_aug[num_envs : 2 * num_envs]) + else: + actions_aug = None + + return obs_aug, actions_aug + + +""" +Symmetry functions for observations. +""" + + +def _transform_obs_left_right(env: ManagerBasedRLEnv, obs: torch.Tensor, obs_type: str = "policy") -> torch.Tensor: + """Apply a left-right symmetry transformation to the observation tensor. + + This function modifies the given observation tensor by applying transformations + that represent a symmetry with respect to the left-right axis. This includes + negating certain components of the linear and angular velocities, projected gravity, + velocity commands, and flipping the joint positions, joint velocities, and last actions + for the ANYmal robot. Additionally, if height-scan data is present, it is flipped + along the relevant dimension. + + Args: + env: The environment instance from which the observation is obtained. + obs: The observation tensor to be transformed. + obs_type: The type of observation to augment. Defaults to "policy". + + Returns: + The transformed observation tensor with left-right symmetry applied. + """ + # copy observation tensor + obs = obs.clone() + device = obs.device + # lin vel + obs[:, :3] = obs[:, :3] * torch.tensor([1, -1, 1], device=device) + # ang vel + obs[:, 3:6] = obs[:, 3:6] * torch.tensor([-1, 1, -1], device=device) + # projected gravity + obs[:, 6:9] = obs[:, 6:9] * torch.tensor([1, -1, 1], device=device) + # velocity command + obs[:, 9:12] = obs[:, 9:12] * torch.tensor([1, -1, -1], device=device) + # joint pos + obs[:, 12:24] = _switch_anymal_joints_left_right(obs[:, 12:24]) + # joint vel + obs[:, 24:36] = _switch_anymal_joints_left_right(obs[:, 24:36]) + # last actions + obs[:, 36:48] = _switch_anymal_joints_left_right(obs[:, 36:48]) + + # height-scan + if obs_type == "critic": + # handle asymmetric actor-critic formulation + group_name = "critic" if "critic" in env.observation_manager.active_terms else "policy" + else: + group_name = "policy" + + # note: this is hard-coded for grid-pattern of ordering "xy" and size (1.6, 1.0) + if "height_scan" in env.observation_manager.active_terms[group_name]: + obs[:, 48:235] = obs[:, 48:235].view(-1, 11, 17).flip(dims=[1]).view(-1, 11 * 17) + + return obs + + +def _transform_obs_front_back(env: ManagerBasedRLEnv, obs: torch.Tensor, obs_type: str = "policy") -> torch.Tensor: + """Applies a front-back symmetry transformation to the observation tensor. + + This function modifies the given observation tensor by applying transformations + that represent a symmetry with respect to the front-back axis. This includes negating + certain components of the linear and angular velocities, projected gravity, velocity commands, + and flipping the joint positions, joint velocities, and last actions for the ANYmal robot. + Additionally, if height-scan data is present, it is flipped along the relevant dimension. + + Args: + env: The environment instance from which the observation is obtained. + obs: The observation tensor to be transformed. + obs_type: The type of observation to augment. Defaults to "policy". + + Returns: + The transformed observation tensor with front-back symmetry applied. + """ + # copy observation tensor + obs = obs.clone() + device = obs.device + # lin vel + obs[:, :3] = obs[:, :3] * torch.tensor([-1, 1, 1], device=device) + # ang vel + obs[:, 3:6] = obs[:, 3:6] * torch.tensor([1, -1, -1], device=device) + # projected gravity + obs[:, 6:9] = obs[:, 6:9] * torch.tensor([-1, 1, 1], device=device) + # velocity command + obs[:, 9:12] = obs[:, 9:12] * torch.tensor([-1, 1, -1], device=device) + # joint pos + obs[:, 12:24] = _switch_anymal_joints_front_back(obs[:, 12:24]) + # joint vel + obs[:, 24:36] = _switch_anymal_joints_front_back(obs[:, 24:36]) + # last actions + obs[:, 36:48] = _switch_anymal_joints_front_back(obs[:, 36:48]) + + # height-scan + if obs_type == "critic": + # handle asymmetric actor-critic formulation + group_name = "critic" if "critic" in env.observation_manager.active_terms else "policy" + else: + group_name = "policy" + + # note: this is hard-coded for grid-pattern of ordering "xy" and size (1.6, 1.0) + if "height_scan" in env.observation_manager.active_terms[group_name]: + obs[:, 48:235] = obs[:, 48:235].view(-1, 11, 17).flip(dims=[2]).view(-1, 11 * 17) + + return obs + + +""" +Symmetry functions for actions. +""" + + +def _transform_actions_left_right(actions: torch.Tensor) -> torch.Tensor: + """Applies a left-right symmetry transformation to the actions tensor. + + This function modifies the given actions tensor by applying transformations + that represent a symmetry with respect to the left-right axis. This includes + flipping the joint positions, joint velocities, and last actions for the + ANYmal robot. + + Args: + actions: The actions tensor to be transformed. + + Returns: + The transformed actions tensor with left-right symmetry applied. + """ + actions = actions.clone() + actions[:] = _switch_anymal_joints_left_right(actions[:]) + return actions + + +def _transform_actions_front_back(actions: torch.Tensor) -> torch.Tensor: + """Applies a front-back symmetry transformation to the actions tensor. + + This function modifies the given actions tensor by applying transformations + that represent a symmetry with respect to the front-back axis. This includes + flipping the joint positions, joint velocities, and last actions for the + ANYmal robot. + + Args: + actions: The actions tensor to be transformed. + + Returns: + The transformed actions tensor with front-back symmetry applied. + """ + actions = actions.clone() + actions[:] = _switch_anymal_joints_front_back(actions[:]) + return actions + + +""" +Helper functions for symmetry. + +In Isaac Sim, the joint ordering is as follows: +[ + 'LF_HAA', 'LH_HAA', 'RF_HAA', 'RH_HAA', + 'LF_HFE', 'LH_HFE', 'RF_HFE', 'RH_HFE', + 'LF_KFE', 'LH_KFE', 'RF_KFE', 'RH_KFE' +] + +Correspondingly, the joint ordering for the ANYmal robot is: + +* LF = left front --> [0, 4, 8] +* LH = left hind --> [1, 5, 9] +* RF = right front --> [2, 6, 10] +* RH = right hind --> [3, 7, 11] +""" + + +def _switch_anymal_joints_left_right(joint_data: torch.Tensor) -> torch.Tensor: + """Applies a left-right symmetry transformation to the joint data tensor.""" + joint_data_switched = torch.zeros_like(joint_data) + # left <-- right + joint_data_switched[..., [0, 4, 8, 1, 5, 9]] = joint_data[..., [2, 6, 10, 3, 7, 11]] + # right <-- left + joint_data_switched[..., [2, 6, 10, 3, 7, 11]] = joint_data[..., [0, 4, 8, 1, 5, 9]] + + # Flip the sign of the HAA joints + joint_data_switched[..., [0, 1, 2, 3]] *= -1.0 + + return joint_data_switched + + +def _switch_anymal_joints_front_back(joint_data: torch.Tensor) -> torch.Tensor: + """Applies a front-back symmetry transformation to the joint data tensor.""" + joint_data_switched = torch.zeros_like(joint_data) + # front <-- hind + joint_data_switched[..., [0, 4, 8, 2, 6, 10]] = joint_data[..., [1, 5, 9, 3, 7, 11]] + # hind <-- front + joint_data_switched[..., [1, 5, 9, 3, 7, 11]] = joint_data[..., [0, 4, 8, 2, 6, 10]] + + # Flip the sign of the HFE and KFE joints + joint_data_switched[..., 4:] *= -1 + + return joint_data_switched From c43f1d705d3d3aa1cc6cd04da040aa0c9ad7785a Mon Sep 17 00:00:00 2001 From: Mayank Mittal Date: Tue, 5 Aug 2025 15:03:06 +0200 Subject: [PATCH 3/7] adds tutorial on specifying agent config --- docs/source/api/lab_rl/isaaclab_rl.rst | 4 +- docs/source/how-to/add_own_library.rst | 13 +- .../setup/walkthrough/project_setup.rst | 2 +- .../03_envs/configuring_rl_training.rst | 139 ++++++++++++++++++ docs/source/tutorials/index.rst | 1 + .../classic/cartpole/__init__.py | 1 + .../classic/cartpole/agents/rsl_rl_ppo_cfg.py | 28 +++- .../classic/cartpole/mdp/symmetry.py | 68 +++++++++ 8 files changed, 251 insertions(+), 5 deletions(-) create mode 100644 docs/source/tutorials/03_envs/configuring_rl_training.rst create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/mdp/symmetry.py diff --git a/docs/source/api/lab_rl/isaaclab_rl.rst b/docs/source/api/lab_rl/isaaclab_rl.rst index 77a4ca1bd96..32b4b4c6246 100644 --- a/docs/source/api/lab_rl/isaaclab_rl.rst +++ b/docs/source/api/lab_rl/isaaclab_rl.rst @@ -1,4 +1,6 @@ -isaaclab_rl +.. _api-isaaclab-rl: + +isaaclab_rl =========== .. automodule:: isaaclab_rl diff --git a/docs/source/how-to/add_own_library.rst b/docs/source/how-to/add_own_library.rst index 2606abe9a13..39fd840226d 100644 --- a/docs/source/how-to/add_own_library.rst +++ b/docs/source/how-to/add_own_library.rst @@ -68,7 +68,7 @@ Isaac Lab, you will first need to make a wrapper for the library, as explained i The following steps can be followed to integrate a new library with Isaac Lab: -1. Add your library as an extra-dependency in the ``setup.py`` for the extension ``isaaclab_tasks``. +1. Add your library as an extra-dependency in the ``setup.py`` for the extension ``isaaclab_rl``. This will ensure that the library is installed when you install Isaac Lab or it will complain if the library is not installed or available. 2. Install your library in the Python environment used by Isaac Lab. You can do this by following the steps mentioned @@ -86,6 +86,15 @@ works as expected and can guide users on how to use the wrapper. * Add some tests to ensure that the wrapper works as expected and remains compatible with the library. These tests can be added to the ``source/isaaclab_rl/test`` directory. * Add some documentation for the wrapper. You can add the API documentation to the - ``docs/source/api/lab_tasks/isaaclab_rl.rst`` file. + :ref:`API documentation` for the ``isaaclab_rl`` module. + + +Configuring an RL Agent +----------------------- + +Once you have integrated a new library with Isaac Lab, you can configure the example environment to use the new library. +You can check the :ref:`tutorial-configure-rl-training` for an example of how to configure the training process to use a +different library. + .. _rsl-rl: https://github.com/leggedrobotics/rsl_rl diff --git a/docs/source/setup/walkthrough/project_setup.rst b/docs/source/setup/walkthrough/project_setup.rst index 396f54f666e..f8cf950b150 100644 --- a/docs/source/setup/walkthrough/project_setup.rst +++ b/docs/source/setup/walkthrough/project_setup.rst @@ -69,7 +69,7 @@ used as the default output directories for tasks run by this project. Project Structure ------------------------------- +----------------- There are four nested structures you need to be aware of when working in the direct workflow with an Isaac Lab template project: the **Project**, the **Extension**, the **Modules**, and the **Task**. diff --git a/docs/source/tutorials/03_envs/configuring_rl_training.rst b/docs/source/tutorials/03_envs/configuring_rl_training.rst new file mode 100644 index 00000000000..2d5fd4bc765 --- /dev/null +++ b/docs/source/tutorials/03_envs/configuring_rl_training.rst @@ -0,0 +1,139 @@ +.. _tutorial-configure-rl-training: + +Configuring an RL Agent +======================= + +.. currentmodule:: isaaclab + +In the previous tutorial, we saw how to train an RL agent to solve the cartpole balancing task +using the `Stable-Baselines3`_ library. In this tutorial, we will see how to configure the +training process to use different RL libraries and different training algorithms. + +In the directory ``scripts/reinforcement_learning``, you will find the scripts for +different RL libraries. These are organized into subdirectories named after the library name. +Each subdirectory contains the training and playing scripts for the library. + +To configure a learning library with a specific task, you need to create a configuration file +for the learning agent. This configuration file is used to create an instance of the learning agent +and is used to configure the training process. Similar to the environment registration shown in +the :ref:`tutorial-register-rl-env-gym` tutorial, you can register the learning agent with the +``gymnasium.register`` method. + +The Code +-------- + +As an example, we will look at the configuration included for the task ``Isaac-Cartpole-v0`` +in the ``isaaclab_tasks`` package. This is the same task that we used in the +:ref:`tutorial-run-rl-training` tutorial. + +.. literalinclude:: ../../../../source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/__init__.py + :language: python + :lines: 18-29 + +The Code Explained +------------------ + +Under the attribute ``kwargs``, we can see the configuration for the different learning libraries. +The key is the name of the library and the value is the path to the configuration instance. +This configuration instance can be a string, a class, or an instance of the class. +For example, the value of the key ``"rl_games_cfg_entry_point"`` is a string that points to the +configuration YAML file for the RL-Games library. Meanwhile, the value of the key +``"rsl_rl_cfg_entry_point"`` points to the configuration class for the RSL-RL library. + +The pattern used for specifying an agent configuration class follows closely to that used for +specifying the environment configuration entry point. This means that while the following +are equivalent: + + +.. dropdown:: Specifying the configuration entry point as a string + :icon: code + + .. code-block:: python + + from . import agents + + gym.register( + id="Isaac-Cartpole-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.cartpole_env_cfg:CartpoleEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:CartpolePPORunnerCfg", + }, + ) + +.. dropdown:: Specifying the configuration entry point as a class + :icon: code + + .. code-block:: python + + from . import agents + + gym.register( + id="Isaac-Cartpole-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.cartpole_env_cfg:CartpoleEnvCfg", + "rsl_rl_cfg_entry_point": agents.rsl_rl_ppo_cfg.CartpolePPORunnerCfg, + }, + ) + +The first code block is the preferred way to specify the configuration entry point. +The second code block is equivalent to the first one, but it leads to import of the configuration +class which slows down the import time. This is why we recommend using strings for the configuration +entry point. + +The Code Execution +------------------ + +All the scripts in the ``scripts/reinforcement_learning`` directory are configured by default to read the +``_cfg_entry_point`` from the ``kwargs`` dictionary to retrieve the configuration instance. + +For instance, the following code block shows how the ``train.py`` script reads the configuration +instance for the Stable-Baselines3 library: + +.. dropdown:: Code for train.py with SB3 + :icon: code + + .. literalinclude:: ../../../../scripts/reinforcement_learning/sb3/train.py + :language: python + :emphasize-lines: 26-28, 102-103 + :linenos: + +The argument ``--agent`` is used to specify the learning library to use. This is used to +retrieve the configuration instance from the ``kwargs`` dictionary. You can manually specify +alternate configuration instances by passing the ``--agent`` argument. + +This also holds true for the RSL-RL library, for which we have two configuration instances. + +* Training with the standard PPO configuration: + + .. code-block:: bash + + # standard PPO training + ./isaaclab.sh -p scripts/reinforcement_learning/rsl_rl/train.py --task Isaac-Cartpole-v0 --headless \ + --run_name ppo + +* Training with the PPO configuration with symmetry augmentation: + + .. code-block:: bash + + # PPO training with symmetry augmentation + ./isaaclab.sh -p scripts/reinforcement_learning/rsl_rl/train.py --task Isaac-Cartpole-v0 --headless \ + --agent rsl_rl_with_symmetry_cfg_entry_point \ + --run_name ppo_with_symmetry_data_augmentation + + # you can use hydra to disable symmetry augmentation but enable mirror loss computation + ./isaaclab.sh -p scripts/reinforcement_learning/rsl_rl/train.py --task Isaac-Cartpole-v0 --headless \ + --agent rsl_rl_with_symmetry_cfg_entry_point \ + --run_name ppo_without_symmetry_data_augmentation \ + agent.algorithm.symmetry_cfg.use_data_augmentation=false + +The ``--run_name`` argument is used to specify the name of the run. This is used to +create a directory for the run in the ``logs/rsl_rl/cartpole`` directory. + +.. _Stable-Baselines3: https://stable-baselines3.readthedocs.io/en/master/ +.. _RL-Games: https://github.com/Denys88/rl_games +.. _RSL-RL: https://github.com/leggedrobotics/rsl_rl +.. _SKRL: https://skrl.readthedocs.io diff --git a/docs/source/tutorials/index.rst b/docs/source/tutorials/index.rst index ec4f091fefe..e7e0fdc651b 100644 --- a/docs/source/tutorials/index.rst +++ b/docs/source/tutorials/index.rst @@ -78,6 +78,7 @@ different aspects of the framework to create a simulation environment for agent 03_envs/create_direct_rl_env 03_envs/register_rl_env_gym 03_envs/run_rl_training + 03_envs/configuring_rl_training 03_envs/modify_direct_rl_env 03_envs/policy_inference_in_usd diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/__init__.py index 391545d9189..e5038c6e8ae 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/__init__.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/__init__.py @@ -23,6 +23,7 @@ "env_cfg_entry_point": f"{__name__}.cartpole_env_cfg:CartpoleEnvCfg", "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:CartpolePPORunnerCfg", + "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:CartpolePPORunnerWithSymmetryCfg", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", }, diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/agents/rsl_rl_ppo_cfg.py index 96064394507..f80815b97e3 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/agents/rsl_rl_ppo_cfg.py @@ -5,7 +5,9 @@ from isaaclab.utils import configclass -from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg, RslRlSymmetryCfg + +import isaaclab_tasks.manager_based.classic.cartpole.mdp.symmetry as symmetry @configclass @@ -35,3 +37,27 @@ class CartpolePPORunnerCfg(RslRlOnPolicyRunnerCfg): desired_kl=0.01, max_grad_norm=1.0, ) + + +@configclass +class CartpolePPORunnerWithSymmetryCfg(CartpolePPORunnerCfg): + """Configuration for the PPO agent with symmetry augmentation.""" + + # all the other settings are inherited from the parent class + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + symmetry_cfg=RslRlSymmetryCfg( + use_data_augmentation=True, data_augmentation_func=symmetry.compute_symmetric_states + ), + ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/mdp/symmetry.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/mdp/symmetry.py new file mode 100644 index 00000000000..ca8e657b78c --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/mdp/symmetry.py @@ -0,0 +1,68 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Functions to specify the symmetry in the observation and action space for cartpole.""" + +from __future__ import annotations + +import torch +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from omni.isaac.lab.envs import ManagerBasedRLEnv + +# specify the functions that are available for import +__all__ = ["compute_symmetric_states"] + + +@torch.no_grad() +def compute_symmetric_states( + env: ManagerBasedRLEnv, + obs: torch.Tensor | None = None, + actions: torch.Tensor | None = None, + obs_type: str = "policy", +): + """Augments the given observations and actions by applying symmetry transformations. + + This function creates augmented versions of the provided observations and actions by applying + two symmetrical transformations: original, left-right. The symmetry + transformations are beneficial for reinforcement learning tasks by providing additional + diverse data without requiring additional data collection. + + Args: + env: The environment instance. + obs: The original observation tensor. Defaults to None. + actions: The original actions tensor. Defaults to None. + obs_type: The type of observation to augment. Defaults to "policy". + + Returns: + Augmented observations and actions tensors, or None if the respective input was None. + """ + + # observations + if obs is not None: + num_envs = obs.shape[0] + # since we have 2 different symmetries, we need to augment the batch size by 2 + obs_aug = torch.zeros(num_envs * 2, obs.shape[1], device=obs.device) + # -- original + obs_aug[:num_envs] = obs[:] + # -- left-right + obs_aug[num_envs : 2 * num_envs] = -obs + else: + obs_aug = None + + # actions + if actions is not None: + num_envs = actions.shape[0] + # since we have 4 different symmetries, we need to augment the batch size by 4 + actions_aug = torch.zeros(num_envs * 2, actions.shape[1], device=actions.device) + # -- original + actions_aug[:num_envs] = actions[:] + # -- left-right + actions_aug[num_envs : 2 * num_envs] = -actions + else: + actions_aug = None + + return obs_aug, actions_aug From b396176f5260749c31421441d6a6ddd69d8e9aeb Mon Sep 17 00:00:00 2001 From: Mayank Mittal Date: Tue, 5 Aug 2025 15:07:34 +0200 Subject: [PATCH 4/7] adds full algorithm configs for symmetry in anymal --- .../classic/cartpole/mdp/symmetry.py | 2 +- .../velocity/config/anymal_b/__init__.py | 8 +++- .../config/anymal_b/agents/rsl_rl_ppo_cfg.py | 46 ++++++++++++++---- .../velocity/config/anymal_c/__init__.py | 8 +++- .../config/anymal_c/agents/rsl_rl_ppo_cfg.py | 48 +++++++++++++++---- .../velocity/config/anymal_d/__init__.py | 8 +++- .../config/anymal_d/agents/rsl_rl_ppo_cfg.py | 45 +++++++++++++---- .../velocity/mdp/symmetry/__init__.py | 5 ++ .../velocity/mdp/symmetry/anymal.py | 3 +- 9 files changed, 138 insertions(+), 35 deletions(-) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/mdp/symmetry.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/mdp/symmetry.py index ca8e657b78c..8b13bf7c017 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/mdp/symmetry.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/mdp/symmetry.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). # All rights reserved. # # SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/__init__.py index f243e01cb5a..facb0aaf950 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/__init__.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/__init__.py @@ -41,7 +41,9 @@ kwargs={ "env_cfg_entry_point": f"{__name__}.rough_env_cfg:AnymalBRoughEnvCfg", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalBRoughPPORunnerCfg", - "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalBRoughPPORunnerWithSymmetryCfg", + "rsl_rl_with_symmetry_cfg_entry_point": ( + f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalBRoughPPORunnerWithSymmetryCfg" + ), "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", }, ) @@ -53,7 +55,9 @@ kwargs={ "env_cfg_entry_point": f"{__name__}.rough_env_cfg:AnymalBRoughEnvCfg_PLAY", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalBRoughPPORunnerCfg", - "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalBRoughPPORunnerWithSymmetryCfg", + "rsl_rl_with_symmetry_cfg_entry_point": ( + f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalBRoughPPORunnerWithSymmetryCfg" + ), "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", }, ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/rsl_rl_ppo_cfg.py index 1acfecc9f4d..7e89bf7acd4 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/rsl_rl_ppo_cfg.py @@ -51,20 +51,48 @@ def __post_init__(self): @configclass -class AnymalBFlatPPORunnerWithSymmetryCfg(AnymalBFlatPPORunnerCfg): - def __post_init__(self): - super().__post_init__() +class AnymalBFlatPPORunnerWithSymmetryCfg(AnymalBFlatPPORunnerCfg): + """Configuration for the PPO agent with symmetry augmentation.""" - self.algorithm.symmetry_cfg = RslRlSymmetryCfg( + # all the other settings are inherited from the parent class + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + symmetry_cfg=RslRlSymmetryCfg( use_data_augmentation=True, data_augmentation_func=anymal.compute_symmetric_states - ) + ), + ) @configclass class AnymalBRoughPPORunnerWithSymmetryCfg(AnymalBRoughPPORunnerCfg): - def __post_init__(self): - super().__post_init__() + """Configuration for the PPO agent with symmetry augmentation.""" - self.algorithm.symmetry_cfg = RslRlSymmetryCfg( + # all the other settings are inherited from the parent class + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + symmetry_cfg=RslRlSymmetryCfg( use_data_augmentation=True, data_augmentation_func=anymal.compute_symmetric_states - ) + ), + ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/__init__.py index 61a634920eb..efcbbe7901d 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/__init__.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/__init__.py @@ -44,7 +44,9 @@ kwargs={ "env_cfg_entry_point": f"{__name__}.rough_env_cfg:AnymalCRoughEnvCfg", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalCRoughPPORunnerCfg", - "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalCRoughPPORunnerWithSymmetryCfg", + "rsl_rl_with_symmetry_cfg_entry_point": ( + f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalCRoughPPORunnerWithSymmetryCfg" + ), "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_rough_ppo_cfg.yaml", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", }, @@ -57,7 +59,9 @@ kwargs={ "env_cfg_entry_point": f"{__name__}.rough_env_cfg:AnymalCRoughEnvCfg_PLAY", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalCRoughPPORunnerCfg", - "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalCRoughPPORunnerWithSymmetryCfg", + "rsl_rl_with_symmetry_cfg_entry_point": ( + f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalCRoughPPORunnerWithSymmetryCfg" + ), "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_rough_ppo_cfg.yaml", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", }, diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_ppo_cfg.py index 9bf8b995dc0..aa620d94030 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_ppo_cfg.py @@ -9,6 +9,7 @@ from isaaclab_tasks.manager_based.locomotion.velocity.mdp.symmetry import anymal + @configclass class AnymalCRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): num_steps_per_env = 24 @@ -49,22 +50,49 @@ def __post_init__(self): self.policy.critic_hidden_dims = [128, 128, 128] - @configclass -class AnymalCFlatPPORunnerWithSymmetryCfg(AnymalCFlatPPORunnerCfg): - def __post_init__(self): - super().__post_init__() +class AnymalCFlatPPORunnerWithSymmetryCfg(AnymalCFlatPPORunnerCfg): + """Configuration for the PPO agent with symmetry augmentation.""" - self.algorithm.symmetry_cfg = RslRlSymmetryCfg( + # all the other settings are inherited from the parent class + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + symmetry_cfg=RslRlSymmetryCfg( use_data_augmentation=True, data_augmentation_func=anymal.compute_symmetric_states - ) + ), + ) @configclass class AnymalCRoughPPORunnerWithSymmetryCfg(AnymalCRoughPPORunnerCfg): - def __post_init__(self): - super().__post_init__() + """Configuration for the PPO agent with symmetry augmentation.""" - self.algorithm.symmetry_cfg = RslRlSymmetryCfg( + # all the other settings are inherited from the parent class + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + symmetry_cfg=RslRlSymmetryCfg( use_data_augmentation=True, data_augmentation_func=anymal.compute_symmetric_states - ) + ), + ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py index ce991ea773e..5a93627006d 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py @@ -42,7 +42,9 @@ kwargs={ "env_cfg_entry_point": f"{__name__}.rough_env_cfg:AnymalDRoughEnvCfg", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDRoughPPORunnerCfg", - "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDRoughPPORunnerWithSymmetryCfg", + "rsl_rl_with_symmetry_cfg_entry_point": ( + f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDRoughPPORunnerWithSymmetryCfg" + ), "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", }, ) @@ -54,7 +56,9 @@ kwargs={ "env_cfg_entry_point": f"{__name__}.rough_env_cfg:AnymalDRoughEnvCfg_PLAY", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDRoughPPORunnerCfg", - "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDRoughPPORunnerWithSymmetryCfg", + "rsl_rl_with_symmetry_cfg_entry_point": ( + f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDRoughPPORunnerWithSymmetryCfg" + ), "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", }, ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/rsl_rl_ppo_cfg.py index 4b343842b83..b1db4f60f8a 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/rsl_rl_ppo_cfg.py @@ -49,20 +49,49 @@ def __post_init__(self): self.policy.actor_hidden_dims = [128, 128, 128] self.policy.critic_hidden_dims = [128, 128, 128] + @configclass class AnymalDFlatPPORunnerWithSymmetryCfg(AnymalDFlatPPORunnerCfg): - def __post_init__(self): - super().__post_init__() + """Configuration for the PPO agent with symmetry augmentation.""" - self.algorithm.symmetry_cfg = RslRlSymmetryCfg( + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + symmetry_cfg=RslRlSymmetryCfg( use_data_augmentation=True, data_augmentation_func=anymal.compute_symmetric_states - ) + ), + ) + @configclass class AnymalDRoughPPORunnerWithSymmetryCfg(AnymalDRoughPPORunnerCfg): - def __post_init__(self): - super().__post_init__() + """Configuration for the PPO agent with symmetry augmentation.""" - self.algorithm.symmetry_cfg = RslRlSymmetryCfg( + # all the other settings are inherited from the parent class + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + symmetry_cfg=RslRlSymmetryCfg( use_data_augmentation=True, data_augmentation_func=anymal.compute_symmetric_states - ) + ), + ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/__init__.py index 4309c312057..027c9900a95 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/__init__.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/__init__.py @@ -1,3 +1,8 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + """Symmetry functions for the velocity tasks. These functions are used to augment the observations and actions of the environment. diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/anymal.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/anymal.py index 75f1cb2d8f6..2a3f4564fb8 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/anymal.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/anymal.py @@ -1,8 +1,9 @@ -# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). # All rights reserved. # # SPDX-License-Identifier: BSD-3-Clause + """Functions to specify the symmetry in the observation and action space for ANYmal.""" from __future__ import annotations From 347b9d0b49d60732dc60130d88d882aa9acc773c Mon Sep 17 00:00:00 2001 From: Mayank Mittal Date: Tue, 5 Aug 2025 15:11:21 +0200 Subject: [PATCH 5/7] adds agent to skrl in backwards compat way --- scripts/reinforcement_learning/skrl/play.py | 16 ++++++++++++++-- scripts/reinforcement_learning/skrl/train.py | 16 ++++++++++++++-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/scripts/reinforcement_learning/skrl/play.py b/scripts/reinforcement_learning/skrl/play.py index 2bc8dc5a1b1..2fa0eb57d9b 100644 --- a/scripts/reinforcement_learning/skrl/play.py +++ b/scripts/reinforcement_learning/skrl/play.py @@ -26,6 +26,15 @@ ) parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument( + "--agent", + type=str, + default=None, + help=( + "Name of the RL agent configuration entry point. Defaults to None, in which case the argument " + "--algorithm is used to determine the default agent configuration entry point." + ), +) parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint.") parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") parser.add_argument( @@ -107,8 +116,11 @@ # PLACEHOLDER: Extension template (do not remove this comment) # config shortcuts -algorithm = args_cli.algorithm.lower() -agent_cfg_entry_point = "skrl_cfg_entry_point" if algorithm in ["ppo"] else f"skrl_{algorithm}_cfg_entry_point" +if args_cli.agent is None: + algorithm = args_cli.algorithm.lower() + agent_cfg_entry_point = "skrl_cfg_entry_point" if algorithm in ["ppo"] else f"skrl_{algorithm}_cfg_entry_point" +else: + agent_cfg_entry_point = args_cli.agent @hydra_task_config(args_cli.task, agent_cfg_entry_point) diff --git a/scripts/reinforcement_learning/skrl/train.py b/scripts/reinforcement_learning/skrl/train.py index b76eb80132c..06a57fb0065 100644 --- a/scripts/reinforcement_learning/skrl/train.py +++ b/scripts/reinforcement_learning/skrl/train.py @@ -24,6 +24,15 @@ parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument( + "--agent", + type=str, + default=None, + help=( + "Name of the RL agent configuration entry point. Defaults to None, in which case the argument " + "--algorithm is used to determine the default agent configuration entry point." + ), +) parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") parser.add_argument( "--distributed", action="store_true", default=False, help="Run training with multiple GPUs or nodes." @@ -103,8 +112,11 @@ # PLACEHOLDER: Extension template (do not remove this comment) # config shortcuts -algorithm = args_cli.algorithm.lower() -agent_cfg_entry_point = "skrl_cfg_entry_point" if algorithm in ["ppo"] else f"skrl_{algorithm}_cfg_entry_point" +if args_cli.agent is None: + algorithm = args_cli.algorithm.lower() + agent_cfg_entry_point = "skrl_cfg_entry_point" if algorithm in ["ppo"] else f"skrl_{algorithm}_cfg_entry_point" +else: + agent_cfg_entry_point = args_cli.agent @hydra_task_config(args_cli.task, agent_cfg_entry_point) From 77e2f30971bc190226e85cf4498fcf7abdeb65ee Mon Sep 17 00:00:00 2001 From: Mayank Mittal Date: Tue, 5 Aug 2025 15:22:05 +0200 Subject: [PATCH 6/7] fix docs build issues --- docs/source/refs/release_notes.rst | 12 ++++++------ .../tutorials/03_envs/configuring_rl_training.rst | 9 +++++---- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/docs/source/refs/release_notes.rst b/docs/source/refs/release_notes.rst index ec0dffa63f5..4a3dd10b579 100644 --- a/docs/source/refs/release_notes.rst +++ b/docs/source/refs/release_notes.rst @@ -46,7 +46,7 @@ Improvements ------------ Core API -^^^^^^^^ +~~~~~~~~ * **Actuator Interfaces** * Fixes implicit actuator limits configs for assets by @ooctipus @@ -90,7 +90,7 @@ Core API * Allows slicing from list values in dicts by @LinghengMeng @kellyguo11 Tasks API -^^^^^^^^^ +~~~~~~~~~ * Adds support for ``module:task`` and gymnasium >=1.0 by @kellyguo11 * Adds RL library error hints by @Toni-SM @@ -104,7 +104,7 @@ Tasks API * Pre-processes SB3 env image obs-space for CNN pipeline by @ooctipus Infrastructure -^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~ * **Dependencies** * Updates torch to 2.7.0 with CUDA 12.8 by @kellyguo11 @@ -131,7 +131,7 @@ Bug Fixes --------- Core API -^^^^^^^^ +~~~~~~~~ * **Actuator Interfaces** * Fixes DCMotor clipping for negative power by @jtigue-bdai @@ -159,12 +159,12 @@ Core API * Fixes ``quat_inv()`` implementation by @ozhanozen Tasks API -^^^^^^^^^ +~~~~~~~~~ * Fixes LSTM to ONNX export by @jtigue-bdai Example Tasks -^^^^^^^^^^^^^ +~~~~~~~~~~~~~ * Removes contact termination redundancy by @louislelay * Fixes memory leak in SDF by @leondavi diff --git a/docs/source/tutorials/03_envs/configuring_rl_training.rst b/docs/source/tutorials/03_envs/configuring_rl_training.rst index 2d5fd4bc765..2eb2b0b5e76 100644 --- a/docs/source/tutorials/03_envs/configuring_rl_training.rst +++ b/docs/source/tutorials/03_envs/configuring_rl_training.rst @@ -84,9 +84,6 @@ The second code block is equivalent to the first one, but it leads to import of class which slows down the import time. This is why we recommend using strings for the configuration entry point. -The Code Execution ------------------- - All the scripts in the ``scripts/reinforcement_learning`` directory are configured by default to read the ``_cfg_entry_point`` from the ``kwargs`` dictionary to retrieve the configuration instance. @@ -105,7 +102,11 @@ The argument ``--agent`` is used to specify the learning library to use. This is retrieve the configuration instance from the ``kwargs`` dictionary. You can manually specify alternate configuration instances by passing the ``--agent`` argument. -This also holds true for the RSL-RL library, for which we have two configuration instances. +The Code Execution +------------------ + +Since for the cartpole balancing task, RSL-RL library offers two configuration instances, +we can use the ``--agent`` argument to specify the configuration instance to use. * Training with the standard PPO configuration: From e3955a1d87fe5683ddacda5ae86959bbe38cb96b Mon Sep 17 00:00:00 2001 From: Mayank Mittal <12863862+Mayankm96@users.noreply.github.com> Date: Sat, 16 Aug 2025 04:25:23 +0000 Subject: [PATCH 7/7] updates changelog --- source/isaaclab_tasks/config/extension.toml | 2 +- source/isaaclab_tasks/docs/CHANGELOG.rst | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/source/isaaclab_tasks/config/extension.toml b/source/isaaclab_tasks/config/extension.toml index 4873bbdc100..289d07ed5b7 100644 --- a/source/isaaclab_tasks/config/extension.toml +++ b/source/isaaclab_tasks/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.10.45" +version = "0.10.46" # Description title = "Isaac Lab Environments" diff --git a/source/isaaclab_tasks/docs/CHANGELOG.rst b/source/isaaclab_tasks/docs/CHANGELOG.rst index f70737ddd8e..98e23db6c81 100644 --- a/source/isaaclab_tasks/docs/CHANGELOG.rst +++ b/source/isaaclab_tasks/docs/CHANGELOG.rst @@ -1,6 +1,16 @@ Changelog --------- +0.10.46 (2025-08-16) +~~~~~~~~~~~~~~~~~~~~ + +Added +^^^^^ + +* Added symmetry data augmentation example with RSL-RL for cartpole and anymal locomotion environments. +* Added :attr:`--agent` to RL workflow scripts to allow switching between different configurations. + + 0.10.45 (2025-07-16) ~~~~~~~~~~~~~~~~~~~~