diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py index 46519048ae..4550d77b16 100644 --- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py @@ -58,6 +58,7 @@ def __init__(self, env: DirectMARLEnv) -> None: self.cfg = self.env.cfg self.sim = self.env.sim self.scene = self.env.scene + self.render_mode = self.env.render_mode self.single_observation_space = gym.spaces.Dict() if self._state_as_observation: @@ -126,7 +127,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn: return obs, rewards, terminated, time_outs, extras def render(self, recompute: bool = False) -> np.ndarray | None: - self.env.render(recompute) + return self.env.render(recompute) def close(self) -> None: self.env.close() diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/__init__.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/__init__.py index 90d70311d1..58517b7043 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/__init__.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/__init__.py @@ -22,8 +22,12 @@ kwargs={ "env_cfg_entry_point": f"{__name__}.cart_double_pendulum_env:CartDoublePendulumEnvCfg", "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": ( + f"{agents.__name__}.rsl_rl_ppo_cfg:CartDoublePendulumPPORunnerCfg" + ), # FIXME: Raises errors "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", "skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml", "skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml", + "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", # FIXME: Raises errors }, ) diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/rsl_rl_ppo_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 0000000000..a927580c3e --- /dev/null +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,41 @@ +# Copyright (c) 2022-2024, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from omni.isaac.lab.utils import configclass + +from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import ( + RslRlOnPolicyRunnerCfg, + RslRlPpoActorCriticCfg, + RslRlPpoAlgorithmCfg, +) + + +@configclass +class CartDoublePendulumPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 16 + max_iterations = 150 + save_interval = 50 + experiment_name = "cart_double_pendulum_direct" + empirical_normalization = False + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[32, 32], + critic_hidden_dims=[32, 32], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/sb3_ppo_cfg.yaml b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/sb3_ppo_cfg.yaml new file mode 100644 index 0000000000..5856f35f8e --- /dev/null +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/sb3_ppo_cfg.yaml @@ -0,0 +1,21 @@ +# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32 +seed: 42 + +n_timesteps: !!float 1e6 +policy: 'MlpPolicy' +n_steps: 16 +batch_size: 4096 +gae_lambda: 0.95 +gamma: 0.99 +n_epochs: 20 +ent_coef: 0.01 +learning_rate: !!float 3e-4 +clip_range: !!float 0.2 +policy_kwargs: "dict( + activation_fn=nn.ELU, + net_arch=[32, 32], + squash_output=False, + )" +vf_coef: 1.0 +max_grad_norm: 1.0 +device: "cuda:0" diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml index b795d9d081..bc0c518217 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml @@ -76,5 +76,5 @@ agent: # https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html trainer: class: SequentialTrainer - timesteps: 1600 + timesteps: 4800 environment_info: log diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml index fc2f07de55..dcd794f57a 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml @@ -78,5 +78,5 @@ agent: # https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html trainer: class: SequentialTrainer - timesteps: 1600 + timesteps: 4800 environment_info: log diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ppo_cfg.yaml b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ppo_cfg.yaml index 160ebcde60..7c1fd452d7 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ppo_cfg.yaml +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ppo_cfg.yaml @@ -76,5 +76,5 @@ agent: # https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html trainer: class: SequentialTrainer - timesteps: 1600 + timesteps: 4800 environment_info: log diff --git a/source/standalone/workflows/rl_games/play.py b/source/standalone/workflows/rl_games/play.py index 7aa1456fe0..f5cb42a6fc 100644 --- a/source/standalone/workflows/rl_games/play.py +++ b/source/standalone/workflows/rl_games/play.py @@ -94,6 +94,11 @@ def main(): # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + # wrap for video recording if args_cli.video: video_kwargs = { @@ -106,10 +111,6 @@ def main(): print_dict(video_kwargs, nesting=4) env = gym.wrappers.RecordVideo(env, **video_kwargs) - # convert to single-agent instance if required by the RL algorithm - if isinstance(env.unwrapped, DirectMARLEnv): - env = multi_agent_to_single_agent(env) - # wrap around environment for rl-games env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions) diff --git a/source/standalone/workflows/rl_games/train.py b/source/standalone/workflows/rl_games/train.py index a925be8575..f2fa76e6ad 100644 --- a/source/standalone/workflows/rl_games/train.py +++ b/source/standalone/workflows/rl_games/train.py @@ -129,6 +129,11 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + # wrap for video recording if args_cli.video: video_kwargs = { @@ -141,10 +146,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen print_dict(video_kwargs, nesting=4) env = gym.wrappers.RecordVideo(env, **video_kwargs) - # convert to single-agent instance if required by the RL algorithm - if isinstance(env.unwrapped, DirectMARLEnv): - env = multi_agent_to_single_agent(env) - # wrap around environment for rl-games env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions) diff --git a/source/standalone/workflows/rsl_rl/play.py b/source/standalone/workflows/rsl_rl/play.py index 067c38165b..ba618b4405 100644 --- a/source/standalone/workflows/rsl_rl/play.py +++ b/source/standalone/workflows/rsl_rl/play.py @@ -74,6 +74,11 @@ def main(): # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + # wrap for video recording if args_cli.video: video_kwargs = { @@ -86,10 +91,6 @@ def main(): print_dict(video_kwargs, nesting=4) env = gym.wrappers.RecordVideo(env, **video_kwargs) - # convert to single-agent instance if required by the RL algorithm - if isinstance(env.unwrapped, DirectMARLEnv): - env = multi_agent_to_single_agent(env) - # wrap around environment for rsl-rl env = RslRlVecEnvWrapper(env) diff --git a/source/standalone/workflows/rsl_rl/train.py b/source/standalone/workflows/rsl_rl/train.py index 6c73798315..9e07bf09f9 100644 --- a/source/standalone/workflows/rsl_rl/train.py +++ b/source/standalone/workflows/rsl_rl/train.py @@ -100,6 +100,10 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + # save resume path before creating a new log_dir if agent_cfg.resume: resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint) @@ -116,10 +120,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen print_dict(video_kwargs, nesting=4) env = gym.wrappers.RecordVideo(env, **video_kwargs) - # convert to single-agent instance if required by the RL algorithm - if isinstance(env.unwrapped, DirectMARLEnv): - env = multi_agent_to_single_agent(env) - # wrap around environment for rsl-rl env = RslRlVecEnvWrapper(env) diff --git a/source/standalone/workflows/sb3/play.py b/source/standalone/workflows/sb3/play.py index e26908911b..95d5b96abc 100644 --- a/source/standalone/workflows/sb3/play.py +++ b/source/standalone/workflows/sb3/play.py @@ -48,6 +48,7 @@ from stable_baselines3 import PPO from stable_baselines3.common.vec_env import VecNormalize +from omni.isaac.lab.envs import DirectMARLEnv, multi_agent_to_single_agent from omni.isaac.lab.utils.dict import print_dict import omni.isaac.lab_tasks # noqa: F401 @@ -82,6 +83,11 @@ def main(): # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + # wrap for video recording if args_cli.video: video_kwargs = { diff --git a/source/standalone/workflows/sb3/train.py b/source/standalone/workflows/sb3/train.py index 1ce8062961..bf60fda825 100644 --- a/source/standalone/workflows/sb3/train.py +++ b/source/standalone/workflows/sb3/train.py @@ -104,6 +104,11 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + # wrap for video recording if args_cli.video: video_kwargs = { @@ -116,10 +121,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen print_dict(video_kwargs, nesting=4) env = gym.wrappers.RecordVideo(env, **video_kwargs) - # convert to single-agent instance if required by the RL algorithm - if isinstance(env.unwrapped, DirectMARLEnv): - env = multi_agent_to_single_agent(env) - # wrap around environment for stable baselines env = Sb3VecEnvWrapper(env) diff --git a/source/standalone/workflows/skrl/play.py b/source/standalone/workflows/skrl/play.py index d523a48930..2280c32d61 100644 --- a/source/standalone/workflows/skrl/play.py +++ b/source/standalone/workflows/skrl/play.py @@ -116,6 +116,11 @@ def main(): # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]: + env = multi_agent_to_single_agent(env) + # wrap for video recording if args_cli.video: video_kwargs = { @@ -128,10 +133,6 @@ def main(): print_dict(video_kwargs, nesting=4) env = gym.wrappers.RecordVideo(env, **video_kwargs) - # convert to single-agent instance if required by the RL algorithm - if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]: - env = multi_agent_to_single_agent(env) - # wrap around environment for skrl env = SkrlVecEnvWrapper(env, ml_framework=args_cli.ml_framework) # same as: `wrap_env(env, wrapper="auto")` diff --git a/source/standalone/workflows/skrl/train.py b/source/standalone/workflows/skrl/train.py index bbbdabf6a1..a42fece9ef 100644 --- a/source/standalone/workflows/skrl/train.py +++ b/source/standalone/workflows/skrl/train.py @@ -151,6 +151,11 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]: + env = multi_agent_to_single_agent(env) + # wrap for video recording if args_cli.video: video_kwargs = { @@ -163,10 +168,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen print_dict(video_kwargs, nesting=4) env = gym.wrappers.RecordVideo(env, **video_kwargs) - # convert to single-agent instance if required by the RL algorithm - if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]: - env = multi_agent_to_single_agent(env) - # wrap around environment for skrl env = SkrlVecEnvWrapper(env, ml_framework=args_cli.ml_framework) # same as: `wrap_env(env, wrapper="auto")`