diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py index 46519048aee..4550d77b167 100644 --- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py @@ -58,6 +58,7 @@ def __init__(self, env: DirectMARLEnv) -> None: self.cfg = self.env.cfg self.sim = self.env.sim self.scene = self.env.scene + self.render_mode = self.env.render_mode self.single_observation_space = gym.spaces.Dict() if self._state_as_observation: @@ -126,7 +127,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn: return obs, rewards, terminated, time_outs, extras def render(self, recompute: bool = False) -> np.ndarray | None: - self.env.render(recompute) + return self.env.render(recompute) def close(self) -> None: self.env.close() diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml index b795d9d0816..bc0c5182179 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml @@ -76,5 +76,5 @@ agent: # https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html trainer: class: SequentialTrainer - timesteps: 1600 + timesteps: 4800 environment_info: log diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml index fc2f07de553..dcd794f57a5 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml @@ -78,5 +78,5 @@ agent: # https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html trainer: class: SequentialTrainer - timesteps: 1600 + timesteps: 4800 environment_info: log diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ppo_cfg.yaml b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ppo_cfg.yaml index 160ebcde604..7c1fd452d70 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ppo_cfg.yaml +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ppo_cfg.yaml @@ -76,5 +76,5 @@ agent: # https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html trainer: class: SequentialTrainer - timesteps: 1600 + timesteps: 4800 environment_info: log diff --git a/source/standalone/workflows/rl_games/play.py b/source/standalone/workflows/rl_games/play.py index 7aa1456fe0e..f5cb42a6fcf 100644 --- a/source/standalone/workflows/rl_games/play.py +++ b/source/standalone/workflows/rl_games/play.py @@ -94,6 +94,11 @@ def main(): # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + # wrap for video recording if args_cli.video: video_kwargs = { @@ -106,10 +111,6 @@ def main(): print_dict(video_kwargs, nesting=4) env = gym.wrappers.RecordVideo(env, **video_kwargs) - # convert to single-agent instance if required by the RL algorithm - if isinstance(env.unwrapped, DirectMARLEnv): - env = multi_agent_to_single_agent(env) - # wrap around environment for rl-games env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions) diff --git a/source/standalone/workflows/rl_games/train.py b/source/standalone/workflows/rl_games/train.py index a925be8575b..f2fa76e6ad3 100644 --- a/source/standalone/workflows/rl_games/train.py +++ b/source/standalone/workflows/rl_games/train.py @@ -129,6 +129,11 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + # wrap for video recording if args_cli.video: video_kwargs = { @@ -141,10 +146,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen print_dict(video_kwargs, nesting=4) env = gym.wrappers.RecordVideo(env, **video_kwargs) - # convert to single-agent instance if required by the RL algorithm - if isinstance(env.unwrapped, DirectMARLEnv): - env = multi_agent_to_single_agent(env) - # wrap around environment for rl-games env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions) diff --git a/source/standalone/workflows/rsl_rl/play.py b/source/standalone/workflows/rsl_rl/play.py index 067c38165bb..ba618b44056 100644 --- a/source/standalone/workflows/rsl_rl/play.py +++ b/source/standalone/workflows/rsl_rl/play.py @@ -74,6 +74,11 @@ def main(): # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + # wrap for video recording if args_cli.video: video_kwargs = { @@ -86,10 +91,6 @@ def main(): print_dict(video_kwargs, nesting=4) env = gym.wrappers.RecordVideo(env, **video_kwargs) - # convert to single-agent instance if required by the RL algorithm - if isinstance(env.unwrapped, DirectMARLEnv): - env = multi_agent_to_single_agent(env) - # wrap around environment for rsl-rl env = RslRlVecEnvWrapper(env) diff --git a/source/standalone/workflows/rsl_rl/train.py b/source/standalone/workflows/rsl_rl/train.py index 6c737983156..9e07bf09f98 100644 --- a/source/standalone/workflows/rsl_rl/train.py +++ b/source/standalone/workflows/rsl_rl/train.py @@ -100,6 +100,10 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + # save resume path before creating a new log_dir if agent_cfg.resume: resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint) @@ -116,10 +120,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen print_dict(video_kwargs, nesting=4) env = gym.wrappers.RecordVideo(env, **video_kwargs) - # convert to single-agent instance if required by the RL algorithm - if isinstance(env.unwrapped, DirectMARLEnv): - env = multi_agent_to_single_agent(env) - # wrap around environment for rsl-rl env = RslRlVecEnvWrapper(env) diff --git a/source/standalone/workflows/sb3/play.py b/source/standalone/workflows/sb3/play.py index e26908911b1..95d5b96abcf 100644 --- a/source/standalone/workflows/sb3/play.py +++ b/source/standalone/workflows/sb3/play.py @@ -48,6 +48,7 @@ from stable_baselines3 import PPO from stable_baselines3.common.vec_env import VecNormalize +from omni.isaac.lab.envs import DirectMARLEnv, multi_agent_to_single_agent from omni.isaac.lab.utils.dict import print_dict import omni.isaac.lab_tasks # noqa: F401 @@ -82,6 +83,11 @@ def main(): # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + # wrap for video recording if args_cli.video: video_kwargs = { diff --git a/source/standalone/workflows/sb3/train.py b/source/standalone/workflows/sb3/train.py index 1ce8062961c..bf60fda825d 100644 --- a/source/standalone/workflows/sb3/train.py +++ b/source/standalone/workflows/sb3/train.py @@ -104,6 +104,11 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv): + env = multi_agent_to_single_agent(env) + # wrap for video recording if args_cli.video: video_kwargs = { @@ -116,10 +121,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen print_dict(video_kwargs, nesting=4) env = gym.wrappers.RecordVideo(env, **video_kwargs) - # convert to single-agent instance if required by the RL algorithm - if isinstance(env.unwrapped, DirectMARLEnv): - env = multi_agent_to_single_agent(env) - # wrap around environment for stable baselines env = Sb3VecEnvWrapper(env) diff --git a/source/standalone/workflows/skrl/play.py b/source/standalone/workflows/skrl/play.py index d523a48930e..2280c32d61d 100644 --- a/source/standalone/workflows/skrl/play.py +++ b/source/standalone/workflows/skrl/play.py @@ -116,6 +116,11 @@ def main(): # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]: + env = multi_agent_to_single_agent(env) + # wrap for video recording if args_cli.video: video_kwargs = { @@ -128,10 +133,6 @@ def main(): print_dict(video_kwargs, nesting=4) env = gym.wrappers.RecordVideo(env, **video_kwargs) - # convert to single-agent instance if required by the RL algorithm - if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]: - env = multi_agent_to_single_agent(env) - # wrap around environment for skrl env = SkrlVecEnvWrapper(env, ml_framework=args_cli.ml_framework) # same as: `wrap_env(env, wrapper="auto")` diff --git a/source/standalone/workflows/skrl/train.py b/source/standalone/workflows/skrl/train.py index bbbdabf6a16..a42fece9ef0 100644 --- a/source/standalone/workflows/skrl/train.py +++ b/source/standalone/workflows/skrl/train.py @@ -151,6 +151,11 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # create isaac environment env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None) + + # convert to single-agent instance if required by the RL algorithm + if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]: + env = multi_agent_to_single_agent(env) + # wrap for video recording if args_cli.video: video_kwargs = { @@ -163,10 +168,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen print_dict(video_kwargs, nesting=4) env = gym.wrappers.RecordVideo(env, **video_kwargs) - # convert to single-agent instance if required by the RL algorithm - if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]: - env = multi_agent_to_single_agent(env) - # wrap around environment for skrl env = SkrlVecEnvWrapper(env, ml_framework=args_cli.ml_framework) # same as: `wrap_env(env, wrapper="auto")`