From 76a1cb9afc6ff4c7927f0f131a74c810e1a3d5e6 Mon Sep 17 00:00:00 2001
From: Rishi-V <rveerapa@andrew.cmu.edu>
Date: Tue, 24 Dec 2024 18:13:00 -0500
Subject: [PATCH 1/4] Updated workflow train.py (and play.py) so that videos
 successfully show up in videos/train when running on a DirectMARLEnv.

---
 source/standalone/workflows/rl_games/play.py  | 9 +++++----
 source/standalone/workflows/rl_games/train.py | 9 +++++----
 source/standalone/workflows/rsl_rl/play.py    | 9 +++++----
 source/standalone/workflows/rsl_rl/train.py   | 8 ++++----
 source/standalone/workflows/sb3/play.py       | 6 ++++++
 source/standalone/workflows/sb3/train.py      | 9 +++++----
 source/standalone/workflows/skrl/play.py      | 9 +++++----
 source/standalone/workflows/skrl/train.py     | 9 +++++----
 8 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/source/standalone/workflows/rl_games/play.py b/source/standalone/workflows/rl_games/play.py
index 7aa1456fe0..ac71aebf45 100644
--- a/source/standalone/workflows/rl_games/play.py
+++ b/source/standalone/workflows/rl_games/play.py
@@ -94,6 +94,11 @@ def main():
 
     # create isaac environment
     env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
+
+    # convert to single-agent instance if required by the RL algorithm
+    if isinstance(env.unwrapped, DirectMARLEnv):
+        env = multi_agent_to_single_agent(env)
+    
     # wrap for video recording
     if args_cli.video:
         video_kwargs = {
@@ -106,10 +111,6 @@ def main():
         print_dict(video_kwargs, nesting=4)
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
-    # convert to single-agent instance if required by the RL algorithm
-    if isinstance(env.unwrapped, DirectMARLEnv):
-        env = multi_agent_to_single_agent(env)
-
     # wrap around environment for rl-games
     env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions)
 
diff --git a/source/standalone/workflows/rl_games/train.py b/source/standalone/workflows/rl_games/train.py
index a925be8575..b8269d76be 100644
--- a/source/standalone/workflows/rl_games/train.py
+++ b/source/standalone/workflows/rl_games/train.py
@@ -129,6 +129,11 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
 
     # create isaac environment
     env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
+
+    # convert to single-agent instance if required by the RL algorithm
+    if isinstance(env.unwrapped, DirectMARLEnv):
+        env = multi_agent_to_single_agent(env)
+        
     # wrap for video recording
     if args_cli.video:
         video_kwargs = {
@@ -141,10 +146,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
         print_dict(video_kwargs, nesting=4)
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
-    # convert to single-agent instance if required by the RL algorithm
-    if isinstance(env.unwrapped, DirectMARLEnv):
-        env = multi_agent_to_single_agent(env)
-
     # wrap around environment for rl-games
     env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions)
 
diff --git a/source/standalone/workflows/rsl_rl/play.py b/source/standalone/workflows/rsl_rl/play.py
index 067c38165b..617ccc7529 100644
--- a/source/standalone/workflows/rsl_rl/play.py
+++ b/source/standalone/workflows/rsl_rl/play.py
@@ -74,6 +74,11 @@ def main():
 
     # create isaac environment
     env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
+
+    # convert to single-agent instance if required by the RL algorithm
+    if isinstance(env.unwrapped, DirectMARLEnv):
+        env = multi_agent_to_single_agent(env)
+        
     # wrap for video recording
     if args_cli.video:
         video_kwargs = {
@@ -86,10 +91,6 @@ def main():
         print_dict(video_kwargs, nesting=4)
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
-    # convert to single-agent instance if required by the RL algorithm
-    if isinstance(env.unwrapped, DirectMARLEnv):
-        env = multi_agent_to_single_agent(env)
-
     # wrap around environment for rsl-rl
     env = RslRlVecEnvWrapper(env)
 
diff --git a/source/standalone/workflows/rsl_rl/train.py b/source/standalone/workflows/rsl_rl/train.py
index 6c73798315..9e07bf09f9 100644
--- a/source/standalone/workflows/rsl_rl/train.py
+++ b/source/standalone/workflows/rsl_rl/train.py
@@ -100,6 +100,10 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     # create isaac environment
     env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
 
+    # convert to single-agent instance if required by the RL algorithm
+    if isinstance(env.unwrapped, DirectMARLEnv):
+        env = multi_agent_to_single_agent(env)
+
     # save resume path before creating a new log_dir
     if agent_cfg.resume:
         resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
@@ -116,10 +120,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
         print_dict(video_kwargs, nesting=4)
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
-    # convert to single-agent instance if required by the RL algorithm
-    if isinstance(env.unwrapped, DirectMARLEnv):
-        env = multi_agent_to_single_agent(env)
-
     # wrap around environment for rsl-rl
     env = RslRlVecEnvWrapper(env)
 
diff --git a/source/standalone/workflows/sb3/play.py b/source/standalone/workflows/sb3/play.py
index e26908911b..95d5b96abc 100644
--- a/source/standalone/workflows/sb3/play.py
+++ b/source/standalone/workflows/sb3/play.py
@@ -48,6 +48,7 @@
 from stable_baselines3 import PPO
 from stable_baselines3.common.vec_env import VecNormalize
 
+from omni.isaac.lab.envs import DirectMARLEnv, multi_agent_to_single_agent
 from omni.isaac.lab.utils.dict import print_dict
 
 import omni.isaac.lab_tasks  # noqa: F401
@@ -82,6 +83,11 @@ def main():
 
     # create isaac environment
     env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
+
+    # convert to single-agent instance if required by the RL algorithm
+    if isinstance(env.unwrapped, DirectMARLEnv):
+        env = multi_agent_to_single_agent(env)
+
     # wrap for video recording
     if args_cli.video:
         video_kwargs = {
diff --git a/source/standalone/workflows/sb3/train.py b/source/standalone/workflows/sb3/train.py
index 1ce8062961..96147c60a8 100644
--- a/source/standalone/workflows/sb3/train.py
+++ b/source/standalone/workflows/sb3/train.py
@@ -104,6 +104,11 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
 
     # create isaac environment
     env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
+
+    # convert to single-agent instance if required by the RL algorithm
+    if isinstance(env.unwrapped, DirectMARLEnv):
+        env = multi_agent_to_single_agent(env)
+        
     # wrap for video recording
     if args_cli.video:
         video_kwargs = {
@@ -116,10 +121,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
         print_dict(video_kwargs, nesting=4)
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
-    # convert to single-agent instance if required by the RL algorithm
-    if isinstance(env.unwrapped, DirectMARLEnv):
-        env = multi_agent_to_single_agent(env)
-
     # wrap around environment for stable baselines
     env = Sb3VecEnvWrapper(env)
 
diff --git a/source/standalone/workflows/skrl/play.py b/source/standalone/workflows/skrl/play.py
index d523a48930..65a732bfbb 100644
--- a/source/standalone/workflows/skrl/play.py
+++ b/source/standalone/workflows/skrl/play.py
@@ -116,6 +116,11 @@ def main():
 
     # create isaac environment
     env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
+
+    # convert to single-agent instance if required by the RL algorithm
+    if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]:
+        env = multi_agent_to_single_agent(env)
+    
     # wrap for video recording
     if args_cli.video:
         video_kwargs = {
@@ -128,10 +133,6 @@ def main():
         print_dict(video_kwargs, nesting=4)
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
-    # convert to single-agent instance if required by the RL algorithm
-    if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]:
-        env = multi_agent_to_single_agent(env)
-
     # wrap around environment for skrl
     env = SkrlVecEnvWrapper(env, ml_framework=args_cli.ml_framework)  # same as: `wrap_env(env, wrapper="auto")`
 
diff --git a/source/standalone/workflows/skrl/train.py b/source/standalone/workflows/skrl/train.py
index bbbdabf6a1..d6a3d3322a 100644
--- a/source/standalone/workflows/skrl/train.py
+++ b/source/standalone/workflows/skrl/train.py
@@ -151,6 +151,11 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
 
     # create isaac environment
     env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
+
+    # convert to single-agent instance if required by the RL algorithm
+    if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]:
+        env = multi_agent_to_single_agent(env)
+        
     # wrap for video recording
     if args_cli.video:
         video_kwargs = {
@@ -163,10 +168,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
         print_dict(video_kwargs, nesting=4)
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
-    # convert to single-agent instance if required by the RL algorithm
-    if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]:
-        env = multi_agent_to_single_agent(env)
-
     # wrap around environment for skrl
     env = SkrlVecEnvWrapper(env, ml_framework=args_cli.ml_framework)  # same as: `wrap_env(env, wrapper="auto")`
 

From 5a0c6ea7c0980efbf5822d7f121aa88e98a87860 Mon Sep 17 00:00:00 2001
From: Rishi-V <rveerapa@andrew.cmu.edu>
Date: Tue, 24 Dec 2024 18:39:17 -0500
Subject: [PATCH 2/4] Formatting updates

---
 source/standalone/workflows/rl_games/play.py  | 2 +-
 source/standalone/workflows/rl_games/train.py | 2 +-
 source/standalone/workflows/rsl_rl/play.py    | 2 +-
 source/standalone/workflows/sb3/train.py      | 2 +-
 source/standalone/workflows/skrl/play.py      | 2 +-
 source/standalone/workflows/skrl/train.py     | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/source/standalone/workflows/rl_games/play.py b/source/standalone/workflows/rl_games/play.py
index ac71aebf45..f5cb42a6fc 100644
--- a/source/standalone/workflows/rl_games/play.py
+++ b/source/standalone/workflows/rl_games/play.py
@@ -98,7 +98,7 @@ def main():
     # convert to single-agent instance if required by the RL algorithm
     if isinstance(env.unwrapped, DirectMARLEnv):
         env = multi_agent_to_single_agent(env)
-    
+
     # wrap for video recording
     if args_cli.video:
         video_kwargs = {
diff --git a/source/standalone/workflows/rl_games/train.py b/source/standalone/workflows/rl_games/train.py
index b8269d76be..f2fa76e6ad 100644
--- a/source/standalone/workflows/rl_games/train.py
+++ b/source/standalone/workflows/rl_games/train.py
@@ -133,7 +133,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     # convert to single-agent instance if required by the RL algorithm
     if isinstance(env.unwrapped, DirectMARLEnv):
         env = multi_agent_to_single_agent(env)
-        
+
     # wrap for video recording
     if args_cli.video:
         video_kwargs = {
diff --git a/source/standalone/workflows/rsl_rl/play.py b/source/standalone/workflows/rsl_rl/play.py
index 617ccc7529..ba618b4405 100644
--- a/source/standalone/workflows/rsl_rl/play.py
+++ b/source/standalone/workflows/rsl_rl/play.py
@@ -78,7 +78,7 @@ def main():
     # convert to single-agent instance if required by the RL algorithm
     if isinstance(env.unwrapped, DirectMARLEnv):
         env = multi_agent_to_single_agent(env)
-        
+
     # wrap for video recording
     if args_cli.video:
         video_kwargs = {
diff --git a/source/standalone/workflows/sb3/train.py b/source/standalone/workflows/sb3/train.py
index 96147c60a8..bf60fda825 100644
--- a/source/standalone/workflows/sb3/train.py
+++ b/source/standalone/workflows/sb3/train.py
@@ -108,7 +108,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     # convert to single-agent instance if required by the RL algorithm
     if isinstance(env.unwrapped, DirectMARLEnv):
         env = multi_agent_to_single_agent(env)
-        
+
     # wrap for video recording
     if args_cli.video:
         video_kwargs = {
diff --git a/source/standalone/workflows/skrl/play.py b/source/standalone/workflows/skrl/play.py
index 65a732bfbb..2280c32d61 100644
--- a/source/standalone/workflows/skrl/play.py
+++ b/source/standalone/workflows/skrl/play.py
@@ -120,7 +120,7 @@ def main():
     # convert to single-agent instance if required by the RL algorithm
     if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]:
         env = multi_agent_to_single_agent(env)
-    
+
     # wrap for video recording
     if args_cli.video:
         video_kwargs = {
diff --git a/source/standalone/workflows/skrl/train.py b/source/standalone/workflows/skrl/train.py
index d6a3d3322a..a42fece9ef 100644
--- a/source/standalone/workflows/skrl/train.py
+++ b/source/standalone/workflows/skrl/train.py
@@ -155,7 +155,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     # convert to single-agent instance if required by the RL algorithm
     if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]:
         env = multi_agent_to_single_agent(env)
-        
+
     # wrap for video recording
     if args_cli.video:
         video_kwargs = {

From 754bcb17fc8aca29134dc7cae95c19c57c24bb00 Mon Sep 17 00:00:00 2001
From: Rishi-V <rveerapa@andrew.cmu.edu>
Date: Sun, 29 Dec 2024 04:03:32 -0500
Subject: [PATCH 3/4] Updated marl.py render and render_mode so that videos
 show up

---
 .../omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py           | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py
index 46519048ae..4550d77b16 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py
@@ -58,6 +58,7 @@ def __init__(self, env: DirectMARLEnv) -> None:
             self.cfg = self.env.cfg
             self.sim = self.env.sim
             self.scene = self.env.scene
+            self.render_mode = self.env.render_mode
 
             self.single_observation_space = gym.spaces.Dict()
             if self._state_as_observation:
@@ -126,7 +127,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
             return obs, rewards, terminated, time_outs, extras
 
         def render(self, recompute: bool = False) -> np.ndarray | None:
-            self.env.render(recompute)
+            return self.env.render(recompute)
 
         def close(self) -> None:
             self.env.close()

From 1228a4e65bfd49c52950ef9092bf1d972ea512b2 Mon Sep 17 00:00:00 2001
From: Rishi-V <rveerapa@andrew.cmu.edu>
Date: Sun, 29 Dec 2024 04:04:13 -0500
Subject: [PATCH 4/4] Added sb3, rsl_rl and changed skrl from 1600 iterations
 to 4800. sb3 and rsl_rl currently fail.

---
 .../direct/cart_double_pendulum/__init__.py   |  4 ++
 .../agents/rsl_rl_ppo_cfg.py                  | 41 +++++++++++++++++++
 .../agents/sb3_ppo_cfg.yaml                   | 21 ++++++++++
 .../agents/skrl_ippo_cfg.yaml                 |  2 +-
 .../agents/skrl_mappo_cfg.yaml                |  2 +-
 .../agents/skrl_ppo_cfg.yaml                  |  2 +-
 6 files changed, 69 insertions(+), 3 deletions(-)
 create mode 100644 source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/rsl_rl_ppo_cfg.py
 create mode 100644 source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/sb3_ppo_cfg.yaml

diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/__init__.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/__init__.py
index 90d70311d1..58517b7043 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/__init__.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/__init__.py
@@ -22,8 +22,12 @@
     kwargs={
         "env_cfg_entry_point": f"{__name__}.cart_double_pendulum_env:CartDoublePendulumEnvCfg",
         "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+        "rsl_rl_cfg_entry_point": (
+            f"{agents.__name__}.rsl_rl_ppo_cfg:CartDoublePendulumPPORunnerCfg"
+        ),  # FIXME: Raises errors
         "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
         "skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
         "skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
+        "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",  # FIXME: Raises errors
     },
 )
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/rsl_rl_ppo_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/rsl_rl_ppo_cfg.py
new file mode 100644
index 0000000000..a927580c3e
--- /dev/null
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/rsl_rl_ppo_cfg.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from omni.isaac.lab.utils import configclass
+
+from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import (
+    RslRlOnPolicyRunnerCfg,
+    RslRlPpoActorCriticCfg,
+    RslRlPpoAlgorithmCfg,
+)
+
+
+@configclass
+class CartDoublePendulumPPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 16
+    max_iterations = 150
+    save_interval = 50
+    experiment_name = "cart_double_pendulum_direct"
+    empirical_normalization = False
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_hidden_dims=[32, 32],
+        critic_hidden_dims=[32, 32],
+        activation="elu",
+    )
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.005,
+        num_learning_epochs=5,
+        num_mini_batches=4,
+        learning_rate=1.0e-3,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/sb3_ppo_cfg.yaml b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/sb3_ppo_cfg.yaml
new file mode 100644
index 0000000000..5856f35f8e
--- /dev/null
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/sb3_ppo_cfg.yaml
@@ -0,0 +1,21 @@
+# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
+seed: 42
+
+n_timesteps: !!float 1e6
+policy: 'MlpPolicy'
+n_steps: 16
+batch_size: 4096
+gae_lambda: 0.95
+gamma: 0.99
+n_epochs: 20
+ent_coef: 0.01
+learning_rate: !!float 3e-4
+clip_range: !!float 0.2
+policy_kwargs: "dict(
+                  activation_fn=nn.ELU,
+                  net_arch=[32, 32],
+                  squash_output=False,
+                )"
+vf_coef: 1.0
+max_grad_norm: 1.0
+device: "cuda:0"
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml
index b795d9d081..bc0c518217 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml
@@ -76,5 +76,5 @@ agent:
 # https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
 trainer:
   class: SequentialTrainer
-  timesteps: 1600
+  timesteps: 4800
   environment_info: log
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml
index fc2f07de55..dcd794f57a 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml
@@ -78,5 +78,5 @@ agent:
 # https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
 trainer:
   class: SequentialTrainer
-  timesteps: 1600
+  timesteps: 4800
   environment_info: log
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ppo_cfg.yaml b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ppo_cfg.yaml
index 160ebcde60..7c1fd452d7 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ppo_cfg.yaml
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/agents/skrl_ppo_cfg.yaml
@@ -76,5 +76,5 @@ agent:
 # https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
 trainer:
   class: SequentialTrainer
-  timesteps: 1600
+  timesteps: 4800
   environment_info: log