From 3dab3d6c0b659218139a3e2b1d69bf585f187f1f Mon Sep 17 00:00:00 2001 From: amrmousa144 <63200956+amrmousa144@users.noreply.github.com> Date: Tue, 24 Sep 2024 08:33:38 +0100 Subject: [PATCH] Fixes setting the device from CLI in the RL training scripts (#1013) This pull request fixes the issue where the device (`CPU` or `CUDA`) is not set correctly when using the `--device` argument in Hydra-configured scripts like `rsl_rl/train.py` and `skrl/train.py`. The bug caused the scripts to always default to `cuda:0`, even when `cpu` or a specific CUDA device (e.g., `cuda:1`) was selected. The fix adds the following line to ensure that the selected device is properly set in `env_cfg` before initializing the environment with `gym.make()`: ```python env_cfg.sim.device = args_cli.device ``` Fixes #1012 - Bug fix (non-breaking change which fixes an issue) Before: - skrl/train, when running the script with --device cpu, it defaults to cuda:0. - rsl_rl/train.py, the script freezes at `[INFO]: Starting the simulation. This may take a few seconds. Please wait....` After: - Both scripts run correctly on the specified device (e.g., cpu or cuda:1) without defaulting to cuda:0 or freezing. - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there --- CONTRIBUTORS.md | 3 ++- .../omni/isaac/lab/envs/direct_marl_env.py | 7 +++++++ .../omni/isaac/lab/envs/direct_marl_env_cfg.py | 8 ++++++++ source/standalone/workflows/rl_games/train.py | 2 ++ source/standalone/workflows/rsl_rl/train.py | 1 + source/standalone/workflows/sb3/train.py | 1 + source/standalone/workflows/skrl/train.py | 9 +++------ 7 files changed, 24 insertions(+), 7 deletions(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 74c3e2488b..8a96a25c18 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -31,9 +31,10 @@ Guidelines for modifications: ## Contributors -* Anton Bjørndahl Mortensen * Alice Zhou +* Amr Mousa * Andrej Orsula +* Anton Bjørndahl Mortensen * Antonio Serrano-Muñoz * Arjun Bhardwaj * Brayden Zhang diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py index 7273ec9234..c467225d49 100644 --- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py @@ -79,6 +79,12 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar # initialize internal variables self._is_closed = False + # set the seed for the environment + if self.cfg.seed is not None: + self.seed(self.cfg.seed) + else: + carb.log_warn("Seed not set for the environment. The environment creation may not be deterministic.") + # create a simulation context to control the simulator if SimulationContext.instance() is None: self.sim: SimulationContext = SimulationContext(self.cfg.sim) @@ -88,6 +94,7 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar # print useful information print("[INFO]: Base environment:") print(f"\tEnvironment device : {self.device}") + print(f"\tEnvironment seed : {self.cfg.seed}") print(f"\tPhysics step-size : {self.physics_dt}") print(f"\tRendering step-size : {self.physics_dt * self.cfg.sim.render_interval}") print(f"\tEnvironment step-size : {self.step_dt}") diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py index 8cfcdcfa93..3dcf364f5c 100644 --- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py @@ -41,6 +41,14 @@ class DirectMARLEnvCfg: """ # general settings + seed: int | None = None + """The seed for the random number generator. Defaults to None, in which case the seed is not set. + + Note: + The seed is set at the beginning of the environment initialization. This ensures that the environment + creation is deterministic and behaves similarly across different runs. + """ + decimation: int = MISSING """Number of control action updates @ sim dt per policy dt. diff --git a/source/standalone/workflows/rl_games/train.py b/source/standalone/workflows/rl_games/train.py index 82b64ae588..79241df823 100644 --- a/source/standalone/workflows/rl_games/train.py +++ b/source/standalone/workflows/rl_games/train.py @@ -74,6 +74,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen """Train with RL-Games agent.""" # override configurations with non-hydra CLI arguments env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs + env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device + agent_cfg["params"]["seed"] = args_cli.seed if args_cli.seed is not None else agent_cfg["params"]["seed"] agent_cfg["params"]["config"]["max_epochs"] = ( args_cli.max_iterations if args_cli.max_iterations is not None else agent_cfg["params"]["config"]["max_epochs"] diff --git a/source/standalone/workflows/rsl_rl/train.py b/source/standalone/workflows/rsl_rl/train.py index d9167be56e..f02e0a3c0f 100644 --- a/source/standalone/workflows/rsl_rl/train.py +++ b/source/standalone/workflows/rsl_rl/train.py @@ -85,6 +85,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # set the environment seed # note: certain randomizations occur in the environment initialization so we set the seed here env_cfg.seed = agent_cfg.seed + env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device # specify directory for logging experiments log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name) diff --git a/source/standalone/workflows/sb3/train.py b/source/standalone/workflows/sb3/train.py index 57f567f3d3..4c894a7355 100644 --- a/source/standalone/workflows/sb3/train.py +++ b/source/standalone/workflows/sb3/train.py @@ -81,6 +81,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # set the environment seed # note: certain randomizations occur in the environment initialization so we set the seed here env_cfg.seed = agent_cfg["seed"] + env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device # directory for logging into log_dir = os.path.join("logs", "sb3", args_cli.task, datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) diff --git a/source/standalone/workflows/skrl/train.py b/source/standalone/workflows/skrl/train.py index 2e964bf1b7..4b7f6b9942 100644 --- a/source/standalone/workflows/skrl/train.py +++ b/source/standalone/workflows/skrl/train.py @@ -106,6 +106,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen """Train with skrl agent.""" # override configurations with non-hydra CLI arguments env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs + env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device + # multi-gpu training config if args_cli.distributed: env_cfg.sim.device = f"cuda:{app_launcher.local_rank}" @@ -118,7 +120,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen skrl.config.jax.backend = "jax" if args_cli.ml_framework == "jax" else "numpy" # set the environment seed - # note: certain randomizations occur in the environment initialization so we set the seed here + # note: certain randomization occur in the environment initialization so we set the seed here env_cfg.seed = args_cli.seed if args_cli.seed is not None else agent_cfg["seed"] # specify directory for logging experiments @@ -135,11 +137,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # update log_dir log_dir = os.path.join(log_root_path, log_dir) - # multi-gpu training config - if args_cli.distributed: - # update env config device - env_cfg.sim.device = f"cuda:{app_launcher.local_rank}" - # dump the configuration into log-directory dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg) dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg)