From f325257b6ff9eecf03063cf610d07c6147b59a92 Mon Sep 17 00:00:00 2001 From: Hailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com> Date: Thu, 18 Jan 2024 15:23:28 -0500 Subject: [PATCH 1/4] Fix SLURM runner comment handling --- deepspeed/launcher/multinode_runner.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/deepspeed/launcher/multinode_runner.py b/deepspeed/launcher/multinode_runner.py index 1442fff4aa7b..ec8b1b367347 100644 --- a/deepspeed/launcher/multinode_runner.py +++ b/deepspeed/launcher/multinode_runner.py @@ -424,12 +424,11 @@ def get_cmd(self, environment, active_resources): ] + split(self.args.launcher_args) if getattr(self.args, 'comment', ''): - srun_cmd += ['--comment', self.args.comment] + srun_cmd += ['--account', self.args.comment] if self.args.include != "": srun_cmd.append('--nodelist') srun_cmd.append(self._pdsh_include_to_nodelist(self.args.include)) - srun_cmd += ['--comment', self.args.slurm_comment] if self.args.num_nodes > 0: srun_cmd.append('--nodes') From 86bc6800b64931b5145adcce1a5ae8859fef092c Mon Sep 17 00:00:00 2001 From: Hailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com> Date: Fri, 19 Jan 2024 07:31:29 -0500 Subject: [PATCH 2/4] Add --account in Slurm Launcher --- deepspeed/launcher/multinode_runner.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/deepspeed/launcher/multinode_runner.py b/deepspeed/launcher/multinode_runner.py index ec8b1b367347..ebde5b4553ce 100644 --- a/deepspeed/launcher/multinode_runner.py +++ b/deepspeed/launcher/multinode_runner.py @@ -426,6 +426,9 @@ def get_cmd(self, environment, active_resources): if getattr(self.args, 'comment', ''): srun_cmd += ['--account', self.args.comment] + if getattr(self.args, 'account', ''): + srun_cmd += ['--comment', self.args.comment] + if self.args.include != "": srun_cmd.append('--nodelist') srun_cmd.append(self._pdsh_include_to_nodelist(self.args.include)) From d10a6513ed5c2d870d0dff6bfa524b71aa514ab4 Mon Sep 17 00:00:00 2001 From: Hailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com> Date: Fri, 19 Jan 2024 07:32:36 -0500 Subject: [PATCH 3/4] fix switch of --account + --comment --- deepspeed/launcher/multinode_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepspeed/launcher/multinode_runner.py b/deepspeed/launcher/multinode_runner.py index ebde5b4553ce..40df5d001dd3 100644 --- a/deepspeed/launcher/multinode_runner.py +++ b/deepspeed/launcher/multinode_runner.py @@ -424,10 +424,10 @@ def get_cmd(self, environment, active_resources): ] + split(self.args.launcher_args) if getattr(self.args, 'comment', ''): - srun_cmd += ['--account', self.args.comment] + srun_cmd += ['--comment', self.args.comment] if getattr(self.args, 'account', ''): - srun_cmd += ['--comment', self.args.comment] + srun_cmd += ['--account', self.args.account] if self.args.include != "": srun_cmd.append('--nodelist') From 6d480ea52743f6592f94988c1919b1c9b7c6fb5a Mon Sep 17 00:00:00 2001 From: Hailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com> Date: Fri, 19 Jan 2024 07:34:20 -0500 Subject: [PATCH 4/4] Add `--account` runner argument --- deepspeed/launcher/runner.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/deepspeed/launcher/runner.py b/deepspeed/launcher/runner.py index a9cfa15411f6..2c53e2d59ef4 100755 --- a/deepspeed/launcher/runner.py +++ b/deepspeed/launcher/runner.py @@ -177,7 +177,14 @@ def parse_args(args=None): "--comment", default="", type=str, - help="A comment that can be used for metadata." + help="A comment that can be used for metadata. Used to pass --comment argument to srun in Slurm launcher" + ) + + parser.add_argument( + "--account", + default="", + type=str, + help="Used to pass --account argument to srun in Slurm launcher" ) parser.add_argument("--elastic_training",