Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feat] Allow ReFrame to pass the access options in command line instead of the script for Slurm #3156

Merged
merged 15 commits into from
Jun 14, 2024
Merged
14 changes: 14 additions & 0 deletions docs/config_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,20 @@ System Partition Configuration
.. warning::
This option is broken in 4.0.


.. py:attribute:: systems.partitions.sched_options.sched_access_in_submit

:required: No
:default: ``false``

Normally, ReFrame will pass the :attr:`~config.systems.partitions.access` options to the job script only.
When this attribute is ``true`` the options are passed verbatim also in the submission command.
ekouts marked this conversation as resolved.
Show resolved Hide resolved

This option is relevant for the LSF, OAR, PBS and Slurm backends.

.. versionadded:: 4.6.0
vkarak marked this conversation as resolved.
Show resolved Hide resolved


.. py:attribute:: systems.partitions.sched_options.ssh_hosts

:required: No
Expand Down
15 changes: 15 additions & 0 deletions docs/manpage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1214,6 +1214,21 @@ Here is an alphabetical list of the environment variables recognized by ReFrame.
Whenever an environment variable is associated with a configuration option, its default value is omitted as it is the same.


.. envvar:: RFM_SCHED_ACCESS_IN_SUBMIT

Pass access options in the submission command (relevant for LSF, OAR, PBS and Slurm).

.. table::
:align: left

================================== ==================
Associated command line option N/A
Associated configuration parameter :attr::attr:`~config.systems.partitions.sched_options.sched_access_in_submit`
================================== ==================

.. versionadded:: 4.6.0
vkarak marked this conversation as resolved.
Show resolved Hide resolved


.. envvar:: RFM_AUTODETECT_FQDN

Use the fully qualified domain name as the hostname.
Expand Down
14 changes: 11 additions & 3 deletions reframe/core/schedulers/lsf.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ class LsfJobScheduler(PbsJobScheduler):
def __init__(self):
self._prefix = '#BSUB'
self._submit_timeout = self.get_option('job_submit_timeout')
self._sched_access_in_submit = self.get_option(
'sched_access_in_submit'
)

def _format_option(self, var, option):
if var is not None:
Expand Down Expand Up @@ -57,8 +60,9 @@ def emit_preamble(self, job):
f'{self._prefix} -W {int(job.time_limit // 60)}'
)

for opt in job.sched_access:
preamble.append(f'{self._prefix} {opt}')
if not self._sched_access_in_submit:
for opt in job.sched_access:
preamble.append(f'{self._prefix} {opt}')

# emit the rest of the options
options = job.options + job.cli_options
Expand All @@ -76,7 +80,11 @@ def emit_preamble(self, job):

def submit(self, job):
with open(job.script_filename, 'r') as fp:
completed = _run_strict('bsub', stdin=fp)
cmd_opts = (
' '.join(job.sched_access) if self._sched_access_in_submit
else ''
)
completed = _run_strict('bsub {cmd_opts}', stdin=fp)
ekouts marked this conversation as resolved.
Show resolved Hide resolved
jobid_match = re.search(r'^Job <(?P<jobid>\S+)> is submitted',
completed.stdout)
if not jobid_match:
Expand Down
15 changes: 12 additions & 3 deletions reframe/core/schedulers/oar.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ class OarJobScheduler(PbsJobScheduler):
def __init__(self):
self._prefix = '#OAR'
self._submit_timeout = self.get_option('job_submit_timeout')
self._sched_access_in_submit = self.get_option(
'sched_access_in_submit'
)

def emit_preamble(self, job):
# host is de-facto nodes and core is number of cores requested per node
Expand Down Expand Up @@ -88,8 +91,11 @@ def emit_preamble(self, job):
num_nodes=num_nodes, num_tasks_per_node=num_tasks_per_node,
)]

if not self._sched_access_in_submit:
options += job.sched_access

# Emit the rest of the options
options += job.sched_access + job.options + job.cli_options
options += job.options + job.cli_options
for opt in options:
if opt.startswith('#'):
preamble.append(opt)
Expand All @@ -101,9 +107,12 @@ def emit_preamble(self, job):
def submit(self, job):
# OAR batch submission mode needs full path to the job script
job_script_fullpath = os.path.join(job.workdir, job.script_filename)

cmd_opts = (
' '.join(job.sched_access) if self._sched_access_in_submit
else ''
)
# OAR needs -S to submit job in batch mode
cmd = f'oarsub -S {job_script_fullpath}'
cmd = f'oarsub {cmd_opts} -S {job_script_fullpath}'
vkarak marked this conversation as resolved.
Show resolved Hide resolved
completed = _run_strict(cmd, timeout=self._submit_timeout)
jobid_match = re.search(r'.*OAR_JOB_ID=(?P<jobid>\S+)',
completed.stdout)
Expand Down
17 changes: 15 additions & 2 deletions reframe/core/schedulers/pbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ class PbsJobScheduler(sched.JobScheduler):
def __init__(self):
self._prefix = '#PBS'
self._submit_timeout = self.get_option('job_submit_timeout')
self._sched_access_in_submit = self.get_option(
'sched_access_in_submit'
)

def _emit_lselect_option(self, job):
num_tasks = job.num_tasks or 1
Expand All @@ -92,7 +95,12 @@ def _emit_lselect_option(self, job):
# Options starting with `-` are emitted in separate lines
rem_opts = []
verb_opts = []
for opt in (*job.sched_access, *job.options, *job.cli_options):
if self._sched_access_in_submit:
all_opts = (*job.options, *job.cli_options)
else:
all_opts = (*job.sched_access, *job.options, *job.cli_options)

for opt in all_opts:
if opt.startswith('-'):
rem_opts.append(opt)
elif opt.startswith('#'):
Expand Down Expand Up @@ -139,9 +147,14 @@ def filternodes(self, job, nodes):
'node filtering')

def submit(self, job):
cmd_opts = (
' '.join(job.sched_access) if self._sched_access_in_submit
else ''
)
# `-o` and `-e` options are only recognized in command line by the PBS
# Slurm wrappers.
cmd = f'qsub -o {job.stdout} -e {job.stderr} {job.script_filename}'
cmd = (f'qsub {cmd_opts} -o {job.stdout} -e {job.stderr} '
f'{job.script_filename}')
vkarak marked this conversation as resolved.
Show resolved Hide resolved
completed = _run_strict(cmd, timeout=self._submit_timeout)
jobid_match = re.search(r'^(?P<jobid>\S+)', completed.stdout)
if not jobid_match:
Expand Down
34 changes: 21 additions & 13 deletions reframe/core/schedulers/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ def __init__(self):
self._submit_timeout = self.get_option('job_submit_timeout')
self._use_nodes_opt = self.get_option('use_nodes_option')
self._resubmit_on_errors = self.get_option('resubmit_on_errors')
self._sched_access_in_submit = self.get_option(
'sched_access_in_submit'
)

def make_job(self, *args, **kwargs):
return _SlurmJob(*args, **kwargs)
Expand Down Expand Up @@ -209,21 +212,22 @@ def emit_preamble(self, job):
)
)

for opt in job.sched_access:
if not opt.strip().startswith(('-C', '--constraint')):
preamble.append('%s %s' % (self._prefix, opt))

# To avoid overriding a constraint that's passed into `sched_access`,
# we AND it with the `--constraint` option passed either in `options`
# or in `cli_options`
constraints = []
constraint_parser = ArgumentParser()
constraint_parser.add_argument('-C', '--constraint')
parsed_options, _ = constraint_parser.parse_known_args(
job.sched_access
)
if parsed_options.constraint:
constraints.append(parsed_options.constraint.strip())
if not self._sched_access_in_submit:
for opt in job.sched_access:
if not opt.strip().startswith(('-C', '--constraint')):
preamble.append('%s %s' % (self._prefix, opt))

# To avoid overriding a constraint that's passed into
# `sched_access`, we AND it with the `--constraint` option
# passed either in `options` or in `cli_options`
parsed_options, _ = constraint_parser.parse_known_args(
job.sched_access
)
if parsed_options.constraint:
constraints.append(parsed_options.constraint.strip())
vkarak marked this conversation as resolved.
Show resolved Hide resolved

# NOTE: Here last of the passed --constraint job options is taken
# into account in order to respect the behavior of slurm.
Expand Down Expand Up @@ -259,7 +263,11 @@ def emit_preamble(self, job):
return list(filter(None, preamble))

def submit(self, job):
cmd = f'sbatch {job.script_filename}'
cmd_opts = (
' '.join(job.sched_access) if self._sched_access_in_submit
else ''
)
cmd = f'sbatch {cmd_opts} {job.script_filename}'
vkarak marked this conversation as resolved.
Show resolved Hide resolved
intervals = itertools.cycle([1, 2, 3])
while True:
try:
Expand Down
7 changes: 7 additions & 0 deletions reframe/frontend/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,13 @@ def main():
)

# Options not associated with command-line arguments
argparser.add_argument(
dest='sched_access_in_submit',
envvar='RFM_SCHED_ACCESS_IN_SUBMIT',
configvar='systems*/sched_options/sched_access_in_submit',
action='store_true',
help='Pass access options in the submission command (only for Slurm)'
)
argparser.add_argument(
dest='autodetect_fqdn',
envvar='RFM_AUTODETECT_FQDN',
Expand Down
2 changes: 2 additions & 0 deletions reframe/schemas/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@
"sched_options": {
"type": "object",
"properties": {
"sched_access_in_submit": {"type": "boolean"},
"hosts": {
"type": "array",
"items": {"type": "string"}
Expand Down Expand Up @@ -625,6 +626,7 @@
"systems/partitions/time_limit": null,
"systems/partitions/devices": [],
"systems/partitions/extras": {},
"systems*/sched_options/sched_access_in_submit": false,
"systems*/sched_options/ssh_hosts": [],
"systems*/sched_options/ignore_reqnodenotavail": false,
"systems*/sched_options/job_submit_timeout": 60,
Expand Down
Loading