Skip to content

Commit

Permalink
only create multi-slurm-task path if needed
Browse files Browse the repository at this point in the history
  • Loading branch information
NeoLegends committed Oct 31, 2024
1 parent 6d29c5e commit 909ae4c
Showing 1 changed file with 22 additions and 40 deletions.
62 changes: 22 additions & 40 deletions sisyphus/simple_linux_utility_for_resource_management_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,9 @@ def submit_helper(self, call, logpath, rqmt, name, task_name, start_id, end_id,
:param int step_size:
"""
name = self.process_task_name(name)
out_log_file = logpath + "/%x.%A.%t.%a"
out_log_file = logpath + "/%x.%A.%a"
if rqmt.get("multi_node_slots", 1) > 1:
out_log_file += ".%t"
sbatch_call = ["sbatch", "-J", name, "--mail-type=None"]
sbatch_call += self.options(rqmt)
sbatch_call += ["-o", f"{out_log_file}.batch"]
Expand Down Expand Up @@ -405,8 +407,8 @@ def init_worker(self, task):
next(filter(None, (os.getenv(var, None) for var in ["SLURM_NTASKS", "SLURM_NPROCS"])), "1")
)
slurm_task_id = int(os.getenv("SLURM_PROCID", "0"))

array_task_id = self.get_task_id(None)

# keep backwards compatibility: only change output file name for multi-SLURM-task jobs
log_suffix = array_task_id if slurm_num_tasks <= 1 else f"{array_task_id}.{slurm_task_id}"
logpath = os.path.relpath(task.path(gs.JOB_LOG, log_suffix))
Expand All @@ -416,46 +418,26 @@ def init_worker(self, task):
job_id = next(
filter(None, (os.getenv(name, None) for name in ["SLURM_JOB_ID", "SLURM_JOBID", "SLURM_ARRAY_JOB_ID"])), "0"
)
has_linked_logfile = False
engine_logpath_candidates = [
(
os.path.dirname(logpath)
+ "/engine/"
+ os.getenv("SLURM_JOB_NAME")
+ "."
+ job_id
+ "."
+ str(slurm_task_id)
+ "."
+ os.getenv("SLURM_ARRAY_TASK_ID", "1")
),
(
os.path.dirname(logpath)
+ "/engine/"
+ os.getenv("SLURM_JOB_NAME")
+ "."
+ job_id
+ "."
+ os.getenv("SLURM_ARRAY_TASK_ID", "1")
),
]
for engine_logpath in engine_logpath_candidates:
if not os.path.isfile(engine_logpath):
continue
try:
engine_logpath = (
os.path.dirname(logpath)
+ "/engine/"
+ os.getenv("SLURM_JOB_NAME")
+ "."
+ job_id
+ "."
+ os.getenv("SLURM_ARRAY_TASK_ID", "1")
)
if slurm_num_tasks > 1:
engine_logpath += f".{slurm_task_id}"

try:
if os.path.isfile(engine_logpath):
os.link(engine_logpath, logpath)
has_linked_logfile = True
break
except FileExistsError:
pass

if not has_linked_logfile:
engine_logpath = engine_logpath_candidates[0]
logging.warning("Could not find engine logfile: %s Create soft link anyway." % engine_logpath)
try:
else:
logging.warning("Could not find engine logfile: %s Create soft link anyway." % engine_logpath)
os.symlink(os.path.relpath(engine_logpath, os.path.dirname(logpath)), logpath)
except FileExistsError:
pass
except FileExistsError:
pass

def get_logpath(self, logpath_base, task_name, task_id):
"""Returns log file for the currently running task"""
Expand Down

0 comments on commit 909ae4c

Please sign in to comment.