Skip to content
This repository was archived by the owner on Feb 26, 2025. It is now read-only.

Commit f97b90d

Browse files
Add wait_for_slurm
1 parent a21e55c commit f97b90d

File tree

2 files changed

+20
-6
lines changed

2 files changed

+20
-6
lines changed

custom_analyses/src/a02/run.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from bluepysnap.frame_report import FrameReport
77
from bluepysnap.simulation import Simulation
88
from bluepysnap.spike_report import SpikeReport
9-
from common.utils import L, run_analysis
9+
from common.utils import L, clean_slurm_env, run_analysis, wait_for_slurm
1010

1111
from blueetl.campaign.config import SimulationCampaign
1212

@@ -79,13 +79,14 @@ def _plot(index: int, path: str, conditions: dict, analysis_config: dict) -> tup
7979
def main(analysis_config: dict) -> dict:
8080
campaign = SimulationCampaign.load(analysis_config["simulation_campaign"])
8181
slurm_args = {**DEFAULT_SLURM_ARGS, **analysis_config.get("slurm_args", {})}
82+
clean_slurm_env()
8283

8384
log_folder = "log_test/%j"
8485
executor = submitit.AutoExecutor(folder=log_folder)
8586
executor.update_parameters(**slurm_args)
86-
L.info("Using %s executor.", executor.cluster)
87+
L.info("Using %s executor", executor.cluster)
8788

88-
# submit all jobs at once in a Slurm job array
89+
# submit all the jobs at once in a Slurm job array
8990
with executor.batch():
9091
jobs = [
9192
executor.submit(
@@ -97,9 +98,10 @@ def main(analysis_config: dict) -> dict:
9798
)
9899
for sim in campaign
99100
]
100-
L.info("Number of jobs %s", len(jobs))
101+
L.info("Waiting for slurm to be ready...")
102+
wait_for_slurm()
101103

102-
# process the results
104+
L.info("Waiting for %s jobs to complete...", len(jobs))
103105
outputs = []
104106
for job in jobs:
105107
sim_index, output_path = job.result()

custom_analyses/src/common/utils.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import logging
44
import os
55
import sys
6+
import time
67
from collections.abc import Callable
78
from pathlib import Path
89

@@ -49,7 +50,6 @@ def wrapper(
4950
log_level: str | int = logging.INFO,
5051
) -> dict:
5152
"""Call the wrapped function, and write the result to file."""
52-
clean_slurm_env()
5353
setup_logging(log_format=log_format, log_level=log_level)
5454
result = func(analysis_config)
5555
if analysis_output:
@@ -78,3 +78,15 @@ def clean_slurm_env():
7878
if key.startswith(("PMI_", "SLURM_")) and not key.endswith(("_ACCOUNT", "_PARTITION")):
7979
L.debug("Deleting env variable %s", key)
8080
del os.environ[key]
81+
82+
83+
def wait_for_slurm():
84+
"""Wait for some time to allow sacct to return the correct status of the submitted jobs.
85+
86+
This may be needed when the slurm ids have been reset, and re-used.
87+
88+
See https://github.com/facebookincubator/submitit/issues/1660.
89+
"""
90+
initial_sleep = float(os.getenv("SUBMIT_JOBS_INITIAL_SLEEP", "10"))
91+
L.debug("SUBMIT_JOBS_INITIAL_SLEEP=%s", initial_sleep)
92+
time.sleep(initial_sleep)

0 commit comments

Comments
 (0)