Skip to content

Commit

Permalink
fix test_tune_resume
Browse files Browse the repository at this point in the history
Signed-off-by: Jack Luar <[email protected]>
  • Loading branch information
luarss committed Jan 18, 2025
1 parent 5061e8d commit bc01553
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 3 deletions.
5 changes: 4 additions & 1 deletion tools/AutoTuner/src/autotuner/distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -941,7 +941,10 @@ def parse_arguments():
args.experiment = f"{args.mode}-{id}"
else:
args.experiment += f"-{args.mode}"
args.experiment += f"-{args.mode}-{DATE}"

# Append a date for non-resume mode to ensure unique experiment dirs.
if not args.resume:
args.experiment += f"-{DATE}"

# Convert time to seconds
if args.timeout_per_trial is not None:
Expand Down
5 changes: 3 additions & 2 deletions tools/AutoTuner/test/resume_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class ResumeCheck(unittest.TestCase):
design = "gcd"
samples = 5
iterations = 2
timeout = 120

def setUp(self):
self.config = os.path.join(
Expand All @@ -38,7 +39,7 @@ def setUp(self):
self.jobs = self.samples
self.num_cpus = os.cpu_count()

# How it works: Say we have 5 samples and 5 iterations.
# How it works: Say we have 5 samples and 5 iterations and 16 cores.
# If we want to limit to only 5 trials (and avoid any parallelism magic by Ray)
# We can set resources_per_trial = NUM_CORES/5 = 3.2 (fractional resources_per_trial are allowed!)

Expand Down Expand Up @@ -66,7 +67,7 @@ def test_tune_resume(self):
# Run the first config asynchronously.
print("Running the first config")
with managed_process(self.commands[0], shell=True) as proc:
time.sleep(120)
time.sleep(self.timeout)

# Keep trying to stop the ray cluster until it is stopped
while 1:
Expand Down

0 comments on commit bc01553

Please sign in to comment.