From 5061e8dfea59cc368f86bdb84c5519ce82949dd5 Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Sat, 18 Jan 2025 06:59:50 +0000 Subject: [PATCH] fixes to smoke_test_timeout - subprocess bug: check must be False to capture nonzero retcode. - METRIC key not inside best_result if timeout prematurely called before trial completes (true for smoke test) Signed-off-by: Jack Luar --- tools/AutoTuner/src/autotuner/distributed.py | 3 +++ tools/AutoTuner/test/smoke_test_timeout.py | 21 ++++++++++---------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/tools/AutoTuner/src/autotuner/distributed.py b/tools/AutoTuner/src/autotuner/distributed.py index 5521d70a0c..9a3a441a03 100644 --- a/tools/AutoTuner/src/autotuner/distributed.py +++ b/tools/AutoTuner/src/autotuner/distributed.py @@ -1059,6 +1059,9 @@ def save_best(results): Save best configuration of parameters found. """ best_config = results.best_config + if METRIC not in results.best_result: + print("[ERROR TUN-0023] Metric not found in results.") + sys.exit(1) best_config["best_result"] = results.best_result[METRIC] trial_id = results.best_trial.trial_id new_best_path = f"{LOCAL_DIR}/{args.experiment}/" diff --git a/tools/AutoTuner/test/smoke_test_timeout.py b/tools/AutoTuner/test/smoke_test_timeout.py index 6b46ac95c5..07347207a6 100644 --- a/tools/AutoTuner/test/smoke_test_timeout.py +++ b/tools/AutoTuner/test/smoke_test_timeout.py @@ -20,14 +20,13 @@ def setUp(self): # 0.001 hour translates to 3.6 seconds, which will definitely cause failure. timeout_flags = ["--timeout 0.001", "--timeout_per_trial 0.001"] - self.timeout_limit = 15 # 15 second upper limit + self.timeout_limit = 60 # 60 second upper limit (Ray needs time to shutdown) self.commands = [ "python3 distributed.py" f" --design {self.design}" f" --platform {self.platform}" f" --experiment {self.experiment}" f" --config {self.config}" - f" --cpu_budget 1" f" --yes" f" {flag}" f" tune --samples 1" @@ -47,10 +46,10 @@ class asap7TimeoutSmokeTest(BaseTimeoutSmokeTest): def test_timeout(self): for command in self.commands: out = subprocess.run( - command, shell=True, check=True, timeout=self.timeout_limit + command, shell=True, check=False, timeout=self.timeout_limit ) - successful = out.returncode == 0 - self.assertTrue(successful) + failed = out.returncode == 1 + self.assertTrue(failed) class sky130hdTimeoutSmokeTest(BaseTimeoutSmokeTest): @@ -60,10 +59,10 @@ class sky130hdTimeoutSmokeTest(BaseTimeoutSmokeTest): def test_timeout(self): for command in self.commands: out = subprocess.run( - command, shell=True, check=True, timeout=self.timeout_limit + command, shell=True, check=False, timeout=self.timeout_limit ) - successful = out.returncode == 0 - self.assertTrue(successful) + failed = out.returncode == 1 + self.assertTrue(failed) class ihpsg13g2TimeoutSmokeTest(BaseTimeoutSmokeTest): @@ -73,10 +72,10 @@ class ihpsg13g2TimeoutSmokeTest(BaseTimeoutSmokeTest): def test_timeout(self): for command in self.commands: out = subprocess.run( - command, shell=True, check=True, timeout=self.timeout_limit + command, shell=True, check=False, timeout=self.timeout_limit ) - successful = out.returncode == 0 - self.assertTrue(successful) + failed = out.returncode == 1 + self.assertTrue(failed) if __name__ == "__main__":