From 6a49817e7c1248e6b4f740c252a47e0e29b82ceb Mon Sep 17 00:00:00 2001 From: Jack Luar <39641663+luarss@users.noreply.github.com> Date: Wed, 19 Jun 2024 16:18:51 +0000 Subject: [PATCH 01/13] scaffold for resume smoke test Signed-off-by: Jack Luar <39641663+luarss@users.noreply.github.com> --- flow/test/at_test_helper.sh | 23 +++++++++++ tools/AutoTuner/test/resume_check.py | 61 ++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100755 flow/test/at_test_helper.sh create mode 100644 tools/AutoTuner/test/resume_check.py diff --git a/flow/test/at_test_helper.sh b/flow/test/at_test_helper.sh new file mode 100755 index 0000000000..bc1f86cdfe --- /dev/null +++ b/flow/test/at_test_helper.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +set -eoux pipefail + +cd "$(dirname $(readlink -f $0))/../" + +# Setting args (and setting default values for testing) +AT_TEST_SLUG=${2:-resume_check} + +echo "Now running $AT_TEST_SLUG" + +# change directory to ../ +# cd .. +# echo "Install dependencies in Venv" +# python3 -m venv .venv +# source .venv/bin/activate +# pip install -r tools/AutoTuner/requirements.txt + + +if [ $AT_TEST_SLUG == "resume_check" ]; then + echo "Now running args.resume check" + python3 -m unittest tools.AutoTuner.test.resume_check +fi diff --git a/tools/AutoTuner/test/resume_check.py b/tools/AutoTuner/test/resume_check.py new file mode 100644 index 0000000000..092f5a41be --- /dev/null +++ b/tools/AutoTuner/test/resume_check.py @@ -0,0 +1,61 @@ +import unittest +import subprocess +import os +import time + +from contextlib import contextmanager + +cur_dir = os.path.dirname(os.path.abspath(__file__)) +src_dir = os.path.join(cur_dir, "../src/autotuner") +orfs_dir = os.path.join(cur_dir, "../../../flow") +os.chdir(src_dir) + +@contextmanager +def managed_process(*args, **kwargs): + proc = subprocess.Popen(*args, **kwargs) + try: + yield proc + finally: + if proc.poll() is None: # If the process is still running + proc.kill() # Forcefully kill it + +class ResumeCheck(unittest.TestCase): + # only test 1 platform/design. + platform = 'asap7' + design = 'gcd' + samples = 5 + iterations = 2 + + def setUp(self): + self.config = os.path.join(orfs_dir, "designs", self.platform, self.design, "autotuner.json") + self.jobs = self.samples + options = ["", "--resume"] + self.commands = [ + f"python3 distributed.py" + f" --design {self.design}" + f" --platform {self.platform}" + f" --config {self.config}" + f" --jobs {self.jobs}" + f" tune --iterations {self.iterations} --samples {self.samples}" + f" {c}" + for c in options + ] + + def test_tune_resume(self): + # Goal is to first run the first config (without resume) and then run the second config (with resume) + # and check if the run is able to complete. + + # Run the first config + print("Running the first config") + with managed_process(self.commands[0], shell=True) as proc: + time.sleep(60) + + # Run the second config to completion + print("Running the second config") + proc = subprocess.run(self.commands[1], shell=True) + successful = proc.returncode == 0 + self.assertTrue(successful) + + +if __name__ == '__main__': + unittest.main() From bd83523a47b41642911b420996337dc4a33ed27c Mon Sep 17 00:00:00 2001 From: Jack Luar <39641663+luarss@users.noreply.github.com> Date: Sat, 29 Jun 2024 07:46:20 +0000 Subject: [PATCH 02/13] some helpers Signed-off-by: Jack Luar <39641663+luarss@users.noreply.github.com> --- flow/test/autotuner_report.py | 40 ++++++++++++++++++++ tools/AutoTuner/src/autotuner/distributed.py | 4 +- tools/AutoTuner/test/resume_check.py | 30 ++++++++++----- 3 files changed, 63 insertions(+), 11 deletions(-) create mode 100644 flow/test/autotuner_report.py diff --git a/flow/test/autotuner_report.py b/flow/test/autotuner_report.py new file mode 100644 index 0000000000..2d192cdd41 --- /dev/null +++ b/flow/test/autotuner_report.py @@ -0,0 +1,40 @@ +import csv +import os + +def read_csv(file_path): + out = [] + with open(file_path, 'r') as file: + reader = csv.reader(file) + for idx, row in enumerate(reader): + if idx == 0: continue + out.append(row) + return out + +def write_csv(lst, file_path): + with open(file_path, 'w') as file: + writer = csv.writer(file) + writer.writerows(lst) + +if __name__ == "__main__": + cur_dir = os.path.dirname(__file__) + log_dir = os.path.join(cur_dir, "../../flow/logs/asap7/gcd") + os.chdir(log_dir) + + # Get the latest updated dir in the data directory. + latest_dir = max(os.listdir("."), key=os.path.getmtime) + folder_name = os.path.basename(latest_dir) + + # Get all the progress.csv file recursively + progress_files = [] + for root, _, files in os.walk(latest_dir): + for file in files: + if file == "progress.csv": + progress_files.append(os.path.join(root, file)) + + # Read the progress.csv file + progress = [] + for file in progress_files: + progress += read_csv(file) + + # Write the progress.csv file + write_csv(progress, f"{folder_name}_progress.csv") diff --git a/tools/AutoTuner/src/autotuner/distributed.py b/tools/AutoTuner/src/autotuner/distributed.py index 5543f0002a..c096b391f0 100644 --- a/tools/AutoTuner/src/autotuner/distributed.py +++ b/tools/AutoTuner/src/autotuner/distributed.py @@ -797,8 +797,8 @@ def parse_arguments(): ) tune_parser.add_argument( "--resources_per_trial", - type=int, - metavar="", + type=float, + metavar="", default=1, help="Number of CPUs to request for each tuning job.", ) diff --git a/tools/AutoTuner/test/resume_check.py b/tools/AutoTuner/test/resume_check.py index 092f5a41be..1389184d93 100644 --- a/tools/AutoTuner/test/resume_check.py +++ b/tools/AutoTuner/test/resume_check.py @@ -24,11 +24,21 @@ class ResumeCheck(unittest.TestCase): platform = 'asap7' design = 'gcd' samples = 5 - iterations = 2 + iterations = 20 def setUp(self): self.config = os.path.join(orfs_dir, "designs", self.platform, self.design, "autotuner.json") self.jobs = self.samples + self.num_cpus = os.cpu_count() + + # How it works: Say we have 5 samples and 5 iterations. + # This means at any one time, there will be 5 trials. + # If we want to only run 5 trials: + # We can set resources_per_trial = NUM_CORES/5 = 3.2 + # Yes fractional resources_per_trial is allowed. + + # Cast to 1 decimal place + res_per_trial = float("{:.1f}".format(self.num_cpus/self.samples)) options = ["", "--resume"] self.commands = [ f"python3 distributed.py" @@ -37,6 +47,7 @@ def setUp(self): f" --config {self.config}" f" --jobs {self.jobs}" f" tune --iterations {self.iterations} --samples {self.samples}" + f" --resources_per_trial {res_per_trial}" f" {c}" for c in options ] @@ -47,14 +58,15 @@ def test_tune_resume(self): # Run the first config print("Running the first config") - with managed_process(self.commands[0], shell=True) as proc: - time.sleep(60) - - # Run the second config to completion - print("Running the second config") - proc = subprocess.run(self.commands[1], shell=True) - successful = proc.returncode == 0 - self.assertTrue(successful) + subprocess.run(self.commands[0], shell=True) + # with managed_process(self.commands[0], shell=True) as proc: + # time.sleep(100) + + # # Run the second config to completion + # print("Running the second config") + # proc = subprocess.run(self.commands[1], shell=True) + # successful = proc.returncode == 0 + # self.assertTrue(successful) if __name__ == '__main__': From 43913d03bf467951108acd451e6e3f2c3db107f4 Mon Sep 17 00:00:00 2001 From: Jack Luar <39641663+luarss@users.noreply.github.com> Date: Sat, 29 Jun 2024 08:23:43 +0000 Subject: [PATCH 03/13] fix bug for multiple iterations Signed-off-by: Jack Luar <39641663+luarss@users.noreply.github.com> --- tools/AutoTuner/src/autotuner/distributed.py | 3 ++- tools/AutoTuner/test/resume_check.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/AutoTuner/src/autotuner/distributed.py b/tools/AutoTuner/src/autotuner/distributed.py index c096b391f0..5904b8b62d 100644 --- a/tools/AutoTuner/src/autotuner/distributed.py +++ b/tools/AutoTuner/src/autotuner/distributed.py @@ -88,7 +88,8 @@ def step(self): """ Run step experiment and compute its score. """ - metrics_file = openroad(self.repo_dir, self.parameters, self.variant) + self._variant = f"{self.variant}-{self.step_}" + metrics_file = openroad(self.repo_dir, self.parameters, self._variant) self.step_ += 1 (score, effective_clk_period, num_drc) = self.evaluate( self.read_metrics(metrics_file) diff --git a/tools/AutoTuner/test/resume_check.py b/tools/AutoTuner/test/resume_check.py index 1389184d93..fd4b4178e8 100644 --- a/tools/AutoTuner/test/resume_check.py +++ b/tools/AutoTuner/test/resume_check.py @@ -24,7 +24,7 @@ class ResumeCheck(unittest.TestCase): platform = 'asap7' design = 'gcd' samples = 5 - iterations = 20 + iterations = 2 def setUp(self): self.config = os.path.join(orfs_dir, "designs", self.platform, self.design, "autotuner.json") From bbf85d3eebef4f24d68eafbbff4e507a68398d90 Mon Sep 17 00:00:00 2001 From: Jack Luar <39641663+luarss@users.noreply.github.com> Date: Sun, 30 Jun 2024 08:11:58 +0000 Subject: [PATCH 04/13] remove date to ensure args.resume work Signed-off-by: Jack Luar <39641663+luarss@users.noreply.github.com> --- tools/AutoTuner/src/autotuner/distributed.py | 4 +- tools/AutoTuner/test/resume_check.py | 63 +++++++++++--------- 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/tools/AutoTuner/src/autotuner/distributed.py b/tools/AutoTuner/src/autotuner/distributed.py index 5904b8b62d..98c2efecda 100644 --- a/tools/AutoTuner/src/autotuner/distributed.py +++ b/tools/AutoTuner/src/autotuner/distributed.py @@ -77,7 +77,7 @@ def setup(self, config): """ # We create the following directory structure: # 1/ 2/ 3/ 4/ 5/ 6/ - # ////-DATE// + # ////// repo_dir = os.getcwd() + "/../" * 6 self.repo_dir = os.path.abspath(repo_dir) self.parameters = parse_config(config, path=os.getcwd()) @@ -875,7 +875,7 @@ def parse_arguments(): ) sys.exit(7) - arguments.experiment += f"-{arguments.mode}-{DATE}" + arguments.experiment += f"-{arguments.mode}" if arguments.timeout is not None: arguments.timeout = round(arguments.timeout * 3600) diff --git a/tools/AutoTuner/test/resume_check.py b/tools/AutoTuner/test/resume_check.py index fd4b4178e8..c9c3b64fbb 100644 --- a/tools/AutoTuner/test/resume_check.py +++ b/tools/AutoTuner/test/resume_check.py @@ -2,6 +2,7 @@ import subprocess import os import time +import uuid from contextlib import contextmanager @@ -10,6 +11,7 @@ orfs_dir = os.path.join(cur_dir, "../../../flow") os.chdir(src_dir) + @contextmanager def managed_process(*args, **kwargs): proc = subprocess.Popen(*args, **kwargs) @@ -19,55 +21,62 @@ def managed_process(*args, **kwargs): if proc.poll() is None: # If the process is still running proc.kill() # Forcefully kill it + class ResumeCheck(unittest.TestCase): # only test 1 platform/design. - platform = 'asap7' - design = 'gcd' + platform = "asap7" + design = "gcd" samples = 5 iterations = 2 def setUp(self): - self.config = os.path.join(orfs_dir, "designs", self.platform, self.design, "autotuner.json") + self.config = os.path.join( + orfs_dir, "designs", self.platform, self.design, "autotuner.json" + ) self.jobs = self.samples self.num_cpus = os.cpu_count() # How it works: Say we have 5 samples and 5 iterations. # This means at any one time, there will be 5 trials. - # If we want to only run 5 trials: - # We can set resources_per_trial = NUM_CORES/5 = 3.2 + # If we want to only run 5 trials: + # We can set resources_per_trial = NUM_CORES/5 = 3.2 # Yes fractional resources_per_trial is allowed. # Cast to 1 decimal place - res_per_trial = float("{:.1f}".format(self.num_cpus/self.samples)) + res_per_trial = float("{:.1f}".format(self.num_cpus / self.samples)) + self.uuid = str(uuid.uuid4())[:8] options = ["", "--resume"] self.commands = [ - f"python3 distributed.py" - f" --design {self.design}" - f" --platform {self.platform}" - f" --config {self.config}" - f" --jobs {self.jobs}" - f" tune --iterations {self.iterations} --samples {self.samples}" - f" --resources_per_trial {res_per_trial}" - f" {c}" - for c in options - ] + f"python3 distributed.py" + f" --design {self.design}" + f" --platform {self.platform}" + f" --config {self.config}" + f" --experiment {self.uuid}" + f" --jobs {self.jobs}" + f" tune --iterations {self.iterations} --samples {self.samples}" + f" --resources_per_trial {res_per_trial}" + f" {c}" + for c in options + ] def test_tune_resume(self): # Goal is to first run the first config (without resume) and then run the second config (with resume) - # and check if the run is able to complete. + # and check if the run is able to complete. - # Run the first config + # Run the first config asynchronously. print("Running the first config") - subprocess.run(self.commands[0], shell=True) - # with managed_process(self.commands[0], shell=True) as proc: - # time.sleep(100) + with managed_process(self.commands[0], shell=True) as proc: + time.sleep(120) + + # Close all ray instances + subprocess.run("ray stop", shell=True) - # # Run the second config to completion - # print("Running the second config") - # proc = subprocess.run(self.commands[1], shell=True) - # successful = proc.returncode == 0 - # self.assertTrue(successful) + # Run the second config to completion + print("Running the second config") + proc = subprocess.run(self.commands[1], shell=True) + successful = proc.returncode == 0 + self.assertTrue(successful) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() From 9441f69d0c0072f88f4571308fd713aa26cfae39 Mon Sep 17 00:00:00 2001 From: Jack Luar <39641663+luarss@users.noreply.github.com> Date: Sun, 30 Jun 2024 09:08:31 +0000 Subject: [PATCH 05/13] working test for args resume Signed-off-by: Jack Luar <39641663+luarss@users.noreply.github.com> --- tools/AutoTuner/src/autotuner/distributed.py | 20 ++++++++++++++++++-- tools/AutoTuner/test/resume_check.py | 14 +++++++------- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/tools/AutoTuner/src/autotuner/distributed.py b/tools/AutoTuner/src/autotuner/distributed.py index 98c2efecda..a1adcfd4ad 100644 --- a/tools/AutoTuner/src/autotuner/distributed.py +++ b/tools/AutoTuner/src/autotuner/distributed.py @@ -710,7 +710,10 @@ def parse_arguments(): help="Time limit (in hours) for each trial run. Default is no limit.", ) tune_parser.add_argument( - "--resume", action="store_true", help="Resume previous run." + "--resume", + action="store_true", + help="Resume previous run. Note that you must also set a unique experiment\ + name identifier via `--experiment NAME` to be able to resume.", ) # Setup @@ -875,7 +878,20 @@ def parse_arguments(): ) sys.exit(7) - arguments.experiment += f"-{arguments.mode}" + # Check for experiment name and resume flag. + if arguments.resume and arguments.experiment == "test": + print( + '[ERROR TUN-0031] The flag "--resume"' + ' requires that "--experiment NAME" is also given.' + ) + sys.exit(1) + + # If the experiment name is the default, add a UUID to the end. + if arguments.experiment == "test": + id = str(uuid())[:8] + arguments.experiment = f"{arguments.mode}-{id}" + else: + arguments.experiment += f"-{arguments.mode}" if arguments.timeout is not None: arguments.timeout = round(arguments.timeout * 3600) diff --git a/tools/AutoTuner/test/resume_check.py b/tools/AutoTuner/test/resume_check.py index c9c3b64fbb..671de15aa3 100644 --- a/tools/AutoTuner/test/resume_check.py +++ b/tools/AutoTuner/test/resume_check.py @@ -2,7 +2,6 @@ import subprocess import os import time -import uuid from contextlib import contextmanager @@ -14,6 +13,9 @@ @contextmanager def managed_process(*args, **kwargs): + """ + Runs process and ensures it is killed when the context is exited. + """ proc = subprocess.Popen(*args, **kwargs) try: yield proc @@ -37,27 +39,25 @@ def setUp(self): self.num_cpus = os.cpu_count() # How it works: Say we have 5 samples and 5 iterations. - # This means at any one time, there will be 5 trials. - # If we want to only run 5 trials: - # We can set resources_per_trial = NUM_CORES/5 = 3.2 - # Yes fractional resources_per_trial is allowed. + # If we want to limit to only 5 trials (and avoid any parallelism magic by Ray) + # We can set resources_per_trial = NUM_CORES/5 = 3.2 (fractional resources_per_trial are allowed!) # Cast to 1 decimal place res_per_trial = float("{:.1f}".format(self.num_cpus / self.samples)) - self.uuid = str(uuid.uuid4())[:8] options = ["", "--resume"] self.commands = [ f"python3 distributed.py" f" --design {self.design}" f" --platform {self.platform}" f" --config {self.config}" - f" --experiment {self.uuid}" f" --jobs {self.jobs}" + f" --experiment test_resume" f" tune --iterations {self.iterations} --samples {self.samples}" f" --resources_per_trial {res_per_trial}" f" {c}" for c in options ] + self.failCommands = [] # TODO def test_tune_resume(self): # Goal is to first run the first config (without resume) and then run the second config (with resume) From 6f3dd19ec73d96c9bbd58ff30684f4fe7455c8d5 Mon Sep 17 00:00:00 2001 From: Jack Luar <39641663+luarss@users.noreply.github.com> Date: Wed, 3 Jul 2024 15:12:58 +0000 Subject: [PATCH 06/13] check in robustify test Signed-off-by: Jack Luar <39641663+luarss@users.noreply.github.com> --- tools/AutoTuner/test/resume_check.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/AutoTuner/test/resume_check.py b/tools/AutoTuner/test/resume_check.py index 671de15aa3..021f6fa6b3 100644 --- a/tools/AutoTuner/test/resume_check.py +++ b/tools/AutoTuner/test/resume_check.py @@ -68,8 +68,16 @@ def test_tune_resume(self): with managed_process(self.commands[0], shell=True) as proc: time.sleep(120) - # Close all ray instances - subprocess.run("ray stop", shell=True) + # Keep trying to stop the ray cluster until it is stopped + while 1: + proc = subprocess.run("ray status", shell=True) + no_nodes = proc.returncode != 0 + proc = subprocess.run("ray stop", shell=True) + successful = proc.returncode == 0 + + if no_nodes and successful: + break + time.sleep(10) # Run the second config to completion print("Running the second config") From 1bead2cd351bf10a1957d44ca64d9a9d3f5ea61b Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Thu, 12 Sep 2024 12:59:29 +0000 Subject: [PATCH 07/13] remove unneeded files Signed-off-by: Jack Luar --- flow/test/at_test_helper.sh | 23 -------------------- flow/test/autotuner_report.py | 40 ----------------------------------- 2 files changed, 63 deletions(-) delete mode 100755 flow/test/at_test_helper.sh delete mode 100644 flow/test/autotuner_report.py diff --git a/flow/test/at_test_helper.sh b/flow/test/at_test_helper.sh deleted file mode 100755 index bc1f86cdfe..0000000000 --- a/flow/test/at_test_helper.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash - -set -eoux pipefail - -cd "$(dirname $(readlink -f $0))/../" - -# Setting args (and setting default values for testing) -AT_TEST_SLUG=${2:-resume_check} - -echo "Now running $AT_TEST_SLUG" - -# change directory to ../ -# cd .. -# echo "Install dependencies in Venv" -# python3 -m venv .venv -# source .venv/bin/activate -# pip install -r tools/AutoTuner/requirements.txt - - -if [ $AT_TEST_SLUG == "resume_check" ]; then - echo "Now running args.resume check" - python3 -m unittest tools.AutoTuner.test.resume_check -fi diff --git a/flow/test/autotuner_report.py b/flow/test/autotuner_report.py deleted file mode 100644 index 2d192cdd41..0000000000 --- a/flow/test/autotuner_report.py +++ /dev/null @@ -1,40 +0,0 @@ -import csv -import os - -def read_csv(file_path): - out = [] - with open(file_path, 'r') as file: - reader = csv.reader(file) - for idx, row in enumerate(reader): - if idx == 0: continue - out.append(row) - return out - -def write_csv(lst, file_path): - with open(file_path, 'w') as file: - writer = csv.writer(file) - writer.writerows(lst) - -if __name__ == "__main__": - cur_dir = os.path.dirname(__file__) - log_dir = os.path.join(cur_dir, "../../flow/logs/asap7/gcd") - os.chdir(log_dir) - - # Get the latest updated dir in the data directory. - latest_dir = max(os.listdir("."), key=os.path.getmtime) - folder_name = os.path.basename(latest_dir) - - # Get all the progress.csv file recursively - progress_files = [] - for root, _, files in os.walk(latest_dir): - for file in files: - if file == "progress.csv": - progress_files.append(os.path.join(root, file)) - - # Read the progress.csv file - progress = [] - for file in progress_files: - progress += read_csv(file) - - # Write the progress.csv file - write_csv(progress, f"{folder_name}_progress.csv") From e59097ad1d76534efb83a710439150231f1c840f Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Thu, 12 Sep 2024 13:01:52 +0000 Subject: [PATCH 08/13] fix lint Signed-off-by: Jack Luar --- tools/AutoTuner/test/resume_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/AutoTuner/test/resume_check.py b/tools/AutoTuner/test/resume_check.py index 021f6fa6b3..5b0d2f49e6 100644 --- a/tools/AutoTuner/test/resume_check.py +++ b/tools/AutoTuner/test/resume_check.py @@ -76,7 +76,7 @@ def test_tune_resume(self): successful = proc.returncode == 0 if no_nodes and successful: - break + break time.sleep(10) # Run the second config to completion From 55d50188bdf58652bf3c9bc66cbef149633ce537 Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Thu, 12 Sep 2024 13:15:39 +0000 Subject: [PATCH 09/13] remove resources per trial Signed-off-by: Jack Luar --- tools/AutoTuner/src/autotuner/distributed.py | 2 +- tools/AutoTuner/test/resume_check.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/AutoTuner/src/autotuner/distributed.py b/tools/AutoTuner/src/autotuner/distributed.py index a1adcfd4ad..67226d7762 100644 --- a/tools/AutoTuner/src/autotuner/distributed.py +++ b/tools/AutoTuner/src/autotuner/distributed.py @@ -1092,7 +1092,7 @@ def sweep(): local_dir=LOCAL_DIR, resume=args.resume, stop={"training_iteration": args.iterations}, - resources_per_trial={"cpu": args.resources_per_trial}, + resources_per_trial={"cpu": os.cpu_count()/args.jobs}, log_to_file=["trail-out.log", "trail-err.log"], trial_name_creator=lambda x: f"variant-{x.trainable_name}-{x.trial_id}-ray", trial_dirname_creator=lambda x: f"variant-{x.trainable_name}-{x.trial_id}-ray", diff --git a/tools/AutoTuner/test/resume_check.py b/tools/AutoTuner/test/resume_check.py index 5b0d2f49e6..c2c037e7bb 100644 --- a/tools/AutoTuner/test/resume_check.py +++ b/tools/AutoTuner/test/resume_check.py @@ -57,7 +57,7 @@ def setUp(self): f" {c}" for c in options ] - self.failCommands = [] # TODO + def test_tune_resume(self): # Goal is to first run the first config (without resume) and then run the second config (with resume) From bb0e6d4a4222e696a119cdd620c6bc7ddde3c036 Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Thu, 12 Sep 2024 13:19:16 +0000 Subject: [PATCH 10/13] fix black Signed-off-by: Jack Luar --- tools/AutoTuner/src/autotuner/distributed.py | 2 +- tools/AutoTuner/test/resume_check.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/AutoTuner/src/autotuner/distributed.py b/tools/AutoTuner/src/autotuner/distributed.py index 67226d7762..d9c8946891 100644 --- a/tools/AutoTuner/src/autotuner/distributed.py +++ b/tools/AutoTuner/src/autotuner/distributed.py @@ -1092,7 +1092,7 @@ def sweep(): local_dir=LOCAL_DIR, resume=args.resume, stop={"training_iteration": args.iterations}, - resources_per_trial={"cpu": os.cpu_count()/args.jobs}, + resources_per_trial={"cpu": os.cpu_count() / args.jobs}, log_to_file=["trail-out.log", "trail-err.log"], trial_name_creator=lambda x: f"variant-{x.trainable_name}-{x.trial_id}-ray", trial_dirname_creator=lambda x: f"variant-{x.trainable_name}-{x.trial_id}-ray", diff --git a/tools/AutoTuner/test/resume_check.py b/tools/AutoTuner/test/resume_check.py index c2c037e7bb..92219eed22 100644 --- a/tools/AutoTuner/test/resume_check.py +++ b/tools/AutoTuner/test/resume_check.py @@ -58,7 +58,6 @@ def setUp(self): for c in options ] - def test_tune_resume(self): # Goal is to first run the first config (without resume) and then run the second config (with resume) # and check if the run is able to complete. From 8b81f80a7831fb0d13cd800d1ba4462c67d85304 Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Sat, 11 Jan 2025 06:33:39 +0000 Subject: [PATCH 11/13] update test_autotuner.sh Signed-off-by: Jack Luar --- flow/test/test_autotuner.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/flow/test/test_autotuner.sh b/flow/test/test_autotuner.sh index ae009fee1d..2b6e967b44 100755 --- a/flow/test/test_autotuner.sh +++ b/flow/test/test_autotuner.sh @@ -23,6 +23,9 @@ python3 -m unittest tools.AutoTuner.test.smoke_test_sample_iteration.${PLATFORM} if [ "$PLATFORM" == "asap7" ] && [ "$DESIGN" == "gcd" ]; then echo "Running Autotuner ref file test (only once)" python3 -m unittest tools.AutoTuner.test.ref_file_check.RefFileCheck.test_files + + echo "Running AutoTuner resume test (only once)" + python3 -m unittest tools.AutoTuner.test.resume_check.ResumeCheck.test_tune_resume fi echo "Running Autotuner smoke algorithm & evaluation test" From b7058eafd3a8d6592104c12d5210c42df0ad39ba Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Sat, 11 Jan 2025 13:10:53 +0000 Subject: [PATCH 12/13] fix test helper scripts Signed-off-by: Jack Luar --- flow/test/test_autotuner.sh | 40 +++++++++++++++++++------------------ flow/test/test_helper.sh | 2 +- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/flow/test/test_autotuner.sh b/flow/test/test_autotuner.sh index 2b6e967b44..f64ab7e92f 100755 --- a/flow/test/test_autotuner.sh +++ b/flow/test/test_autotuner.sh @@ -1,4 +1,6 @@ #!/usr/bin/env bash +DESIGN_NAME=${1:-gcd} +PLATFORM=${2:-nangate45} # run the commands in ORFS root dir echo "[INFO FLW-0029] Installing dependencies in virtual environment." @@ -20,7 +22,7 @@ python3 -m unittest tools.AutoTuner.test.smoke_test_sweep.${PLATFORM}SweepSmokeT echo "Running Autotuner smoke tests for --sample and --iteration." python3 -m unittest tools.AutoTuner.test.smoke_test_sample_iteration.${PLATFORM}SampleIterationSmokeTest.test_sample_iteration -if [ "$PLATFORM" == "asap7" ] && [ "$DESIGN" == "gcd" ]; then +if [ "$PLATFORM" == "asap7" ] && [ "$DESIGN_NAME" == "gcd" ]; then echo "Running Autotuner ref file test (only once)" python3 -m unittest tools.AutoTuner.test.ref_file_check.RefFileCheck.test_files @@ -28,23 +30,23 @@ if [ "$PLATFORM" == "asap7" ] && [ "$DESIGN" == "gcd" ]; then python3 -m unittest tools.AutoTuner.test.resume_check.ResumeCheck.test_tune_resume fi -echo "Running Autotuner smoke algorithm & evaluation test" -python3 -m unittest tools.AutoTuner.test.smoke_test_algo_eval.${PLATFORM}AlgoEvalSmokeTest.test_algo_eval - -# run this test last (because it modifies current path) -echo "Running Autotuner remote test" -if [ "$PLATFORM" == "asap7" ] && [ "$DESIGN" == "gcd" ]; then - # Get the directory of the current script - script_dir="$(dirname "${BASH_SOURCE[0]}")" - cd "$script_dir"/../../ - latest_image=$(./etc/DockerTag.sh -dev) - echo "ORFS_VERSION=$latest_image" > ./tools/AutoTuner/.env - cd ./tools/AutoTuner - docker compose up --wait - docker compose exec ray-worker bash -c "cd /OpenROAD-flow-scripts/tools/AutoTuner/src/autotuner && \ - python3 distributed.py --design gcd --platform asap7 --server 127.0.0.1 --port 10001 \ - --config ../../../../flow/designs/asap7/gcd/autotuner.json tune --samples 1" - docker compose down -v --remove-orphans -fi +# echo "Running Autotuner smoke algorithm & evaluation test" +# python3 -m unittest tools.AutoTuner.test.smoke_test_algo_eval.${PLATFORM}AlgoEvalSmokeTest.test_algo_eval + +# # run this test last (because it modifies current path) +# echo "Running Autotuner remote test" +# if [ "$PLATFORM" == "asap7" ] && [ "$DESIGN_NAME" == "gcd" ]; then +# # Get the directory of the current script +# script_dir="$(dirname "${BASH_SOURCE[0]}")" +# cd "$script_dir"/../../ +# latest_image=$(./etc/DockerTag.sh -dev) +# echo "ORFS_VERSION=$latest_image" > ./tools/AutoTuner/.env +# cd ./tools/AutoTuner +# docker compose up --wait +# docker compose exec ray-worker bash -c "cd /OpenROAD-flow-scripts/tools/AutoTuner/src/autotuner && \ +# python3 distributed.py --design gcd --platform asap7 --server 127.0.0.1 --port 10001 \ +# --config ../../../../flow/designs/asap7/gcd/autotuner.json tune --samples 1" +# docker compose down -v --remove-orphans +# fi exit $ret diff --git a/flow/test/test_helper.sh b/flow/test/test_helper.sh index 4507dba8f2..123119cddb 100755 --- a/flow/test/test_helper.sh +++ b/flow/test/test_helper.sh @@ -108,7 +108,7 @@ fi if [ "${RUN_AUTOTUNER}" == "true" ]; then set +x echo "Start AutoTuner test." - ./test/test_autotuner.sh + ./test/test_autotuner.sh $DESIGN_NAME $PLATFORM set -x fi From ae488ebae605b5b46d7da54a58a97ef61785ec83 Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Sun, 12 Jan 2025 02:56:48 +0000 Subject: [PATCH 13/13] remove unstable tests Signed-off-by: Jack Luar --- flow/test/test_autotuner.sh | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/flow/test/test_autotuner.sh b/flow/test/test_autotuner.sh index f64ab7e92f..7f8c258753 100755 --- a/flow/test/test_autotuner.sh +++ b/flow/test/test_autotuner.sh @@ -30,23 +30,4 @@ if [ "$PLATFORM" == "asap7" ] && [ "$DESIGN_NAME" == "gcd" ]; then python3 -m unittest tools.AutoTuner.test.resume_check.ResumeCheck.test_tune_resume fi -# echo "Running Autotuner smoke algorithm & evaluation test" -# python3 -m unittest tools.AutoTuner.test.smoke_test_algo_eval.${PLATFORM}AlgoEvalSmokeTest.test_algo_eval - -# # run this test last (because it modifies current path) -# echo "Running Autotuner remote test" -# if [ "$PLATFORM" == "asap7" ] && [ "$DESIGN_NAME" == "gcd" ]; then -# # Get the directory of the current script -# script_dir="$(dirname "${BASH_SOURCE[0]}")" -# cd "$script_dir"/../../ -# latest_image=$(./etc/DockerTag.sh -dev) -# echo "ORFS_VERSION=$latest_image" > ./tools/AutoTuner/.env -# cd ./tools/AutoTuner -# docker compose up --wait -# docker compose exec ray-worker bash -c "cd /OpenROAD-flow-scripts/tools/AutoTuner/src/autotuner && \ -# python3 distributed.py --design gcd --platform asap7 --server 127.0.0.1 --port 10001 \ -# --config ../../../../flow/designs/asap7/gcd/autotuner.json tune --samples 1" -# docker compose down -v --remove-orphans -# fi - exit $ret