|
1 |
| -import argparse |
2 |
| -import itertools |
3 | 1 | import os
|
4 |
| -import shutil |
5 | 2 | import subprocess
|
6 |
| -import time |
7 |
| -from datetime import datetime |
8 |
| -from pathlib import Path |
9 |
| -from typing import List |
10 |
| - |
11 |
| -import yaml |
12 |
| -from git import Repo |
13 |
| - |
14 |
| -from ..utils import dump_output, get_output_dir, get_output_json |
15 |
| -from .result_analyzer import analyze |
16 | 3 |
|
17 |
| -# Expected WORK_DIR structure |
18 |
| -# WORK_DIR/ |
19 |
| -# |---examples/ |
20 |
| -# |---pytorch-<ver1>-cuda<ver1>/ |
21 |
| -# |---run.sh |
22 |
| -# |---mnist/ |
23 |
| -# |---mnist-hogwild/ |
24 |
| -# |---<other-benchmarks> |
25 |
| -# |---pytorch-<ver2>-cuda<ver2>/ |
26 |
| -# |---summary.csv |
| 4 | +from typing import List |
27 | 5 |
|
28 | 6 | BM_NAME = "release-test"
|
29 | 7 | EXAMPLE_URL = "https://github.com/pytorch/examples.git"
|
30 | 8 | CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
31 |
| -DEFAULT_CONFIG_PATH = os.path.join( |
32 |
| - os.path.dirname(os.path.abspath(__file__)), "configs" |
33 |
| -) |
34 |
| -RUN_TEMPLATE = """ |
35 |
| -# GENERATED BY userbenchmark/release-test/__init__.py. DO NOT EDIT! |
36 |
| -bash {RELEASE_TEST_ROOT}/setup_env.sh '{CUDA_VERSION}' '{MAGMA_VERSION}' '{PYTORCH_VERSION}' '{PYTORCH_CHANNEL}' '{WORK_DIR}' |
37 |
| -bash {RELEASE_TEST_ROOT}/run_release_test.sh '{CUDA_VERSION}' '{RESULT_DIR}' |
38 |
| -""" |
39 |
| - |
40 |
| - |
41 |
| -def get_timestamp(): |
42 |
| - return datetime.fromtimestamp(time.time()).strftime("%Y%m%d%H%M%S") |
43 |
| - |
44 |
| - |
45 |
| -def get_work_dir(output_dir): |
46 |
| - work_dir = output_dir.joinpath(f"run-{get_timestamp()}") |
47 |
| - work_dir.mkdir(exist_ok=True, parents=True) |
48 |
| - return work_dir |
49 |
| - |
50 |
| - |
51 |
| -def generate_test_scripts(config, work_dir): |
52 |
| - assert "cuda" in config and isinstance( |
53 |
| - config["cuda"], list |
54 |
| - ), f"Expected CUDA config list, but not found." |
55 |
| - assert "pytorch" in config and isinstance( |
56 |
| - config["pytorch"], list |
57 |
| - ), f"Exptected pytorch version list, but not found." |
58 |
| - bm_matrix = [config["cuda"], config["pytorch"]] |
59 |
| - run_scripts = {} |
60 |
| - for cuda, pytorch in itertools.product(*bm_matrix): |
61 |
| - run_key = f"pytorch-{pytorch['version']}-cuda-{cuda['version']}" |
62 |
| - run_script = RUN_TEMPLATE.format( |
63 |
| - RELEASE_TEST_ROOT=CURRENT_DIR, |
64 |
| - CUDA_VERSION=cuda["version"], |
65 |
| - MAGMA_VERSION=cuda["magma_version"], |
66 |
| - PYTORCH_VERSION=pytorch["version"], |
67 |
| - PYTORCH_CHANNEL=pytorch["conda_channel"], |
68 |
| - WORK_DIR=work_dir, |
69 |
| - RESULT_DIR=work_dir.joinpath(run_key), |
70 |
| - ) |
71 |
| - run_scripts[run_key] = run_script |
72 |
| - return run_scripts |
73 |
| - |
74 |
| - |
75 |
| -def dump_test_scripts(run_scripts, work_dir): |
76 |
| - for run_key, run_script in run_scripts.items(): |
77 |
| - run_script_loc = work_dir.joinpath(run_key) |
78 |
| - run_script_loc.mkdir(exist_ok=True) |
79 |
| - with open(run_script_loc.joinpath("run.sh"), "w") as rs: |
80 |
| - rs.write(run_script) |
81 |
| - |
82 |
| - |
83 |
| -def dump_result_to_json(metrics): |
84 |
| - result = get_output_json(BM_NAME, metrics) |
85 |
| - dump_output(BM_NAME, result) |
86 |
| - |
87 |
| - |
88 |
| -def run_benchmark(run_scripts, work_dir): |
89 |
| - for run_key, _rscript in run_scripts.items(): |
90 |
| - run_script_path = work_dir.joinpath(run_key, "run.sh") |
91 |
| - # run the benchmark |
92 |
| - print(f"Running benchmark {run_key} ...") |
93 |
| - subprocess.check_call(["bash", str(run_script_path)]) |
94 |
| - |
95 |
| - |
96 |
| -def get_config(config_name: str): |
97 |
| - if os.path.exists(os.path.join(DEFAULT_CONFIG_PATH, config_name)): |
98 |
| - config_name = os.path.join(DEFAULT_CONFIG_PATH, config_name) |
99 |
| - elif os.path.exists(os.path.join(DEFAULT_CONFIG_PATH, f"{config_name}.yaml")): |
100 |
| - config_name = os.path.join(DEFAULT_CONFIG_PATH, f"{config_name}.yaml") |
101 |
| - else: |
102 |
| - raise ValueError( |
103 |
| - f"Can't find config name {config_name} in config path {DEFAULT_CONFIG_PATH}." |
104 |
| - ) |
105 |
| - with open(config_name, "r") as yfile: |
106 |
| - config = yaml.safe_load(yfile) |
107 |
| - return config |
108 |
| - |
109 |
| - |
110 |
| -def parse_args(args): |
111 |
| - parser = argparse.ArgumentParser() |
112 |
| - parser.add_argument( |
113 |
| - "--config", "-c", default="1.12.1", type=str, help="Config for release testing" |
114 |
| - ) |
115 |
| - parser.add_argument( |
116 |
| - "--dry-run", |
117 |
| - action="store_true", |
118 |
| - help="Only generate the test scripts. Do not run the benchmark.", |
119 |
| - ) |
120 |
| - parser.add_argument( |
121 |
| - "--analyze", |
122 |
| - type=str, |
123 |
| - help="Only analyze the result of the specified work directory.", |
124 |
| - ) |
125 |
| - args = parser.parse_args(args) |
126 |
| - return args |
127 |
| - |
128 |
| - |
129 |
| -def prepare_release_tests(args: argparse.Namespace, work_dir: Path): |
130 |
| - config = get_config(args.config) |
131 |
| - run_scripts = generate_test_scripts(config, work_dir) |
132 |
| - dump_test_scripts(run_scripts, work_dir) |
133 |
| - # clone the examples repo |
134 |
| - Repo.clone_from(EXAMPLE_URL, work_dir.joinpath("examples")) |
135 |
| - return run_scripts |
136 |
| - |
137 |
| - |
138 |
| -def cleanup_release_tests(work_dir): |
139 |
| - examples_path = work_dir.joinpath("examples") |
140 |
| - if examples_path.exists(): |
141 |
| - shutil.rmtree(examples_path) |
142 | 9 |
|
143 | 10 |
|
144 | 11 | def run(args: List[str]):
|
145 |
| - args = parse_args(args) |
146 |
| - if args.analyze: |
147 |
| - analyze(args.analyze) |
148 |
| - return |
149 |
| - work_dir = get_work_dir(get_output_dir(BM_NAME)) |
150 |
| - run_scripts = prepare_release_tests(args=args, work_dir=work_dir) |
151 |
| - if not args.dry_run: |
152 |
| - run_benchmark(run_scripts, work_dir) |
153 |
| - metrics = analyze(work_dir) |
154 |
| - dump_result_to_json(metrics) |
155 |
| - cleanup_release_tests(work_dir) |
| 12 | + subprocess.check_call(["bash", f"{CURRENT_DIR}/run_release_test.sh"]) |
0 commit comments