diff --git a/build_tools/benchmarks/common/benchmark_definition.py b/build_tools/benchmarks/common/benchmark_definition.py index 9772ffd8b59c..16bf41f245a8 100644 --- a/build_tools/benchmarks/common/benchmark_definition.py +++ b/build_tools/benchmarks/common/benchmark_definition.py @@ -11,15 +11,18 @@ """ import json +import os +import re import subprocess from dataclasses import dataclass from enum import Enum -from typing import Any, Dict, Sequence +from typing import Any, Dict, Optional, Sequence # A map from CPU ABI to IREE's benchmark target architecture. CPU_ABI_TO_TARGET_ARCH_MAP = { "arm64-v8a": "cpu-arm64-v8a", + "x86_64": "cpu-x86-64", } # A map from GPU name to IREE's benchmark target architecture. @@ -30,6 +33,7 @@ "adreno-730": "gpu-adreno", "mali-g77": "gpu-mali-valhall", "mali-g78": "gpu-mali-valhall", + "unknown": "gpu-unknown", } @@ -99,6 +103,58 @@ def execute_cmd_and_get_output(args: Sequence[str], **kwargs).stdout.strip() +def get_git_commit_hash(commit: str) -> str: + return execute_cmd_and_get_output(['git', 'rev-parse', commit], + cwd=os.path.dirname( + os.path.realpath(__file__))) + + +def get_iree_benchmark_module_arguments( + results_filename: str, + driver: str, + benchmark_min_time: Optional[float] = None): + """Returns the common arguments to run iree-benchmark-module.""" + + if driver == "iree-vmvx": + # VMVX is very unoptimized for now and can take a long time to run. + # Decrease the repetition for it until it's reasonably fast. + repetitions = 3 + else: + repetitions = 10 + + cmd = [ + "--benchmark_format=json", + "--benchmark_out_format=json", + f"--benchmark_out={results_filename}", + ] + if benchmark_min_time: + cmd.extend([ + f"--benchmark_min_time={benchmark_min_time}", + ]) + else: + cmd.extend([ + f"--benchmark_repetitions={repetitions}", + ]) + + return cmd + + +def wait_for_iree_benchmark_module_start(process: subprocess.Popen, + verbose: bool = False) -> None: + """Wait for the start of iree-benchmark module; otherwise will see connection + failure when opening the catpure tool.""" + + while True: + line = process.stdout.readline() # pytype: disable=attribute-error + if line == "" and process.poll() is not None: # Process completed + raise ValueError("Cannot find benchmark result line in the log!") + if verbose: + print(line.strip()) + # Result available + if re.match(r"^BM_.+/real_time", line) is not None: + break + + class PlatformType(Enum): ANDROID = "Android" LINUX = "Linux" @@ -150,6 +206,8 @@ def get_iree_gpu_arch_name(self) -> str: def get_cpu_arch_revision(self) -> str: if self.cpu_abi == "arm64-v8a": return self.__get_arm_cpu_arch_revision() + if self.cpu_abi == "x86_64": + return "x86_64" raise ValueError("Unrecognized CPU ABI; need to update the list") def to_json_object(self) -> Dict[str, Any]: diff --git a/build_tools/benchmarks/common/benchmark_suite.py b/build_tools/benchmarks/common/benchmark_suite.py index 72753cd32a1c..c18675f76455 100644 --- a/build_tools/benchmarks/common/benchmark_suite.py +++ b/build_tools/benchmarks/common/benchmark_suite.py @@ -39,6 +39,7 @@ # All benchmarks' relative path against root build directory. BENCHMARK_SUITE_REL_PATH = "benchmark_suites" +MODEL_FLAGFILE_NAME = "flagfile" MODEL_TOOLFILE_NAME = "tool" diff --git a/build_tools/benchmarks/run_benchmarks_on_android.py b/build_tools/benchmarks/run_benchmarks_on_android.py index 6c67a49b1de1..887656176629 100755 --- a/build_tools/benchmarks/run_benchmarks_on_android.py +++ b/build_tools/benchmarks/run_benchmarks_on_android.py @@ -41,17 +41,17 @@ from common.benchmark_config import BenchmarkConfig from common.benchmark_driver import BenchmarkDriver from common.benchmark_definition import (execute_cmd, - execute_cmd_and_get_output) -from common.benchmark_suite import (BenchmarkCase, BenchmarkSuite) + execute_cmd_and_get_output, + get_git_commit_hash, + get_iree_benchmark_module_arguments, + wait_for_iree_benchmark_module_start) +from common.benchmark_suite import (MODEL_FLAGFILE_NAME, BenchmarkCase, + BenchmarkSuite) from common.android_device_utils import (get_android_device_model, get_android_device_info, get_android_gpu_name) from common.common_arguments import build_common_argument_parser -# The flagfile/toolfile's filename for compiled benchmark artifacts. -MODEL_FLAGFILE_NAME = "flagfile" -MODEL_TOOLFILE_NAME = "tool" - # Root directory to perform benchmarks in on the Android device. ANDROID_TMP_DIR = "/data/local/tmp/iree-benchmarks" @@ -59,21 +59,6 @@ TRACED_TOOL_REL_DIR = "traced-tools" -def get_benchmark_repetition_count(runner: str) -> int: - """Returns the benchmark repetition count for the given runner.""" - if runner == "iree-vmvx": - # VMVX is very unoptimized for now and can take a long time to run. - # Decrease the repetition for it until it's reasonably fast. - return 3 - return 10 - - -def get_git_commit_hash(commit: str) -> str: - return execute_cmd_and_get_output(['git', 'rev-parse', commit], - cwd=os.path.dirname( - os.path.realpath(__file__))) - - def adb_push_to_tmp_dir(content: str, relative_dir: str = "", verbose: bool = False) -> str: @@ -239,20 +224,11 @@ def __run_benchmark(self, android_case_dir: str, tool_name: str, driver: str, "taskset", taskset, android_tool, f"--flagfile={MODEL_FLAGFILE_NAME}" ] if tool_name == "iree-benchmark-module": - cmd.extend([ - "--benchmark_format=json", - "--benchmark_out_format=json", - f"--benchmark_out='{os.path.basename(results_filename)}'", - ]) - if self.config.benchmark_min_time: - cmd.extend([ - f"--benchmark_min_time={self.config.benchmark_min_time}", - ]) - else: - repetitions = get_benchmark_repetition_count(driver) - cmd.extend([ - f"--benchmark_repetitions={repetitions}", - ]) + cmd.extend( + get_iree_benchmark_module_arguments( + results_filename=f"'{os.path.basename(results_filename)}'", + driver=driver, + benchmark_min_time=self.config.benchmark_min_time)) result_json = adb_execute_and_get_output(cmd, android_case_dir, @@ -285,20 +261,8 @@ def __run_capture(self, android_case_dir: str, tool_name: str, # Just launch the traced benchmark tool with TRACY_NO_EXIT=1 without # waiting for the adb command to complete as that won't happen. process = adb_start_cmd(run_cmd, android_case_dir, verbose=self.verbose) - # But we do need to wait for its start; otherwise will see connection - # failure when opening the catpure tool. Here we cannot just sleep a - # certain amount of seconds---Pixel 4 seems to have an issue that will - # make the trace collection step get stuck. Instead wait for the - # benchmark result to be available. - while True: - line = process.stdout.readline() # pytype: disable=attribute-error - if line == "" and process.poll() is not None: # Process completed - raise ValueError("Cannot find benchmark result line in the log!") - if self.verbose: - print(line.strip()) - # Result available - if re.match(r"^BM_.+/real_time", line) is not None: - break + + wait_for_iree_benchmark_module_start(process, self.verbose) # Now it's okay to collect the trace via the capture tool. This will # send the signal to let the previously waiting benchmark tool to diff --git a/build_tools/benchmarks/run_benchmarks_on_linux.py b/build_tools/benchmarks/run_benchmarks_on_linux.py index 291a8037f25a..25811a81a358 100755 --- a/build_tools/benchmarks/run_benchmarks_on_linux.py +++ b/build_tools/benchmarks/run_benchmarks_on_linux.py @@ -6,16 +6,134 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception """Runs all matched benchmark suites on a Linux device.""" +import subprocess +import atexit +import os +import re +import shutil +import sys +import tarfile + +from typing import Optional + +from common.benchmark_driver import BenchmarkDriver +from common.benchmark_suite import MODEL_FLAGFILE_NAME, BenchmarkCase, BenchmarkSuite +from common.benchmark_config import BenchmarkConfig +from common.benchmark_definition import execute_cmd, execute_cmd_and_get_output, get_git_commit_hash, get_iree_benchmark_module_arguments, wait_for_iree_benchmark_module_start from common.common_arguments import build_common_argument_parser from common.linux_device_utils import get_linux_device_info +class LinuxBenchmarkDriver(BenchmarkDriver): + """Linux benchmark driver.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def run_benchmark_case(self, benchmark_case: BenchmarkCase, + benchmark_results_filename: Optional[str], + capture_filename: Optional[str]) -> None: + + # TODO(pzread): Taskset should be derived from CPU topology. + # Only use the low 8 cores. + taskset = "0xFF" + + if benchmark_results_filename: + self.__run_benchmark(case_dir=benchmark_case.benchmark_case_dir, + tool_name=benchmark_case.benchmark_tool_name, + results_filename=benchmark_results_filename, + driver=benchmark_case.driver, + taskset=taskset) + + if capture_filename: + self.__run_capture(case_dir=benchmark_case.benchmark_case_dir, + tool_name=benchmark_case.benchmark_tool_name, + capture_filename=capture_filename, + taskset=taskset) + + def __run_benchmark(self, case_dir, tool_name: str, results_filename: str, + driver: str, taskset: str): + tool_path = os.path.join(self.config.normal_benchmark_tool_dir, tool_name) + cmd = ["taskset", taskset, tool_path, f"--flagfile={MODEL_FLAGFILE_NAME}"] + if tool_name == "iree-benchmark-module": + cmd.extend( + get_iree_benchmark_module_arguments( + results_filename=results_filename, + driver=driver, + benchmark_min_time=self.config.benchmark_min_time)) + + result_json = execute_cmd_and_get_output(cmd, + cwd=case_dir, + verbose=self.verbose) + if self.verbose: + print(result_json) + + def __run_capture(self, case_dir, tool_name: str, capture_filename: str, + taskset: str): + capture_config = self.config.trace_capture_config + + tool_path = os.path.join(capture_config.traced_benchmark_tool_dir, + tool_name) + cmd = ["taskset", taskset, tool_path, f"--flagfile={MODEL_FLAGFILE_NAME}"] + process = subprocess.Popen(cmd, + env={"TRACY_NO_EXIT": "1"}, + cwd=case_dir, + stdout=subprocess.PIPE, + universal_newlines=True) + + wait_for_iree_benchmark_module_start(process, self.verbose) + + capture_cmd = [ + capture_config.trace_capture_tool, "-f", "-o", capture_filename + ] + stdout_redirect = None if self.verbose else subprocess.DEVNULL + execute_cmd(capture_cmd, verbose=self.verbose, stdout=stdout_redirect) + + def main(args): device_info = get_linux_device_info(args.device_model, args.verbose) if args.verbose: print(device_info) - raise NotImplementedError() + commit = get_git_commit_hash("HEAD") + benchmark_config = BenchmarkConfig.build_from_args(args, commit) + benchmark_suite = BenchmarkSuite.load_from_benchmark_suite_dir( + benchmark_config.root_benchmark_dir) + benchmark_driver = LinuxBenchmarkDriver(device_info=device_info, + benchmark_config=benchmark_config, + benchmark_suite=benchmark_suite, + benchmark_grace_time=1.0, + verbose=args.verbose) + + if args.pin_cpu_freq: + raise NotImplementedError("CPU freq pinning is not supported yet.") + if args.pin_gpu_freq: + raise NotImplementedError("GPU freq pinning is not supported yet.") + if not args.no_clean: + atexit.register(shutil.rmtree, args.tmp_dir) + + benchmark_driver.run() + + benchmark_results = benchmark_driver.get_benchmark_results() + if args.output is not None: + with open(args.output, "w") as f: + f.write(benchmark_results.to_json_str()) + + if args.verbose: + print(benchmark_results.commit) + print(benchmark_results.benchmarks) + + trace_capture_config = benchmark_config.trace_capture_config + if trace_capture_config: + # Put all captures in a tarball and remove the origial files. + with tarfile.open(trace_capture_config.capture_tarball, "w:gz") as tar: + for capture_filename in benchmark_driver.get_capture_filenames(): + tar.add(capture_filename) + + benchmark_errors = benchmark_driver.get_benchmark_errors() + if benchmark_errors: + print("Benchmarking completed with errors", file=sys.stderr) + raise RuntimeError(benchmark_errors) def parse_argument():