From 2327e14429562152be1511d5b051f74674052986 Mon Sep 17 00:00:00 2001 From: LeiWang1999 Date: Fri, 9 Aug 2024 17:53:36 +0000 Subject: [PATCH] tqdm --- .../benchmark_matmul_scope_compare.py | 225 +++++++++--------- .../operators/benchmark_matmul_strategies.py | 102 ++------ 2 files changed, 128 insertions(+), 199 deletions(-) diff --git a/benchmark/operators/benchmark_matmul_scope_compare.py b/benchmark/operators/benchmark_matmul_scope_compare.py index 5235a85f6..0fb3c5ba6 100644 --- a/benchmark/operators/benchmark_matmul_scope_compare.py +++ b/benchmark/operators/benchmark_matmul_scope_compare.py @@ -9,123 +9,118 @@ from bitblas.gpu.matmul_analysis import get_tensorized_func_and_tags from bitblas.base.utils import apply_and_build - -# Initialize the parser -parser = argparse.ArgumentParser( - description="Benchmark BitBLAS int4 on a specific target." -) - -# Add arguments to the parser -parser.add_argument( - "--target", - type=str, - default=auto_detect_nvidia_target(), - help="Specify the target device for benchmarking." -) -parser.add_argument( - "--group_size", - type=int, - default=None, - help="Group size for grouped quantization." -) -parser.add_argument( - "--A_dtype", - type=str, - default="float16", - choices=["float16", "float32", "float64", "int32", "int8"], # Assuming these are the valid choices - help="Data type of activation A." -) -parser.add_argument( - "--W_dtype", - type=str, - default="int4", - choices=["float16", "float32", "float64", "int32", "int8", "int4", "int2", "int1", "nf4", "fp4_e2m1"], # Assuming these are the valid choices - help="Data type of weight W." -) -parser.add_argument( - "--accum_dtype", - type=str, - default="float16", +# Initialize the parser +parser = argparse.ArgumentParser(description="Benchmark BitBLAS int4 on a specific target.") + +# Add arguments to the parser +parser.add_argument( + "--target", + type=str, + default=auto_detect_nvidia_target(), + help="Specify the target device for benchmarking.") +parser.add_argument( + "--group_size", type=int, default=None, help="Group size for grouped quantization.") +parser.add_argument( + "--A_dtype", + type=str, + default="float16", + choices=["float16", "float32", "float64", "int32", + "int8"], # Assuming these are the valid choices + help="Data type of activation A.") +parser.add_argument( + "--W_dtype", + type=str, + default="int4", + choices=[ + "float16", "float32", "float64", "int32", "int8", "int4", "int2", "int1", "nf4", "fp4_e2m1" + ], # Assuming these are the valid choices + help="Data type of weight W.") +parser.add_argument( + "--accum_dtype", + type=str, + default="float16", choices=["float16", "int32"], # Assuming these are the valid choices - help="Data type for accumulation." -) -parser.add_argument( - "--out_dtype", - type=str, - default="float16", + help="Data type for accumulation.") +parser.add_argument( + "--out_dtype", + type=str, + default="float16", choices=["float16", "float32", "int32", "int8"], # Assuming these are the valid choices - help="Data type for output." -) -parser.add_argument( - "--layout", - type=str, - default="nt", + help="Data type for output.") +parser.add_argument( + "--layout", + type=str, + default="nt", choices=["nt", "nn"], # Assuming these are the valid choices - help="Matrix layout, 'nt' for non-transpose A and transpose W." -) -parser.add_argument( - "--with_bias", - action="store_true", - help="Include bias in the benchmark." -) -parser.add_argument( - "--with_scaling", - action="store_true", - help="Include scaling factor in the quantization." -) -parser.add_argument( - "--with_zeros", - action="store_true", - help="Include zeros in the quantization." -) -parser.add_argument( - "--zeros_mode", - type=str, - default=None, + help="Matrix layout, 'nt' for non-transpose A and transpose W.") +parser.add_argument("--with_bias", action="store_true", help="Include bias in the benchmark.") +parser.add_argument( + "--with_scaling", action="store_true", help="Include scaling factor in the quantization.") +parser.add_argument("--with_zeros", action="store_true", help="Include zeros in the quantization.") +parser.add_argument( + "--zeros_mode", + type=str, + default=None, choices=["original", "rescale", "quantized"], # Replace with actual modes if applicable - help="Specify the mode for calculating zeros." -) - -# Parse the arguments -args = parser.parse_args() - -# Assign arguments to variables -target = args.target -group_size = args.group_size -A_dtype = args.A_dtype -W_dtype = args.W_dtype -accum_dtype = args.accum_dtype -out_dtype = args.out_dtype -layout = args.layout -with_bias = args.with_bias -group_size = args.group_size -with_scaling = args.with_scaling -with_zeros = args.with_zeros -zeros_mode = args.zeros_mode + help="Specify the mode for calculating zeros.") -test_shapes = [ - # square test - (MatmulConfig, Matmul, (1, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), - (MatmulConfig, Matmul, (16, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), - (MatmulConfig, Matmul, (32, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), - (MatmulConfig, Matmul, (64, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), - (MatmulConfig, Matmul, (128, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), - (MatmulConfig, Matmul, (256, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), - (MatmulConfig, Matmul, (1024, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), - (MatmulConfig, Matmul, (16, 43008, 14336, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), - (MatmulConfig, Matmul, (32, 14336, 14336, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), - (MatmulConfig, Matmul, (64, 57344, 14336, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), - (MatmulConfig, Matmul, (128, 14336, 57344, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), +# Parse the arguments +args = parser.parse_args() - (MatmulConfig, Matmul, (256, 9216, 9216, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), - (MatmulConfig, Matmul, (128, 36864, 9216, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), - (MatmulConfig, Matmul, (64, 9216, 36864, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), - (MatmulConfig, Matmul, (32, 22016, 8192, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), +# Assign arguments to variables +target = args.target +group_size = args.group_size +A_dtype = args.A_dtype +W_dtype = args.W_dtype +accum_dtype = args.accum_dtype +out_dtype = args.out_dtype +layout = args.layout +with_bias = args.with_bias +group_size = args.group_size +with_scaling = args.with_scaling +with_zeros = args.with_zeros +zeros_mode = args.zeros_mode - (MatmulConfig, Matmul, (16, 8192, 22016, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), - (MatmulConfig, Matmul, (32, 8192, 8192, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), - (MatmulConfig, Matmul, (64, 28672, 8192, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), - (MatmulConfig, Matmul, (128, 8192, 28672, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)), +test_shapes = [ + # square test + (MatmulConfig, Matmul, (1, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), + (MatmulConfig, Matmul, (16, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), + (MatmulConfig, Matmul, (32, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), + (MatmulConfig, Matmul, (64, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), + (MatmulConfig, Matmul, (128, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), + (MatmulConfig, Matmul, (256, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), + (MatmulConfig, Matmul, (1024, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), + (MatmulConfig, Matmul, (16, 43008, 14336, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), + (MatmulConfig, Matmul, (32, 14336, 14336, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), + (MatmulConfig, Matmul, (64, 57344, 14336, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), + (MatmulConfig, Matmul, (128, 14336, 57344, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), + (MatmulConfig, Matmul, (256, 9216, 9216, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), + (MatmulConfig, Matmul, (128, 36864, 9216, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), + (MatmulConfig, Matmul, (64, 9216, 36864, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), + (MatmulConfig, Matmul, (32, 22016, 8192, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), + (MatmulConfig, Matmul, (16, 8192, 22016, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), + (MatmulConfig, Matmul, (32, 8192, 8192, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), + (MatmulConfig, Matmul, (64, 28672, 8192, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), + (MatmulConfig, Matmul, (128, 8192, 28672, A_dtype, W_dtype, out_dtype, accum_dtype, layout, + with_bias, group_size, with_scaling, with_zeros, zeros_mode)), ] benchmark_sets = [] @@ -152,7 +147,7 @@ for config in configs: static_config = config static_config.shared_scope = "shared" - static_configs.append(static_config) + static_configs.append(static_config) dynamic_configs = [] for config in configs: dynamic_config = config @@ -162,8 +157,10 @@ _, best_static = apply_and_build(func, static_configs, arch, parallel_build=True) _, best_dynamic = apply_and_build(func, dynamic_configs, arch, parallel_build=True) - benchmark_results[input_args] = (best_static.latency, best_dynamic.latency, best_static.latency - best_dynamic.latency) + benchmark_results[input_args] = (best_static.latency, best_dynamic.latency, + best_static.latency - best_dynamic.latency) for key, value in benchmark_results.items(): - print(f"Input arguments: {key}, Static latency: {value[0]}, Dynamic latency: {value[1]}, Difference: {value[2]}") - \ No newline at end of file + print( + f"Input arguments: {key}, Static latency: {value[0]}, Dynamic latency: {value[1]}, Difference: {value[2]}" + ) diff --git a/benchmark/operators/benchmark_matmul_strategies.py b/benchmark/operators/benchmark_matmul_strategies.py index c9e6f0881..a28c6a11c 100644 --- a/benchmark/operators/benchmark_matmul_strategies.py +++ b/benchmark/operators/benchmark_matmul_strategies.py @@ -7,10 +7,10 @@ from bitblas.utils import get_commit_id from bitblas import set_log_level from tabulate import tabulate -import json from os import path, makedirs -from typing import Dict, List, Union +from typing import List import argparse +from tqdm import tqdm set_log_level("DEBUG") @@ -105,82 +105,6 @@ def generate_operator_config(self, name: str, M, N, K) -> MatmulConfig: **self.config_map[name], ) - def serialize_results(self) -> None: - """Serialize benchmark results into JSON files.""" - commit_id_path = f"CommitID_{self.CURRENT_COMMIT_ID}" - log_commit_path = path.join(self.log_path, commit_id_path) - - if not path.exists(log_commit_path): - makedirs(log_commit_path) - - # Save benchmark results into JSON - self._save_json( - self.benchmark_results, - path.join(log_commit_path, self.BENCHMARK_RESULTS_FILE), - ) - - # Save benchmark shapes into JSON - shapes: Dict[List[List[Union[List, int], int, int]]] = {} - - # Iterate through the benchmark results to extract the shapes - for name, results in self.benchmark_results.items(): - shapes[name] = [] - for i, _ in enumerate(results): - config = self.benchmark_sets[name][i][1] - dyn_prof_shape = self.benchmark_sets[name][i][2] - shapes[name].append([config.M, config.N, config.K, dyn_prof_shape]) - - self._save_json(shapes, path.join(log_commit_path, self.BENCHMARK_SHAPES_FILE)) - - # Save device info into JSON - self._save_json( - {"device": self.benchmark_target}, - path.join(log_commit_path, self.BENCHMARK_DEVICE_FILE), - ) - - def _save_json(self, data, file_path): - """Helper function to save JSON data to a file.""" - with open(file_path, "w") as f: - json.dump(data, f) - - @classmethod - def deserialize_from_logs(cls, commit_id: str) -> None: - """Deserialize benchmark results from JSON files.""" - benchmark = cls() - commit_id_path = f"CommitID_{commit_id}" - log_commit_path = path.join(benchmark.log_path, commit_id_path) - - benchmark.benchmark_results = cls._load_json( - path.join(log_commit_path, cls.BENCHMARK_RESULTS_FILE)) - - shapes_file = path.join(log_commit_path, cls.BENCHMARK_SHAPES_FILE) - - with open(shapes_file, "r") as f: - shapes = json.load(f) - for name, shape_list in shapes.items(): - for shape in shape_list: - M, N, K, dyn_prof_shape = shape - benchmark.add_benchmark_set( - name, - [ - benchmark.generate_op_unit( - benchmark.generate_operator_config(name, M, N, K), - dynamic_profiling_shape=dyn_prof_shape, - ) - ], - ) - - benchmark.benchmark_target = cls._load_json( - path.join(log_commit_path, cls.BENCHMARK_DEVICE_FILE))["device"] - - return benchmark - - @staticmethod - def _load_json(file_path): - """Helper function to load JSON data from a file.""" - with open(file_path, "r") as f: - return json.load(f) - def report(self): """Generate and print a report of the benchmark results.""" results4compare = {} @@ -271,13 +195,21 @@ def make_operator(self, operator: Matmul, config: MatmulConfig) -> Matmul: def benchmark(self): """Run benchmarks on all benchmark sets.""" - for name, benchmark_set in self.benchmark_sets.items(): - self.benchmark_results[name] = [] - for op, config, _ in benchmark_set: - for opt in self.OPT_SHAPES: - print(f"Running benchmark for {name} with shape {opt}") - self.benchmark_results[name].extend( - [self.run_benchmark(op, config, {"m": opt})]) + # Calculate the total number of benchmark runs for the progress bar + total_runs = sum( + len(benchmark_set) * len(self.OPT_SHAPES) + for benchmark_set in self.benchmark_sets.values()) + + with tqdm(total=total_runs, desc="Total Progress", unit="benchmark") as pbar: + for name, benchmark_set in self.benchmark_sets.items(): + self.benchmark_results[name] = [] + for op, config, _ in benchmark_set: + for opt in self.OPT_SHAPES: + print(f"Running benchmark for {name} with shape {opt}") + self.benchmark_results[name].extend( + [self.run_benchmark(op, config, {"m": opt})]) + # Update the progress bar after each run + pbar.update(1) def run_compare_strategy(self, report=True, serialize=True, enable_tuning: bool = False): """Run the benchmark process."""