From 2327e14429562152be1511d5b051f74674052986 Mon Sep 17 00:00:00 2001
From: LeiWang1999 <leiwang1999@outlook.com>
Date: Fri, 9 Aug 2024 17:53:36 +0000
Subject: [PATCH] tqdm

---
 .../benchmark_matmul_scope_compare.py         | 225 +++++++++---------
 .../operators/benchmark_matmul_strategies.py  | 102 ++------
 2 files changed, 128 insertions(+), 199 deletions(-)

diff --git a/benchmark/operators/benchmark_matmul_scope_compare.py b/benchmark/operators/benchmark_matmul_scope_compare.py
index 5235a85f6..0fb3c5ba6 100644
--- a/benchmark/operators/benchmark_matmul_scope_compare.py
+++ b/benchmark/operators/benchmark_matmul_scope_compare.py
@@ -9,123 +9,118 @@
 from bitblas.gpu.matmul_analysis import get_tensorized_func_and_tags
 from bitblas.base.utils import apply_and_build
 
- 
-# Initialize the parser  
-parser = argparse.ArgumentParser(  
-    description="Benchmark BitBLAS int4 on a specific target."  
-)  
-  
-# Add arguments to the parser  
-parser.add_argument(  
-    "--target",  
-    type=str,  
-    default=auto_detect_nvidia_target(),  
-    help="Specify the target device for benchmarking."  
-)  
-parser.add_argument(  
-    "--group_size",  
-    type=int,  
-    default=None,  
-    help="Group size for grouped quantization."  
-)  
-parser.add_argument(  
-    "--A_dtype",  
-    type=str,  
-    default="float16",  
-    choices=["float16", "float32", "float64", "int32", "int8"],  # Assuming these are the valid choices  
-    help="Data type of activation A."  
-)  
-parser.add_argument(  
-    "--W_dtype",  
-    type=str,  
-    default="int4",  
-    choices=["float16", "float32", "float64", "int32", "int8", "int4", "int2", "int1", "nf4", "fp4_e2m1"],  # Assuming these are the valid choices  
-    help="Data type of weight W."  
-)  
-parser.add_argument(  
-    "--accum_dtype",  
-    type=str,  
-    default="float16",  
+# Initialize the parser
+parser = argparse.ArgumentParser(description="Benchmark BitBLAS int4 on a specific target.")
+
+# Add arguments to the parser
+parser.add_argument(
+    "--target",
+    type=str,
+    default=auto_detect_nvidia_target(),
+    help="Specify the target device for benchmarking.")
+parser.add_argument(
+    "--group_size", type=int, default=None, help="Group size for grouped quantization.")
+parser.add_argument(
+    "--A_dtype",
+    type=str,
+    default="float16",
+    choices=["float16", "float32", "float64", "int32",
+             "int8"],  # Assuming these are the valid choices  
+    help="Data type of activation A.")
+parser.add_argument(
+    "--W_dtype",
+    type=str,
+    default="int4",
+    choices=[
+        "float16", "float32", "float64", "int32", "int8", "int4", "int2", "int1", "nf4", "fp4_e2m1"
+    ],  # Assuming these are the valid choices  
+    help="Data type of weight W.")
+parser.add_argument(
+    "--accum_dtype",
+    type=str,
+    default="float16",
     choices=["float16", "int32"],  # Assuming these are the valid choices  
-    help="Data type for accumulation."  
-)  
-parser.add_argument(  
-    "--out_dtype",  
-    type=str,  
-    default="float16",  
+    help="Data type for accumulation.")
+parser.add_argument(
+    "--out_dtype",
+    type=str,
+    default="float16",
     choices=["float16", "float32", "int32", "int8"],  # Assuming these are the valid choices  
-    help="Data type for output."  
-)  
-parser.add_argument(  
-    "--layout",  
-    type=str,  
-    default="nt",  
+    help="Data type for output.")
+parser.add_argument(
+    "--layout",
+    type=str,
+    default="nt",
     choices=["nt", "nn"],  # Assuming these are the valid choices  
-    help="Matrix layout, 'nt' for non-transpose A and transpose W."  
-)  
-parser.add_argument(  
-    "--with_bias",  
-    action="store_true",  
-    help="Include bias in the benchmark."  
-)  
-parser.add_argument(  
-    "--with_scaling",  
-    action="store_true",  
-    help="Include scaling factor in the quantization."  
-)  
-parser.add_argument(  
-    "--with_zeros",  
-    action="store_true",  
-    help="Include zeros in the quantization."  
-)  
-parser.add_argument(  
-    "--zeros_mode",  
-    type=str,  
-    default=None,  
+    help="Matrix layout, 'nt' for non-transpose A and transpose W.")
+parser.add_argument("--with_bias", action="store_true", help="Include bias in the benchmark.")
+parser.add_argument(
+    "--with_scaling", action="store_true", help="Include scaling factor in the quantization.")
+parser.add_argument("--with_zeros", action="store_true", help="Include zeros in the quantization.")
+parser.add_argument(
+    "--zeros_mode",
+    type=str,
+    default=None,
     choices=["original", "rescale", "quantized"],  # Replace with actual modes if applicable  
-    help="Specify the mode for calculating zeros."  
-)  
-  
-# Parse the arguments  
-args = parser.parse_args()  
-  
-# Assign arguments to variables  
-target = args.target  
-group_size = args.group_size  
-A_dtype = args.A_dtype  
-W_dtype = args.W_dtype  
-accum_dtype = args.accum_dtype  
-out_dtype = args.out_dtype  
-layout = args.layout  
-with_bias = args.with_bias  
-group_size = args.group_size  
-with_scaling = args.with_scaling  
-with_zeros = args.with_zeros  
-zeros_mode = args.zeros_mode 
+    help="Specify the mode for calculating zeros.")
 
-test_shapes = [
-    # square test
-    (MatmulConfig, Matmul, (1, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
-    (MatmulConfig, Matmul, (16, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
-    (MatmulConfig, Matmul, (32, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
-    (MatmulConfig, Matmul, (64, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
-    (MatmulConfig, Matmul, (128, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
-    (MatmulConfig, Matmul, (256, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
-    (MatmulConfig, Matmul, (1024, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
-    (MatmulConfig, Matmul, (16, 43008, 14336, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
-    (MatmulConfig, Matmul, (32, 14336, 14336, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
-    (MatmulConfig, Matmul, (64, 57344, 14336, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
-    (MatmulConfig, Matmul, (128, 14336, 57344, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+# Parse the arguments
+args = parser.parse_args()
 
-    (MatmulConfig, Matmul, (256, 9216, 9216, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
-    (MatmulConfig, Matmul, (128, 36864, 9216, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
-    (MatmulConfig, Matmul, (64, 9216, 36864, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
-    (MatmulConfig, Matmul, (32, 22016, 8192, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+# Assign arguments to variables
+target = args.target
+group_size = args.group_size
+A_dtype = args.A_dtype
+W_dtype = args.W_dtype
+accum_dtype = args.accum_dtype
+out_dtype = args.out_dtype
+layout = args.layout
+with_bias = args.with_bias
+group_size = args.group_size
+with_scaling = args.with_scaling
+with_zeros = args.with_zeros
+zeros_mode = args.zeros_mode
 
-    (MatmulConfig, Matmul, (16, 8192, 22016, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
-    (MatmulConfig, Matmul, (32, 8192, 8192, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
-    (MatmulConfig, Matmul, (64, 28672, 8192, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
-    (MatmulConfig, Matmul, (128, 8192, 28672, A_dtype, W_dtype, out_dtype, accum_dtype, layout, with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+test_shapes = [
+    # square test
+    (MatmulConfig, Matmul, (1, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+    (MatmulConfig, Matmul, (16, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+    (MatmulConfig, Matmul, (32, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+    (MatmulConfig, Matmul, (64, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+    (MatmulConfig, Matmul, (128, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+    (MatmulConfig, Matmul, (256, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+    (MatmulConfig, Matmul, (1024, 16384, 16384, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+    (MatmulConfig, Matmul, (16, 43008, 14336, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+    (MatmulConfig, Matmul, (32, 14336, 14336, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+    (MatmulConfig, Matmul, (64, 57344, 14336, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+    (MatmulConfig, Matmul, (128, 14336, 57344, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+    (MatmulConfig, Matmul, (256, 9216, 9216, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+    (MatmulConfig, Matmul, (128, 36864, 9216, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+    (MatmulConfig, Matmul, (64, 9216, 36864, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+    (MatmulConfig, Matmul, (32, 22016, 8192, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+    (MatmulConfig, Matmul, (16, 8192, 22016, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+    (MatmulConfig, Matmul, (32, 8192, 8192, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+    (MatmulConfig, Matmul, (64, 28672, 8192, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
+    (MatmulConfig, Matmul, (128, 8192, 28672, A_dtype, W_dtype, out_dtype, accum_dtype, layout,
+                            with_bias, group_size, with_scaling, with_zeros, zeros_mode)),
 ]
 
 benchmark_sets = []
@@ -152,7 +147,7 @@
     for config in configs:
         static_config = config
         static_config.shared_scope = "shared"
-        static_configs.append(static_config)   
+        static_configs.append(static_config)
     dynamic_configs = []
     for config in configs:
         dynamic_config = config
@@ -162,8 +157,10 @@
     _, best_static = apply_and_build(func, static_configs, arch, parallel_build=True)
 
     _, best_dynamic = apply_and_build(func, dynamic_configs, arch, parallel_build=True)
-    benchmark_results[input_args] = (best_static.latency, best_dynamic.latency, best_static.latency - best_dynamic.latency)
+    benchmark_results[input_args] = (best_static.latency, best_dynamic.latency,
+                                     best_static.latency - best_dynamic.latency)
 
 for key, value in benchmark_results.items():
-    print(f"Input arguments: {key}, Static latency: {value[0]}, Dynamic latency: {value[1]}, Difference: {value[2]}")
-    
\ No newline at end of file
+    print(
+        f"Input arguments: {key}, Static latency: {value[0]}, Dynamic latency: {value[1]}, Difference: {value[2]}"
+    )
diff --git a/benchmark/operators/benchmark_matmul_strategies.py b/benchmark/operators/benchmark_matmul_strategies.py
index c9e6f0881..a28c6a11c 100644
--- a/benchmark/operators/benchmark_matmul_strategies.py
+++ b/benchmark/operators/benchmark_matmul_strategies.py
@@ -7,10 +7,10 @@
 from bitblas.utils import get_commit_id
 from bitblas import set_log_level
 from tabulate import tabulate
-import json
 from os import path, makedirs
-from typing import Dict, List, Union
+from typing import List
 import argparse
+from tqdm import tqdm
 
 set_log_level("DEBUG")
 
@@ -105,82 +105,6 @@ def generate_operator_config(self, name: str, M, N, K) -> MatmulConfig:
             **self.config_map[name],
         )
 
-    def serialize_results(self) -> None:
-        """Serialize benchmark results into JSON files."""
-        commit_id_path = f"CommitID_{self.CURRENT_COMMIT_ID}"
-        log_commit_path = path.join(self.log_path, commit_id_path)
-
-        if not path.exists(log_commit_path):
-            makedirs(log_commit_path)
-
-        # Save benchmark results into JSON
-        self._save_json(
-            self.benchmark_results,
-            path.join(log_commit_path, self.BENCHMARK_RESULTS_FILE),
-        )
-
-        # Save benchmark shapes into JSON
-        shapes: Dict[List[List[Union[List, int], int, int]]] = {}
-
-        # Iterate through the benchmark results to extract the shapes
-        for name, results in self.benchmark_results.items():
-            shapes[name] = []
-            for i, _ in enumerate(results):
-                config = self.benchmark_sets[name][i][1]
-                dyn_prof_shape = self.benchmark_sets[name][i][2]
-                shapes[name].append([config.M, config.N, config.K, dyn_prof_shape])
-
-        self._save_json(shapes, path.join(log_commit_path, self.BENCHMARK_SHAPES_FILE))
-
-        # Save device info into JSON
-        self._save_json(
-            {"device": self.benchmark_target},
-            path.join(log_commit_path, self.BENCHMARK_DEVICE_FILE),
-        )
-
-    def _save_json(self, data, file_path):
-        """Helper function to save JSON data to a file."""
-        with open(file_path, "w") as f:
-            json.dump(data, f)
-
-    @classmethod
-    def deserialize_from_logs(cls, commit_id: str) -> None:
-        """Deserialize benchmark results from JSON files."""
-        benchmark = cls()
-        commit_id_path = f"CommitID_{commit_id}"
-        log_commit_path = path.join(benchmark.log_path, commit_id_path)
-
-        benchmark.benchmark_results = cls._load_json(
-            path.join(log_commit_path, cls.BENCHMARK_RESULTS_FILE))
-
-        shapes_file = path.join(log_commit_path, cls.BENCHMARK_SHAPES_FILE)
-
-        with open(shapes_file, "r") as f:
-            shapes = json.load(f)
-            for name, shape_list in shapes.items():
-                for shape in shape_list:
-                    M, N, K, dyn_prof_shape = shape
-                    benchmark.add_benchmark_set(
-                        name,
-                        [
-                            benchmark.generate_op_unit(
-                                benchmark.generate_operator_config(name, M, N, K),
-                                dynamic_profiling_shape=dyn_prof_shape,
-                            )
-                        ],
-                    )
-
-        benchmark.benchmark_target = cls._load_json(
-            path.join(log_commit_path, cls.BENCHMARK_DEVICE_FILE))["device"]
-
-        return benchmark
-
-    @staticmethod
-    def _load_json(file_path):
-        """Helper function to load JSON data from a file."""
-        with open(file_path, "r") as f:
-            return json.load(f)
-
     def report(self):
         """Generate and print a report of the benchmark results."""
         results4compare = {}
@@ -271,13 +195,21 @@ def make_operator(self, operator: Matmul, config: MatmulConfig) -> Matmul:
 
     def benchmark(self):
         """Run benchmarks on all benchmark sets."""
-        for name, benchmark_set in self.benchmark_sets.items():
-            self.benchmark_results[name] = []
-            for op, config, _ in benchmark_set:
-                for opt in self.OPT_SHAPES:
-                    print(f"Running benchmark for {name} with shape {opt}")
-                    self.benchmark_results[name].extend(
-                        [self.run_benchmark(op, config, {"m": opt})])
+        # Calculate the total number of benchmark runs for the progress bar
+        total_runs = sum(
+            len(benchmark_set) * len(self.OPT_SHAPES)
+            for benchmark_set in self.benchmark_sets.values())
+
+        with tqdm(total=total_runs, desc="Total Progress", unit="benchmark") as pbar:
+            for name, benchmark_set in self.benchmark_sets.items():
+                self.benchmark_results[name] = []
+                for op, config, _ in benchmark_set:
+                    for opt in self.OPT_SHAPES:
+                        print(f"Running benchmark for {name} with shape {opt}")
+                        self.benchmark_results[name].extend(
+                            [self.run_benchmark(op, config, {"m": opt})])
+                        # Update the progress bar after each run
+                        pbar.update(1)
 
     def run_compare_strategy(self, report=True, serialize=True, enable_tuning: bool = False):
         """Run the benchmark process."""