fix config field name

xurui1995 · xurui1995 · commit 76350446c03a · 2024-10-07T19:43:59.000-07:00
diff --git a/test/benchgc/src/benchgc/tuner/README.md b/test/benchgc/src/benchgc/tuner/README.md
@@ -80,15 +80,15 @@ Tuning ends in 26.26677966117859 s
 Best cost: 0.025292858481407166 ms
 Best config: [{
     "MatMulConfig": {
-        "M_threads": 1,
-        "K_threads": 1,
-        "N_threads": 1,
-        "M_block": 64,
-        "K_block": 32,
-        "N_block": 64,
-        "innermostM_block": 16,
-        "innermostK_block": 16,
-        "innermostN_block": 16
+        "MThreads": 1,
+        "KThreads": 1,
+        "NThreads": 1,
+        "MBlock": 128,
+        "KBlock": 64,
+        "NBlock": 16,
+        "innerMostMBlock": 32,
+        "innerMostKBlock": 16,
+        "innerMostNBlock": 16
     }
 }]
 mlir:
@@ -97,7 +97,7 @@ mlir:
     %cst = arith.constant 0.000000e+00 : f32
     %0 = tensor.empty() : tensor<128x128xf32>
     %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<128x128xf32>) -> tensor<128x128xf32>
-    %2 = linalg.matmul {KBlock = 32 : i32, Kthreads = 1 : i32, MBlock = 64 : i32, Mthreads = 1 : i32, NBlock = 64 : i32, Nthreads = 1 : i32, cast = #linalg.type_fn<cast_signed>, innermostKBlock = 16 : i32, innermostMBlock = 16 : i32, innermostNBlock = 16 : i32} ins(%arg0, %arg1 : tensor<128x128xf32>, tensor<128x128xf32>) outs(%1 : tensor<128x128xf32>) -> tensor<128x128xf32>
+    %2 = linalg.matmul {KBlock = 64 : i32, KThreads = 1 : i32, MBlock = 128 : i32, MThreads = 1 : i32, NBlock = 16 : i32, NThreads = 1 : i32, cast = #linalg.type_fn<cast_signed>, innerMostKBlock = 16 : i32, innerMostMBlock = 32 : i32, innerMostNBlock = 16 : i32} ins(%arg0, %arg1 : tensor<128x128xf32>, tensor<128x128xf32>) outs(%1 : tensor<128x128xf32>) -> tensor<128x128xf32>
     return %2 : tensor<128x128xf32>
   }
 }
@@ -117,31 +117,31 @@ OMP_NUM_THREADS=1 python -m benchgc --mode T --driver pattern --case mlp --batch
 [ 400 / 1536 ] skipped: 1131 best: 0.006834045052528381 ms
 [ 405 / 1536 ] skipped: 1131 best: 0.006834045052528381 ms
 Tuner returns empty batch, early stop now
-Tuning ends in 80.21396946907043 s
-Best cost: 0.006834045052528381 ms
+Tuning ends in 80.10290145874023 s
+Best cost: 0.006632879376411438 ms
 Best config: [{
     "MatMulConfig": {
-        "M_threads": 1,
-        "K_threads": 1,
-        "N_threads": 1,
-        "M_block": 32,
-        "K_block": 16,
-        "N_block": 32,
-        "innermostM_block": 16,
-        "innermostK_block": 16,
-        "innermostN_block": 32
+        "MThreads": 1,
+        "KThreads": 1,
+        "NThreads": 1,
+        "MBlock": 32,
+        "KBlock": 16,
+        "NBlock": 32,
+        "innerMostMBlock": 32,
+        "innerMostKBlock": 16,
+        "innerMostNBlock": 16
     }
 }, {
     "MatMulConfig": {
-        "M_threads": 1,
-        "K_threads": 1,
-        "N_threads": 1,
-        "M_block": 32,
-        "K_block": 32,
-        "N_block": 64,
-        "innermostM_block": 16,
-        "innermostK_block": 16,
-        "innermostN_block": 32
+        "MThreads": 1,
+        "KThreads": 1,
+        "NThreads": 1,
+        "MBlock": 32,
+        "KBlock": 32,
+        "NBlock": 16,
+        "innerMostMBlock": 16,
+        "innerMostKBlock": 32,
+        "innerMostNBlock": 16
     }
 }]
 mlir:
@@ -150,7 +150,7 @@ mlir:
     %cst = arith.constant 0.000000e+00 : f32
     %0 = tensor.empty() : tensor<32x32xf32>
     %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<32x32xf32>) -> tensor<32x32xf32>
-    %2 = linalg.matmul {KBlock = 16 : i32, Kthreads = 1 : i32, MBlock = 32 : i32, Mthreads = 1 : i32, NBlock = 32 : i32, Nthreads = 1 : i32, cast = #linalg.type_fn<cast_signed>, innermostKBlock = 16 : i32, innermostMBlock = 16 : i32, innermostNBlock = 32 : i32} ins(%arg0, %arg1 : tensor<32x16xf32>, tensor<16x32xf32>) outs(%1 : tensor<32x32xf32>) -> tensor<32x32xf32>
+    %2 = linalg.matmul {KBlock = 16 : i32, KThreads = 1 : i32, MBlock = 32 : i32, MThreads = 1 : i32, NBlock = 32 : i32, NThreads = 1 : i32, cast = #linalg.type_fn<cast_signed>, innerMostKBlock = 16 : i32, innerMostMBlock = 32 : i32, innerMostNBlock = 16 : i32} ins(%arg0, %arg1 : tensor<32x16xf32>, tensor<16x32xf32>) outs(%1 : tensor<32x32xf32>) -> tensor<32x32xf32>
     %3 = tensor.empty() : tensor<32x32xf32>
     %broadcasted = linalg.broadcast ins(%arg3 : tensor<32xf32>) outs(%3 : tensor<32x32xf32>) dimensions = [0] 
     %4 = tensor.empty() : tensor<32x32xf32>
@@ -160,7 +160,7 @@ mlir:
     %7 = linalg.max ins(%5, %cst_0 : tensor<32x32xf32>, tensor<32x32xf32>) outs(%6 : tensor<32x32xf32>) -> tensor<32x32xf32>
     %8 = tensor.empty() : tensor<32x64xf32>
     %9 = linalg.fill ins(%cst : f32) outs(%8 : tensor<32x64xf32>) -> tensor<32x64xf32>
-    %10 = linalg.matmul {KBlock = 32 : i32, Kthreads = 1 : i32, MBlock = 32 : i32, Mthreads = 1 : i32, NBlock = 64 : i32, Nthreads = 1 : i32, cast = #linalg.type_fn<cast_signed>, innermostKBlock = 16 : i32, innermostMBlock = 16 : i32, innermostNBlock = 32 : i32} ins(%7, %arg2 : tensor<32x32xf32>, tensor<32x64xf32>) outs(%9 : tensor<32x64xf32>) -> tensor<32x64xf32>
+    %10 = linalg.matmul {KBlock = 32 : i32, KThreads = 1 : i32, MBlock = 32 : i32, MThreads = 1 : i32, NBlock = 16 : i32, NThreads = 1 : i32, cast = #linalg.type_fn<cast_signed>, innerMostKBlock = 32 : i32, innerMostMBlock = 16 : i32, innerMostNBlock = 16 : i32} ins(%7, %arg2 : tensor<32x32xf32>, tensor<32x64xf32>) outs(%9 : tensor<32x64xf32>) -> tensor<32x64xf32>
     %11 = tensor.empty() : tensor<32x64xf32>
     %broadcasted_1 = linalg.broadcast ins(%arg4 : tensor<64xf32>) outs(%11 : tensor<32x64xf32>) dimensions = [0] 
     %12 = tensor.empty() : tensor<32x64xf32>
diff --git a/test/benchgc/src/benchgc/tuner/op_config.py b/test/benchgc/src/benchgc/tuner/op_config.py
@@ -53,101 +53,101 @@ class MatMulConfig(Config):
     def __init__(
         self,
         op: OpView,
-        M_threads: int = 1,
-        K_threads: int = 1,
-        N_threads: int = 1,
-        M_block: int = 1,
-        K_block: int = 1,
-        N_block: int = 1,
-        innermostM_block: int = 1,
-        innermostK_block: int = 1,
-        innermostN_block: int = 1,
+        MThreads: int = 1,
+        KThreads: int = 1,
+        NThreads: int = 1,
+        MBlock: int = 1,
+        KBlock: int = 1,
+        NBlock: int = 1,
+        innerMostMBlock: int = 1,
+        innerMostKBlock: int = 1,
+        innerMostNBlock: int = 1,
     ):
         # you can set the default value and candidates by info from matmul_op
-        self.M = op.inputs[0].type.shape[0]
-        self.K = op.inputs[0].type.shape[1]
-        self.N = op.inputs[1].type.shape[1]
+        self.m = op.inputs[0].type.shape[0]
+        self.k = op.inputs[0].type.shape[1]
+        self.n = op.inputs[1].type.shape[1]
         # self.input_a_dtype = str(op.inputs[0].type.element_type)
         self.num_threads = int(os.environ.get("OMP_NUM_THREADS", 1))
-        self.M_threads = M_threads
-        self.K_threads = K_threads
-        self.N_threads = N_threads
-        self.M_block = M_block
-        self.K_block = K_block
-        self.N_block = N_block
-        self.innermostM_block = innermostM_block
-        self.innermostK_block = innermostK_block
-        self.innermostN_block = innermostN_block
+        self.m_threads = MThreads
+        self.k_threads = KThreads
+        self.n_threads = NThreads
+        self.m_block = MBlock
+        self.k_block = KBlock
+        self.n_block = NBlock
+        self.innermost_m_block = innerMostMBlock
+        self.innermost_k_block = innerMostKBlock
+        self.innermost_n_block = innerMostNBlock
         super().__init__()
 
     def init_candidates(self):
         default_blocks = [16, 32, 64, 128, 256, 512]
         default_innermost_blocks = [16, 32]
-        self.field_candidates["M_threads"] = find_factors(self.num_threads)
-        self.field_candidates["K_threads"] = find_factors(self.num_threads)
-        self.field_candidates["N_threads"] = find_factors(self.num_threads)
-        self.field_candidates["M_block"] = [
-            block for block in default_blocks if self.M >= block
+        self.field_candidates["m_threads"] = find_factors(self.num_threads)
+        self.field_candidates["k_threads"] = find_factors(self.num_threads)
+        self.field_candidates["n_threads"] = find_factors(self.num_threads)
+        self.field_candidates["m_block"] = [
+            block for block in default_blocks if self.m >= block
         ]
-        self.field_candidates["K_block"] = [
-            block for block in default_blocks if self.K >= block
+        self.field_candidates["k_block"] = [
+            block for block in default_blocks if self.k >= block
         ]
-        self.field_candidates["N_block"] = [
-            block for block in default_blocks if self.N >= block
+        self.field_candidates["n_block"] = [
+            block for block in default_blocks if self.n >= block
         ]
-        self.field_candidates["innermostM_block"] = [
-            block for block in default_innermost_blocks if self.M >= block
+        self.field_candidates["innermost_m_block"] = [
+            block for block in default_innermost_blocks if self.m >= block
         ]
-        self.field_candidates["innermostK_block"] = [
-            block for block in default_innermost_blocks if self.K >= block
+        self.field_candidates["innermost_k_block"] = [
+            block for block in default_innermost_blocks if self.k >= block
         ]
-        self.field_candidates["innermostN_block"] = [
-            block for block in default_innermost_blocks if self.N >= block
+        self.field_candidates["innermost_n_block"] = [
+            block for block in default_innermost_blocks if self.n >= block
         ]
 
     def init_constraints(self):
         # example: using lambda to add constraints, adding constraints by the order of the fields
-        self.field_constraints["M_threads"] = None
-        self.field_constraints["K_threads"] = (
-            lambda MatMulConfig, K_threads: self.num_threads
-            % (MatMulConfig.M_threads * K_threads)
+        self.field_constraints["m_threads"] = None
+        self.field_constraints["k_threads"] = (
+            lambda MatMulConfig, k_threads: self.num_threads
+            % (MatMulConfig.m_threads * k_threads)
             == 0
         )
-        self.field_constraints["N_threads"] = (
-            lambda MatMulConfig, N_threads: self.num_threads
-            % (MatMulConfig.M_threads * MatMulConfig.K_threads * N_threads)
+        self.field_constraints["n_threads"] = (
+            lambda MatMulConfig, n_threads: self.num_threads
+            % (MatMulConfig.m_threads * MatMulConfig.k_threads * n_threads)
             == 0
         )
-        self.field_constraints["M_block"] = None
-        self.field_constraints["K_block"] = None
-        self.field_constraints["N_block"] = None
-        self.field_constraints["innermostM_block"] = (
-            lambda MatMulConfig, innermostM_block: MatMulConfig.M_block
-            % innermostM_block
+        self.field_constraints["m_block"] = None
+        self.field_constraints["k_block"] = None
+        self.field_constraints["n_block"] = None
+        self.field_constraints["innermost_m_block"] = (
+            lambda MatMulConfig, innermost_m_block: MatMulConfig.m_block
+            % innermost_m_block
             == 0
         )
-        self.field_constraints["innermostK_block"] = (
-            lambda MatMulConfig, innermostK_block: MatMulConfig.K_block
-            % innermostK_block
+        self.field_constraints["innermost_k_block"] = (
+            lambda MatMulConfig, innermost_k_block: MatMulConfig.k_block
+            % innermost_k_block
             == 0
         )
-        self.field_constraints["innermostN_block"] = (
-            lambda MatMulConfig, innermostN_block: MatMulConfig.N_block
-            % innermostN_block
+        self.field_constraints["innermost_n_block"] = (
+            lambda MatMulConfig, innermost_n_block: MatMulConfig.n_block
+            % innermost_n_block
             == 0
         )
 
     def attach_to_ir(self, op: OpView):
         attr_to_field = {
-            "Mthreads": self.M_threads,
-            "Kthreads": self.K_threads,
-            "Nthreads": self.N_threads,
-            "MBlock": self.M_block,
-            "KBlock": self.K_block,
-            "NBlock": self.N_block,
-            "innermostMBlock": self.innermostM_block,
-            "innermostKBlock": self.innermostK_block,
-            "innermostNBlock": self.innermostN_block,
+            "MThreads": self.m_threads,
+            "KThreads": self.k_threads,
+            "NThreads": self.n_threads,
+            "MBlock": self.m_block,
+            "KBlock": self.k_block,
+            "NBlock": self.n_block,
+            "innerMostMBlock": self.innermost_m_block,
+            "innerMostKBlock": self.innermost_k_block,
+            "innerMostNBlock": self.innermost_n_block,
         }
         for name, value in attr_to_field.items():
             op.attributes[name] = IntegerAttr.get(T.i32(), value)
@@ -158,15 +158,15 @@ def __repr__(self) -> str:
     def __str__(self) -> str:
         obj_dict = {
             "MatMulConfig": {
-                "M_threads": self.M_threads,
-                "K_threads": self.K_threads,
-                "N_threads": self.N_threads,
-                "M_block": self.M_block,
-                "K_block": self.K_block,
-                "N_block": self.N_block,
-                "innermostM_block": self.innermostM_block,
-                "innermostK_block": self.innermostK_block,
-                "innermostN_block": self.innermostN_block,
+                "MThreads": self.m_threads,
+                "KThreads": self.k_threads,
+                "NThreads": self.n_threads,
+                "MBlock": self.m_block,
+                "KBlock": self.k_block,
+                "NBlock": self.n_block,
+                "innerMostMBlock": self.innermost_m_block,
+                "innerMostKBlock": self.innermost_k_block,
+                "innerMostNBlock": self.innermost_n_block,
             }
         }
         return json.dumps(obj_dict, indent=4)