Skip to content

Commit

Permalink
fix config field name
Browse files Browse the repository at this point in the history
  • Loading branch information
xurui1995 committed Oct 8, 2024
1 parent 662cb3b commit 7635044
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 105 deletions.
64 changes: 32 additions & 32 deletions test/benchgc/src/benchgc/tuner/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,15 @@ Tuning ends in 26.26677966117859 s
Best cost: 0.025292858481407166 ms
Best config: [{
"MatMulConfig": {
"M_threads": 1,
"K_threads": 1,
"N_threads": 1,
"M_block": 64,
"K_block": 32,
"N_block": 64,
"innermostM_block": 16,
"innermostK_block": 16,
"innermostN_block": 16
"MThreads": 1,
"KThreads": 1,
"NThreads": 1,
"MBlock": 128,
"KBlock": 64,
"NBlock": 16,
"innerMostMBlock": 32,
"innerMostKBlock": 16,
"innerMostNBlock": 16
}
}]
mlir:
Expand All @@ -97,7 +97,7 @@ mlir:
%cst = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<128x128xf32>
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<128x128xf32>) -> tensor<128x128xf32>
%2 = linalg.matmul {KBlock = 32 : i32, Kthreads = 1 : i32, MBlock = 64 : i32, Mthreads = 1 : i32, NBlock = 64 : i32, Nthreads = 1 : i32, cast = #linalg.type_fn<cast_signed>, innermostKBlock = 16 : i32, innermostMBlock = 16 : i32, innermostNBlock = 16 : i32} ins(%arg0, %arg1 : tensor<128x128xf32>, tensor<128x128xf32>) outs(%1 : tensor<128x128xf32>) -> tensor<128x128xf32>
%2 = linalg.matmul {KBlock = 64 : i32, KThreads = 1 : i32, MBlock = 128 : i32, MThreads = 1 : i32, NBlock = 16 : i32, NThreads = 1 : i32, cast = #linalg.type_fn<cast_signed>, innerMostKBlock = 16 : i32, innerMostMBlock = 32 : i32, innerMostNBlock = 16 : i32} ins(%arg0, %arg1 : tensor<128x128xf32>, tensor<128x128xf32>) outs(%1 : tensor<128x128xf32>) -> tensor<128x128xf32>
return %2 : tensor<128x128xf32>
}
}
Expand All @@ -117,31 +117,31 @@ OMP_NUM_THREADS=1 python -m benchgc --mode T --driver pattern --case mlp --batch
[ 400 / 1536 ] skipped: 1131 best: 0.006834045052528381 ms
[ 405 / 1536 ] skipped: 1131 best: 0.006834045052528381 ms
Tuner returns empty batch, early stop now
Tuning ends in 80.21396946907043 s
Best cost: 0.006834045052528381 ms
Tuning ends in 80.10290145874023 s
Best cost: 0.006632879376411438 ms
Best config: [{
"MatMulConfig": {
"M_threads": 1,
"K_threads": 1,
"N_threads": 1,
"M_block": 32,
"K_block": 16,
"N_block": 32,
"innermostM_block": 16,
"innermostK_block": 16,
"innermostN_block": 32
"MThreads": 1,
"KThreads": 1,
"NThreads": 1,
"MBlock": 32,
"KBlock": 16,
"NBlock": 32,
"innerMostMBlock": 32,
"innerMostKBlock": 16,
"innerMostNBlock": 16
}
}, {
"MatMulConfig": {
"M_threads": 1,
"K_threads": 1,
"N_threads": 1,
"M_block": 32,
"K_block": 32,
"N_block": 64,
"innermostM_block": 16,
"innermostK_block": 16,
"innermostN_block": 32
"MThreads": 1,
"KThreads": 1,
"NThreads": 1,
"MBlock": 32,
"KBlock": 32,
"NBlock": 16,
"innerMostMBlock": 16,
"innerMostKBlock": 32,
"innerMostNBlock": 16
}
}]
mlir:
Expand All @@ -150,7 +150,7 @@ mlir:
%cst = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<32x32xf32>
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<32x32xf32>) -> tensor<32x32xf32>
%2 = linalg.matmul {KBlock = 16 : i32, Kthreads = 1 : i32, MBlock = 32 : i32, Mthreads = 1 : i32, NBlock = 32 : i32, Nthreads = 1 : i32, cast = #linalg.type_fn<cast_signed>, innermostKBlock = 16 : i32, innermostMBlock = 16 : i32, innermostNBlock = 32 : i32} ins(%arg0, %arg1 : tensor<32x16xf32>, tensor<16x32xf32>) outs(%1 : tensor<32x32xf32>) -> tensor<32x32xf32>
%2 = linalg.matmul {KBlock = 16 : i32, KThreads = 1 : i32, MBlock = 32 : i32, MThreads = 1 : i32, NBlock = 32 : i32, NThreads = 1 : i32, cast = #linalg.type_fn<cast_signed>, innerMostKBlock = 16 : i32, innerMostMBlock = 32 : i32, innerMostNBlock = 16 : i32} ins(%arg0, %arg1 : tensor<32x16xf32>, tensor<16x32xf32>) outs(%1 : tensor<32x32xf32>) -> tensor<32x32xf32>
%3 = tensor.empty() : tensor<32x32xf32>
%broadcasted = linalg.broadcast ins(%arg3 : tensor<32xf32>) outs(%3 : tensor<32x32xf32>) dimensions = [0]
%4 = tensor.empty() : tensor<32x32xf32>
Expand All @@ -160,7 +160,7 @@ mlir:
%7 = linalg.max ins(%5, %cst_0 : tensor<32x32xf32>, tensor<32x32xf32>) outs(%6 : tensor<32x32xf32>) -> tensor<32x32xf32>
%8 = tensor.empty() : tensor<32x64xf32>
%9 = linalg.fill ins(%cst : f32) outs(%8 : tensor<32x64xf32>) -> tensor<32x64xf32>
%10 = linalg.matmul {KBlock = 32 : i32, Kthreads = 1 : i32, MBlock = 32 : i32, Mthreads = 1 : i32, NBlock = 64 : i32, Nthreads = 1 : i32, cast = #linalg.type_fn<cast_signed>, innermostKBlock = 16 : i32, innermostMBlock = 16 : i32, innermostNBlock = 32 : i32} ins(%7, %arg2 : tensor<32x32xf32>, tensor<32x64xf32>) outs(%9 : tensor<32x64xf32>) -> tensor<32x64xf32>
%10 = linalg.matmul {KBlock = 32 : i32, KThreads = 1 : i32, MBlock = 32 : i32, MThreads = 1 : i32, NBlock = 16 : i32, NThreads = 1 : i32, cast = #linalg.type_fn<cast_signed>, innerMostKBlock = 32 : i32, innerMostMBlock = 16 : i32, innerMostNBlock = 16 : i32} ins(%7, %arg2 : tensor<32x32xf32>, tensor<32x64xf32>) outs(%9 : tensor<32x64xf32>) -> tensor<32x64xf32>
%11 = tensor.empty() : tensor<32x64xf32>
%broadcasted_1 = linalg.broadcast ins(%arg4 : tensor<64xf32>) outs(%11 : tensor<32x64xf32>) dimensions = [0]
%12 = tensor.empty() : tensor<32x64xf32>
Expand Down
146 changes: 73 additions & 73 deletions test/benchgc/src/benchgc/tuner/op_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,101 +53,101 @@ class MatMulConfig(Config):
def __init__(
self,
op: OpView,
M_threads: int = 1,
K_threads: int = 1,
N_threads: int = 1,
M_block: int = 1,
K_block: int = 1,
N_block: int = 1,
innermostM_block: int = 1,
innermostK_block: int = 1,
innermostN_block: int = 1,
MThreads: int = 1,
KThreads: int = 1,
NThreads: int = 1,
MBlock: int = 1,
KBlock: int = 1,
NBlock: int = 1,
innerMostMBlock: int = 1,
innerMostKBlock: int = 1,
innerMostNBlock: int = 1,
):
# you can set the default value and candidates by info from matmul_op
self.M = op.inputs[0].type.shape[0]
self.K = op.inputs[0].type.shape[1]
self.N = op.inputs[1].type.shape[1]
self.m = op.inputs[0].type.shape[0]
self.k = op.inputs[0].type.shape[1]
self.n = op.inputs[1].type.shape[1]
# self.input_a_dtype = str(op.inputs[0].type.element_type)
self.num_threads = int(os.environ.get("OMP_NUM_THREADS", 1))
self.M_threads = M_threads
self.K_threads = K_threads
self.N_threads = N_threads
self.M_block = M_block
self.K_block = K_block
self.N_block = N_block
self.innermostM_block = innermostM_block
self.innermostK_block = innermostK_block
self.innermostN_block = innermostN_block
self.m_threads = MThreads
self.k_threads = KThreads
self.n_threads = NThreads
self.m_block = MBlock
self.k_block = KBlock
self.n_block = NBlock
self.innermost_m_block = innerMostMBlock
self.innermost_k_block = innerMostKBlock
self.innermost_n_block = innerMostNBlock
super().__init__()

def init_candidates(self):
default_blocks = [16, 32, 64, 128, 256, 512]
default_innermost_blocks = [16, 32]
self.field_candidates["M_threads"] = find_factors(self.num_threads)
self.field_candidates["K_threads"] = find_factors(self.num_threads)
self.field_candidates["N_threads"] = find_factors(self.num_threads)
self.field_candidates["M_block"] = [
block for block in default_blocks if self.M >= block
self.field_candidates["m_threads"] = find_factors(self.num_threads)
self.field_candidates["k_threads"] = find_factors(self.num_threads)
self.field_candidates["n_threads"] = find_factors(self.num_threads)
self.field_candidates["m_block"] = [
block for block in default_blocks if self.m >= block
]
self.field_candidates["K_block"] = [
block for block in default_blocks if self.K >= block
self.field_candidates["k_block"] = [
block for block in default_blocks if self.k >= block
]
self.field_candidates["N_block"] = [
block for block in default_blocks if self.N >= block
self.field_candidates["n_block"] = [
block for block in default_blocks if self.n >= block
]
self.field_candidates["innermostM_block"] = [
block for block in default_innermost_blocks if self.M >= block
self.field_candidates["innermost_m_block"] = [
block for block in default_innermost_blocks if self.m >= block
]
self.field_candidates["innermostK_block"] = [
block for block in default_innermost_blocks if self.K >= block
self.field_candidates["innermost_k_block"] = [
block for block in default_innermost_blocks if self.k >= block
]
self.field_candidates["innermostN_block"] = [
block for block in default_innermost_blocks if self.N >= block
self.field_candidates["innermost_n_block"] = [
block for block in default_innermost_blocks if self.n >= block
]

def init_constraints(self):
# example: using lambda to add constraints, adding constraints by the order of the fields
self.field_constraints["M_threads"] = None
self.field_constraints["K_threads"] = (
lambda MatMulConfig, K_threads: self.num_threads
% (MatMulConfig.M_threads * K_threads)
self.field_constraints["m_threads"] = None
self.field_constraints["k_threads"] = (
lambda MatMulConfig, k_threads: self.num_threads
% (MatMulConfig.m_threads * k_threads)
== 0
)
self.field_constraints["N_threads"] = (
lambda MatMulConfig, N_threads: self.num_threads
% (MatMulConfig.M_threads * MatMulConfig.K_threads * N_threads)
self.field_constraints["n_threads"] = (
lambda MatMulConfig, n_threads: self.num_threads
% (MatMulConfig.m_threads * MatMulConfig.k_threads * n_threads)
== 0
)
self.field_constraints["M_block"] = None
self.field_constraints["K_block"] = None
self.field_constraints["N_block"] = None
self.field_constraints["innermostM_block"] = (
lambda MatMulConfig, innermostM_block: MatMulConfig.M_block
% innermostM_block
self.field_constraints["m_block"] = None
self.field_constraints["k_block"] = None
self.field_constraints["n_block"] = None
self.field_constraints["innermost_m_block"] = (
lambda MatMulConfig, innermost_m_block: MatMulConfig.m_block
% innermost_m_block
== 0
)
self.field_constraints["innermostK_block"] = (
lambda MatMulConfig, innermostK_block: MatMulConfig.K_block
% innermostK_block
self.field_constraints["innermost_k_block"] = (
lambda MatMulConfig, innermost_k_block: MatMulConfig.k_block
% innermost_k_block
== 0
)
self.field_constraints["innermostN_block"] = (
lambda MatMulConfig, innermostN_block: MatMulConfig.N_block
% innermostN_block
self.field_constraints["innermost_n_block"] = (
lambda MatMulConfig, innermost_n_block: MatMulConfig.n_block
% innermost_n_block
== 0
)

def attach_to_ir(self, op: OpView):
attr_to_field = {
"Mthreads": self.M_threads,
"Kthreads": self.K_threads,
"Nthreads": self.N_threads,
"MBlock": self.M_block,
"KBlock": self.K_block,
"NBlock": self.N_block,
"innermostMBlock": self.innermostM_block,
"innermostKBlock": self.innermostK_block,
"innermostNBlock": self.innermostN_block,
"MThreads": self.m_threads,
"KThreads": self.k_threads,
"NThreads": self.n_threads,
"MBlock": self.m_block,
"KBlock": self.k_block,
"NBlock": self.n_block,
"innerMostMBlock": self.innermost_m_block,
"innerMostKBlock": self.innermost_k_block,
"innerMostNBlock": self.innermost_n_block,
}
for name, value in attr_to_field.items():
op.attributes[name] = IntegerAttr.get(T.i32(), value)
Expand All @@ -158,15 +158,15 @@ def __repr__(self) -> str:
def __str__(self) -> str:
obj_dict = {
"MatMulConfig": {
"M_threads": self.M_threads,
"K_threads": self.K_threads,
"N_threads": self.N_threads,
"M_block": self.M_block,
"K_block": self.K_block,
"N_block": self.N_block,
"innermostM_block": self.innermostM_block,
"innermostK_block": self.innermostK_block,
"innermostN_block": self.innermostN_block,
"MThreads": self.m_threads,
"KThreads": self.k_threads,
"NThreads": self.n_threads,
"MBlock": self.m_block,
"KBlock": self.k_block,
"NBlock": self.n_block,
"innerMostMBlock": self.innermost_m_block,
"innerMostKBlock": self.innermost_k_block,
"innerMostNBlock": self.innermost_n_block,
}
}
return json.dumps(obj_dict, indent=4)
Expand Down

0 comments on commit 7635044

Please sign in to comment.