Skip to content

Commit

Permalink
Merge branch 'main' into tile_printing
Browse files Browse the repository at this point in the history
  • Loading branch information
Abhishek-Varma authored Jan 13, 2025
2 parents bca1c55 + 5658cc6 commit 0fd9766
Show file tree
Hide file tree
Showing 19 changed files with 936 additions and 375 deletions.
69 changes: 66 additions & 3 deletions build_tools/ci/cpu_comparison/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@ def __init__(
name_suffix="",
use_ukernel=False,
run_on_target=["npu1_4col"],
tile_pipeline="pack-peel",
additional_labels=None,
aie_compilation_flags=None,
n_repeats=1,
Expand All @@ -380,7 +381,7 @@ def __init__(
K=K,
input_type=input_type,
acc_type=acc_type,
tile_pipeline="pack-peel",
tile_pipeline=tile_pipeline,
use_ukernel=use_ukernel,
n_repeats=n_repeats,
n_kernel_runs=n_kernel_runs,
Expand Down Expand Up @@ -417,6 +418,7 @@ def __init__(
name_suffix="",
use_ukernel=False,
run_on_target=["npu1_4col"],
tile_pipeline="pack-peel",
additional_labels=None,
aie_compilation_flags=None,
n_repeats=1,
Expand All @@ -429,6 +431,7 @@ def __init__(
K=K,
input_type=input_type,
acc_type=acc_type,
tile_pipeline=tile_pipeline,
use_ukernel=use_ukernel,
function_name="matmul_transpose_b",
n_repeats=n_repeats,
Expand Down Expand Up @@ -471,6 +474,7 @@ def __init__(
name_suffix="",
use_ukernel=False,
run_on_target=["npu1_4col"],
tile_pipeline="pack-peel",
additional_labels=None,
aie_compilation_flags=None,
n_repeats=1,
Expand All @@ -488,7 +492,7 @@ def __init__(
K=K,
input_type=input_type,
acc_type=acc_type,
tile_pipeline="pack-peel",
tile_pipeline=tile_pipeline,
use_ukernel=use_ukernel,
n_repeats=n_repeats,
n_kernel_runs=n_kernel_runs,
Expand Down Expand Up @@ -526,6 +530,7 @@ def __init__(
name_suffix="",
use_ukernel=False,
run_on_target=["npu1_4col"],
tile_pipeline="pack-peel",
additional_labels=None,
aie_compilation_flags=None,
n_repeats=1,
Expand All @@ -538,6 +543,7 @@ def __init__(
K=K,
input_type=input_type,
acc_type=acc_type,
tile_pipeline=tile_pipeline,
use_ukernel=use_ukernel,
function_name="matmul_transpose_a",
n_repeats=n_repeats,
Expand Down Expand Up @@ -580,6 +586,7 @@ def __init__(
name_suffix="",
use_ukernel=False,
run_on_target=["npu1_4col"],
tile_pipeline="pack-peel",
additional_labels=None,
aie_compilation_flags=None,
n_repeats=1,
Expand All @@ -597,7 +604,7 @@ def __init__(
K=K,
input_type=input_type,
acc_type=acc_type,
tile_pipeline="pack-peel",
tile_pipeline=tile_pipeline,
use_ukernel=use_ukernel,
n_repeats=n_repeats,
n_kernel_runs=n_kernel_runs,
Expand Down Expand Up @@ -1734,6 +1741,7 @@ def __init__(self):
"outline": False,
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
},
{
"M": 512,
Expand All @@ -1744,6 +1752,7 @@ def __init__(self):
"outline": True,
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
},
{
"M": 512,
Expand All @@ -1754,6 +1763,7 @@ def __init__(self):
"outline": False,
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
},
{
"M": 512,
Expand All @@ -1764,6 +1774,7 @@ def __init__(self):
"outline": True,
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
},
{
"M": 512,
Expand All @@ -1774,6 +1785,7 @@ def __init__(self):
"outline": True,
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
},
{
"M": 512,
Expand All @@ -1784,6 +1796,7 @@ def __init__(self):
"outline": True,
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
},
{
"M": 512,
Expand All @@ -1794,6 +1807,7 @@ def __init__(self):
"outline": True,
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
},
{
"M": 512,
Expand All @@ -1804,6 +1818,7 @@ def __init__(self):
"outline": True,
"transpose_a": False,
"transpose_b": True,
"tile_pipeline": "pack-peel",
},
{
"M": 4096,
Expand All @@ -1814,6 +1829,7 @@ def __init__(self):
"outline": True,
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
},
{
"M": 4096,
Expand All @@ -1824,6 +1840,7 @@ def __init__(self):
"outline": True,
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
},
{
"M": 4096,
Expand All @@ -1834,6 +1851,7 @@ def __init__(self):
"outline": True,
"transpose_a": True,
"transpose_b": False,
"tile_pipeline": "pack-peel",
},
# Test where the compute is omitted, this should help triangulate
# how much performance gain can be obtained with better matmul
Expand All @@ -1849,6 +1867,45 @@ def __init__(self):
"transpose_a": False,
"transpose_b": False,
"skip_numerics": True,
"tile_pipeline": "pack-peel",
},
{
"M": 512,
"N": 4096,
"K": 512,
"use_ukernel": False,
"peano_opt_level": 3,
"outline": True,
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel-4-level-tiling",
},
{
"M": 512,
"N": 4096,
"K": 512,
"use_ukernel": True,
"peano_opt_level": 3,
"outline": True,
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel-4-level-tiling",
},
# Test where the compute is omitted, this should help triangulate
# how much performance gain can be obtained with better matmul
# on core vs data movement.
{
"M": 512,
"N": 4096,
"K": 512,
"use_ukernel": False,
"peano_opt_level": 3,
"outline": True,
"outline_to_empty_function": True,
"transpose_a": False,
"transpose_b": False,
"skip_numerics": True,
"tile_pipeline": "pack-peel-4-level-tiling",
},
]

Expand All @@ -1862,6 +1919,7 @@ def __init__(self):
outline = test["outline"]
transpose_a = test["transpose_a"]
transpose_b = test["transpose_b"]
tile_pipeline = test["tile_pipeline"]

outlining_string = "--iree-amdaie-enable-function-outlining=" + str(
int(outline)
Expand Down Expand Up @@ -1902,6 +1960,9 @@ def __init__(self):
else:
raise ValueError("Transposing both LHS and RHS is not supported.")

if tile_pipeline == "pack-peel-4-level-tiling":
name_suffix += "_4_level_tiling"

# This should only be the case for benchmark tests which we expect
# to not pass numerically.
if "skip_numerics" in test and test["skip_numerics"]:
Expand All @@ -1914,6 +1975,7 @@ def __init__(self):
K,
"bf16",
"f32",
tile_pipeline=tile_pipeline,
use_ukernel=use_ukernel,
n_repeats=2,
aie_compilation_flags=aie_compilation_flags,
Expand All @@ -1929,6 +1991,7 @@ def __init__(self):
K,
"bf16",
"f32",
tile_pipeline=tile_pipeline,
additional_labels=["Performance"],
use_ukernel=use_ukernel,
n_repeats=5,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ struct AMDAIEOptions {
clEnumValN(TilePassPipeline::PackPeelPipeline, "pack-peel",
"Use the pack-peel based lowering strategy for "
"matmul-like ops"),
clEnumValN(TilePassPipeline::PackPeel4LevelTilingPipeline,
"pack-peel-4-level-tiling",
"Use the pack-peel based lowering strategy with 4 "
"levels of tiling for "
"matmul-like ops"),
clEnumValN(
TilePassPipeline::PadPackPipeline, "pad-pack",
"Use the pad-pack based lowering strategy for matmul-like ops"),
Expand Down
Loading

0 comments on commit 0fd9766

Please sign in to comment.