Skip to content

Commit 9f9e5aa

Browse files
authored
[BenchGC] attach DLTI for mlir module (#312)
1 parent 2d27682 commit 9f9e5aa

File tree

8 files changed

+203
-1
lines changed

8 files changed

+203
-1
lines changed

python/CMakeLists.txt

+9
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ declare_mlir_python_sources(GcPythonSources.Common
4949
__init__.py
5050
graph_compiler.py
5151
dialects/__init__.py
52+
tools/__init__.py
53+
tools/cpuinfo.py
5254
# init hooks
5355
_mlir_libs/_site_initialize_0.py
5456
)
@@ -86,6 +88,13 @@ declare_mlir_python_extension(GcPythonSources.Extension
8688
GcCAPI
8789
)
8890

91+
declare_mlir_python_extension(GcPythonSources.CpuInfoExtension
92+
MODULE_NAME _cpuinfo
93+
ADD_TO_PARENT GcPythonSources
94+
SOURCES
95+
CPUInfo.cpp
96+
)
97+
8998
################################################################################
9099
# Common CAPI
91100
################################################################################

python/CPUInfo.cpp

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/*
2+
* Copyright (C) 2024 Intel Corporation
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing,
11+
* software distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions
14+
* and limitations under the License.
15+
*
16+
* SPDX-License-Identifier: Apache-2.0
17+
*/
18+
19+
#include "mlir/Bindings/Python/PybindAdaptors.h"
20+
21+
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
22+
defined(_M_IX86)
23+
// x86 or x86_64 specific code
24+
void cpuid(int info[4], int leaf, int subleaf) {
25+
__asm__ __volatile__("cpuid"
26+
: "=a"(info[0]), "=b"(info[1]), "=c"(info[2]),
27+
"=d"(info[3])
28+
: "a"(leaf), "c"(subleaf));
29+
}
30+
31+
std::vector<int> getCacheSizes() {
32+
int info[4];
33+
cpuid(info, 0, 0);
34+
int nIds = info[0];
35+
int caches[3] = {};
36+
for (int i = 0; i <= nIds; ++i) {
37+
cpuid(info, 4, i);
38+
int cacheType = info[0] & 0x1F;
39+
if (cacheType == 0) {
40+
break;
41+
}
42+
if (cacheType == 2) {
43+
// skip instruction cache
44+
continue;
45+
}
46+
int cacheLevel = (info[0] >> 5) & 0x7;
47+
int cacheLinesPerTag = ((info[1] >> 0) & 0xFFF) + 1;
48+
int cacheAssociativity = ((info[1] >> 12) & 0x3FF) + 1;
49+
int cachePartitions = ((info[1] >> 22) & 0x3FF) + 1;
50+
int cacheSets = info[2] + 1;
51+
int cacheSize =
52+
cacheLinesPerTag * cacheAssociativity * cachePartitions * cacheSets;
53+
if (cacheLevel >= 1 && cacheLevel <= 3) {
54+
caches[cacheLevel - 1] = cacheSize;
55+
}
56+
}
57+
return std::vector<int>(std::begin(caches), std::end(caches));
58+
}
59+
60+
bool isFeatureSupported(int function_id, int register_idx, int bit) {
61+
int info[4];
62+
cpuid(info, function_id, 0);
63+
return (info[register_idx] & (1 << bit)) != 0;
64+
}
65+
66+
int getMaxVectorWidth() {
67+
if (isFeatureSupported(7, 1, 16)) { // Check for AVX-512F support
68+
return 512;
69+
} else if (isFeatureSupported(1, 2, 28)) { // Check for AVX support
70+
return 256;
71+
} else if (isFeatureSupported(1, 3, 25)) { // Check for SSE support
72+
return 128;
73+
}
74+
return 64; // Default to 64 if none of the above features are supported
75+
}
76+
#else
77+
std::vector<int> getCacheSizes() { return {}; }
78+
79+
int getMaxVectorWidth { return 0; }
80+
#endif
81+
82+
PYBIND11_MODULE(_cpuinfo, m) {
83+
m.doc() = "Graph-compiler MLIR Python binding";
84+
m.def("get_cache_sizes", &getCacheSizes, "Get CPU L1,L2,L3 cache size");
85+
m.def("get_max_vector_width", &getMaxVectorWidth,
86+
"Get CPU supported max vector width");
87+
}

python/gc_mlir/tools/__init__.py

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# ===-- __init__.py - init ------------------------------------*- Python -*-===#
2+
#
3+
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# ===-----------------------------------------------------------------------===#

python/gc_mlir/tools/cpuinfo.py

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# ===-- cpuinfo.py - Getting the CPU info ---------------------*- Python -*-===#
2+
#
3+
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# ===-----------------------------------------------------------------------===#
8+
9+
from .._mlir_libs import _cpuinfo
10+
11+
_cache_sizes = []
12+
_max_vector_width = None
13+
14+
15+
def get_cache_sizes():
16+
global _cache_sizes
17+
if not _cache_sizes:
18+
_cache_sizes = _cpuinfo.get_cache_sizes()
19+
return _cache_sizes
20+
21+
22+
def get_max_vector_width():
23+
global _max_vector_width
24+
if _max_vector_width is None:
25+
_max_vector_width = _cpuinfo.get_max_vector_width()
26+
return _max_vector_width

test/benchgc/README.md

+8
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ Benchgc is a tool used to verify the correctness and performance of graph compil
88
* python >= 3.10
99
* torch >= 2.2
1010
* Enable mlir python binding, Refer to [`python/README.md`](../../python/README.md) for detail
11+
* Set the envs
12+
* OMP_NUM_THREADS [int] : the `num_threads` for dlti attr, default = 1
1113

1214
## Build
1315
There are two ways for using benchgc
@@ -107,6 +109,12 @@ module {
107109
| Pytorch tensor dump | F | dump filename |
108110
| Benchdnn driver | D | driver_name[:driver filling parameter]* |
109111

112+
### --cpu_cache_sizes, --max_vector_width
113+
* BenchGC will automatically obtain target info and add the DLTI attr to the IR
114+
* In some cases, if the system info obtained by BenchGC is not accurate, you can specify the relevant attributes for BenchGC through these options.
115+
* --cpu_cache_sizes: cpu cache sizes in bytes, format: L1:L2:L3, example: `--cpu_cache_sizes 49152:2097152:110100480`
116+
* --max_vector_width: the maximum width of vector registers available in a CPU, example `--max_vector_width `
117+
110118
#### Benchdnn driver filling
111119

112120
| driver_name | driver filling parameter |

test/benchgc/setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,5 @@
2626
packages=setuptools.find_packages("src")
2727
+ setuptools.find_namespace_packages("../../python_packages/gc_mlir_core"),
2828
package_data={"gc_mlir": ["_mlir_libs/*.so"]},
29-
install_requires=["torch", "numpy", "ml_dtypes"],
29+
install_requires=["torch", "numpy"],
3030
)

test/benchgc/src/benchgc/__main__.py

+16
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,20 @@ def add_common_options(parser: argparse.ArgumentParser):
124124
help="if we need print the ir during the pass-pipeline",
125125
)
126126

127+
parser.add_argument(
128+
"--cpu_cache_sizes",
129+
required=False,
130+
help="set the cpu cache sizes, format: L1:L2:L3",
131+
type=str,
132+
)
133+
134+
parser.add_argument(
135+
"--max_vector_width",
136+
required=False,
137+
help="set the cpu max_vector_width",
138+
type=int,
139+
)
140+
127141
if parser.parse_known_args()[0].driver == "linalg":
128142
parser.add_argument(
129143
"--cast",
@@ -269,6 +283,8 @@ def get_module_and_args(flags: argparse.Namespace):
269283
for arg in args:
270284
arg.print_verbose(flags.verbose)
271285

286+
benchgc.mlir.util.attach_dlti(flags, module)
287+
272288
if flags.verbose >= benchgc.util.MODULE_VERBOSE:
273289
print(module)
274290
return module, args

test/benchgc/src/benchgc/mlir/util.py

+49
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,15 @@
1414
# limitations under the License.
1515
################################################################################
1616

17+
import argparse
1718
import ctypes
19+
import os
1820
from typing import Any, List
1921

2022
import torch
2123
from gc_mlir import ir
2224
from gc_mlir.dialects import arith, func, memref
25+
from gc_mlir.tools import cpuinfo
2326

2427

2528
# calling python binding consumes a lot of time e.g. get_name()
@@ -152,3 +155,49 @@ def get_kernel_func_from_module(
152155
if type(f) is func.FuncOp and str(f.name).strip('"') == func_name:
153156
return f
154157
raise ValueError("can not find the entry function")
158+
159+
160+
def attach_dlti(flags: argparse.Namespace, module: ir.Module):
161+
# the moudle already had dlti attr
162+
if "dlti.target_system_spec" in module.operation.attributes:
163+
return
164+
if flags.cpu_cache_sizes:
165+
caches_sizes = [int(x) for x in flags.cpu_cache_sizes.strip().split(":")]
166+
else:
167+
caches_sizes = cpuinfo.get_cache_sizes()
168+
if not caches_sizes or len(caches_sizes) != 3:
169+
print(
170+
"Failed to get CPU cache sizes, please added them manually br --cpu_cache_sizes"
171+
)
172+
return
173+
if flags.max_vector_width:
174+
max_vector_width = flags.max_vector_width
175+
else:
176+
max_vector_width = cpuinfo.get_max_vector_width()
177+
if not max_vector_width:
178+
print(
179+
"Failed to get CPU max vector width, please added them manually br --max_vector_width"
180+
)
181+
return
182+
l1_data_cache_size, l2_cache_size, l3_cache_size = caches_sizes
183+
if "OMP_NUM_THREADS" not in os.environ:
184+
print("OMP_NUM_THREADS is not found, using 1 as default")
185+
num_threads = os.environ.get("OMP_NUM_THREADS", 1)
186+
187+
dlti_template = f"""
188+
module attributes {{
189+
dlti.target_system_spec = #dlti.target_system_spec<
190+
"CPU": #dlti.target_device_spec<
191+
#dlti.dl_entry<"L1_cache_size_in_bytes", {l1_data_cache_size} : ui32>,
192+
#dlti.dl_entry<"L2_cache_size_in_bytes", {l2_cache_size} : ui64>,
193+
#dlti.dl_entry<"L3_cache_size_in_bytes", {l3_cache_size} : ui64>,
194+
#dlti.dl_entry<"num_threads", {num_threads} : i32>,
195+
#dlti.dl_entry<"max_vector_width", {max_vector_width} : i64>>
196+
>}} {{}}
197+
"""
198+
print(dlti_template)
199+
with module.context:
200+
template_module = ir.Module.parse(dlti_template)
201+
module.operation.attributes["dlti.target_system_spec"] = (
202+
template_module.operation.attributes["dlti.target_system_spec"]
203+
)

0 commit comments

Comments
 (0)