Skip to content

Commit 244e9c9

Browse files
committed
Add Arm®v9-A architecture SME target
Add a new target, ARMV9SME, for Arm®v9-A architecture systems that support the Scalable Matrix Extension (SME) [1]. Initially inherits ARMV8SVE settings with updated compiler flags. This target can only be built with an SME-capable toolchain such as GCC 14 or LLVM 19. Includes some initial FEAT_SME2 feature detection on Linux targets via hwcaps. Target is disabled in DYNAMIC_ARCH builds by default. This is intended as a base target for SME2 kernels. [1] https://developer.arm.com/documentation/109246/0100/SME-Overview/SME-and-SME2
1 parent 89f02ed commit 244e9c9

13 files changed

+78
-6
lines changed

Makefile.arm64

+5
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ FCOMMON_OPT += -march=armv8-a+sve
3030
endif
3131
endif
3232

33+
ifeq ($(CORE), ARMV9SME)
34+
CCOMMON_OPT += -march=armv9-a+sme2 -O3
35+
FCOMMON_OPT += -march=armv9-a+sve2 -O3
36+
endif
37+
3338
ifeq ($(CORE), CORTEXA53)
3439
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
3540
ifneq ($(F_COMPILER), NAG)

Makefile.system

+10
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,7 @@ ifeq ($(ARCH), arm64)
420420
export MACOSX_DEPLOYMENT_TARGET=11.0
421421
ifeq ($(C_COMPILER), GCC)
422422
export NO_SVE = 1
423+
export NO_SME = 1
423424
endif
424425
else
425426
export MACOSX_DEPLOYMENT_TARGET=10.8
@@ -709,6 +710,11 @@ DYNAMIC_CORE += NEOVERSEN2
709710
DYNAMIC_CORE += ARMV8SVE
710711
DYNAMIC_CORE += A64FX
711712
endif
713+
# Disabled by default while ARMV9SME is WIP
714+
NO_SME ?= 1
715+
ifneq ($(NO_SME), 1)
716+
DYNAMIC_CORE += ARMV9SME
717+
endif
712718
DYNAMIC_CORE += THUNDERX
713719
DYNAMIC_CORE += THUNDERX2T99
714720
DYNAMIC_CORE += TSV110
@@ -1474,6 +1480,10 @@ ifeq ($(NO_SVE), 1)
14741480
CCOMMON_OPT += -DNO_SVE
14751481
endif
14761482

1483+
ifeq ($(NO_SME), 1)
1484+
CCOMMON_OPT += -DNO_SME
1485+
endif
1486+
14771487
ifdef SMP
14781488
CCOMMON_OPT += -DSMP_SERVER
14791489

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
188188
- **Apple Vortex**: preliminary support based on ThunderX2/3
189189
- **A64FX**: preliminary support, optimized Level-3 BLAS
190190
- **ARMV8SVE**: any ARMV8 cpu with SVE extensions
191+
- **ARMV9SME**: WIP target, any Arm®v9-A core with SME2 support. Only functional for GEMM.
191192

192193
#### PPC/PPC64
193194

TargetList.txt

+1
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ THUNDERX3T110
111111
VORTEX
112112
A64FX
113113
ARMV8SVE
114+
ARMV9SME
114115
FT2000
115116

116117
9.System Z:

cmake/arch.cmake

+15-3
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,21 @@ endif ()
4444

4545
if (DYNAMIC_ARCH)
4646
if (ARM64)
47-
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
48-
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 9.99)
49-
set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE A64FX)
47+
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
48+
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
49+
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 10) # SVE ACLE supported in GCC >= 10
50+
set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE A64FX)
51+
endif ()
52+
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 14) # SME ACLE supported in GCC >= 14
53+
set(DYNAMIC_CORE ${DYNAMIC_CORE} ARMV9SME)
54+
endif()
55+
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
56+
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 11) # SVE ACLE supported in LLVM >= 11
57+
set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE A64FX)
58+
endif ()
59+
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 19) # SME ACLE supported in LLVM >= 19
60+
set(DYNAMIC_CORE ${DYNAMIC_CORE} ARMV9SME)
61+
endif()
5062
endif ()
5163
if (DYNAMIC_LIST)
5264
set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST})

cmake/cc.cmake

+6
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,12 @@ if (${CORE} STREQUAL ARMV8SVE)
238238
endif ()
239239
endif ()
240240

241+
if (${CORE} STREQUAL ARMV9SME)
242+
if (NOT DYNAMIC_ARCH)
243+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv9-a+sme2")
244+
endif ()
245+
endif ()
246+
241247
if (${CORE} STREQUAL CORTEXA510)
242248
if (NOT DYNAMIC_ARCH)
243249
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")

cmake/prebuild.cmake

+1-1
Original file line numberDiff line numberDiff line change
@@ -1014,7 +1014,7 @@ endif ()
10141014
set(ZGEMM_UNROLL_M 4)
10151015
set(ZGEMM_UNROLL_N 4)
10161016
set(SYMV_P 16)
1017-
elseif ("${TCORE}" STREQUAL "NEOVERSEN2")
1017+
elseif ("${TCORE}" STREQUAL "NEOVERSEN2" or "${TCORE}" STREQUAL "ARMV9SME")
10181018
file(APPEND ${TARGET_CONF_TEMP}
10191019
"#define L1_CODE_SIZE\t65536\n"
10201020
"#define L1_CODE_LINESIZE\t64\n"

cmake/system.cmake

+3
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,9 @@ if (${TARGET} STREQUAL NEOVERSEV1)
310310
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve")
311311
endif()
312312
endif()
313+
if (${TARGET} STREQUAL ARMV9SME)
314+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv9-a+sme2 -O3")
315+
endif()
313316
if (${TARGET} STREQUAL A64FX)
314317
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
315318
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve-intrinsics -march=armv8.2-a+sve -mtune=a64fx")

common_arm64.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
175175
#define HUGE_PAGESIZE ( 4 << 20)
176176

177177
#ifndef BUFFERSIZE
178-
#if defined(NEOVERSEN1) || defined(NEOVERSEN2) || defined(NEOVERSEV1) || defined(A64FX) || defined(ARMV8SVE)
178+
#if defined(NEOVERSEN1) || defined(NEOVERSEN2) || defined(NEOVERSEV1) || defined(A64FX) || defined(ARMV8SVE) || defined(ARMV9SME)
179179
#define BUFFER_SIZE (32 << 22)
180180
#else
181181
#define BUFFER_SIZE (32 << 20)

driver/others/dynamic_arm64.c

+19
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,11 @@ extern gotoblas_t gotoblas_ARMV8SVE;
115115
#else
116116
#define gotoblas_ARMV8SVE gotoblas_ARMV8
117117
#endif
118+
#ifdef DYN_ARMV9SME
119+
extern gotoblas_t gotoblas_ARMV9SME;
120+
#else
121+
#define gotoblas_ARMV9SME gotoblas_ARMV8
122+
#endif
118123
#ifdef DYN_CORTEX_A55
119124
extern gotoblas_t gotoblas_CORTEXA55;
120125
#else
@@ -148,6 +153,13 @@ extern gotoblas_t gotoblas_A64FX;
148153
#define gotoblas_ARMV8SVE gotoblas_ARMV8
149154
#define gotoblas_A64FX gotoblas_ARMV8
150155
#endif
156+
157+
#ifndef NO_SME
158+
extern gotoblas_t gotoblas_ARMV9SME;
159+
#else
160+
#define gotoblas_ARMV9SME gotoblas_ARMV8SVE
161+
#endif
162+
151163
extern gotoblas_t gotoblas_THUNDERX3T110;
152164
#endif
153165
#define gotoblas_NEOVERSEV2 gotoblas_NEOVERSEV1
@@ -393,6 +405,13 @@ static gotoblas_t *get_coretype(void) {
393405
snprintf(coremsg, 128, "Unknown CPU model - implementer %x part %x\n",implementer,part);
394406
openblas_warning(1, coremsg);
395407
}
408+
409+
#if !defined(NO_SME) && defined(HWCAP2_SME2)
410+
if ((getauxval(AT_HWCAP2) & HWCAP2_SME2)) {
411+
return &gotoblas_ARMV9SME;
412+
}
413+
#endif
414+
396415
#ifndef NO_SVE
397416
if ((getauxval(AT_HWCAP) & HWCAP_SVE)) {
398417
return &gotoblas_ARMV8SVE;

getarch.c

+14
Original file line numberDiff line numberDiff line change
@@ -1289,6 +1289,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12891289
#define CORENAME "ARMV8SVE"
12901290
#endif
12911291

1292+
#ifdef FORCE_ARMV9SME
1293+
#define FORCE
1294+
#define ARCHITECTURE "ARM64"
1295+
#define SUBARCHITECTURE "ARMV9SME"
1296+
#define SUBDIRNAME "arm64"
1297+
#define ARCHCONFIG "-DARMV9SME " \
1298+
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
1299+
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
1300+
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
1301+
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DHAVE_SME -DARMV8 -DARMV9"
1302+
#define LIBNAME "armv9sme"
1303+
#define CORENAME "ARMV9SME"
1304+
#endif
1305+
12921306

12931307
#ifdef FORCE_ARMV8
12941308
#define FORCE

kernel/arm64/KERNEL.ARMV9SME

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
include $(KERNELDIR)/KERNEL.ARMV8SVE

param.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -3667,7 +3667,7 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
36673667
#define CGEMM_DEFAULT_R 4096
36683668
#define ZGEMM_DEFAULT_R 4096
36693669

3670-
#elif defined(ARMV8SVE) || defined(ARMV9) || defined(CORTEXA510)|| defined(CORTEXA710) || defined(CORTEXX2) // 128-bit SVE
3670+
#elif defined(ARMV8SVE) || defined(ARMV9SME) || defined(ARMV9) || defined(CORTEXA510)|| defined(CORTEXA710) || defined(CORTEXX2) // 128-bit SVE
36713671

36723672
#if defined(XDOUBLE) || defined(DOUBLE)
36733673
#define SWITCH_RATIO 8

0 commit comments

Comments
 (0)