Skip to content

Commit

Permalink
[CPU][ARM] Adds SVE F32 implementation for MHASingleToken sub-functions
Browse files Browse the repository at this point in the history
  • Loading branch information
NishantPrabhuFujitsu committed Oct 31, 2024
1 parent 9ec63be commit 43ad2e1
Show file tree
Hide file tree
Showing 10 changed files with 491 additions and 15 deletions.
94 changes: 94 additions & 0 deletions cmake/developer_package/compile_flags/os_flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

include(ProcessorCount)
include(CheckCXXCompilerFlag)
include(CheckCSourceCompiles)
include(CheckCXXSourceCompiles)

#
# ov_disable_deprecated_warnings()
Expand Down Expand Up @@ -91,6 +93,54 @@ macro(ov_dev_package_no_errors)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ov_c_cxx_dev_no_errors}")
endmacro()

#
# ov_check_compiler_supports_sve(lang flags)
#
# Checks whether compiler for passed language supports SVE code compilation
#
macro(ov_check_compiler_supports_sve lang flags)
# Code to compile
set(SVE_CODE "
#include <arm_sve.h>
int main() {
svfloat64_t a;
a = svdup_n_f64(0);
return 0;
}")

# Save the current state of required flags
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})

# Set the flags necessary for compiling the test code with SVE support
set(CMAKE_REQUIRED_FLAGS "${CMAKE_${lang}_FLAGS_INIT} ${flags}")

# Check if the source code compiles with the given flags for the specified language (C or C++)
if(lang STREQUAL "CXX")
CHECK_CXX_SOURCE_COMPILES("${SVE_CODE}" ${lang}_HAS_SVE)
else()
CHECK_C_SOURCE_COMPILES("${SVE_CODE}" ${lang}_HAS_SVE)
endif()

# If the compilation test is successful, set appropriate variables indicating support
if(${lang}_HAS_SVE)
set(${lang}_SVE_FOUND TRUE CACHE BOOL "SVE available on host")
set(${lang}_SVE_FOUND TRUE CACHE BOOL "${lang} SVE support")
set(${lang}_SVE_FLAGS "${flags}" CACHE STRING "${lang} SVE flags")
endif()

# Restore the original state of required flags
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})

# If the compilation test fails, indicate that the support is not found
if(NOT ${lang}_SVE_FOUND)
set(${lang}_SVE_FOUND FALSE CACHE BOOL "${lang} SVE support")
set(${lang}_SVE_FLAGS "" CACHE STRING "${lang} SVE flags")
endif()

# Mark the variables as advanced to hide them in the default CMake GUI
mark_as_advanced(${lang}_SVE_FOUND ${lang}_SVE_FLAGS)
endmacro()

#
# ov_sse42_optimization_flags(<output flags>)
#
Expand Down Expand Up @@ -208,6 +258,50 @@ macro(ov_arm_neon_fp16_optimization_flags flags)
endif()
endmacro()

#
# ov_arm_sve_optimization_flags(<output flags>)
#
macro(ov_arm_sve_optimization_flags flags)
# Check for compiler SVE support
ov_check_compiler_supports_sve(CXX "-march=armv8-a+sve")
ov_check_compiler_supports_sve(C "-march=armv8-a+sve")

if(OV_COMPILER_IS_INTEL_LLVM)
message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# nothing should be required here
elseif(ANDROID)
if(ANDROID_ABI STREQUAL "arm64-v8a")
set(${flags} -Wno-unused-command-line-argument)
if(CXX_SVE_FOUND AND C_SVE_FOUND)
list(APPEND ${flags} -march=armv8-a+sve)
else()
message(WARNING "SVE is not supported on this Android ABI: ${ANDROID_ABI}")
endif()
else()
message(WARNING "SVE is not supported on this Android ABI: ${ANDROID_ABI}")
endif()
else()
if(AARCH64)
set(${flags} -O2)

# Add flag for SVE if supported
if(CXX_SVE_FOUND AND C_SVE_FOUND)
list(APPEND ${flags} -march=armv8-a+sve)
endif()
if(NOT CMAKE_CL_64)
list(APPEND ${flags} -ftree-vectorize)
endif()

set(${flags} ${${flags}})
elseif(ARM)
message(WARNING "SVE is not supported on 32-bit ARM architectures.")
else()
message(WARNING "SVE is not supported by architecture ${CMAKE_SYSTEM_PROCESSOR}")
endif()
endif()
endmacro()

#
# ov_disable_all_warnings(<target1 [target2 target3 ...]>)
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ set(_CPU_CHECK_ANY "true")
set(_CPU_CHECK_SSE42 "with_cpu_x86_sse42()")
set(_CPU_CHECK_AVX "with_cpu_x86_avx()")
set(_CPU_CHECK_NEON_FP16 "with_cpu_neon_fp16()")
set(_CPU_CHECK_SVE "with_cpu_sve()")
set(_CPU_CHECK_AVX2 "with_cpu_x86_avx2()")
set(_CPU_CHECK_AVX512F "with_cpu_x86_avx512f()")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
#

## list of available instruction sets
set(_ARCH_LIST ANY SSE42 AVX AVX2 AVX512F NEON_FP16)
set(_ARCH_LIST ANY SSE42 AVX AVX2 AVX512F NEON_FP16 SVE)

set(_ACCEPTED_ARCHS_ANY "^(ANY)$")
set(_ACCEPTED_ARCHS_SSE42 "^(ANY|SSE42)$")
set(_ACCEPTED_ARCHS_AVX "^(ANY|SSE42|AVX)$")
set(_ACCEPTED_ARCHS_AVX2 "^(ANY|SSE42|AVX|AVX2)$")
set(_ACCEPTED_ARCHS_AVX512F "^(ANY|SSE42|AVX|AVX2|AVX512F)$")
set(_ACCEPTED_ARCHS_NEON_FP16 "^(ANY|NEON_FP16)$")
set(_ACCEPTED_ARCHS_SVE "^(ANY|SVE)$")

## Arch specific definitions
set(_DEFINE_ANY "")
Expand All @@ -19,12 +20,14 @@ set(_DEFINE_AVX "HAVE_AVX" ${_DEFINE_SSE42})
set(_DEFINE_AVX2 "HAVE_AVX2" ${_DEFINE_AVX})
set(_DEFINE_AVX512F "HAVE_AVX512F" ${_DEFINE_AVX2})
set(_DEFINE_NEON_FP16 "HAVE_NEON_FP16" ${_DEFINE_ANY})
set(_DEFINE_SVE "HAVE_SVE" ${_DEFINE_SVE})

## Arch specific compile options
ov_avx512_optimization_flags(_FLAGS_AVX512F)
ov_avx2_optimization_flags (_FLAGS_AVX2)
ov_sse42_optimization_flags (_FLAGS_SSE42)
ov_arm_neon_fp16_optimization_flags(_FLAGS_NEON_FP16)
ov_arm_sve_optimization_flags(_FLAGS_SVE)
set(_FLAGS_AVX "") ## TBD is not defined for OV project yet
set(_FLAGS_ANY "") ##

Expand Down Expand Up @@ -185,6 +188,8 @@ endfunction()
function(_currently_requested_top_arch VAR)
if(ENABLE_NEON_FP16)
set(RES NEON_FP16)
elseif(ENABLE_SVE)
set(RES SVE)
elseif(ENABLE_AVX512F)
set(RES AVX512F)
elseif(ENABLE_AVX2)
Expand Down
2 changes: 2 additions & 0 deletions cmake/developer_package/features.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ ov_dependent_option (ENABLE_AVX512F "Enable AVX512 optimizations" ON "X86_64 OR

ov_dependent_option(ENABLE_NEON_FP16 "Enable ARM FP16 optimizations" ON "AARCH64" OFF)

ov_dependent_option(ENABLE_SVE "Enable SVE optimizations" ON "AARCH64" OFF)

# Type of build, we add this as an explicit option to default it to ON
get_property(BUILD_SHARED_LIBS_DEFAULT GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS)
ov_option (BUILD_SHARED_LIBS "Build as a shared library" ${BUILD_SHARED_LIBS_DEFAULT})
Expand Down
7 changes: 7 additions & 0 deletions src/inference/dev_api/openvino/runtime/system_conf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ OPENVINO_RUNTIME_API bool with_cpu_x86_sse42();
*/
OPENVINO_RUNTIME_API bool with_cpu_neon_fp16();

/**
* @brief Checks whether CPU supports ARM SVE capability
* @ingroup ov_dev_api_system_conf
* @return `True` if ARM SVE instructions are available, `false` otherwise
*/
OPENVINO_RUNTIME_API bool with_cpu_sve();

/**
* @brief Checks whether CPU supports AVX capability
* @ingroup ov_dev_api_system_conf
Expand Down
19 changes: 19 additions & 0 deletions src/inference/src/system_conf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
# include <sys/auxv.h>
# define ARM_COMPUTE_CPU_FEATURE_HWCAP_FPHP (1 << 9)
# define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDHP (1 << 10)
# define ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE (1 << 24)
#elif defined(__APPLE__) && defined(__aarch64__)
# include <sys/sysctl.h>
# include <sys/types.h>
Expand Down Expand Up @@ -114,6 +115,10 @@ bool with_cpu_neon_fp16() {
return false;
}

bool with_cpu_sve() {
return false;
}

#else // OPENVINO_ARCH_X86 || OPENVINO_ARCH_X86_64

bool with_cpu_x86_sse42() {
Expand Down Expand Up @@ -173,6 +178,20 @@ bool with_cpu_neon_fp16() {
return false;
# endif
}
bool with_cpu_sve() {
# if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
!defined(__arm__) && defined(__aarch64__)
const uint32_t hwcaps = getauxval(AT_HWCAP);
return hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE;
# elif !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
!defined(__aarch64__) && defined(__arm__)
return false;
# elif defined(__aarch64__) && defined(__APPLE__)
return false;
# else
return false;
# endif
}
#endif // OPENVINO_ARCH_X86 || OPENVINO_ARCH_X86_64

bool check_open_mp_env_vars(bool include_omp_num_threads) {
Expand Down
64 changes: 62 additions & 2 deletions src/plugins/intel_cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,66 @@ target_include_directories(${TARGET_NAME} SYSTEM PRIVATE $<TARGET_PROPERTY:dnnl,
# is not (yet) needed.
target_include_directories(${TARGET_NAME} PRIVATE $<TARGET_PROPERTY:openvino::reference,INTERFACE_INCLUDE_DIRECTORIES>)

# Checking for SVE support in CXX and C
INCLUDE(CheckCSourceCompiles)
INCLUDE(CheckCXXSourceCompiles)

SET(SVE_CODE "
#include <arm_sve.h>
int main() {
svfloat64_t a;
a = svdup_n_f64(0);
return 0;
}
")

MACRO(CHECK_SVE lang flags)
# Save the current state of required flags
SET(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})

# Set the flags necessary for compiling the test code with SVE support
SET(CMAKE_REQUIRED_FLAGS "${CMAKE_${lang}_FLAGS_INIT} ${flags}")

# Check if the source code compiles with the given flags for the specified language (C or C++)
IF(lang STREQUAL "CXX")
CHECK_CXX_SOURCE_COMPILES("${SVE_CODE}" ${lang}_HAS_SVE)
ELSE()
CHECK_C_SOURCE_COMPILES("${SVE_CODE}" ${lang}_HAS_SVE)
ENDIF()

# If the compilation test is successful, set appropriate variables indicating support
IF(${lang}_HAS_SVE)
SET(${lang}_SVE_FOUND TRUE CACHE BOOL "SVE available on host")
SET(${lang}_SVE_FOUND TRUE CACHE BOOL "${lang} SVE support")
SET(${lang}_SVE_FLAGS "${flags}" CACHE STRING "${lang} SVE flags")
ENDIF()

# Restore the original state of required flags
SET(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})

# If the compilation test fails, indicate that the support is not found
IF(NOT ${lang}_SVE_FOUND)
SET(${lang}_SVE_FOUND FALSE CACHE BOOL "${lang} SVE support")
SET(${lang}_SVE_FLAGS "" CACHE STRING "${lang} SVE flags")
ENDIF()

# Mark the variables as advanced to hide them in the default CMake GUI
MARK_AS_ADVANCED(${lang}_SVE_FOUND ${lang}_SVE_FLAGS)
ENDMACRO()

CHECK_SVE(CXX "-march=armv8-a+sve")
CHECK_SVE(C "-march=armv8-a+sve")

# ARCH lists for softmax.cpp and mha_single_token.cpp
# Based on result of above calls, decide whether to add SVE
set(SOFTMAX_ARCH_LIST AVX512F AVX2 NEON_FP16 ANY)
set(MHA_SINGLE_TOKEN_ARCH_LIST AVX512F AVX2 NEON_FP16 ANY)

if(CXX_SVE_FOUND AND C_SVE_FOUND)
list(APPEND SOFTMAX_ARCH_LIST SVE)
list(APPEND MHA_SINGLE_TOKEN_ARCH_LIST SVE)
endif()

# Cross compiled function
# TODO: The same for proposal, proposalONNX, topk
cross_compiled_file(${TARGET_NAME}
Expand All @@ -287,14 +347,14 @@ cross_compiled_file(${TARGET_NAME}
NAMESPACE ov::Extensions::Cpu::XARCH
)
cross_compiled_file(${TARGET_NAME}
ARCH AVX512F AVX2 NEON_FP16 ANY
ARCH ${SOFTMAX_ARCH_LIST}
src/nodes/kernels/scaled_attn/softmax.cpp
API src/nodes/kernels/scaled_attn/softmax.hpp
NAME attn_softmax
NAMESPACE ov::Extensions::Cpu::XARCH
)
cross_compiled_file(${TARGET_NAME}
ARCH AVX512F AVX2 NEON_FP16 ANY
ARCH ${MHA_SINGLE_TOKEN_ARCH_LIST}
src/nodes/kernels/scaled_attn/mha_single_token.cpp
API src/nodes/kernels/scaled_attn/mha_single_token.hpp
NAME mha_single_token
Expand Down
Loading

0 comments on commit 43ad2e1

Please sign in to comment.