diff --git a/.circleci/config.yml b/.circleci/config.yml index f714672..22234a5 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -12,48 +12,33 @@ version: 2.1 ##================================================================================================== docker_gcc: &docker_gcc docker: - - image: gcc:latest + - image: compilaction/gcc-dev:latest environment: - COMPILER: g++ -docker_i686: &docker_i686 - docker: - - image: gcc:latest - environment: - COMPILER_PACKAGE: g++-multilib - COMPILER: g++-8 + COMPILER: g++-9 docker_clang: &docker_clang docker: - - image: ubuntu:disco + - image: compilaction/clang-dev:latest environment: - COMPILER: clang++-8 + COMPILER: clang++-9 docker_ppc64: &docker_ppc64 docker: - - image: ubuntu:disco + - image: compilaction/gcc-dev:latest environment: - COMPILER_PACKAGE: g++-8-powerpc64-linux-gnu - COMPILER: powerpc64-linux-gnu-g++-8 RUN_COMMAND: qemu-ppc64 - EXTRA_PATH: /usr/powerpc64-linux-gnu/lib/ - EXTRA_LIB: lib64 - EXTRA_NAME: ld64.so.1 + COMPILER: powerpc64-linux-gnu-g++-9 docker_aarch64: &docker_aarch64 docker: - - image: ubuntu:disco + - image: compilaction/gcc-dev:latest environment: - COMPILER_PACKAGE: g++-8-aarch64-linux-gnu - COMPILER: aarch64-linux-gnu-g++-8 RUN_COMMAND: qemu-aarch64 - EXTRA_PATH: /usr/aarch64-linux-gnu/lib/ - EXTRA_LIB: lib - EXTRA_NAME: ld-linux-aarch64.so.1 + COMPILER: aarch64-linux-gnu-g++-9 + ##================================================================================================== ## Build configurations ##================================================================================================== config_gcc_amd64: &config_gcc_amd64 <<: *docker_gcc -config_gcc_x86: &config_gcc_x86 - <<: *docker_i686 config_clang_amd64: &config_clang_amd64 <<: *docker_clang config_clang_x86: &config_clang_x86 @@ -74,9 +59,6 @@ jobs: <<: *config_gcc_amd64 steps: - checkout - - run: - name: Setup dependencies - command: . .circleci/prepare.sh - run: name: Running Basic Tests - SSE2 command: VARIANT="sse2" OPTIONS="-O3 -msse2" . .circleci/run.sh @@ -92,43 +74,14 @@ jobs: - run: name: Running Basic Tests - AVX2 command: VARIANT="avx2" OPTIONS="-O3 -mavx2" . .circleci/run.sh - - gcc_x86: - <<: *config_gcc_x86 - steps: - - checkout - - run: - name: Setup dependencies - command: . .circleci/prepare.sh - - run: - name: Setup cross compiler - command: . .circleci/prepare_cc.sh - - run: - name: Running Basic Tests - SSE2 - command: VARIANT="sse2" OPTIONS="-O3 -msse2 -m32" . .circleci/run.sh - - run: - name: Running Basic Tests - SSE4 - command: VARIANT="sse4" OPTIONS="-O3 -msse4 -m32" . .circleci/run.sh - - run: - name: Running Basic Tests - AVX - command: VARIANT="avx" OPTIONS="-O3 -mavx -m32" . .circleci/run.sh - run: - name: Running Basic Tests - FMA3 - command: VARIANT="fma" OPTIONS="-O3 -mavx -mfma -m32" . .circleci/run.sh - - run: - name: Running Basic Tests - AVX2 - command: VARIANT="avx2" OPTIONS="-O3 -mavx2 -m32" . .circleci/run.sh + name: Running Basic Tests - AVX512 + command: VARIANT="avx2" OPTIONS="-O3 -mavx512f" . .circleci/run.sh clang_amd64: <<: *config_clang_amd64 steps: - checkout - - run: - name: Setup dependencies - command: . .circleci/prepare.sh - - run: - name: Install clang-8 - command: 'apt-get update && sudo apt-get install -y clang-8' - run: name: Running Basic Tests - SSE2 command: VARIANT="sse2" OPTIONS="-O3 -msse2" . .circleci/run.sh @@ -149,12 +102,6 @@ jobs: <<: *config_aarch64 steps: - checkout - - run: - name: Setup dependencies - command: . .circleci/prepare.sh - - run: - name: Setup cross compiler - command: . .circleci/prepare_cc.sh - run: name: Running Basic Tests - AARCH64 NEON command: VARIANT="aarch64" OPTIONS="-O3 -Wno-psabi" . .circleci/run.sh @@ -163,12 +110,6 @@ jobs: <<: *config_gcc_ppc64 steps: - checkout - - run: - name: Setup dependencies - command: . .circleci/prepare.sh - - run: - name: Setup cross compiler - command: . .circleci/prepare_cc.sh - run: name: Running Basic Tests - PPC64 command: VARIANT="vsx" OPTIONS="-O3" . .circleci/run.sh @@ -184,7 +125,6 @@ workflows: ## All tests ##============================================================================================== - gcc_amd64 - - gcc_x86 - clang_amd64 - arm_aarch64 - ppc64 diff --git a/.circleci/prepare.sh b/.circleci/prepare.sh deleted file mode 100644 index 136afd5..0000000 --- a/.circleci/prepare.sh +++ /dev/null @@ -1,12 +0,0 @@ -##================================================================================================== -## SPY - C++ Informations Broker -## Copyright 2020 Joel FALCOU -## -## Licensed under the MIT License . -## SPDX-License-Identifier: MIT -##================================================================================================== -#!/usr/bin/env bash - -apt-get update && apt-get install -y sudo && rm -rf /var/lib/apt/lists/* -apt-get update && sudo apt-get install -y cmake -apt-get update && sudo apt-get install -y ninja-build diff --git a/.circleci/prepare_cc.sh b/.circleci/prepare_cc.sh deleted file mode 100644 index 3b24b33..0000000 --- a/.circleci/prepare_cc.sh +++ /dev/null @@ -1,12 +0,0 @@ -##================================================================================================== -## SPY - C++ Informations Broker -## Copyright 2020 Joel FALCOU -## -## Licensed under the MIT License . -## SPDX-License-Identifier: MIT -##================================================================================================== -#!/usr/bin/env bash - -apt-get update && sudo apt-get install -y $COMPILER_PACKAGE -apt-get update && sudo apt-get install -y qemu-user -apt-get update && sudo apt-get install -y qemu diff --git a/.circleci/run.sh b/.circleci/run.sh index 9cc64e1..3e3c1af 100644 --- a/.circleci/run.sh +++ b/.circleci/run.sh @@ -25,18 +25,7 @@ else fi ##================================================================================================== -## PATH Infos -##================================================================================================== -if [[ -v EXTRA_PATH ]] -then - echo "Updating path for $EXTRA_PATH/$EXTRA_NAME ..." - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$EXTRA_PATH - export LD_LIBRARY_PATH - ln -sf $EXTRA_PATH/$EXTRA_NAME /$EXTRA_LIB/$EXTRA_NAME -fi - -##================================================================================================== -## Run every test up to SIMD +## Run every tests ##================================================================================================== ninja unit -j 8 ctest -j 8 diff --git a/docs/index.html b/docs/index.html index 5c77ad6..f103747 100644 --- a/docs/index.html +++ b/docs/index.html @@ -1,8 +1,9 @@ - ![The C++ Informations Broker](https://github.com/jfalcou/spy/raw/develop/logo.png) + ![**The C++ Informations Broker**](https://github.com/jfalcou/spy/raw/develop/logo.png) -# Purpose +Purpose +==================================================================================================== Detection and versioning of operating systems, compilers, architecture and other element are traditionally done using preprocessor macros. Library like [Boost.Predef](https://www.boost.org/doc/libs/release/doc/html/predef.html) provides a sanitized @@ -14,12 +15,17 @@ **References:** + +![**Our CppCon 2019 Ligthning talk video**](https://www.youtube.com/watch?v=t406o2EhG-A) + - [Our CppCon 2019 Ligthning talk slides](https://docs.google.com/presentation/d/1nSBhU4pr5EWznni0MYsyDkMCr3O3q2XS-KQdz2_BRRI/edit?usp=sharing) -- [Our CppCon 2019 Ligthning talk video](https://www.youtube.com/watch?v=t406o2EhG-A) - [Boost.Predef](https://www.boost.org/doc/libs/release/doc/html/predef.html) -# How to install -## Using CMake +How to install +==================================================================================================== + +Using CMake +---------------------------------------------------------------------------------------------------- After cloning the repository, run `CMake` with an install prefix path then run the `install` target using `Make` or any other build system. @@ -31,7 +37,8 @@ make install ~~~~~ -## Manually +Manually +---------------------------------------------------------------------------------------------------- After cloning the repository, copy the `include` folder into the folder of your choice. ~~~~~ bash @@ -62,8 +69,17 @@ Just compile this example using a C++17 compliant compiler. Don't forget to add the path to the SPY library files to your favorite compiler's options. -# User manual -## Supported detectors +Redistribuable include +---------------------------------------------------------------------------------------------------- +If you want to use SPY in your own project but want to keep a low imprint on your own source code, +one can copy the standalone `spy.hpp` file provided at the root of the include folder. This file +contains the whole SPY library ready to be used and shipped. + +User manual +==================================================================================================== + +Supported detectors +---------------------------------------------------------------------------------------------------- SPY can detect: - Architecture family via the `spy::architecture` object. @@ -91,7 +107,8 @@ ~~~~~ -## Comparing vendors +Comparing vendors +---------------------------------------------------------------------------------------------------- Knowing is half the battle, we may want to compare the current compiler or OS to a given one so you can branch off your code based on this informations. Here is the list of each detected vendor for each SPY objects. @@ -140,7 +157,8 @@ - Checking for POSIX capability via `spy::supports::posix_` -## Comparing versions +Comparing versions +---------------------------------------------------------------------------------------------------- Checking for a vendor is sometimes not enough, we need to check which version of a given component is used. To do this, you can compare a detector to a given version value using any comparison operators. @@ -183,16 +201,17 @@ ~~~~~ -## Handling SIMD extensions +Handling SIMD extensions +---------------------------------------------------------------------------------------------------- SIMD extensions set detection is made so that one can ask if the current SIMD extension is exactly, below or above a given reference instruction set. Detectable instructions sets depends on SIMD hardware vendor -| Architecture | Supported SIMD instructions sets | -| ------------- | ------------------------------------------------------------------------ | -| X86 | `sse1_`, `sse2_`, `sse3_`, `ssse3_`, `sse41_`, `sse42_`, `avx_`, `avx2_` | -| Power PC | `vmx_`, `vsx_` | -| ARM | `neon_` | +| Architecture | Supported SIMD instructions sets | +| ------------- | ----------------------------------------------------------------------------------- | +| X86 | `sse1_`, `sse2_`, `sse3_`, `ssse3_`, `sse41_`, `sse42_`, `avx_`, `avx2_`, `avx512_` | +| Power PC | `vmx_`, `vsx_` | +| ARM | `neon_` | Complete set of comparison operators is provided for those sets. Order of instructions sets are built so that if an instructions set supersedes another, it is considered greater than. For @@ -250,8 +269,8 @@ ~~~~~ -Some SIMD instructions set provides supplemental instructions on top of existing system. Those -supplemental instruction set can be checked using the `spy::supports` namespace. +Some SIMD instructions set provides supplemental instructions on top of existing system. +Those supplemental instruction set can be checked using the `spy::supports` namespace. +The complete list of supplemental instruction sets si given in the following table. + +| Architecture | Supported SIMD instructions sets | +| ------------- | ------------------------------------------------------------------------------------------------------------------------------- | +| X86 AVX | `xop_`, `fma_`, `fma4_` | +| X86 AVX512 | `avx512::bw_`, `avx512::cd_`, `avx512::dq_`, `avx512::er_`, `avx512::ifma_`, `avx512::pf_`, `avx512::vl_`, `avx512::popcntdq_`, `avx512::_4fmaps_`, `avx512::vnniw_`, `avx512::vbmi_`, `avx512::bf16_`, `avx512::bitalg_`, `avx512::vbmi2_`, `avx512::vnni_`, `avx512::vpintersect_` | +| ARM | `spy::supports::aarch64_` | -## Caveat with `if constexpr` +Caveat with `if constexpr` +---------------------------------------------------------------------------------------------------- The detection and comparisons of versions using SPY detectors are subject to some caveats that stem from the way `if constexpr` behaves. As both branch of the `if constexpr` are ODR_checked, all functions and type names must be defined even if not used. This means @@ -296,11 +328,11 @@ template auto f(T t) { - if constexpr( spy::clang ) + if constexpr( spy::clang_ ) { return __builtin_bitreverse32(t); } - else if constexpr( spy::gcc ) + else if constexpr( spy::gcc_ ) { return __builtin_bswap32(t); } @@ -314,24 +346,20 @@ ~~~~~ c++ #include -template auto f(T t) requires( spy::clang ) +template auto f(T t) requires( spy::clang_ ) { return __builtin_bitreverse32(t); } -template auto f(T t) requires( spy::gcc ) +template auto f(T t) requires( spy::gcc_ ) { return __builtin_bswap32(t); } ~~~~~ -## Redistribuable include -If you want to use SPY in your own project but want to keep a low imprint on your own source code, -one can copy the standalone `spy.hpp` file provided at the root of the include folder. This file -contains the whole SPY library ready to be used and shipped. - -# License +License +==================================================================================================== This library is licensed under the MIT License as specified in the LICENSE.md file. If you use SPY in your project or product, feel free to send us an email so we can advertise it here. diff --git a/include/spy.hpp b/include/spy.hpp index e6af279..299f2a2 100644 --- a/include/spy.hpp +++ b/include/spy.hpp @@ -6,8 +6,8 @@ SPDX-License-Identifier: MIT */ //================================================================================================== -#ifndef SPY_SPY_HPP_INLUDED -#define SPY_SPY_HPP_INLUDED +#ifndef SPY_SPY_HPP_INCLUDED +#define SPY_SPY_HPP_INCLUDED #include namespace spy::detail { @@ -462,7 +462,7 @@ namespace spy { #if defined(_LIBCPP_VERSION) #define SPY_STDLIB_IS_LIBCPP - using stdlib_type = detail::stdcpp_t<(_LIBCPP_VERSION/1000)%10,0,_LIBCPP_VERSION%1000,0>; + using stdlib_type = detail::libcpp_t<(_LIBCPP_VERSION/1000)%10,0,_LIBCPP_VERSION%1000,0>; #elif defined(__GLIBCXX__) #define SPY_STDLIB_IS_GLIBCXX #define SPY0 (__GLIBCXX__/100) @@ -499,6 +499,74 @@ namespace spy::literal } } #include +#if !defined(SPY_SIMD_DETECTED) && defined(__AVX512F__) +# define SPY_SIMD_IS_X86_AVX512 +# define SPY_SIMD_DETECTED ::spy::detail::simd_version::avx512_ +#if defined(__AVX512BW__) +# define SPY_SIMD_IS_X86_AVX512_BW +# define SPY_SIMD_X86_AVX512_SUB 0x0001 +#endif +#if defined(__AVX512CD__) +# define SPY_SIMD_IS_X86_AVX512_CD +# define SPY_SIMD_X86_AVX512_SUB 0x0002 +#endif +#if defined(__AVX512DQ__) +# define SPY_SIMD_IS_X86_AVX512_DQ +# define SPY_SIMD_X86_AVX512_SUB 0x0004 +#endif +#if defined(__AVX512ER__) +# define SPY_SIMD_IS_X86_AVX512_ER +# define SPY_SIMD_X86_AVX512_SUB 0x0008 +#endif +#if defined(__AVX512IFMA__) +# define SPY_SIMD_IS_X86_AVX512_IFMA +# define SPY_SIMD_X86_AVX512_SUB 0x0010 +#endif +#if defined(__AVX512PF__) +# define SPY_SIMD_IS_X86_AVX512_PF +# define SPY_SIMD_X86_AVX512_SUB 0x0020 +#endif +#if defined(__AVX512VL__) +# define SPY_SIMD_IS_X86_AVX512_VL +# define SPY_SIMD_X86_AVX512_SUB 0x0040 +#endif +#if defined(__AVX512VPOPCNTDQ__) +# define SPY_SIMD_IS_X86_AVX512_POPCNTDQ +# define SPY_SIMD_X86_AVX512_SUB 0x0080 +#endif +#if defined(__AVX5124FMAPS__) +# define SPY_SIMD_IS_X86_AVX512_4FMAPS +# define SPY_SIMD_X86_AVX512_SUB 0x0100 +#endif +#if defined(__AVX5124VNNIW__) +# define SPY_SIMD_IS_X86_AVX512_VNNIW +# define SPY_SIMD_X86_AVX512_SUB 0x0200 +#endif +#if defined(__AVX512VBMI__) +# define SPY_SIMD_IS_X86_AVX512_VBMI +# define SPY_SIMD_X86_AVX512_SUB 0x0400 +#endif +#if defined(__AVX512BF16__) +# define SPY_SIMD_IS_X86_AVX512_BF16 +# define SPY_SIMD_X86_AVX512_SUB 0x0800 +#endif +#if defined(__AVX512BITALG__) +# define SPY_SIMD_IS_X86_AVX512_BITALG +# define SPY_SIMD_X86_AVX512_SUB 0x1000 +#endif +#if defined(__AVX512VBMI2__) +# define SPY_SIMD_IS_X86_AVX512_VBMI2 +# define SPY_SIMD_X86_AVX512_SUB 0x2000 +#endif +#if defined(__AVX512VNNI__) +# define SPY_SIMD_IS_X86_AVX512_VNNI +# define SPY_SIMD_X86_AVX512_SUB 0x4000 +#endif +#if defined(__AVX512VP2INTERSECT__) +# define SPY_SIMD_IS_X86_AVX512_VP2INTERSECT +# define SPY_SIMD_X86_AVX512_SUB 0x8000 +#endif +#endif #if !defined(SPY_SIMD_DETECTED) && defined(__AVX2__) # define SPY_SIMD_IS_X86_AVX2 # define SPY_SIMD_DETECTED ::spy::detail::simd_version::avx2_ @@ -555,6 +623,105 @@ namespace spy::supports #else constexpr inline auto xop_ = false; #endif +namespace avx512 +{ +#if defined(__AVX512BW__) +# define SPY_SIMD_IS_X86_AVX512_BW + constexpr inline auto bw_ = true; +#else + constexpr inline auto bw_ = false; +#endif +#if defined(__AVX512CD__) +# define SPY_SIMD_IS_X86_AVX512_CD + constexpr inline auto cd_ = true; +#else + constexpr inline auto cd_ = false; +#endif +#if defined(__AVX512DQ__) +# define SPY_SIMD_IS_X86_AVX512_DQ + constexpr inline auto dq_ = true; +#else + constexpr inline auto dq_ = false; +#endif +#if defined(__AVX512ER__) +# define SPY_SIMD_IS_X86_AVX512_ER + constexpr inline auto er_ = true; +#else + constexpr inline auto er_ = false; +#endif +#if defined(__AVX512IFMA__) +# define SPY_SIMD_IS_X86_AVX512_IFMA + constexpr inline auto ifma_ = true; +#else + constexpr inline auto ifma_ = false; +#endif +#if defined(__AVX512PF__) +# define SPY_SIMD_IS_X86_AVX512_PF + constexpr inline auto pf_ = true; +#else + constexpr inline auto pf_ = false; +#endif +#if defined(__AVX512VL__) +# define SPY_SIMD_IS_X86_AVX512_VL + constexpr inline auto vl_ = true; +#else + constexpr inline auto vl_ = false; +#endif +#if defined(__AVX512VPOPCNTDQ__) +# define SPY_SIMD_IS_X86_AVX512_POPCNTDQ + constexpr inline auto popcntdq_ = true; +#else + constexpr inline auto popcntdq_ = false; +#endif +#if defined(__AVX5124FMAPS__) +# define SPY_SIMD_IS_X86_AVX512_4FMAPS + constexpr inline auto _4fmaps_ = true; +#else + constexpr inline auto _4fmaps_ = false; +#endif +#if defined(__AVX5124VNNIW__) +# define SPY_SIMD_IS_X86_AVX512_VNNIW + constexpr inline auto vnniw_ = true; +#else + constexpr inline auto vnniw_ = false; +#endif +#if defined(__AVX512VBMI__) +# define SPY_SIMD_IS_X86_AVX512_VBMI + constexpr inline auto vbmi_ = true; +#else + constexpr inline auto vbmi_ = false; +#endif +#if defined(__AVX512BF16__) +# define SPY_SIMD_IS_X86_AVX512_BF16 + constexpr inline auto bf16_ = true; +#else + constexpr inline auto bf16_ = false; +#endif +#if defined(__AVX512BITALG__) +# define SPY_SIMD_IS_X86_AVX512_BITALG + constexpr inline auto bitalg_ = true; +#else + constexpr inline auto bitalg_ = false; +#endif +#if defined(__AVX512VBMI2__) +# define SPY_SIMD_IS_X86_AVX512_VBMI2 + constexpr inline auto vbmi2_ = true; +#else + constexpr inline auto vbmi2_ = false; +#endif +#if defined(__AVX512VNNI__) +# define SPY_SIMD_IS_X86_AVX512_VNNI + constexpr inline auto vnni_ = true; +#else + constexpr inline auto vnni_ = false; +#endif +#if defined(__AVX512VP2INTERSECT__) +# define SPY_SIMD_IS_X86_AVX512_VP2INTERSECT + constexpr inline auto vpintersect_ = true; +#else + constexpr inline auto vpintersect_ = false; +#endif +} } #if !defined(SPY_SIMD_DETECTED) && (defined(__ARM_NEON__) || defined(_M_ARM) || defined(__aarch64__)) # define SPY_SIMD_IS_ARM_NEON @@ -589,10 +756,11 @@ namespace spy::detail { enum class simd_isa { undefined_ = -1, x86_ = 1000, ppc_ = 2000, arm_ = 3000 }; enum class simd_version { undefined_ = -1 - , sse1_ = 1110, sse2_ = 1120, sse3_ = 1130, ssse3_ = 1131 - , sse41_ = 1141, sse42_ = 1142, avx_ = 1201, avx2_ = 1202 - , vmx_ = 2001, vsx_ = 2002 - , neon_ = 3001 + , sse1_ = 1110, sse2_ = 1120, sse3_ = 1130, ssse3_ = 1131 + , sse41_ = 1141, sse42_ = 1142, avx_ = 1201, avx2_ = 1202 + , avx512_ = 1300 + , vmx_ = 2001, vsx_ = 2002 + , neon_ = 3001 }; template struct simd_info @@ -601,17 +769,18 @@ namespace spy::detail static constexpr auto version = VERSION; friend std::ostream& operator<<(std::ostream& os, simd_info const&) { - if constexpr ( VERSION == simd_version::sse1_ ) os << "X86 SSE"; - else if constexpr ( VERSION == simd_version::sse2_ ) os << "X86 SSE2"; - else if constexpr ( VERSION == simd_version::sse3_ ) os << "X86 SSE3"; - else if constexpr ( VERSION == simd_version::ssse3_) os << "X86 SSSE3"; - else if constexpr ( VERSION == simd_version::sse41_) os << "X86 SSE4.1"; - else if constexpr ( VERSION == simd_version::sse42_) os << "X86 SSE4.2"; - else if constexpr ( VERSION == simd_version::avx_ ) os << "X86 AVX"; - else if constexpr ( VERSION == simd_version::avx2_ ) os << "X86 AVX2"; - else if constexpr ( VERSION == simd_version::vmx_ ) os << "PPC VMX"; - else if constexpr ( VERSION == simd_version::vsx_ ) os << "PPC VSX"; - else if constexpr ( VERSION == simd_version::neon_ ) os << "ARM NEON"; + if constexpr ( VERSION == simd_version::sse1_ ) os << "X86 SSE"; + else if constexpr ( VERSION == simd_version::sse2_ ) os << "X86 SSE2"; + else if constexpr ( VERSION == simd_version::sse3_ ) os << "X86 SSE3"; + else if constexpr ( VERSION == simd_version::ssse3_ ) os << "X86 SSSE3"; + else if constexpr ( VERSION == simd_version::sse41_ ) os << "X86 SSE4.1"; + else if constexpr ( VERSION == simd_version::sse42_ ) os << "X86 SSE4.2"; + else if constexpr ( VERSION == simd_version::avx_ ) os << "X86 AVX"; + else if constexpr ( VERSION == simd_version::avx2_ ) os << "X86 AVX2"; + else if constexpr ( VERSION == simd_version::avx512_ ) os << "X86 AVX512"; + else if constexpr ( VERSION == simd_version::vmx_ ) os << "PPC VMX"; + else if constexpr ( VERSION == simd_version::vsx_ ) os << "PPC VSX"; + else if constexpr ( VERSION == simd_version::neon_ ) os << "ARM NEON"; else return os << "Undefined SIMD instructions set"; if constexpr (spy::supports::aarch64_) os << " (with AARCH64 support)"; if constexpr (spy::supports::fma_) os << " (with FMA3 support)"; @@ -666,14 +835,15 @@ namespace spy template using x86_simd_info = detail::simd_info; constexpr inline auto x86_simd_ = x86_simd_info<>{}; - constexpr inline auto sse1_ = x86_simd_info{}; - constexpr inline auto sse2_ = x86_simd_info{}; - constexpr inline auto sse3_ = x86_simd_info{}; - constexpr inline auto ssse3_ = x86_simd_info{}; - constexpr inline auto sse41_ = x86_simd_info{}; - constexpr inline auto sse42_ = x86_simd_info{}; - constexpr inline auto avx_ = x86_simd_info{}; - constexpr inline auto avx2_ = x86_simd_info{}; + constexpr inline auto sse1_ = x86_simd_info{}; + constexpr inline auto sse2_ = x86_simd_info{}; + constexpr inline auto sse3_ = x86_simd_info{}; + constexpr inline auto ssse3_ = x86_simd_info{}; + constexpr inline auto sse41_ = x86_simd_info{}; + constexpr inline auto sse42_ = x86_simd_info{}; + constexpr inline auto avx_ = x86_simd_info{}; + constexpr inline auto avx2_ = x86_simd_info{}; + constexpr inline auto avx512_ = x86_simd_info{}; template using ppc_simd_info = detail::simd_info; constexpr inline auto ppc_simd_ = ppc_simd_info<>{}; diff --git a/include/spy/arch.hpp b/include/spy/arch.hpp index 72b72da..0a26f16 100644 --- a/include/spy/arch.hpp +++ b/include/spy/arch.hpp @@ -7,8 +7,8 @@ SPDX-License-Identifier: MIT */ //================================================================================================== -#ifndef SPY_ARCH_HPP_INLUDED -#define SPY_ARCH_HPP_INLUDED +#ifndef SPY_ARCH_HPP_INCLUDED +#define SPY_ARCH_HPP_INCLUDED #include diff --git a/include/spy/compiler.hpp b/include/spy/compiler.hpp index f73350c..1014d5f 100644 --- a/include/spy/compiler.hpp +++ b/include/spy/compiler.hpp @@ -7,8 +7,8 @@ SPDX-License-Identifier: MIT */ //================================================================================================== -#ifndef SPY_COMPILER_HPP_INLUDED -#define SPY_COMPILER_HPP_INLUDED +#ifndef SPY_COMPILER_HPP_INCLUDED +#define SPY_COMPILER_HPP_INCLUDED #include #include diff --git a/include/spy/data_model.hpp b/include/spy/data_model.hpp index 1b2bac4..2e67950 100644 --- a/include/spy/data_model.hpp +++ b/include/spy/data_model.hpp @@ -7,8 +7,8 @@ SPDX-License-Identifier: MIT */ //================================================================================================== -#ifndef SPY_DATA_MODEL_HPP_INLUDED -#define SPY_DATA_MODEL_HPP_INLUDED +#ifndef SPY_DATA_MODEL_HPP_INCLUDED +#define SPY_DATA_MODEL_HPP_INCLUDED #include diff --git a/include/spy/detail.hpp b/include/spy/detail.hpp index b04b9c6..7e0022e 100644 --- a/include/spy/detail.hpp +++ b/include/spy/detail.hpp @@ -7,8 +7,8 @@ SPDX-License-Identifier: MIT */ //================================================================================================== -#ifndef SPY_DETAIL_HPP_INLUDED -#define SPY_DETAIL_HPP_INLUDED +#ifndef SPY_DETAIL_HPP_INCLUDED +#define SPY_DETAIL_HPP_INCLUDED #include diff --git a/include/spy/libc.hpp b/include/spy/libc.hpp index be91b7c..071d54a 100644 --- a/include/spy/libc.hpp +++ b/include/spy/libc.hpp @@ -7,8 +7,8 @@ SPDX-License-Identifier: MIT */ //================================================================================================== -#ifndef SPY_LIBC_HPP_INLUDED -#define SPY_LIBC_HPP_INLUDED +#ifndef SPY_LIBC_HPP_INCLUDED +#define SPY_LIBC_HPP_INCLUDED // Make sure the proper header is included to detect GNU libc #include diff --git a/include/spy/os.hpp b/include/spy/os.hpp index 9efed7d..9bc3fbc 100644 --- a/include/spy/os.hpp +++ b/include/spy/os.hpp @@ -7,8 +7,8 @@ SPDX-License-Identifier: MIT */ //================================================================================================== -#ifndef SPY_OS_HPP_INLUDED -#define SPY_OS_HPP_INLUDED +#ifndef SPY_OS_HPP_INCLUDED +#define SPY_OS_HPP_INCLUDED #include diff --git a/include/spy/simd.hpp b/include/spy/simd.hpp index 41ef201..3fe292f 100644 --- a/include/spy/simd.hpp +++ b/include/spy/simd.hpp @@ -7,8 +7,8 @@ SPDX-License-Identifier: MIT */ //================================================================================================== -#ifndef SPY_SIMD_HPP_INLUDED -#define SPY_SIMD_HPP_INLUDED +#ifndef SPY_SIMD_HPP_INCLUDED +#define SPY_SIMD_HPP_INCLUDED #include #include @@ -20,10 +20,11 @@ namespace spy::detail enum class simd_isa { undefined_ = -1, x86_ = 1000, ppc_ = 2000, arm_ = 3000 }; enum class simd_version { undefined_ = -1 - , sse1_ = 1110, sse2_ = 1120, sse3_ = 1130, ssse3_ = 1131 - , sse41_ = 1141, sse42_ = 1142, avx_ = 1201, avx2_ = 1202 - , vmx_ = 2001, vsx_ = 2002 - , neon_ = 3001 + , sse1_ = 1110, sse2_ = 1120, sse3_ = 1130, ssse3_ = 1131 + , sse41_ = 1141, sse42_ = 1142, avx_ = 1201, avx2_ = 1202 + , avx512_ = 1300 + , vmx_ = 2001, vsx_ = 2002 + , neon_ = 3001 }; template @@ -34,17 +35,18 @@ namespace spy::detail friend std::ostream& operator<<(std::ostream& os, simd_info const&) { - if constexpr ( VERSION == simd_version::sse1_ ) os << "X86 SSE"; - else if constexpr ( VERSION == simd_version::sse2_ ) os << "X86 SSE2"; - else if constexpr ( VERSION == simd_version::sse3_ ) os << "X86 SSE3"; - else if constexpr ( VERSION == simd_version::ssse3_) os << "X86 SSSE3"; - else if constexpr ( VERSION == simd_version::sse41_) os << "X86 SSE4.1"; - else if constexpr ( VERSION == simd_version::sse42_) os << "X86 SSE4.2"; - else if constexpr ( VERSION == simd_version::avx_ ) os << "X86 AVX"; - else if constexpr ( VERSION == simd_version::avx2_ ) os << "X86 AVX2"; - else if constexpr ( VERSION == simd_version::vmx_ ) os << "PPC VMX"; - else if constexpr ( VERSION == simd_version::vsx_ ) os << "PPC VSX"; - else if constexpr ( VERSION == simd_version::neon_ ) os << "ARM NEON"; + if constexpr ( VERSION == simd_version::sse1_ ) os << "X86 SSE"; + else if constexpr ( VERSION == simd_version::sse2_ ) os << "X86 SSE2"; + else if constexpr ( VERSION == simd_version::sse3_ ) os << "X86 SSE3"; + else if constexpr ( VERSION == simd_version::ssse3_ ) os << "X86 SSSE3"; + else if constexpr ( VERSION == simd_version::sse41_ ) os << "X86 SSE4.1"; + else if constexpr ( VERSION == simd_version::sse42_ ) os << "X86 SSE4.2"; + else if constexpr ( VERSION == simd_version::avx_ ) os << "X86 AVX"; + else if constexpr ( VERSION == simd_version::avx2_ ) os << "X86 AVX2"; + else if constexpr ( VERSION == simd_version::avx512_ ) os << "X86 AVX512"; + else if constexpr ( VERSION == simd_version::vmx_ ) os << "PPC VMX"; + else if constexpr ( VERSION == simd_version::vsx_ ) os << "PPC VSX"; + else if constexpr ( VERSION == simd_version::neon_ ) os << "ARM NEON"; else return os << "Undefined SIMD instructions set"; if constexpr (spy::supports::aarch64_) os << " (with AARCH64 support)"; @@ -119,14 +121,15 @@ namespace spy using x86_simd_info = detail::simd_info; constexpr inline auto x86_simd_ = x86_simd_info<>{}; - constexpr inline auto sse1_ = x86_simd_info{}; - constexpr inline auto sse2_ = x86_simd_info{}; - constexpr inline auto sse3_ = x86_simd_info{}; - constexpr inline auto ssse3_ = x86_simd_info{}; - constexpr inline auto sse41_ = x86_simd_info{}; - constexpr inline auto sse42_ = x86_simd_info{}; - constexpr inline auto avx_ = x86_simd_info{}; - constexpr inline auto avx2_ = x86_simd_info{}; + constexpr inline auto sse1_ = x86_simd_info{}; + constexpr inline auto sse2_ = x86_simd_info{}; + constexpr inline auto sse3_ = x86_simd_info{}; + constexpr inline auto ssse3_ = x86_simd_info{}; + constexpr inline auto sse41_ = x86_simd_info{}; + constexpr inline auto sse42_ = x86_simd_info{}; + constexpr inline auto avx_ = x86_simd_info{}; + constexpr inline auto avx2_ = x86_simd_info{}; + constexpr inline auto avx512_ = x86_simd_info{}; template using ppc_simd_info = detail::simd_info; diff --git a/include/spy/simd/arm.hpp b/include/spy/simd/arm.hpp index 1014487..15c88dc 100644 --- a/include/spy/simd/arm.hpp +++ b/include/spy/simd/arm.hpp @@ -7,8 +7,8 @@ SPDX-License-Identifier: MIT */ //================================================================================================== -#ifndef SPY_SIMD_ARM_HPP_INLUDED -#define SPY_SIMD_ARM_HPP_INLUDED +#ifndef SPY_SIMD_ARM_HPP_INCLUDED +#define SPY_SIMD_ARM_HPP_INCLUDED #if !defined(SPY_SIMD_DETECTED) && (defined(__ARM_NEON__) || defined(_M_ARM) || defined(__aarch64__)) # define SPY_SIMD_IS_ARM_NEON diff --git a/include/spy/simd/ppc.hpp b/include/spy/simd/ppc.hpp index 3c73d96..3c55c51 100644 --- a/include/spy/simd/ppc.hpp +++ b/include/spy/simd/ppc.hpp @@ -7,8 +7,8 @@ SPDX-License-Identifier: MIT */ //================================================================================================== -#ifndef SPY_SIMD_PPC_HPP_INLUDED -#define SPY_SIMD_PPC_HPP_INLUDED +#ifndef SPY_SIMD_PPC_HPP_INCLUDED +#define SPY_SIMD_PPC_HPP_INCLUDED #if !defined(SPY_SIMD_DETECTED) && defined(__VSX__) # define SPY_SIMD_IS_PPC_VSX diff --git a/include/spy/simd/x86.hpp b/include/spy/simd/x86.hpp index e86c4d5..3ef1013 100644 --- a/include/spy/simd/x86.hpp +++ b/include/spy/simd/x86.hpp @@ -7,8 +7,94 @@ SPDX-License-Identifier: MIT */ //================================================================================================== -#ifndef SPY_SIMD_X86_HPP_INLUDED -#define SPY_SIMD_X86_HPP_INLUDED +#ifndef SPY_SIMD_X86_HPP_INCLUDED +#define SPY_SIMD_X86_HPP_INCLUDED + +#if !defined(SPY_SIMD_DETECTED) && defined(__AVX512F__) +# define SPY_SIMD_IS_X86_AVX512 +# define SPY_SIMD_DETECTED ::spy::detail::simd_version::avx512_ + +#if defined(__AVX512BW__) +# define SPY_SIMD_IS_X86_AVX512_BW +# define SPY_SIMD_X86_AVX512_SUB 0x0001 +#endif + +#if defined(__AVX512CD__) +# define SPY_SIMD_IS_X86_AVX512_CD +# define SPY_SIMD_X86_AVX512_SUB 0x0002 +#endif + +#if defined(__AVX512DQ__) +# define SPY_SIMD_IS_X86_AVX512_DQ +# define SPY_SIMD_X86_AVX512_SUB 0x0004 +#endif + +#if defined(__AVX512ER__) +# define SPY_SIMD_IS_X86_AVX512_ER +# define SPY_SIMD_X86_AVX512_SUB 0x0008 +#endif + +#if defined(__AVX512IFMA__) +# define SPY_SIMD_IS_X86_AVX512_IFMA +# define SPY_SIMD_X86_AVX512_SUB 0x0010 +#endif + +#if defined(__AVX512PF__) +# define SPY_SIMD_IS_X86_AVX512_PF +# define SPY_SIMD_X86_AVX512_SUB 0x0020 +#endif + +#if defined(__AVX512VL__) +# define SPY_SIMD_IS_X86_AVX512_VL +# define SPY_SIMD_X86_AVX512_SUB 0x0040 +#endif + +#if defined(__AVX512VPOPCNTDQ__) +# define SPY_SIMD_IS_X86_AVX512_POPCNTDQ +# define SPY_SIMD_X86_AVX512_SUB 0x0080 +#endif + +#if defined(__AVX5124FMAPS__) +# define SPY_SIMD_IS_X86_AVX512_4FMAPS +# define SPY_SIMD_X86_AVX512_SUB 0x0100 +#endif + +#if defined(__AVX5124VNNIW__) +# define SPY_SIMD_IS_X86_AVX512_VNNIW +# define SPY_SIMD_X86_AVX512_SUB 0x0200 +#endif + +#if defined(__AVX512VBMI__) +# define SPY_SIMD_IS_X86_AVX512_VBMI +# define SPY_SIMD_X86_AVX512_SUB 0x0400 +#endif + +#if defined(__AVX512BF16__) +# define SPY_SIMD_IS_X86_AVX512_BF16 +# define SPY_SIMD_X86_AVX512_SUB 0x0800 +#endif + +#if defined(__AVX512BITALG__) +# define SPY_SIMD_IS_X86_AVX512_BITALG +# define SPY_SIMD_X86_AVX512_SUB 0x1000 +#endif + +#if defined(__AVX512VBMI2__) +# define SPY_SIMD_IS_X86_AVX512_VBMI2 +# define SPY_SIMD_X86_AVX512_SUB 0x2000 +#endif + +#if defined(__AVX512VNNI__) +# define SPY_SIMD_IS_X86_AVX512_VNNI +# define SPY_SIMD_X86_AVX512_SUB 0x4000 +#endif + +#if defined(__AVX512VP2INTERSECT__) +# define SPY_SIMD_IS_X86_AVX512_VP2INTERSECT +# define SPY_SIMD_X86_AVX512_SUB 0x8000 +#endif + +#endif #if !defined(SPY_SIMD_DETECTED) && defined(__AVX2__) # define SPY_SIMD_IS_X86_AVX2 @@ -80,6 +166,122 @@ namespace spy::supports #else constexpr inline auto xop_ = false; #endif + +namespace avx512 +{ +#if defined(__AVX512BW__) +# define SPY_SIMD_IS_X86_AVX512_BW + constexpr inline auto bw_ = true; +#else + constexpr inline auto bw_ = false; +#endif + +#if defined(__AVX512CD__) +# define SPY_SIMD_IS_X86_AVX512_CD + constexpr inline auto cd_ = true; +#else + constexpr inline auto cd_ = false; +#endif + +#if defined(__AVX512DQ__) +# define SPY_SIMD_IS_X86_AVX512_DQ + constexpr inline auto dq_ = true; +#else + constexpr inline auto dq_ = false; +#endif + +#if defined(__AVX512ER__) +# define SPY_SIMD_IS_X86_AVX512_ER + constexpr inline auto er_ = true; +#else + constexpr inline auto er_ = false; +#endif + +#if defined(__AVX512IFMA__) +# define SPY_SIMD_IS_X86_AVX512_IFMA + constexpr inline auto ifma_ = true; +#else + constexpr inline auto ifma_ = false; +#endif + +#if defined(__AVX512PF__) +# define SPY_SIMD_IS_X86_AVX512_PF + constexpr inline auto pf_ = true; +#else + constexpr inline auto pf_ = false; +#endif + +#if defined(__AVX512VL__) +# define SPY_SIMD_IS_X86_AVX512_VL + constexpr inline auto vl_ = true; +#else + constexpr inline auto vl_ = false; +#endif + +#if defined(__AVX512VPOPCNTDQ__) +# define SPY_SIMD_IS_X86_AVX512_POPCNTDQ + constexpr inline auto popcntdq_ = true; +#else + constexpr inline auto popcntdq_ = false; +#endif + +#if defined(__AVX5124FMAPS__) +# define SPY_SIMD_IS_X86_AVX512_4FMAPS + constexpr inline auto _4fmaps_ = true; +#else + constexpr inline auto _4fmaps_ = false; +#endif + +#if defined(__AVX5124VNNIW__) +# define SPY_SIMD_IS_X86_AVX512_VNNIW + constexpr inline auto vnniw_ = true; +#else + constexpr inline auto vnniw_ = false; +#endif + +#if defined(__AVX512VBMI__) +# define SPY_SIMD_IS_X86_AVX512_VBMI + constexpr inline auto vbmi_ = true; +#else + constexpr inline auto vbmi_ = false; +#endif + +#if defined(__AVX512BF16__) +# define SPY_SIMD_IS_X86_AVX512_BF16 + constexpr inline auto bf16_ = true; +#else + constexpr inline auto bf16_ = false; +#endif + +#if defined(__AVX512BITALG__) +# define SPY_SIMD_IS_X86_AVX512_BITALG + constexpr inline auto bitalg_ = true; +#else + constexpr inline auto bitalg_ = false; +#endif + +#if defined(__AVX512VBMI2__) +# define SPY_SIMD_IS_X86_AVX512_VBMI2 + constexpr inline auto vbmi2_ = true; +#else + constexpr inline auto vbmi2_ = false; +#endif + +#if defined(__AVX512VNNI__) +# define SPY_SIMD_IS_X86_AVX512_VNNI + constexpr inline auto vnni_ = true; +#else + constexpr inline auto vnni_ = false; +#endif + +#if defined(__AVX512VP2INTERSECT__) +# define SPY_SIMD_IS_X86_AVX512_VP2INTERSECT + constexpr inline auto vpintersect_ = true; +#else + constexpr inline auto vpintersect_ = false; +#endif +} + } #endif diff --git a/include/spy/spy.hpp b/include/spy/spy.hpp index fdea410..6534b6d 100644 --- a/include/spy/spy.hpp +++ b/include/spy/spy.hpp @@ -7,8 +7,8 @@ SPDX-License-Identifier: MIT */ //================================================================================================== -#ifndef SPY_SPY_HPP_INLUDED -#define SPY_SPY_HPP_INLUDED +#ifndef SPY_SPY_HPP_INCLUDED +#define SPY_SPY_HPP_INCLUDED #include #include diff --git a/include/spy/stdlib.hpp b/include/spy/stdlib.hpp index e296326..5987755 100644 --- a/include/spy/stdlib.hpp +++ b/include/spy/stdlib.hpp @@ -7,8 +7,8 @@ SPDX-License-Identifier: MIT */ //================================================================================================== -#ifndef SPY_STDLIB_HPP_INLUDED -#define SPY_STDLIB_HPP_INLUDED +#ifndef SPY_STDLIB_HPP_INCLUDED +#define SPY_STDLIB_HPP_INCLUDED // Make sure the proper header is included to detect libstd #include @@ -44,15 +44,15 @@ namespace spy::detail return os << "Undefined Standard C++ Library"; } - template using libcpp_t = stdlib_info; - template using gnucpp_t = stdlib_info; + template using libcpp_t = stdlib_info; + template using gnucpp_t = stdlib_info; } namespace spy { #if defined(_LIBCPP_VERSION) #define SPY_STDLIB_IS_LIBCPP - using stdlib_type = detail::stdcpp_t<(_LIBCPP_VERSION/1000)%10,0,_LIBCPP_VERSION%1000,0>; + using stdlib_type = detail::libcpp_t<(_LIBCPP_VERSION/1000)%10,0,_LIBCPP_VERSION%1000,0>; #elif defined(__GLIBCXX__) #define SPY_STDLIB_IS_GLIBCXX #define SPY0 (__GLIBCXX__/100) diff --git a/test/simd.cpp b/test/simd.cpp index 25ddb89..034acfc 100644 --- a/test/simd.cpp +++ b/test/simd.cpp @@ -15,17 +15,34 @@ int main() std::cout << "Supported SIMD instructions set: " << spy::simd_instruction_set << std::endl; std::cout << "Check that X86 SIMD extension detection is correct: " << std::endl; { - std::cout << "X86 SIMD status: " << std::boolalpha << (spy::simd_instruction_set == spy::x86_simd_ ) << std::endl; - std::cout << "SSE1 status: " << std::boolalpha << (spy::simd_instruction_set >= spy::sse1_ ) << std::endl; - std::cout << "SSE2 status: " << std::boolalpha << (spy::simd_instruction_set >= spy::sse2_ ) << std::endl; - std::cout << "SSE3 status: " << std::boolalpha << (spy::simd_instruction_set >= spy::sse3_ ) << std::endl; - std::cout << "SSE4.1 status: " << std::boolalpha << (spy::simd_instruction_set >= spy::sse41_) << std::endl; - std::cout << "SSE4.2 status: " << std::boolalpha << (spy::simd_instruction_set >= spy::sse42_) << std::endl; - std::cout << "AVX status: " << std::boolalpha << (spy::simd_instruction_set >= spy::avx_ ) << std::endl; - std::cout << "AVX2 status: " << std::boolalpha << (spy::simd_instruction_set >= spy::avx2_ ) << std::endl; + std::cout << "X86 SIMD status: " << std::boolalpha << (spy::simd_instruction_set == spy::x86_simd_) << std::endl; + std::cout << "SSE1 status: " << std::boolalpha << (spy::simd_instruction_set >= spy::sse1_ ) << std::endl; + std::cout << "SSE2 status: " << std::boolalpha << (spy::simd_instruction_set >= spy::sse2_ ) << std::endl; + std::cout << "SSE3 status: " << std::boolalpha << (spy::simd_instruction_set >= spy::sse3_ ) << std::endl; + std::cout << "SSE4.1 status: " << std::boolalpha << (spy::simd_instruction_set >= spy::sse41_ ) << std::endl; + std::cout << "SSE4.2 status: " << std::boolalpha << (spy::simd_instruction_set >= spy::sse42_ ) << std::endl; + std::cout << "AVX status: " << std::boolalpha << (spy::simd_instruction_set >= spy::avx_ ) << std::endl; + std::cout << "AVX2 status: " << std::boolalpha << (spy::simd_instruction_set >= spy::avx2_ ) << std::endl; std::cout << "FMA3 support: " << std::boolalpha << (spy::supports::fma_ ) << std::endl; std::cout << "FMA4 support: " << std::boolalpha << (spy::supports::fma4_ ) << std::endl; std::cout << "XOP support: " << std::boolalpha << (spy::supports::xop_ ) << std::endl; + std::cout << "AVX512 status: " << std::boolalpha << (spy::simd_instruction_set >= spy::avx512_ ) << std::endl; + std::cout << "|-BW support: " << std::boolalpha << (spy::supports::avx512::bw_ ) << std::endl; + std::cout << "|-CD support: " << std::boolalpha << (spy::supports::avx512::cd_ ) << std::endl; + std::cout << "|-DQ support: " << std::boolalpha << (spy::supports::avx512::dq_ ) << std::endl; + std::cout << "|-ER support: " << std::boolalpha << (spy::supports::avx512::er_ ) << std::endl; + std::cout << "|-IFMA support: " << std::boolalpha << (spy::supports::avx512::ifma_ ) << std::endl; + std::cout << "|-PF support: " << std::boolalpha << (spy::supports::avx512::pf_ ) << std::endl; + std::cout << "|-VL support: " << std::boolalpha << (spy::supports::avx512::vl_ ) << std::endl; + std::cout << "|-POPCNTDQ support: " << std::boolalpha << (spy::supports::avx512::popcntdq_ ) << std::endl; + std::cout << "|-4FMAPS support: " << std::boolalpha << (spy::supports::avx512::_4fmaps_ ) << std::endl; + std::cout << "|-VNNIW support: " << std::boolalpha << (spy::supports::avx512::vnniw_ ) << std::endl; + std::cout << "|-VBMI support: " << std::boolalpha << (spy::supports::avx512::vbmi_ ) << std::endl; + std::cout << "|-BF16 support: " << std::boolalpha << (spy::supports::avx512::bf16_ ) << std::endl; + std::cout << "|-BITALG support: " << std::boolalpha << (spy::supports::avx512::bitalg_ ) << std::endl; + std::cout << "|-VBMI2 support: " << std::boolalpha << (spy::supports::avx512::vbmi2_ ) << std::endl; + std::cout << "|-VNNI support: " << std::boolalpha << (spy::supports::avx512::vnni_ ) << std::endl; + std::cout << "|-VPINTERSECT support: " << std::boolalpha << (spy::supports::avx512::vpintersect_ ) << std::endl; } std::cout << std::endl;