diff --git a/.github/scripts/get_cpu_info.sh b/.github/scripts/get_cpu_info.sh new file mode 100755 index 00000000..82c46b84 --- /dev/null +++ b/.github/scripts/get_cpu_info.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Copyright 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Script to get CPU information using platform-agnostic python packages + +# Install python packages if not present in the environment +if ! python -m pip show archspec > /dev/null 2>&1; then + python -m pip install archspec +fi + +if ! python -m pip show py-cpuinfo > /dev/null 2>&1; then + python -m pip install py-cpuinfo +fi + +# Print host microarchitecture +python -c "import archspec.cpu; \ + print('Host Microarchitecture[archspec]:', archspec.cpu.host().name)" + +# Print full CPU information +python -c "import pprint, cpuinfo; \ + print('CPU info[py-cpuinfo]:'); \ + pprint.pprint(cpuinfo.get_cpu_info(), indent=4, compact=True)" diff --git a/.github/scripts/install_sde.sh b/.github/scripts/install_sde.sh new file mode 100755 index 00000000..25310c5a --- /dev/null +++ b/.github/scripts/install_sde.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Copyright 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +wget --content-disposition "https://downloadmirror.intel.com/850782/sde-external-9.53.0-2025-03-16-lin.tar.xz" +tar -xf sde-external-*-lin.tar.xz +cd sde-external-*/ +echo "$PWD" >> $GITHUB_PATH diff --git a/.github/workflows/build-linux-arm.yml b/.github/workflows/build-linux-arm.yml index 07811f00..32201f97 100644 --- a/.github/workflows/build-linux-arm.yml +++ b/.github/workflows/build-linux-arm.yml @@ -47,6 +47,10 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Get CPU info + run: | + bash ${GITHUB_WORKSPACE}/.github/scripts/get_cpu_info.sh + - name: Configure build working-directory: ${{ runner.temp }} env: @@ -65,8 +69,26 @@ jobs: working-directory: ${{ runner.temp }}/build run: make -j$(nproc) + - name: Validate MicroArch symbols + working-directory: ${{ runner.temp }}/build + run: python ${GITHUB_WORKSPACE}/tools/validate_microarch_symbols.py . + - name: Run tests env: CTEST_OUTPUT_ON_FAILURE: 1 working-directory: ${{ runner.temp }}/build/tests run: ctest -C ${{ matrix.build_type }} + + - name: Build Python Bindings + env: + CXX: ${{ matrix.cxx }} + CC: ${{ matrix.cc }} + run: | + cd bindings/python + python -m pip install . + + - name: Run Python Microarch Test + run: | + cd bindings/python + python -c "import svs; svs.microarch.describe()" + python -m unittest discover -p "test_microarch.py" -s . diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml index a03bdd78..16383f69 100644 --- a/.github/workflows/build-linux.yml +++ b/.github/workflows/build-linux.yml @@ -56,6 +56,12 @@ jobs: source /opt/intel/oneapi/setvars.sh printenv >> $GITHUB_ENV + - name: Install Intel(R) SDE + run: source ${GITHUB_WORKSPACE}/.github/scripts/install_sde.sh + + - name: Get CPU info + run: bash ${GITHUB_WORKSPACE}/.github/scripts/get_cpu_info.sh + - name: Configure build working-directory: ${{ runner.temp }} env: @@ -75,6 +81,10 @@ jobs: working-directory: ${{ runner.temp }}/build run: make -j$(nproc) + - name: Validate MicroArch symbols + working-directory: ${{ runner.temp }}/build + run: python ${GITHUB_WORKSPACE}/tools/validate_microarch_symbols.py . + - name: Run tests env: CTEST_OUTPUT_ON_FAILURE: 1 @@ -86,3 +96,22 @@ jobs: CTEST_OUTPUT_ON_FAILURE: 1 working-directory: ${{ runner.temp }}/build/examples/cpp run: ctest -C RelWithDebugInfo + + - name: Build Python Bindings + env: + CXX: ${{ matrix.cxx }} + CC: ${{ matrix.cc }} + run: | + cd bindings/python + python -m pip install . + + - name: Run Python Microarch Test with SDE + run: | + cd bindings/python + for flag in nhm hsw skx clx icl; do + echo "SDE emulation: $flag" + export SDE_FLAG=$flag + sde64 -$flag -- python -c "import svs; svs.microarch.describe()" + sde64 -$flag -- python -m unittest discover -p "test_microarch.py" -s . + sde64 -$flag -- python -m unittest discover -p "example*.py" -s ${GITHUB_WORKSPACE}/examples/python + done diff --git a/.github/workflows/build-macos.yaml b/.github/workflows/build-macos.yaml index a382d525..9069fda1 100644 --- a/.github/workflows/build-macos.yaml +++ b/.github/workflows/build-macos.yaml @@ -46,6 +46,10 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Get CPU info + run: | + bash ${GITHUB_WORKSPACE}/.github/scripts/get_cpu_info.sh + - name: Install Compiler run: | echo "Installing ${{ matrix.package }}..." @@ -83,3 +87,28 @@ jobs: CTEST_OUTPUT_ON_FAILURE: 1 working-directory: ${{ runner.temp }}/build/tests run: ctest -C ${{ matrix.build_type }} + + - name: Build Python Bindings + env: + CXX: ${{ matrix.cxx }} + CC: ${{ matrix.cc }} + run: | + if [[ "${{ matrix.needs_prefix }}" == "true" ]]; then + # For non-default packages like llvm@15, get the install prefix + COMPILER_PREFIX=$(brew --prefix ${{ matrix.package }}) + export CC="${COMPILER_PREFIX}/bin/${{ matrix.cc_name }}" + export CXX="${COMPILER_PREFIX}/bin/${{ matrix.cxx_name }}" + else + # For versioned GCC installs, the name is usually directly available + export CC="${{ matrix.cc_name }}" + export CXX="${{ matrix.cxx_name }}" + fi + + cd bindings/python + python -m pip install . + + - name: Run Python Microarch Test + run: | + cd bindings/python + python -c "import svs; svs.microarch.describe()" + python -m unittest discover -p "test_microarch.py" -s . diff --git a/.github/workflows/cibuildwheel.yml b/.github/workflows/cibuildwheel.yml index 87198b98..8ca5dc92 100644 --- a/.github/workflows/cibuildwheel.yml +++ b/.github/workflows/cibuildwheel.yml @@ -43,6 +43,10 @@ jobs: - name: Install cibuildwheel run: python -m pip install cibuildwheel + - name: Get CPU info + run: | + bash ${GITHUB_WORKSPACE}/.github/scripts/get_cpu_info.sh + # Install inside the temporary working directory. - name: Build Wheel env: diff --git a/.gitignore b/.gitignore index 6aa7c701..206ce89e 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,6 @@ __pycache__/ # Example generated files. example_data_*/ + +# Auto-generated microarch macros header +include/svs/lib/microarch_macros.h diff --git a/.licenserc.yaml b/.licenserc.yaml index 815de7ee..70c1cc96 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -45,6 +45,12 @@ header: - 'THIRD-PARTY-PROGRAMS' - '.github/renovate.json' - 'cmake/mkl_functions' + - 'cmake/microarch_list_aarch64' + - 'cmake/microarch_list_aarch64_darwin' + - 'cmake/microarch_list_x86_64' + - 'cmake/microarch_targets_aarch64' + - 'cmake/microarch_targets_aarch64_darwin' + - 'cmake/microarch_targets_x86_64' - 'cmake/patches/tomlplusplus_v330.patch' - 'docker/x86_64/manylinux2014/oneAPI.repo' - 'docs/cpp/index/loader-compatibility.csv' diff --git a/CMakeLists.txt b/CMakeLists.txt index 314a6b33..080893bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,9 +34,14 @@ set(SVS_VERSION_MAJOR ${PROJECT_VERSION_MAJOR}) set(SVS_VERSION_MINOR ${PROJECT_VERSION_MINOR}) set(SVS_VERSION_PATCH ${PROJECT_VERSION_PATCH}) +# SVS targets: +# - svs_devel (${SVS_LIB}) - the internal target +# - svs_export (svs::svs) - the exported target for end users of the library add_library(${SVS_LIB} INTERFACE) -set_target_properties(${SVS_LIB} PROPERTIES EXPORT_NAME svs) -add_library(svs::svs ALIAS ${SVS_LIB}) +add_library(svs_export INTERFACE) +set_target_properties(svs_export PROPERTIES EXPORT_NAME svs) +target_link_libraries(svs_export INTERFACE ${SVS_LIB}) +add_library(svs::svs ALIAS svs_export) # Runtime include directories are established in the installation logic. target_include_directories( @@ -61,12 +66,20 @@ target_compile_options( "-DSVS_VERSION_PATCH=${SVS_VERSION_PATCH}" ) +# Provide microarchitecture-specific template instantiations. +set(SVS_MICROARCH_INSTANCE_FILES + cmake/microarch_instantiations/cosine.cpp + cmake/microarch_instantiations/euclidean.cpp + cmake/microarch_instantiations/inner_product.cpp +) + ##### ##### Options and extra build steps ##### include("cmake/options.cmake") +include("cmake/microarch.cmake") include("cmake/clang-tidy.cmake") include("cmake/eve.cmake") include("cmake/pthread.cmake") @@ -112,7 +125,7 @@ set(LIB_CONFIG_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/svs") # Install headers and target information. install( - TARGETS svs_devel svs_compile_options svs_native_options + TARGETS svs_export svs_devel svs_compile_options EXPORT svs-targets INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) @@ -163,4 +176,3 @@ install(FILES ${SVS_CMAKE_FIND_FILES} DESTINATION "${LIB_CONFIG_INSTALL_DIR}" ) - diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 5c042c29..56c8ca32 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -102,9 +102,9 @@ endif() target_link_libraries( svs_benchmark_library PUBLIC - ${SVS_LIB} + svs::svs svs_compile_options - svs_native_options + svs_microarch_options_base fmt::fmt ) @@ -112,3 +112,5 @@ target_link_libraries( set(EXE_FILES src/main.cpp) add_executable(svs_benchmark ${EXE_FILES}) target_link_libraries(svs_benchmark PRIVATE svs_benchmark_library) + +svs_register_static_dimensions(96 100 128 200 768) diff --git a/bindings/python/CMakeLists.txt b/bindings/python/CMakeLists.txt index 495eec2c..2be364aa 100644 --- a/bindings/python/CMakeLists.txt +++ b/bindings/python/CMakeLists.txt @@ -24,68 +24,9 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(pybind11) -# Try to find the Python executable. -# -# If it's given as part of the Cmake arguments given by "scikit build", then use that. -# Otherwise, fall back to using plain old "python". -# If *THAT* doesn't work, give up. -if(DEFINED PYTHON_EXECUTABLE) - set(SVS_PYTHON_EXECUTABLE "${PYTHON_EXECUTABLE}") -else() - set(SVS_PYTHON_EXECUTABLE "python") -endif() - -# The micro architectures to compile for. -if(NOT DEFINED SVS_MICROARCHS) - set(SVS_MICROARCHS native) -endif() - # Include the SVS library directly. add_subdirectory("../.." "${CMAKE_CURRENT_BINARY_DIR}/svs") -# Run the python script to get optimization flags for the desired back-ends. -# -# FLAGS_SCRIPT - Path to the Python script that will take the compiler, compiler version, -# and list of desired microarchitectures and generate optimization flags for each -# microarchitecture. -# -# FLAGS_TEXT_FILE - List of optimization flags for each architecture. -# Expected format: -# -march=arch1,-mtune=arch1 -# -march=arch2,-mtune=arch2 -# ... -# -march=archN,-mtune=archN -# -# The number of lines should be equal to the number of microarchitectures. -# NOTE: The entries within each line are separated by a comma on purpose to allow CMake -# to read the whole file as a List and then use string replacement on the commas to turn -# each line into a list in its own right. -# -# TEMP_JSON - JSON Manifest file describing the generated binaries. This is meant to be -# included in the Python package to allow the Python code to reason about the packaged -# libraries and select the correct one for loading. -# -set(FLAGS_SCRIPT "${CMAKE_CURRENT_LIST_DIR}/microarch.py") -set(FLAGS_TEXT_FILE "${CMAKE_CURRENT_BINARY_DIR}/optimization_flags.txt") -set(FLAGS_MANIFEST_JSON "${CMAKE_CURRENT_BINARY_DIR}/flags_manifest.json") - -execute_process( - COMMAND - ${SVS_PYTHON_EXECUTABLE} - ${FLAGS_SCRIPT} - ${FLAGS_TEXT_FILE} - ${FLAGS_MANIFEST_JSON} - --compiler ${CMAKE_CXX_COMPILER_ID} - --compiler-version ${CMAKE_CXX_COMPILER_VERSION} - --microarchitectures ${SVS_MICROARCHS} - COMMAND_ERROR_IS_FATAL ANY -) - -file(STRINGS "${FLAGS_TEXT_FILE}" OPTIMIZATION_FLAGS) -message("Flags: ${OPTIMIZATION_FLAGS}") -list(LENGTH OPTIMIZATION_FLAGS OPT_FLAGS_LENGTH) -message("Length of flags: ${OPT_FLAGS_LENGTH}") - # C++ files makind up the python bindings. set(CPP_FILES src/allocator.cpp @@ -98,51 +39,33 @@ set(CPP_FILES src/svs_mkl.cpp ) -# Generate a shared library for each target microarchitecture. -foreach(MICRO OPT_FLAGS IN ZIP_LISTS SVS_MICROARCHS OPTIMIZATION_FLAGS) - set(LIB_NAME "_svs_${MICRO}") +set(LIB_NAME "_svs") +pybind11_add_module(${LIB_NAME} MODULE ${CPP_FILES}) +target_link_libraries(${LIB_NAME} PRIVATE pybind11::module) +target_link_libraries(${LIB_NAME} PUBLIC svs::svs) +# Dependency "fmt::fmt" obtained from "svs" +target_link_libraries(${LIB_NAME} PRIVATE svs::compile_options fmt::fmt svs::microarch_options_base) +target_include_directories( + ${LIB_NAME} + PUBLIC $ +) - pybind11_add_module(${LIB_NAME} MODULE ${CPP_FILES}) - target_link_libraries(${LIB_NAME} PUBLIC svs::svs) - # Dependency "fmt::fmt" obtained from "svs" - target_link_libraries(${LIB_NAME} PRIVATE svs::compile_options fmt::fmt) +if(DEFINED SKBUILD) + install(TARGETS ${LIB_NAME} DESTINATION .) - string(REPLACE "," ";" OPT_FLAGS ${OPT_FLAGS}) - message("OPT Flags: ${OPT_FLAGS}") - target_compile_options(${LIB_NAME} PRIVATE ${OPT_FLAGS}) + # The extension module may need to load build or included libraries when loaded. - # Header files. - target_include_directories( + # Placing build depedencies in the package and using relative RPATHs that + # don't point outside of the package means that the built package is + # relocatable. This allows for safe binary redistribution. + set_target_properties( ${LIB_NAME} - PUBLIC $ + PROPERTIES + INSTALL_RPATH "$ORIGIN/${CMAKE_INSTALL_LIBDIR}" ) - - # Comunicate to the C++ library the desired name of the library - target_compile_options(${LIB_NAME} PRIVATE "-DSVS_MODULE_NAME=${LIB_NAME}") - - # If scikit build is running the compilation process, - if(DEFINED SKBUILD) - install(TARGETS ${LIB_NAME} DESTINATION .) - - # The extension module may need to load build or included libraries when loaded. - - # Placing build depedencies in the package and using relative RPATHs that - # don't point outside of the package means that the built package is - # relocatable. This allows for safe binary redistribution. - set_target_properties( - ${LIB_NAME} - PROPERTIES - INSTALL_RPATH "$ORIGIN/${CMAKE_INSTALL_LIBDIR}" - ) - endif() -endforeach() +endif() if(DEFINED SKBUILD) - # Install the manifest JSON file. - # This is kind of a hack to avoid the needing to explicitly move JSON file into the - # source folder of the python library. - install(FILES ${FLAGS_MANIFEST_JSON} DESTINATION .) - # Install header files. install( DIRECTORY "${CMAKE_CURRENT_LIST_DIR}/include/svs" diff --git a/bindings/python/setup.py b/bindings/python/setup.py index 5d310749..7e8aa254 100644 --- a/bindings/python/setup.py +++ b/bindings/python/setup.py @@ -13,7 +13,6 @@ # limitations under the License. from skbuild import setup -import archspec.cpu as cpu import os # If building in a cibuildwheel context, compile multiple versions of the library for @@ -25,27 +24,6 @@ "-DCMAKE_EXPORT_COMPILE_COMMANDS=YES", ] -# Utility to convert micro-architecture strings to -def target(arch): - return cpu.TARGETS[arch] - -# N.B.: cibuildwheel must configure the multi-arch environment variable. -# Also, the micro-architectures defined below should be in order of preference. -if os.environ.get("SVS_MULTIARCH", None) is not None: - svs_microarchs = [ - "cascadelake", - "x86_64_v3", # conservative base CPU for x86 CPUs. - ] - - # Add the current host to the list of micro-architecture if it doesn't already exist. - last_target = target(svs_microarchs[-1]) - host_name = cpu.host().name - if host_name not in svs_microarchs and target(host_name) < last_target: - svs_microarchs.append(host_name) - - cmake_array = ";".join(svs_microarchs) - cmake_args.append(f"-DSVS_MICROARCHS={cmake_array}") - # Determine the root of the repository base_dir = os.path.relpath(os.path.join(os.path.dirname(__file__), '..', '..')) diff --git a/bindings/python/src/python_bindings.cpp b/bindings/python/src/python_bindings.cpp index e1ac92b6..fb062e89 100644 --- a/bindings/python/src/python_bindings.cpp +++ b/bindings/python/src/python_bindings.cpp @@ -26,6 +26,7 @@ // SVS dependencies #include "svs/core/distance.h" #include "svs/core/io.h" +#include "svs/lib/arch.h" #include "svs/lib/array.h" #include "svs/lib/datatype.h" #include "svs/lib/float16.h" @@ -42,17 +43,9 @@ // stl #include +#include #include -// Get the expected name of the library -// Make sure CMake stays up to date with defining this parameter. -// -// The variable allows us to customize the name of the python module to support -// micro-architecture versioning. -#if !defined(SVS_MODULE_NAME) -#define SVS_MODULE_NAME _svs_native -#endif - namespace py = pybind11; namespace { @@ -144,7 +137,7 @@ class ScopedModuleNameOverride { } // namespace -PYBIND11_MODULE(SVS_MODULE_NAME, m) { +PYBIND11_MODULE(_svs, m) { // Internall, the top level `__init__.py` imports everything from the C++ module named // `_svs`. // @@ -196,6 +189,62 @@ Convert the `fvecs` file on disk with 32-bit floating point entries to a `fvecs` wrap_conversion(m); + m.def("_print_cpu_extensions_status", []() { + svs::arch::write_extensions_status(std::cout); + }); + + // Wrapper for svs::arch::MicroArchEnvironment + py::class_( + m, "microarch", "Microarchitecture management singleton" + ) + .def_static( + "get", + []() -> svs::arch::MicroArchEnvironment& { + return svs::arch::MicroArchEnvironment::get_instance(); + }, + py::return_value_policy::reference + ) + .def_property_static( + "current", + [](py::object) { + auto& env = svs::arch::MicroArchEnvironment::get_instance(); + return svs::arch::microarch_to_string(env.get_microarch()); + }, + [](py::object, const std::string& arch_name) { + auto& env = svs::arch::MicroArchEnvironment::get_instance(); + env.set_microarch(arch_name); + }, + "Gets or sets the current microarchitecture." + ) + .def_property_readonly_static( + "supported", + [](py::object) { + auto& env = svs::arch::MicroArchEnvironment::get_instance(); + std::vector result; + for (const auto& arch : env.get_supported_microarchs()) { + result.push_back(svs::arch::microarch_to_string(arch)); + } + return result; + }, + "Returns a list of supported microarchitectures." + ) + .def_property_readonly_static( + "compiled", + [](py::object) { + auto& env = svs::arch::MicroArchEnvironment::get_instance(); + std::vector result; + for (const auto& arch : env.get_compiled_microarchs()) { + result.push_back(svs::arch::microarch_to_string(arch)); + } + return result; + }, + "Returns a list of compiled microarchitectures." + ) + .def_static("describe", []() { + auto& env = svs::arch::MicroArchEnvironment::get_instance(); + env.describe(std::cout); + }); + // Allocators svs::python::allocators::wrap(m); diff --git a/bindings/python/src/svs/__init__.py b/bindings/python/src/svs/__init__.py index dd9948e7..25c61af6 100644 --- a/bindings/python/src/svs/__init__.py +++ b/bindings/python/src/svs/__init__.py @@ -12,13 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Dynamic loading logic. -from .loader import library, current_backend, available_backends +import svs._svs as _svs # Reexport all public functions and structs from the inner module. -lib = library() globals().update( - {k : v for (k, v) in lib.__dict__.items() if not k.startswith("__")} + {k : v for (k, v) in _svs.__dict__.items() if not k.startswith("__")} ) # Misc types and functions @@ -33,4 +31,3 @@ # Make the upgrader available without explicit import. from . import upgrader - diff --git a/bindings/python/src/svs/common.py b/bindings/python/src/svs/common.py index a0c8e5e2..8749c2ac 100644 --- a/bindings/python/src/svs/common.py +++ b/bindings/python/src/svs/common.py @@ -13,13 +13,11 @@ # limitations under the License. import itertools -import importlib import struct import os import numpy as np -from .loader import library -lib = library() +import svs._svs as _svs def np_to_svs(nptype): """ @@ -34,29 +32,29 @@ def np_to_svs(nptype): """ # Signed if nptype == np.int8: - return lib.int8 + return _svs.int8 if nptype == np.int16: - return lib.int16 + return _svs.int16 if nptype == np.int32: - return lib.int32 + return _svs.int32 if nptype == np.int64: - return lib.int64 + return _svs.int64 # Unsigned if nptype == np.uint8: - return lib.uint8 + return _svs.uint8 if nptype == np.uint16: - return lib.uint16 + return _svs.uint16 if nptype == np.uint32: - return lib.uint32 + return _svs.uint32 if nptype == np.uint64: - return lib.uint64 + return _svs.uint64 # Float if nptype == np.float16: - return lib.float16 + return _svs.float16 if nptype == np.float32: - return lib.float32 + return _svs.float32 if nptype == np.float64: - return lib.float64 + return _svs.float64 raise Exception(f"Could not convert {nptype} to a svs.DataType enum!"); @@ -221,7 +219,7 @@ def generate_test_dataset( query_seed = None, num_threads = 1, num_neighbors: int = 100, - distance = lib.DistanceType.L2 + distance = _svs.DistanceType.L2 ): """ Generate a sample dataset consisting of the base data, queries, and groundtruth all in @@ -259,7 +257,7 @@ def generate_test_dataset( write_vecs(queries, os.path.join(directory, "queries.fvecs")) print("Generating Groundtruth") - index = lib.Flat(data, distance, num_threads = num_threads) + index = _svs.Flat(data, distance, num_threads = num_threads) I, _ = index.search(queries, num_neighbors) write_vecs(I.astype(np.uint32), os.path.join(directory, "groundtruth.ivecs")) diff --git a/bindings/python/src/svs/loader.py b/bindings/python/src/svs/loader.py deleted file mode 100644 index 1390cf79..00000000 --- a/bindings/python/src/svs/loader.py +++ /dev/null @@ -1,174 +0,0 @@ -# Copyright 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# dep pre-coms -import archspec.cpu as cpu - -# standard library -import json -import importlib -import os -from pathlib import Path - -# Get environment variables for configuring warnings and overriding backend selection. -def _is_quiet(): - """ - Return whether or not backend loading should be "quiet". - In this context, "quiet" means not warning for older architectures. - """ - return os.environ.get("SVS_QUIET", False) - -def _override_backend(): - """ - Return a manual override for the backend. - If no override is set, return `None`. - """ - return os.environ.get("SVS_OVERRIDE_BACKEND", None) - - -# The name of the manifest file. -FLAGS_MANIFEST = "flags_manifest.json" # Keep in-sync with CMakeLists.txt - -def _library_from_suffix(suffix): - return f"._svs_{suffix}" - -def _message_prehook(spec, host = cpu.host()): - """ - Emit any special messages for the given microarchitecture spec. - """ - if _is_quiet(): - return - - if isinstance(spec, str): - spec = cpu.TARGETS[spec] - - import warnings - if spec <= cpu.TARGETS["skylake_avx512"]: - message = f""" - Loading library for an older CPU architecture ({spec}). - Performance may be degraded. - """ - warnings.warn(message, RuntimeWarning) - - if host < spec: - message = """ - Override backend is target for a newer CPU than the one you're currently using. - Application may crash. - """ - warnings.warn(message, RuntimeWarning) - - -# The backend being used for this session -__CURRENT_BACKEND__ = None -def current_backend(): - """ - Return the name of the current backend. - """ - return __CURRENT_BACKEND__ - -def __set_backend_once__(suffix: str, spec): - global __CURRENT_BACKEND__ - if __CURRENT_BACKEND__ == None: - _message_prehook(spec) - __CURRENT_BACKEND__ = str(suffix) - - return current_backend() - -# The dynamically loaded module. -__LIBRARY__ = None - -def _load_manifest(): - """ - Determine which shared library to load to supply the C++ extentions. - """ - json_file = Path(__file__).parent / FLAGS_MANIFEST - json_file_alternate = Path(__file__).parent.parent / FLAGS_MANIFEST - - # Try to give a somewhat helpful error message if the JSON manifest file was not - # generated properly by Scikit-build/CMake - if json_file.exists(): - with open(json_file, "r") as io: - return json.load(io) - elif json_file_alternate.exists(): - with open(json_file_alternate, "r") as io: - return json.load(io) - else: - print(Path(str(json_file).replace("ai.similarity-search.gss/", ""))) - raise RuntimeError(f""" - Expected a file {FLAGS_MANIFEST} to exist in the source directory to describe the - attributes of the libraries bundled with this application. - - No such file was found. - - Please report this to the project maintainer! - """) - -def available_backends(): - """ - Return a list of the available backends that where compiled when this module was built. - - Each backend in the list may be used to initialize ``SVS_OVERRIDE_BACKEND`` - environment variable prior to application start to override the default loading logic. - """ - return list(_load_manifest()["libraries"].keys()) - -def _find_library(): - """ - Find the appropriate library to load for this micro architecture. - """ - - # Get the current CPU and the manifest of compiled libraries that ship with this - # library. - host = cpu.host() - manifest = _load_manifest() - - # Respect override requests. - # Down stream loading will fail if the given option doesn't exist. - # - # However, if an override is explicitly given, then we can assume that the use knows - # what they're doing and can respond to a loading failure correctly. - override = _override_backend() - if override is not None: - spec = cpu.TARGETS[manifest["libraries"][override]] - return __set_backend_once__(override, spec) - - # Assume architectures in the manifest are place in order of preference. - # TODO: Revisit this assumption. - for (suffix, microarch) in manifest["libraries"].items(): - # Are we compatible with this micro architecture? - spec = cpu.TARGETS[microarch] - if spec <= host: - return __set_backend_once__(suffix, spec) - - raise RuntimeError(f""" - Could not find a suitable backend for your machine ({host}). - Please contact the project maintainers! - """) - -def __load_module_once__(): - global __LIBRARY__ - if __LIBRARY__ is None: - library_name = _library_from_suffix(_find_library()) - __LIBRARY__ = importlib.import_module(library_name, package = "svs") - -def library(): - """ - Return the library backend as a module. Dynamically loads the library when first called. - - Dynamically loading the library may trigger warnings related to correctness or - performance. If you really **really** don't want these warnings, they can be suppressed - by defining the environemtn variable ``SVS_QUIET=YES`` prior to application start. - """ - __load_module_once__() - return __LIBRARY__ diff --git a/bindings/python/src/svs/upgrader.py b/bindings/python/src/svs/upgrader.py index f98a2b46..908eefef 100644 --- a/bindings/python/src/svs/upgrader.py +++ b/bindings/python/src/svs/upgrader.py @@ -18,13 +18,12 @@ import shutil from pathlib import Path -from .loader import library -_lib = library() +import svs._svs as _svs DEFAULT_SCHEMA_FILE = "serialization.toml" def _reformat_toml(path): - _lib.__reformat_toml(path) + _svs.__reformat_toml(path) def _is_reserved(key: str): return key.startswith("__") diff --git a/bindings/python/tests/test_loader.py b/bindings/python/tests/test_loader.py deleted file mode 100644 index c9abb886..00000000 --- a/bindings/python/tests/test_loader.py +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Test the dynamic loading logic -import archspec.cpu as cpu -import unittest -import os -import warnings - -import svs.loader as loader - -def set_quiet(): - os.environ["SVS_QUIET"] = "YES" - -def clear_quiet(): - os.environ.pop("SVS_QUIET", None) - -def set_override(override: str): - os.environ["SVS_OVERRIDE_BACKEND"] = override - -def clear_override(): - os.environ.pop("SVS_OVERRIDE_BACKEND", None) - -class LoadingTester(unittest.TestCase): - def __unset_environment_variables__(self): - clear_quiet() - clear_override() - - def tearDown(self): - self.__unset_environment_variables__() - - def test_environment_variables(self): - # Clear the environment variables in question. - self.__unset_environment_variables__() - - # Make sure "is_quiet" behaves correctly. - self.assertFalse(loader._is_quiet()) - set_quiet() - self.assertTrue(loader._is_quiet()) - self.__unset_environment_variables__() - self.assertFalse(loader._is_quiet()) - - # Now, check that "override_backend" works. - self.assertEqual(loader._override_backend(), None) - set_override("hello") - self.assertEqual(loader._override_backend(), "hello") - set_override("north") - self.assertEqual(loader._override_backend(), "north") - clear_override() - self.assertEqual(loader._override_backend(), None) - self.__unset_environment_variables__() - - def test_suffix(self): - self.assertEqual(loader._library_from_suffix("native"), "._svs_native") - self.assertEqual(loader._library_from_suffix("cascadelake"), "._svs_cascadelake") - - def test_available_backends(self): - self.assertGreaterEqual(len(loader.available_backends()), 1) - - def test_manifest(self): - manifest = loader._load_manifest() - self.assertTrue("toolchain" in manifest) - self.assertTrue("libraries" in manifest) - - toolchain = manifest["toolchain"] - self.assertTrue("compiler" in toolchain) - self.assertTrue("compiler_version" in toolchain) - - libraries = manifest["libraries"] - self.assertGreaterEqual(len(libraries), 1) - - def test_message_prehook(self): - # Cause all warnings to always be triggered. - warnings.simplefilter("always") - - # Refer to - # https://docs.python.org/3/library/warnings.html#testing-warnings - # for how to test warnings. - - # Warning for the host being greater than the spec. - spec = cpu.TARGETS["icelake"] - host = cpu.TARGETS["skylake"] - with warnings.catch_warnings(record = True) as w: - loader._message_prehook(spec, host) - self.assertTrue(len(w) == 1) - self.assertTrue(issubclass(w[-1].category, RuntimeWarning)) - self.assertTrue("Override" in str(w[-1].message)) - - # Running again with "quiet" enabled should suppress the warning - set_quiet() - with warnings.catch_warnings(record = True) as w: - loader._message_prehook(spec, host) - self.assertTrue(len(w) == 0) - - # Warning for using an old architecture. - clear_quiet() - archs = ["haswell", "skylake", "skylake_avx512"] - for arch in archs: - with warnings.catch_warnings(record = True) as w: - loader._message_prehook(arch) - # Number of warnings can exceed 1 if running on an older CPU. - # In this latter case, we get a "newer CPU" warning as well. - self.assertTrue(len(w) >= 1) - self.assertTrue(issubclass(w[0].category, RuntimeWarning)) - self.assertTrue("older CPU" in str(w[0].message)) - - def test_loaded(self): - libraries = loader._load_manifest()["libraries"] - self.assertTrue(loader.current_backend() in libraries) - self.assertNotEqual(loader.library(), None) diff --git a/bindings/python/tests/test_microarch.py b/bindings/python/tests/test_microarch.py new file mode 100644 index 00000000..670ae779 --- /dev/null +++ b/bindings/python/tests/test_microarch.py @@ -0,0 +1,37 @@ +# Copyright 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import svs +import archspec.cpu as cpu +import os + + +class MicroarchTester(unittest.TestCase): + def test_microarch(self): + # Get emulated microarch from SDE_FLAG or use the host CPU + host_microarch = os.environ.get("SDE_FLAG", cpu.host().name) + mapping = { + "nhm": "nehalem", + "hsw": "haswell", + "skx": "skylake_avx512", + "clx": "cascadelake", + "icl": "icelake_client", + "icelake": "icelake_client", + "spr": "sapphirerapids", + } + host_microarch = mapping.get(host_microarch, host_microarch) + + if host_microarch in svs.microarch.compiled: + self.assertTrue(host_microarch == svs.microarch.current) diff --git a/cmake/microarch.cmake b/cmake/microarch.cmake new file mode 100644 index 00000000..2059d54d --- /dev/null +++ b/cmake/microarch.cmake @@ -0,0 +1,166 @@ +# Copyright 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if(svs_microarch_cmake_included) + return() +endif() +set(svs_microarch_cmake_included true) + +# N.B.: first microarch listed in targets file is treated as "base" microarch +# which is used to build base object files, shared libs and executables +if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") + file(STRINGS "${CMAKE_CURRENT_LIST_DIR}/microarch_targets_x86_64" SVS_SUPPORTED_MICROARCHS) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm64") + if(APPLE) + file(STRINGS "${CMAKE_CURRENT_LIST_DIR}/microarch_targets_aarch64_darwin" SVS_SUPPORTED_MICROARCHS) + else() + file(STRINGS "${CMAKE_CURRENT_LIST_DIR}/microarch_targets_aarch64" SVS_SUPPORTED_MICROARCHS) + endif() +else() + message(FATAL_ERROR "Unknown CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") +endif() + +# List all known microarchs +if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") + file(STRINGS "${CMAKE_CURRENT_LIST_DIR}/microarch_list_x86_64" SVS_KNOWN_MICROARCHS) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm64") + if(APPLE) + file(STRINGS "${CMAKE_CURRENT_LIST_DIR}/microarch_list_aarch64_darwin" SVS_KNOWN_MICROARCHS) + else() + file(STRINGS "${CMAKE_CURRENT_LIST_DIR}/microarch_list_aarch64" SVS_KNOWN_MICROARCHS) + endif() +else() + message(FATAL_ERROR "Unknown CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") +endif() + +# Try to find the Python executable. +# +# If it's given as part of the Cmake arguments given by "scikit build", then use that. +# Otherwise, fall back to using plain old "python". +# If *THAT* doesn't work, give up. +if(DEFINED PYTHON_EXECUTABLE) + set(SVS_PYTHON_EXECUTABLE "${PYTHON_EXECUTABLE}") +else() + set(SVS_PYTHON_EXECUTABLE "python") +endif() + +# Run the python script to get optimization flags for the desired back-ends. +# +# FLAGS_SCRIPT - Path to the Python script that will take the compiler, compiler version, +# and list of desired microarchitectures and generate optimization flags for each +# microarchitecture. +# +# FLAGS_TEXT_FILE - List of optimization flags for each architecture. +# Expected format: +# -march=arch1,-mtune=arch1 +# -march=arch2,-mtune=arch2 +# ... +# -march=archN,-mtune=archN +# +# The number of lines should be equal to the number of microarchitectures. +# NOTE: The entries within each line are separated by a comma on purpose to allow CMake +# to read the whole file as a List and then use string replacement on the commas to turn +# each line into a list in its own right. +# +set(FLAGS_SCRIPT "${CMAKE_CURRENT_LIST_DIR}/microarch.py") +set(FLAGS_TEXT_FILE "${CMAKE_CURRENT_BINARY_DIR}/optimization_flags.txt") + +execute_process( + COMMAND + ${SVS_PYTHON_EXECUTABLE} + ${FLAGS_SCRIPT} + ${FLAGS_TEXT_FILE} + --compiler ${CMAKE_CXX_COMPILER_ID} + --compiler-version ${CMAKE_CXX_COMPILER_VERSION} + --microarchitectures ${SVS_SUPPORTED_MICROARCHS} + COMMAND_ERROR_IS_FATAL ANY +) +file(STRINGS "${FLAGS_TEXT_FILE}" OPTIMIZATION_FLAGS) + +# Run the python script to generate a header with microarch-specific macros. +set(GENERATOR_SCRIPT "${CMAKE_CURRENT_LIST_DIR}/microarch_generate_macros.py") +set(MICROARCH_MACROS_PROTOTYPE_HEADER "${CMAKE_CURRENT_LIST_DIR}/microarch_macros.h") +set(MICROARCH_MACROS_HEADER "${CMAKE_CURRENT_LIST_DIR}/../include/svs/lib/microarch_macros.h") + +execute_process( + COMMAND + ${SVS_PYTHON_EXECUTABLE} + ${GENERATOR_SCRIPT} + --proto-header-file ${MICROARCH_MACROS_PROTOTYPE_HEADER} + --output-header-file ${MICROARCH_MACROS_HEADER} + --known-uarchs ${SVS_KNOWN_MICROARCHS} + --supported-uarchs ${SVS_SUPPORTED_MICROARCHS} + COMMAND_ERROR_IS_FATAL ANY +) + +##### +##### Helper function to register specified static dimensions for dispatching by microarch. +##### + +set(DIM_REGISTRY_SCRIPT "${CMAKE_CURRENT_LIST_DIR}/microarch_register_dimensions.py") + +function(svs_register_static_dimensions) + execute_process( + COMMAND + ${SVS_PYTHON_EXECUTABLE} + ${DIM_REGISTRY_SCRIPT} + --header-file ${MICROARCH_MACROS_HEADER} + --dimensions ${ARGN} + COMMAND_ERROR_IS_FATAL ANY + ) +endfunction() + +##### +##### Helper targets to support required microarchs and apply relevant compiler optimizations. +##### + +# Set up "base" target to include opt. flags for base microarch +add_library(svs_microarch_options_base INTERFACE) +add_library(svs::microarch_options_base ALIAS svs_microarch_options_base) + +# Get opt. flags for base microarch +list(GET SVS_SUPPORTED_MICROARCHS 0 BASE_MICROARCH) +list(GET OPTIMIZATION_FLAGS 0 BASE_OPT_FLAGS) +string(REPLACE "," ";" BASE_OPT_FLAGS ${BASE_OPT_FLAGS}) +message("Opt.flags[base=${BASE_MICROARCH}]: ${BASE_OPT_FLAGS}") + +target_compile_options(svs_microarch_options_base INTERFACE ${BASE_OPT_FLAGS}) + +# Generate object files for each microarchitecture +add_library(svs_microarchs INTERFACE) +foreach(MICROARCH OPT_FLAGS IN ZIP_LISTS SVS_SUPPORTED_MICROARCHS OPTIMIZATION_FLAGS) + string(REPLACE "," ";" OPT_FLAGS ${OPT_FLAGS}) + message("Opt.flags[${MICROARCH}]: ${OPT_FLAGS}") + + # Object with MicroArch template instantiations + set(OBJ_NAME "microarch_${MICROARCH}") + add_library(${OBJ_NAME} OBJECT ${SVS_MICROARCH_INSTANCE_FILES}) + target_link_libraries(${OBJ_NAME} PRIVATE ${SVS_LIB} svs::compile_options) + target_compile_options(${OBJ_NAME} PRIVATE ${OPT_FLAGS} -DSVS_MICROARCH_TARGET=${MICROARCH} -fPIC) + target_sources(svs_microarchs INTERFACE $) + + install( + TARGETS ${OBJ_NAME} + EXPORT svs-targets + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + ) +endforeach() + +target_link_libraries(svs_export INTERFACE svs_microarchs) + +install( + TARGETS svs_microarch_options_base svs_microarchs + EXPORT svs-targets + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} +) diff --git a/bindings/python/microarch.py b/cmake/microarch.py similarity index 89% rename from bindings/python/microarch.py rename to cmake/microarch.py index 99a4ae36..6e6ce58c 100644 --- a/bindings/python/microarch.py +++ b/cmake/microarch.py @@ -17,21 +17,18 @@ # (1) A text file with compiler optimization flags for each microarchitecture formatted for # relatively easy consumption by CMake. # -# (2) A JSON manifest file describing the micreoarchitecture for each compiled library -# that the python library can use to select the correct shared library. -# import archspec import archspec.cpu as cpu import argparse import json + def build_parser(): parser = argparse.ArgumentParser() parser.add_argument( "cmake_flags_text_file", help = "file path to where CMake's text file will go." ) - parser.add_argument("python_output_json_file") parser.add_argument("--compiler", required = True) parser.add_argument("--compiler-version", required = True) parser.add_argument( @@ -48,6 +45,7 @@ def resolve_microarch(name: str): """ custom_aliases = { "native": cpu.host().name, + "icelake_client": "icelake", } # Allow the custom aliases to override the current name. # If an alias doesn't exist, juse pass the name straight through. @@ -85,6 +83,7 @@ def resolve_compiler(name: str): aliases = { "GNU": "gcc", "Clang": "clang", + "AppleClang": "clang", "IntelLLVM": "oneapi", } return aliases.get(name, name) @@ -96,7 +95,6 @@ def run(): # Extract elements from the parser architectures = args.microarchitectures output_text = args.cmake_flags_text_file - output_json = args.python_output_json_file compiler = resolve_compiler(args.compiler) compiler_version = args.compiler_version @@ -120,16 +118,10 @@ def run(): "toolchain": toolchain, "libraries": suffix_to_microarch, } - with open(output_json, "w") as file: - file.write(json.dumps(pre_json_dict, indent = 4)) # Safe flags to file dump_flags_for_cmake(optimization_flags, output_text) - # Print flags to stdout - for flags in optimization_flags: - print(flags) - ##### ##### Execute as script. ##### diff --git a/cmake/microarch_generate_macros.py b/cmake/microarch_generate_macros.py new file mode 100644 index 00000000..fa8fcd6b --- /dev/null +++ b/cmake/microarch_generate_macros.py @@ -0,0 +1,48 @@ +# Copyright 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + + +def build_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("--proto-header-file", required=True) + parser.add_argument("--output-header-file", required=True) + parser.add_argument("--known-uarchs", required=True, nargs="+", type=str) + parser.add_argument("--supported-uarchs", required=True, nargs="+", type=str) + return parser + + +def main(): + args = build_parser().parse_args() + with open(args.proto_header_file, "r") as f: + header_content = f.read() + + # generate SVS_FOR_EACH_MICROARCH macro + prefix = "#define SVS_FOR_EACH_MICROARCH" + appendix = "\\\n SVS_MICROARCH_FUNC({uarch})" + for uarch in args.supported_uarchs[::-1]: + header_content = header_content.replace(prefix, prefix + appendix.format(uarch=uarch)) + # generate SVS_FOR_EACH_KNOWN_MICROARCH macro + prefix = "#define SVS_FOR_EACH_KNOWN_MICROARCH" + appendix = "\\\n SVS_MICROARCH_FUNC({uarch})" + for uarch in args.known_uarchs[::-1]: + header_content = header_content.replace(prefix, prefix + appendix.format(uarch=uarch)) + + with open(args.output_header_file, "w") as f: + f.write(header_content) + + +if __name__ == "__main__": + main() diff --git a/cmake/microarch_instantiations/cosine.cpp b/cmake/microarch_instantiations/cosine.cpp new file mode 100644 index 00000000..8cbe01fa --- /dev/null +++ b/cmake/microarch_instantiations/cosine.cpp @@ -0,0 +1,21 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "svs/core/distance/cosine.h" + +using namespace svs::distance; + +SVS_INSTANTIATE_COSINE_DISTANCE_TEMPLATES_BY_MICROARCH(SVS_MICROARCH_TARGET) diff --git a/cmake/microarch_instantiations/euclidean.cpp b/cmake/microarch_instantiations/euclidean.cpp new file mode 100644 index 00000000..eb620a4e --- /dev/null +++ b/cmake/microarch_instantiations/euclidean.cpp @@ -0,0 +1,21 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "svs/core/distance/euclidean.h" + +using namespace svs::distance; + +SVS_INSTANTIATE_L2_DISTANCE_TEMPLATES_BY_MICROARCH(SVS_MICROARCH_TARGET) diff --git a/cmake/microarch_instantiations/inner_product.cpp b/cmake/microarch_instantiations/inner_product.cpp new file mode 100644 index 00000000..3b97baba --- /dev/null +++ b/cmake/microarch_instantiations/inner_product.cpp @@ -0,0 +1,21 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "svs/core/distance/inner_product.h" + +using namespace svs::distance; + +SVS_INSTANTIATE_IP_DISTANCE_TEMPLATES_BY_MICROARCH(SVS_MICROARCH_TARGET) diff --git a/cmake/microarch_list_aarch64 b/cmake/microarch_list_aarch64 new file mode 100644 index 00000000..fe512ced --- /dev/null +++ b/cmake/microarch_list_aarch64 @@ -0,0 +1,2 @@ +neoverse_v1 +neoverse_n2 diff --git a/cmake/microarch_list_aarch64_darwin b/cmake/microarch_list_aarch64_darwin new file mode 100644 index 00000000..b5692e52 --- /dev/null +++ b/cmake/microarch_list_aarch64_darwin @@ -0,0 +1,2 @@ +m1 +m2 diff --git a/cmake/microarch_list_x86_64 b/cmake/microarch_list_x86_64 new file mode 100644 index 00000000..2f634cee --- /dev/null +++ b/cmake/microarch_list_x86_64 @@ -0,0 +1,16 @@ +nehalem +westmere +sandybridge +ivybridge +haswell +broadwell +skylake +x86_64_v4 +skylake_avx512 +cascadelake +cooperlake +icelake_client +icelake_server +sapphirerapids +graniterapids +graniterapids_d diff --git a/cmake/microarch_macros.h b/cmake/microarch_macros.h new file mode 100644 index 00000000..3718614d --- /dev/null +++ b/cmake/microarch_macros.h @@ -0,0 +1,76 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +// clang-format off + +#define SVS_PACK_ARGS(...) __VA_ARGS__ + +#define SVS_DISTANCE_DYNAMIC_TEMPLATES_BY_MICROARCH(dist, spec, uarch) \ + SVS_##dist##_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, int8_t, int8_t) \ + SVS_##dist##_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, int8_t, uint8_t) \ + SVS_##dist##_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, int8_t, float) \ + SVS_##dist##_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, int8_t, svs::float16::Float16) \ + SVS_##dist##_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, uint8_t, int8_t) \ + SVS_##dist##_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, uint8_t, uint8_t) \ + SVS_##dist##_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, uint8_t, float) \ + SVS_##dist##_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, uint8_t, svs::float16::Float16) \ + SVS_##dist##_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, float, int8_t) \ + SVS_##dist##_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, float, uint8_t) \ + SVS_##dist##_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, float, float) \ + SVS_##dist##_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, float, svs::float16::Float16) \ + SVS_##dist##_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, svs::float16::Float16, int8_t) \ + SVS_##dist##_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, svs::float16::Float16, uint8_t) \ + SVS_##dist##_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, svs::float16::Float16, float) \ + SVS_##dist##_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, svs::float16::Float16, svs::float16::Float16) + +#define SVS_DISTANCE_STATIC_TEMPLATES_BY_MICROARCH(dist, spec, uarch, length) \ + SVS_##dist##_DISTANCE_STATIC_TEMPLATE(spec, uarch, int8_t, int8_t, length) \ + SVS_##dist##_DISTANCE_STATIC_TEMPLATE(spec, uarch, int8_t, uint8_t, length) \ + SVS_##dist##_DISTANCE_STATIC_TEMPLATE(spec, uarch, int8_t, float, length) \ + SVS_##dist##_DISTANCE_STATIC_TEMPLATE(spec, uarch, int8_t, svs::float16::Float16, length) \ + SVS_##dist##_DISTANCE_STATIC_TEMPLATE(spec, uarch, uint8_t, int8_t, length) \ + SVS_##dist##_DISTANCE_STATIC_TEMPLATE(spec, uarch, uint8_t, uint8_t, length) \ + SVS_##dist##_DISTANCE_STATIC_TEMPLATE(spec, uarch, uint8_t, float, length) \ + SVS_##dist##_DISTANCE_STATIC_TEMPLATE(spec, uarch, uint8_t, svs::float16::Float16, length) \ + SVS_##dist##_DISTANCE_STATIC_TEMPLATE(spec, uarch, float, int8_t, length) \ + SVS_##dist##_DISTANCE_STATIC_TEMPLATE(spec, uarch, float, uint8_t, length) \ + SVS_##dist##_DISTANCE_STATIC_TEMPLATE(spec, uarch, float, float, length) \ + SVS_##dist##_DISTANCE_STATIC_TEMPLATE(spec, uarch, float, svs::float16::Float16, length) \ + SVS_##dist##_DISTANCE_STATIC_TEMPLATE(spec, uarch, svs::float16::Float16, int8_t, length) \ + SVS_##dist##_DISTANCE_STATIC_TEMPLATE(spec, uarch, svs::float16::Float16, uint8_t, length) \ + SVS_##dist##_DISTANCE_STATIC_TEMPLATE(spec, uarch, svs::float16::Float16, float, length) \ + SVS_##dist##_DISTANCE_STATIC_TEMPLATE(spec, uarch, svs::float16::Float16, svs::float16::Float16, length) + +#define SVS_DISTANCE_TEMPLATES_BY_MICROARCH(dist, spec, uarch) \ + SVS_DISTANCE_DYNAMIC_TEMPLATES_BY_MICROARCH(dist, spec, uarch) + +// clang-format on + +#define SVS_FOR_EACH_MICROARCH + +#define SVS_FOR_EACH_KNOWN_MICROARCH + +namespace svs::lib { + inline constexpr bool extent_is_registered(size_t n) { + if (n == 0) { + // Unnecessary check to avoid "unused parameter 'n'" compilation error. + throw std::logic_error("Static length cannot be 0!"); + } + return (false); + }; +} diff --git a/cmake/microarch_register_dimensions.py b/cmake/microarch_register_dimensions.py new file mode 100644 index 00000000..940f17af --- /dev/null +++ b/cmake/microarch_register_dimensions.py @@ -0,0 +1,50 @@ +# Copyright 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + + +def build_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("--header-file", required=True) + parser.add_argument("--dimensions", required=True, nargs="+", type=int) + return parser + + +def main(): + args = build_parser().parse_args() + with open(args.header_file, "r") as f: + header_content = f.read() + + # add dimensions to the `SVS_DISTANCE_TEMPLATES_BY_MICROARCH` macro + # for external linkage and instantiation of static distance templates + prefix = "#define SVS_DISTANCE_TEMPLATES_BY_MICROARCH(dist, spec, uarch)" + appendix = "\\\n SVS_DISTANCE_STATIC_TEMPLATES_BY_MICROARCH(dist, spec, uarch, {dim})" + for dim in args.dimensions: + new_entry = appendix.format(dim=dim) + if new_entry not in header_content: + header_content = header_content.replace(prefix, prefix + new_entry) + + # add dimensions to the `extent_is_registered` constant expression + prefix = "return (false" + appendix = "\n || (n == {dim})" + for dim in args.dimensions: + header_content = header_content.replace(prefix, prefix + appendix.format(dim=dim)) + + with open(args.header_file, "w") as f: + f.write(header_content) + + +if __name__ == "__main__": + main() diff --git a/cmake/microarch_targets_aarch64 b/cmake/microarch_targets_aarch64 new file mode 100644 index 00000000..fe512ced --- /dev/null +++ b/cmake/microarch_targets_aarch64 @@ -0,0 +1,2 @@ +neoverse_v1 +neoverse_n2 diff --git a/cmake/microarch_targets_aarch64_darwin b/cmake/microarch_targets_aarch64_darwin new file mode 100644 index 00000000..b5692e52 --- /dev/null +++ b/cmake/microarch_targets_aarch64_darwin @@ -0,0 +1,2 @@ +m1 +m2 diff --git a/cmake/microarch_targets_x86_64 b/cmake/microarch_targets_x86_64 new file mode 100644 index 00000000..bac14630 --- /dev/null +++ b/cmake/microarch_targets_x86_64 @@ -0,0 +1,7 @@ +nehalem +haswell +x86_64_v4 +skylake_avx512 +cascadelake +icelake_client +sapphirerapids diff --git a/cmake/options.cmake b/cmake/options.cmake index e374a548..7bc7a7be 100644 --- a/cmake/options.cmake +++ b/cmake/options.cmake @@ -140,14 +140,6 @@ else() target_compile_options(${SVS_LIB} INTERFACE -DSVS_INITIALIZE_LOGGER=0) endif() -##### -##### Helper target to apply relevant compiler optimizations. -##### - -add_library(svs_native_options INTERFACE) -add_library(svs::native_options ALIAS svs_native_options) -target_compile_options(svs_native_options INTERFACE -march=native -mtune=native) - # Use an internal INTERFACE target to apply the same build options to both the # unit test and the compiled binaries. add_library(svs_compile_options INTERFACE) diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt index b9f1c98e..2ab74c4f 100644 --- a/examples/cpp/CMakeLists.txt +++ b/examples/cpp/CMakeLists.txt @@ -27,7 +27,7 @@ function(create_simple_example exe test file) add_executable(${exe} ${file}) target_include_directories(${exe} PRIVATE ${CMAKE_CURRENT_LIST_DIR}) # Link to our library - target_link_libraries(${exe} ${SVS_LIB} svs_compile_options svs_native_options) + target_link_libraries(${exe} svs::svs svs_compile_options svs_microarch_options_base) # Create a test. # No-op if the `include(CTest)` line above is not executed. add_test(${test} ${exe}) @@ -37,6 +37,7 @@ endfunction() create_simple_example(saveload test_saveload saveload.cpp) create_simple_example(types test_types types.cpp) create_simple_example(vamana_iterator test_vamana_iterator vamana_iterator.cpp) +create_simple_example(microarch_info test_microarch_info microarch_info.cpp) ## More complicated examples involving more extensive setup. @@ -51,7 +52,7 @@ configure_file(../../data/test_dataset/groundtruth_euclidean.ivecs . COPYONLY) # The vamana test executable. add_executable(vamana vamana.cpp) target_include_directories(vamana PRIVATE ${CMAKE_CURRENT_LIST_DIR}) -target_link_libraries(vamana ${SVS_LIB} svs_compile_options svs_native_options) +target_link_libraries(vamana svs::svs svs_compile_options svs_microarch_options_base) add_test( NAME test_vamana COMMAND @@ -64,7 +65,7 @@ add_test( # The custom thread pool executable. add_executable(custom_thread_pool custom_thread_pool.cpp) target_include_directories(custom_thread_pool PRIVATE ${CMAKE_CURRENT_LIST_DIR}) -target_link_libraries(custom_thread_pool ${SVS_LIB} svs_compile_options svs_native_options) +target_link_libraries(custom_thread_pool svs::svs svs_compile_options svs_microarch_options_base) add_test( NAME test_custom_thread_pool COMMAND @@ -81,7 +82,7 @@ add_test( add_executable(dispatcher dispatcher.cpp) target_include_directories(dispatcher PRIVATE ${CMAKE_CURRENT_LIST_DIR}) -target_link_libraries(dispatcher ${SVS_LIB} svs_compile_options svs_native_options) +target_link_libraries(dispatcher svs::svs svs_compile_options svs_microarch_options_base) # Here we go. add_test( diff --git a/examples/cpp/microarch_info.cpp b/examples/cpp/microarch_info.cpp new file mode 100644 index 00000000..30b72cdb --- /dev/null +++ b/examples/cpp/microarch_info.cpp @@ -0,0 +1,26 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "svs/lib/arch.h" +#include "svs/lib/cpuid.h" +#include + +int main() { + auto& env = svs::arch::MicroArchEnvironment::get_instance(); + env.describe(std::cout); + + return 0; +} diff --git a/examples/cpp/shared/CMakeLists.txt b/examples/cpp/shared/CMakeLists.txt index 3155c4f2..3a816004 100644 --- a/examples/cpp/shared/CMakeLists.txt +++ b/examples/cpp/shared/CMakeLists.txt @@ -37,4 +37,3 @@ SET(CMAKE_CXX_FLAGS "-O3 -DNDEBUG -std=gnu++20 -march=native -mtune=native -Wer add_executable(shared shared.cpp) target_link_libraries(shared PUBLIC ${SVS_SHARED} svs::svs) - diff --git a/include/svs/core/distance/cosine.h b/include/svs/core/distance/cosine.h index 9738e881..7001bf3d 100644 --- a/include/svs/core/distance/cosine.h +++ b/include/svs/core/distance/cosine.h @@ -19,6 +19,7 @@ // svs #include "svs/core/distance/distance_core.h" #include "svs/core/distance/simd_utils.h" +#include "svs/lib/arch.h" #include "svs/lib/saveload.h" #include "svs/lib/static.h" @@ -32,7 +33,8 @@ namespace svs::distance { // Forward declare implementation to allow entry point to be near the top. -template struct CosineSimilarityImpl; +template +struct CosineSimilarityImpl; // Generic Entry Point // Call as one of either: @@ -41,18 +43,18 @@ template struct CosineSimilarityImpl; // (2) CosineSimilarity::compute(a, b) // ``` // Where (2) is when length is known at compile time and (1) is when length is not. -class CosineSimilarity { +template class CosineSimilarity { public: template - static constexpr float compute(const Ea* a, const Eb* b, float a_norm, size_t N) { - return CosineSimilarityImpl::compute( + SVS_NOINLINE static float compute(const Ea* a, const Eb* b, float a_norm, size_t N) { + return CosineSimilarityImpl::compute( a, b, a_norm, lib::MaybeStatic(N) ); } template - static constexpr float compute(const Ea* a, const Eb* b, float a_norm) { - return CosineSimilarityImpl::compute( + SVS_NOINLINE static float compute(const Ea* a, const Eb* b, float a_norm) { + return CosineSimilarityImpl::compute( a, b, a_norm, lib::MaybeStatic() ); } @@ -136,12 +138,42 @@ inline constexpr bool operator==(DistanceCosineSimilarity, DistanceCosineSimilar /// template float compute(DistanceCosineSimilarity distance, std::span a, std::span b) { + using namespace svs::arch; assert(a.size() == b.size()); + auto uarch = MicroArchEnvironment::get_instance().get_microarch(); constexpr size_t extent = lib::extract_extent(Da, Db); - if constexpr (extent == Dynamic) { - return CosineSimilarity::compute(a.data(), b.data(), distance.norm_, a.size()); + if constexpr (extent == Dynamic || !lib::extent_is_registered(extent)) { + switch (uarch) { +#define SVS_MICROARCH_FUNC(uarch) \ + case MicroArch::uarch: \ + return CosineSimilarity::compute( \ + a.data(), b.data(), distance.norm_, a.size() \ + ); \ + break; + SVS_FOR_EACH_MICROARCH +#undef SVS_MICROARCH_FUNC + default: + return CosineSimilarity::compute( + a.data(), b.data(), distance.norm_, a.size() + ); + break; + } } else { - return CosineSimilarity::compute(a.data(), b.data(), distance.norm_); + switch (uarch) { +#define SVS_MICROARCH_FUNC(uarch) \ + case MicroArch::uarch: \ + return CosineSimilarity::compute( \ + a.data(), b.data(), distance.norm_ \ + ); \ + break; + SVS_FOR_EACH_MICROARCH +#undef SVS_MICROARCH_FUNC + default: + return CosineSimilarity::compute( + a.data(), b.data(), distance.norm_ + ); + break; + } } } @@ -166,7 +198,8 @@ float generic_cosine_similarity( return result / (a_norm * std::sqrt(accum)); }; -template struct CosineSimilarityImpl { +template +struct CosineSimilarityImpl { static float compute( const Ea* a, const Eb* b, @@ -224,7 +257,8 @@ template <> struct CosineFloatOp<16> : public svs::simd::ConvertToFloat<16> { // Small Integers SVS_VALIDATE_BOOL_ENV(SVS_AVX512_VNNI) #if SVS_AVX512_VNNI -template struct CosineSimilarityImpl { +template +struct CosineSimilarityImpl { SVS_NOINLINE static float compute(const int8_t* a, const int8_t* b, float a_norm, lib::MaybeStatic length) { auto sum = _mm512_setzero_epi32(); @@ -250,7 +284,8 @@ template struct CosineSimilarityImpl { } }; -template struct CosineSimilarityImpl { +template +struct CosineSimilarityImpl { SVS_NOINLINE static float compute(const uint8_t* a, const uint8_t* b, float a_norm, lib::MaybeStatic length) { auto sum = _mm512_setzero_epi32(); @@ -278,7 +313,8 @@ template struct CosineSimilarityImpl { #endif // Floating and Mixed Types -template struct CosineSimilarityImpl { +template +struct CosineSimilarityImpl { SVS_NOINLINE static float compute(const float* a, const float* b, float a_norm, lib::MaybeStatic length) { auto [sum, norm] = simd::generic_simd_op(CosineFloatOp<16>(), a, b, length); @@ -286,7 +322,8 @@ template struct CosineSimilarityImpl { } }; -template struct CosineSimilarityImpl { +template +struct CosineSimilarityImpl { SVS_NOINLINE static float compute(const float* a, const uint8_t* b, float a_norm, lib::MaybeStatic length) { auto [sum, norm] = simd::generic_simd_op(CosineFloatOp<16>(), a, b, length); @@ -294,7 +331,8 @@ template struct CosineSimilarityImpl { }; }; -template struct CosineSimilarityImpl { +template +struct CosineSimilarityImpl { SVS_NOINLINE static float compute(const float* a, const int8_t* b, float a_norm, lib::MaybeStatic length) { auto [sum, norm] = simd::generic_simd_op(CosineFloatOp<16>(), a, b, length); @@ -302,7 +340,8 @@ template struct CosineSimilarityImpl { }; }; -template struct CosineSimilarityImpl { +template +struct CosineSimilarityImpl { SVS_NOINLINE static float compute(const float* a, const Float16* b, float a_norm, lib::MaybeStatic length) { auto [sum, norm] = simd::generic_simd_op(CosineFloatOp<16>{}, a, b, length); @@ -310,7 +349,8 @@ template struct CosineSimilarityImpl { } }; -template struct CosineSimilarityImpl { +template +struct CosineSimilarityImpl { SVS_NOINLINE static float compute(const Float16* a, const float* b, float a_norm, lib::MaybeStatic length) { auto [sum, norm] = simd::generic_simd_op(CosineFloatOp<16>{}, a, b, length); @@ -318,7 +358,8 @@ template struct CosineSimilarityImpl { } }; -template struct CosineSimilarityImpl { +template +struct CosineSimilarityImpl { SVS_NOINLINE static float compute(const Float16* a, const Float16* b, float a_norm, lib::MaybeStatic length) { auto [sum, norm] = simd::generic_simd_op(CosineFloatOp<16>{}, a, b, length); @@ -327,4 +368,34 @@ template struct CosineSimilarityImpl { }; #endif + +// Templates of `float CosineSimilarity::compute<...>(...)`. +// `spec` value is either `extern template` for external linkage +// or `template` for instantiation. +#define SVS_COSINE_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, a_type, b_type) \ + spec float CosineSimilarity::compute( \ + a_type const*, b_type const*, float, size_t \ + ); + +#define SVS_COSINE_DISTANCE_STATIC_TEMPLATE(spec, uarch, a_type, b_type, length) \ + spec float \ + CosineSimilarity::compute( \ + a_type const*, b_type const*, float \ + ); + +// NOTE: dispatching doesn't work for other distance instances than the listed below. +#define SVS_COSINE_DISTANCE_TEMPLATES_BY_MICROARCH(spec, uarch) \ + SVS_DISTANCE_TEMPLATES_BY_MICROARCH(COSINE, spec, uarch) + +#define SVS_INSTANTIATE_COSINE_DISTANCE_TEMPLATES_BY_MICROARCH(uarch) \ + SVS_COSINE_DISTANCE_TEMPLATES_BY_MICROARCH(template, uarch) + +#define SVS_EXTERN_COSINE_DISTANCE_TEMPLATES_BY_MICROARCH(uarch) \ + SVS_COSINE_DISTANCE_TEMPLATES_BY_MICROARCH(extern template, uarch) + +// Equal to `foreach(uarch : uarch_list) { extern Cosine::compute(...) }` +#define SVS_MICROARCH_FUNC(uarch) SVS_EXTERN_COSINE_DISTANCE_TEMPLATES_BY_MICROARCH(uarch) +SVS_FOR_EACH_MICROARCH +#undef SVS_MICROARCH_FUNC + } // namespace svs::distance diff --git a/include/svs/core/distance/euclidean.h b/include/svs/core/distance/euclidean.h index 2fc86986..62cbec95 100644 --- a/include/svs/core/distance/euclidean.h +++ b/include/svs/core/distance/euclidean.h @@ -19,6 +19,7 @@ // svs #include "svs/core/distance/distance_core.h" #include "svs/core/distance/simd_utils.h" +#include "svs/lib/arch.h" #include "svs/lib/float16.h" #include "svs/lib/preprocessor.h" #include "svs/lib/saveload.h" @@ -71,7 +72,7 @@ namespace svs::distance { // Forward declare implementation to allow entry point to be near the top. -template struct L2Impl; +template struct L2Impl; // Generic Entry Point // Call as one of either: @@ -80,16 +81,16 @@ template struct L2Impl; // (2) L2::compute(a, b) // ``` // Where (2) is when length is known at compile time and (1) is when length is not. -class L2 { +template class L2 { public: template - static constexpr float compute(const Ea* a, const Eb* b, size_t N) { - return L2Impl::compute(a, b, lib::MaybeStatic(N)); + SVS_NOINLINE static float compute(const Ea* a, const Eb* b, size_t N) { + return L2Impl::compute(a, b, lib::MaybeStatic(N)); } template - static constexpr float compute(const Ea* a, const Eb* b) { - return L2Impl::compute(a, b, lib::MaybeStatic()); + SVS_NOINLINE static float compute(const Ea* a, const Eb* b) { + return L2Impl::compute(a, b, lib::MaybeStatic()); } }; @@ -152,12 +153,34 @@ inline constexpr bool operator==(DistanceL2, DistanceL2) { return true; } /// template float compute(DistanceL2 /*unused*/, std::span a, std::span b) { + using namespace svs::arch; assert(a.size() == b.size()); + auto uarch = MicroArchEnvironment::get_instance().get_microarch(); constexpr size_t extent = lib::extract_extent(Da, Db); - if constexpr (extent == Dynamic) { - return L2::compute(a.data(), b.data(), a.size()); + if constexpr (extent == Dynamic || !lib::extent_is_registered(extent)) { + switch (uarch) { +#define SVS_MICROARCH_FUNC(uarch) \ + case MicroArch::uarch: \ + return L2::compute(a.data(), b.data(), a.size()); \ + break; + SVS_FOR_EACH_MICROARCH +#undef SVS_MICROARCH_FUNC + default: + return L2::compute(a.data(), b.data(), a.size()); + break; + } } else { - return L2::compute(a.data(), b.data()); + switch (uarch) { +#define SVS_MICROARCH_FUNC(uarch) \ + case MicroArch::uarch: \ + return L2::compute(a.data(), b.data()); \ + break; + SVS_FOR_EACH_MICROARCH +#undef SVS_MICROARCH_FUNC + default: + return L2::compute(a.data(), b.data()); + break; + } } } @@ -177,7 +200,7 @@ float generic_l2( return result; } -template struct L2Impl { +template struct L2Impl { static constexpr float compute(const Ea* a, const Eb* b, lib::MaybeStatic length = lib::MaybeStatic()) { return generic_l2(a, b, length); @@ -252,14 +275,14 @@ template <> struct L2VNNIOp : public svs::simd::ConvertForVNNI struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const int8_t* a, const int8_t* b, lib::MaybeStatic length) { return simd::generic_simd_op(L2VNNIOp(), a, b, length); } }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const uint8_t* a, const uint8_t* b, lib::MaybeStatic length) { return simd::generic_simd_op(L2VNNIOp(), a, b, length); @@ -269,42 +292,42 @@ template struct L2Impl { #endif // Floating and Mixed Types -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const float* a, const float* b, lib::MaybeStatic length) { return simd::generic_simd_op(L2FloatOp<16>{}, a, b, length); } }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const float* a, const uint8_t* b, lib::MaybeStatic length) { return simd::generic_simd_op(L2FloatOp<16>{}, a, b, length); }; }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const float* a, const int8_t* b, lib::MaybeStatic length) { return simd::generic_simd_op(L2FloatOp<16>{}, a, b, length); }; }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const float* a, const Float16* b, lib::MaybeStatic length) { return simd::generic_simd_op(L2FloatOp<16>{}, a, b, length); } }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const Float16* a, const float* b, lib::MaybeStatic length) { return simd::generic_simd_op(L2FloatOp<16>{}, a, b, length); } }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const Float16* a, const Float16* b, lib::MaybeStatic length) { return simd::generic_simd_op(L2FloatOp<16>{}, a, b, length); @@ -320,7 +343,7 @@ template struct L2Impl { SVS_VALIDATE_BOOL_ENV(SVS_AVX512_F) SVS_VALIDATE_BOOL_ENV(SVS_AVX2) #if !SVS_AVX512_F && SVS_AVX2 -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const float* a, const float* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -340,7 +363,7 @@ template struct L2Impl { } }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const Float16* a, const Float16* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -362,7 +385,7 @@ template struct L2Impl { } }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const float* a, const Float16* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -383,7 +406,7 @@ template struct L2Impl { } }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const float* a, const int8_t* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -407,7 +430,7 @@ template struct L2Impl { } }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const int8_t* a, const int8_t* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -434,7 +457,7 @@ template struct L2Impl { } }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const uint8_t* a, const uint8_t* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -462,4 +485,32 @@ template struct L2Impl { }; #endif + +// Templates of `float L2::compute<...>(...)`. +// `spec` value is either `extern template` for external linkage +// or `template` for instantiation. +#define SVS_L2_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, a_type, b_type) \ + spec float L2::compute( \ + a_type const*, b_type const*, size_t \ + ); + +#define SVS_L2_DISTANCE_STATIC_TEMPLATE(spec, uarch, a_type, b_type, length) \ + spec float L2:: \ + compute(a_type const*, b_type const*); + +// NOTE: dispatching doesn't work for other distance instances than the listed below. +#define SVS_L2_DISTANCE_TEMPLATES_BY_MICROARCH(spec, uarch) \ + SVS_DISTANCE_TEMPLATES_BY_MICROARCH(L2, spec, uarch) + +#define SVS_INSTANTIATE_L2_DISTANCE_TEMPLATES_BY_MICROARCH(uarch) \ + SVS_L2_DISTANCE_TEMPLATES_BY_MICROARCH(template, uarch) + +#define SVS_EXTERN_L2_DISTANCE_TEMPLATES_BY_MICROARCH(uarch) \ + SVS_L2_DISTANCE_TEMPLATES_BY_MICROARCH(extern template, uarch) + +// Equal to `foreach(uarch : uarch_list) { extern L2::compute(...) }` +#define SVS_MICROARCH_FUNC(uarch) SVS_EXTERN_L2_DISTANCE_TEMPLATES_BY_MICROARCH(uarch) +SVS_FOR_EACH_MICROARCH +#undef SVS_MICROARCH_FUNC + } // namespace svs::distance diff --git a/include/svs/core/distance/inner_product.h b/include/svs/core/distance/inner_product.h index 2ad51e17..d9842158 100644 --- a/include/svs/core/distance/inner_product.h +++ b/include/svs/core/distance/inner_product.h @@ -19,6 +19,7 @@ // svs #include "svs/core/distance/distance_core.h" #include "svs/core/distance/simd_utils.h" +#include "svs/lib/arch.h" #include "svs/lib/float16.h" #include "svs/lib/preprocessor.h" #include "svs/lib/saveload.h" @@ -32,7 +33,7 @@ namespace svs::distance { // Forward declare implementation to allow entry point to be near the top. -template struct IPImpl; +template struct IPImpl; // Generic Entry Point // Call as one of either: @@ -41,16 +42,16 @@ template struct IPImpl; // (2) IP::compute(a, b) // ``` // Where (2) is when length is known at compile time and (1) is when length is not. -class IP { +template class IP { public: template - static constexpr float compute(const Ea* a, const Eb* b, size_t N) { - return IPImpl::compute(a, b, lib::MaybeStatic(N)); + SVS_NOINLINE static float compute(const Ea* a, const Eb* b, size_t N) { + return IPImpl::compute(a, b, lib::MaybeStatic(N)); } template - static constexpr float compute(const Ea* a, const Eb* b) { - return IPImpl::compute(a, b, lib::MaybeStatic()); + SVS_NOINLINE static float compute(const Ea* a, const Eb* b) { + return IPImpl::compute(a, b, lib::MaybeStatic()); } }; @@ -114,12 +115,34 @@ inline constexpr bool operator==(DistanceIP, DistanceIP) { return true; } /// template float compute(DistanceIP /*unused*/, std::span a, std::span b) { + using namespace svs::arch; assert(a.size() == b.size()); + auto uarch = MicroArchEnvironment::get_instance().get_microarch(); constexpr size_t extent = lib::extract_extent(Da, Db); - if constexpr (extent == Dynamic) { - return IP::compute(a.data(), b.data(), a.size()); + if constexpr (extent == Dynamic || !lib::extent_is_registered(extent)) { + switch (uarch) { +#define SVS_MICROARCH_FUNC(uarch) \ + case MicroArch::uarch: \ + return IP::compute(a.data(), b.data(), a.size()); \ + break; + SVS_FOR_EACH_MICROARCH +#undef SVS_MICROARCH_FUNC + default: + return IP::compute(a.data(), b.data(), a.size()); + break; + } } else { - return IP::compute(a.data(), b.data()); + switch (uarch) { +#define SVS_MICROARCH_FUNC(uarch) \ + case MicroArch::uarch: \ + return IP::compute(a.data(), b.data()); \ + break; + SVS_FOR_EACH_MICROARCH +#undef SVS_MICROARCH_FUNC + default: + return IP::compute(a.data(), b.data()); + break; + } } } @@ -138,7 +161,7 @@ float generic_ip( return result; } -template struct IPImpl { +template struct IPImpl { static float compute(const Ea* a, const Eb* b, lib::MaybeStatic length = lib::MaybeStatic()) { return generic_ip(a, b, length); @@ -207,14 +230,14 @@ template <> struct IPVNNIOp : public svs::simd::ConvertForVNNI struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const int8_t* a, const int8_t* b, lib::MaybeStatic length) { return simd::generic_simd_op(IPVNNIOp(), a, b, length); } }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const uint8_t* a, const uint8_t* b, lib::MaybeStatic length) { return simd::generic_simd_op(IPVNNIOp(), a, b, length); @@ -224,42 +247,42 @@ template struct IPImpl { #endif // Floating and Mixed Types -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const float* a, const float* b, lib::MaybeStatic length) { return svs::simd::generic_simd_op(IPFloatOp<16>{}, a, b, length); } }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const float* a, const uint8_t* b, lib::MaybeStatic length) { return svs::simd::generic_simd_op(IPFloatOp<16>{}, a, b, length); }; }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const float* a, const int8_t* b, lib::MaybeStatic length) { return svs::simd::generic_simd_op(IPFloatOp<16>{}, a, b, length); }; }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const float* a, const Float16* b, lib::MaybeStatic length) { return svs::simd::generic_simd_op(IPFloatOp<16>{}, a, b, length); } }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const Float16* a, const float* b, lib::MaybeStatic length) { return svs::simd::generic_simd_op(IPFloatOp<16>{}, a, b, length); } }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const Float16* a, const Float16* b, lib::MaybeStatic length) { return svs::simd::generic_simd_op(IPFloatOp<16>{}, a, b, length); @@ -274,7 +297,7 @@ template struct IPImpl { SVS_VALIDATE_BOOL_ENV(SVS_AVX512_F) SVS_VALIDATE_BOOL_ENV(SVS_AVX2) #if !SVS_AVX512_F && SVS_AVX2 -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const float* a, const float* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -293,7 +316,7 @@ template struct IPImpl { } }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const Float16* a, const Float16* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -314,7 +337,7 @@ template struct IPImpl { } }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const float* a, const Float16* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -334,7 +357,7 @@ template struct IPImpl { } }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const float* a, const int8_t* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -357,7 +380,7 @@ template struct IPImpl { } }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const int8_t* a, const int8_t* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -383,7 +406,7 @@ template struct IPImpl { } }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const uint8_t* a, const uint8_t* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -410,4 +433,32 @@ template struct IPImpl { }; #endif + +// Templates of `float IP::compute<...>(...)`. +// `spec` value is either `extern template` for external linkage +// or `template` for instantiation. +#define SVS_IP_DISTANCE_DYNAMIC_TEMPLATE(spec, uarch, a_type, b_type) \ + spec float IP::compute( \ + a_type const*, b_type const*, size_t \ + ); + +#define SVS_IP_DISTANCE_STATIC_TEMPLATE(spec, uarch, a_type, b_type, length) \ + spec float IP:: \ + compute(a_type const*, b_type const*); + +// NOTE: dispatching doesn't work for other distance instances than the listed below. +#define SVS_IP_DISTANCE_TEMPLATES_BY_MICROARCH(spec, uarch) \ + SVS_DISTANCE_TEMPLATES_BY_MICROARCH(IP, spec, uarch) + +#define SVS_INSTANTIATE_IP_DISTANCE_TEMPLATES_BY_MICROARCH(uarch) \ + SVS_IP_DISTANCE_TEMPLATES_BY_MICROARCH(template, uarch) + +#define SVS_EXTERN_IP_DISTANCE_TEMPLATES_BY_MICROARCH(uarch) \ + SVS_IP_DISTANCE_TEMPLATES_BY_MICROARCH(extern template, uarch) + +// Equal to `foreach(uarch : uarch_list) { extern IP::compute(...) }` +#define SVS_MICROARCH_FUNC(uarch) SVS_EXTERN_IP_DISTANCE_TEMPLATES_BY_MICROARCH(uarch) +SVS_FOR_EACH_MICROARCH +#undef SVS_MICROARCH_FUNC + } // namespace svs::distance diff --git a/include/svs/lib/arch.h b/include/svs/lib/arch.h new file mode 100644 index 00000000..7ab2107a --- /dev/null +++ b/include/svs/lib/arch.h @@ -0,0 +1,276 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "svs/lib/cpuid.h" +#include "svs/lib/microarch_macros.h" +#include +#include +#include +#include +#include + +namespace svs::arch { + +// Refer to the GCC docs for the list of available uarch targets: +// https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html +// https://gcc.gnu.org/onlinedocs/gcc/AArch64-Options.html +enum class MicroArch { +// Using `foreach (uarch: known_uarch_list)` macro to list all known uarchs +// for uarch support map instead of duplicating the list from microarch_macros.h +#define SVS_MICROARCH_FUNC(uarch) uarch, + SVS_FOR_EACH_KNOWN_MICROARCH +#undef SVS_MICROARCH_FUNC + base = 0, +}; + +struct MicroArchInfo { + std::optional parent; + std::vector extensions; + std::string name; +}; + +// Unordered map with information about each microarchitecture. +inline const std::unordered_map& get_microarch_info_map() { + static const std::unordered_map microarch_info = { +#if defined(__x86_64__) + {MicroArch::nehalem, + {std::nullopt, + {ISAExt::MMX, + ISAExt::SSE, + ISAExt::SSE2, + ISAExt::SSE3, + ISAExt::SSSE3, + ISAExt::SSE4_1, + ISAExt::SSE4_2, + ISAExt::POPCNT, + ISAExt::CX16, + ISAExt::SAHF, + ISAExt::FXSR}, + "nehalem"}}, + {MicroArch::westmere, {MicroArch::nehalem, {ISAExt::PCLMUL}, "westmere"}}, + {MicroArch::sandybridge, + {MicroArch::westmere, {ISAExt::AVX, ISAExt::XSAVE}, "sandybridge"}}, + {MicroArch::ivybridge, + {MicroArch::sandybridge, + {ISAExt::FSGSBASE, ISAExt::RDRND, ISAExt::F16C}, + "ivybridge"}}, + {MicroArch::haswell, + {MicroArch::sandybridge, + {ISAExt::AVX2, + ISAExt::BMI, + ISAExt::BMI2, + ISAExt::LZCNT, + ISAExt::FMA, + ISAExt::MOVBE}, + "haswell"}}, + {MicroArch::broadwell, + {MicroArch::haswell, + {ISAExt::RDSEED, ISAExt::ADCX, ISAExt::PREFETCHW}, + "broadwell"}}, + {MicroArch::skylake, + {MicroArch::broadwell, + {ISAExt::AES, ISAExt::CLFLUSHOPT, ISAExt::XSAVEC, ISAExt::XSAVES}, + "skylake"}}, + {MicroArch::x86_64_v4, + {std::nullopt, + {ISAExt::AVX512_F, + ISAExt::AVX512_VL, + ISAExt::AVX512_BW, + ISAExt::AVX512_DQ, + ISAExt::AVX512_CD}, + "x86_64_v4"}}, + {MicroArch::skylake_avx512, + {MicroArch::skylake, + {ISAExt::AVX512_F, + ISAExt::CLWB, + ISAExt::AVX512_VL, + ISAExt::AVX512_BW, + ISAExt::AVX512_DQ, + ISAExt::AVX512_CD}, + "skylake_avx512"}}, + {MicroArch::cascadelake, + {MicroArch::skylake_avx512, {ISAExt::AVX512_VNNI}, "cascadelake"}}, + {MicroArch::cooperlake, + {MicroArch::cascadelake, {ISAExt::AVX512_BF16}, "cooperlake"}}, + {MicroArch::icelake_client, + {MicroArch::cascadelake, + {ISAExt::PKU, + ISAExt::AVX512_VBMI, + ISAExt::AVX512_IFMA, + ISAExt::SHA, + ISAExt::GFNI, + ISAExt::VAES, + ISAExt::AVX512_VBMI2, + ISAExt::VPCLMULQDQ, + ISAExt::AVX512_BITALG, + ISAExt::RDPID, + ISAExt::AVX512_VPOPCNTDQ}, + "icelake_client"}}, + {MicroArch::icelake_server, + {MicroArch::icelake_client, + {ISAExt::PCONFIG, ISAExt::WBNOINVD, ISAExt::CLWB}, + "icelake_server"}}, + {MicroArch::sapphirerapids, + {MicroArch::icelake_server, + {ISAExt::MOVDIRI, + ISAExt::MOVDIR64B, + ISAExt::ENQCMD, + ISAExt::CLDEMOTE, + ISAExt::PTWRITE, + ISAExt::WAITPKG, + ISAExt::SERIALIZE, + ISAExt::TSXLDTRK, + ISAExt::UINTR, + ISAExt::AMX_BF16, + ISAExt::AMX_TILE, + ISAExt::AMX_INT8, + ISAExt::AVX_VNNI, + ISAExt::AVX512_FP16, + ISAExt::AVX512_BF16}, + "sapphirerapids"}}, + {MicroArch::graniterapids, + {MicroArch::sapphirerapids, + {ISAExt::AMX_FP16, ISAExt::PREFETCHI}, + "graniterapids"}}, + {MicroArch::graniterapids_d, + {MicroArch::graniterapids, {ISAExt::AMX_COMPLEX}, "graniterapids_d"}}, +#elif defined(__aarch64__) +#if defined(__APPLE__) + {MicroArch::m1, {std::nullopt, {ISAExt::M1}, "m1"}}, + {MicroArch::m2, {std::nullopt, {ISAExt::M2}, "m2"}}, +#else + {MicroArch::neoverse_v1, {std::nullopt, {ISAExt::SVE}, "neoverse_v1"}}, + {MicroArch::neoverse_n2, {MicroArch::neoverse_v1, {ISAExt::SVE2}, "neoverse_n2"}}, +#endif +#endif + }; + return microarch_info; +} + +inline bool arch_is_supported(MicroArch arch) { + const auto& info_map = get_microarch_info_map(); + auto it = info_map.find(arch); + if (it == info_map.end()) { + return false; + } + + const auto& info = it->second; + + // First check if parent architecture is supported + if (info.parent.has_value() && !arch_is_supported(info.parent.value())) { + return false; + } + + // Then check additional extensions + return check_extensions(info.extensions); +} + +inline std::string microarch_to_string(MicroArch arch) { + const auto& info_map = get_microarch_info_map(); + auto it = info_map.find(arch); + if (it != info_map.end()) { + return it->second.name; + } + return "unknown"; +} + +inline MicroArch string_to_microarch(const std::string& arch_name) { + const auto& info_map = get_microarch_info_map(); + for (const auto& [arch, info] : info_map) { + if (info.name == arch_name) { + return arch; + } + } + throw std::invalid_argument("Unknown microarchitecture name: " + arch_name); +} + +class MicroArchEnvironment { + public: + static MicroArchEnvironment& get_instance() { + static MicroArchEnvironment instance; + return instance; + } + MicroArch get_microarch() const { return max_arch_; } + + void set_microarch(MicroArch arch) { + if (arch_is_supported(arch)) { + max_arch_ = arch; + } else { + throw std::invalid_argument("Unsupported microarchitecture"); + } + } + + void set_microarch(const std::string& arch) { + set_microarch(string_to_microarch(arch)); + } + + const std::vector& get_supported_microarchs() const { + return supported_archs_; + } + + const std::vector& get_compiled_microarchs() const { + return compiled_archs_; + } + + void describe(std::ostream& out) const { + write_extensions_status(out); + + out << "\nCurrent microarchitecture:\n " << microarch_to_string(max_arch_) + << std::endl; + + output_microarchs(out, "Supported", supported_archs_); + output_microarchs(out, "Compiled", compiled_archs_); + output_microarchs(out, "Known", known_archs_); + } + + private: + MicroArchEnvironment() { +#define SVS_MICROARCH_FUNC(uarch) MicroArch::uarch, + const std::vector known_archs = {SVS_FOR_EACH_KNOWN_MICROARCH}; + const std::vector compiled_archs = {SVS_FOR_EACH_MICROARCH}; +#undef SVS_MICROARCH_FUNC + known_archs_ = known_archs; + compiled_archs_ = compiled_archs; + max_arch_ = MicroArch::base; + for (const auto& arch : compiled_archs_) { + if (arch_is_supported(arch)) { + supported_archs_.push_back(arch); + if (static_cast(arch) > static_cast(max_arch_)) { + max_arch_ = arch; + } + } + } + } + + void output_microarchs( + std::ostream& out, std::string name, const std::vector microarchs + ) const { + out << name << " microarchitectures:\n "; + for (const auto& arch : microarchs) { + out << microarch_to_string(arch) << " "; + } + out << std::endl; + } + + std::vector known_archs_; + std::vector compiled_archs_; + std::vector supported_archs_; + MicroArch max_arch_; +}; + +} // namespace svs::arch diff --git a/include/svs/lib/cpuid.h b/include/svs/lib/cpuid.h new file mode 100644 index 00000000..55dcc9a9 --- /dev/null +++ b/include/svs/lib/cpuid.h @@ -0,0 +1,332 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#if defined(__x86_64__) +#include +#endif + +#if defined(__aarch64__) && defined(__APPLE__) +#include +#endif + +namespace svs::arch { + +#if defined(__x86_64__) + +enum class ISAExt { + // Common extensions + MMX, + SSE, + SSE2, + SSE3, + SSSE3, + SSE4_1, + SSE4_2, + POPCNT, + CX16, + SAHF, + FXSR, + AVX, + XSAVE, + PCLMUL, + FSGSBASE, + RDRND, + F16C, + AVX2, + BMI, + BMI2, + LZCNT, + FMA, + MOVBE, + RDSEED, + ADCX, + PREFETCHW, + AES, + CLFLUSHOPT, + XSAVEC, + XSAVES, + SGX, + CLWB, + PKU, + SHA, + GFNI, + VAES, + VPCLMULQDQ, + RDPID, + PCONFIG, + WBNOINVD, + MOVDIRI, + MOVDIR64B, + ENQCMD, + CLDEMOTE, + PTWRITE, + WAITPKG, + SERIALIZE, + TSXLDTRK, + UINTR, + PREFETCHI, + + // AVX family + AVX_VNNI, + + // AVX512 family + AVX512_F, + AVX512_VL, + AVX512_BW, + AVX512_DQ, + AVX512_CD, + AVX512_VBMI, + AVX512_IFMA, + AVX512_VNNI, + AVX512_VBMI2, + AVX512_BITALG, + AVX512_VPOPCNTDQ, + AVX512_BF16, + AVX512_FP16, + + // AMX family + AMX_BF16, + AMX_TILE, + AMX_INT8, + AMX_FP16, + AMX_COMPLEX +}; + +struct CPUIDFlag { + const uint32_t function; // EAX input for CPUID + const uint32_t subfunction; // ECX input for CPUID + const uint32_t reg; // Register index (0=EAX, 1=EBX, 2=ECX, 3=EDX) + const uint32_t bit; // Bit position in the register + const char* name; + + bool get_value() const { + std::array regs{}; + __cpuid_count(function, subfunction, regs[0], regs[1], regs[2], regs[3]); + return (regs[reg] & (1 << bit)) != 0; + } +}; + +inline const std::unordered_map& get_isa_ext_info() { + static const std::unordered_map isa_ext_info = { + // flags are sorted by function, subfunction, register and bit + {ISAExt::MMX, {1, 0, 3, 23, "MMX"}}, + {ISAExt::FXSR, {1, 0, 3, 24, "FXSR"}}, + {ISAExt::SSE, {1, 0, 3, 25, "SSE"}}, + {ISAExt::SSE2, {1, 0, 3, 26, "SSE2"}}, + {ISAExt::SSE3, {1, 0, 2, 0, "SSE3"}}, + {ISAExt::PCLMUL, {1, 0, 2, 1, "PCLMUL"}}, + {ISAExt::SSSE3, {1, 0, 2, 9, "SSSE3"}}, + {ISAExt::FMA, {1, 0, 2, 12, "FMA"}}, + {ISAExt::CX16, {1, 0, 2, 13, "CX16"}}, + {ISAExt::SSE4_1, {1, 0, 2, 19, "SSE4_1"}}, + {ISAExt::SSE4_2, {1, 0, 2, 20, "SSE4_2"}}, + {ISAExt::MOVBE, {1, 0, 2, 22, "MOVBE"}}, + {ISAExt::POPCNT, {1, 0, 2, 23, "POPCNT"}}, + {ISAExt::AES, {1, 0, 2, 25, "AES"}}, + {ISAExt::XSAVE, {1, 0, 2, 26, "XSAVE"}}, + {ISAExt::AVX, {1, 0, 2, 28, "AVX"}}, + {ISAExt::F16C, {1, 0, 2, 29, "F16C"}}, + {ISAExt::RDRND, {1, 0, 2, 30, "RDRND"}}, + {ISAExt::FSGSBASE, {7, 0, 1, 0, "FSGSBASE"}}, + {ISAExt::SGX, {7, 0, 1, 2, "SGX"}}, + {ISAExt::BMI, {7, 0, 1, 3, "BMI"}}, + {ISAExt::AVX2, {7, 0, 1, 5, "AVX2"}}, + {ISAExt::BMI2, {7, 0, 1, 8, "BMI2"}}, + {ISAExt::AVX512_F, {7, 0, 1, 16, "AVX512_F"}}, + {ISAExt::AVX512_DQ, {7, 0, 1, 17, "AVX512_DQ"}}, + {ISAExt::RDSEED, {7, 0, 1, 18, "RDSEED"}}, + {ISAExt::ADCX, {7, 0, 1, 19, "ADCX"}}, + {ISAExt::AVX512_IFMA, {7, 0, 1, 21, "AVX512_IFMA"}}, + {ISAExt::CLFLUSHOPT, {7, 0, 1, 23, "CLFLUSHOPT"}}, + {ISAExt::CLWB, {7, 0, 1, 24, "CLWB"}}, + {ISAExt::AVX512_CD, {7, 0, 1, 28, "AVX512_CD"}}, + {ISAExt::SHA, {7, 0, 1, 29, "SHA"}}, + {ISAExt::AVX512_BW, {7, 0, 1, 30, "AVX512_BW"}}, + {ISAExt::AVX512_VL, {7, 0, 1, 31, "AVX512_VL"}}, + {ISAExt::AVX512_VBMI, {7, 0, 2, 1, "AVX512_VBMI"}}, + {ISAExt::PKU, {7, 0, 2, 3, "PKU"}}, + {ISAExt::WAITPKG, {7, 0, 2, 5, "WAITPKG"}}, + {ISAExt::AVX512_VBMI2, {7, 0, 2, 6, "AVX512_VBMI2"}}, + {ISAExt::GFNI, {7, 0, 2, 8, "GFNI"}}, + {ISAExt::VAES, {7, 0, 2, 9, "VAES"}}, + {ISAExt::VPCLMULQDQ, {7, 0, 2, 10, "VPCLMULQDQ"}}, + {ISAExt::AVX512_VNNI, {7, 0, 2, 11, "AVX512_VNNI"}}, + {ISAExt::AVX512_BITALG, {7, 0, 2, 12, "AVX512_BITALG"}}, + {ISAExt::AVX512_VPOPCNTDQ, {7, 0, 2, 14, "AVX512_VPOPCNTDQ"}}, + {ISAExt::RDPID, {7, 0, 2, 22, "RDPID"}}, + {ISAExt::CLDEMOTE, {7, 0, 2, 25, "CLDEMOTE"}}, + {ISAExt::MOVDIRI, {7, 0, 2, 27, "MOVDIRI"}}, + {ISAExt::MOVDIR64B, {7, 0, 2, 28, "MOVDIR64B"}}, + {ISAExt::ENQCMD, {7, 0, 2, 29, "ENQCMD"}}, + {ISAExt::UINTR, {7, 0, 3, 5, "UINTR"}}, + {ISAExt::SERIALIZE, {7, 0, 3, 14, "SERIALIZE"}}, + {ISAExt::TSXLDTRK, {7, 0, 3, 16, "TSXLDTRK"}}, + {ISAExt::PCONFIG, {7, 0, 3, 18, "PCONFIG"}}, + {ISAExt::AMX_BF16, {7, 0, 3, 22, "AMX_BF16"}}, + {ISAExt::AVX512_FP16, {7, 0, 3, 23, "AVX512_FP16"}}, + {ISAExt::AMX_TILE, {7, 0, 3, 24, "AMX_TILE"}}, + {ISAExt::AMX_INT8, {7, 0, 3, 25, "AMX_INT8"}}, + {ISAExt::AVX_VNNI, {7, 1, 0, 4, "AVX_VNNI"}}, + {ISAExt::AVX512_BF16, {7, 1, 0, 5, "AVX512_BF16"}}, + {ISAExt::AMX_FP16, {7, 1, 0, 21, "AMX_FP16"}}, + {ISAExt::AMX_COMPLEX, {7, 1, 3, 8, "AMX_COMPLEX"}}, + {ISAExt::PREFETCHI, {7, 1, 3, 14, "PREFETCHI"}}, + {ISAExt::XSAVEC, {0xD, 1, 0, 1, "XSAVEC"}}, + {ISAExt::XSAVES, {0xD, 1, 0, 3, "XSAVES"}}, + {ISAExt::PTWRITE, {0x14, 0, 1, 4, "PTWRITE"}}, + {ISAExt::WBNOINVD, {0x80000008, 0, 1, 9, "WBNOINVD"}}, + {ISAExt::SAHF, {0x80000001, 0, 2, 0, "SAHF"}}, + {ISAExt::LZCNT, {0x80000001, 0, 2, 5, "LZCNT"}}, + {ISAExt::PREFETCHW, {0x80000001, 0, 2, 8, "PREFETCHW"}}, + }; + return isa_ext_info; +} + +#elif defined(__aarch64__) + +#if defined(__APPLE__) + +enum class ISAExt { + M1, + M2, +}; + +struct BrandInfo { + const char* name; + + bool get_value() const { + char buffer[256]; + size_t size = sizeof(buffer); + + if (sysctlbyname("machdep.cpu.brand_string", &buffer, &size, nullptr, 0) == 0) { + std::string brand(buffer); + return brand.find(name) != std::string::npos; + } + + return false; + } +}; + +inline const std::unordered_map& get_isa_ext_info() { + static const std::unordered_map isa_ext_info = { + {ISAExt::M1, {"M1"}}, + {ISAExt::M2, {"M2"}}, + }; + return isa_ext_info; +} + +#else + +enum class ISAExt { + SVE, + SVE2, +}; + +// Define register ID values for ARM features detection +#define ID_AA64PFR0_EL1 0 +#define ID_AA64ZFR0_EL1 1 + +// Helper template to read system registers with mrs instruction +template inline uint64_t read_system_reg() { + uint64_t val; + if constexpr (ID == ID_AA64PFR0_EL1) { + asm("mrs %0, id_aa64pfr0_el1" : "=r"(val)); + } else if constexpr (ID == ID_AA64ZFR0_EL1) { + asm("mrs %0, id_aa64zfr0_el1" : "=r"(val)); + } else { + val = 0; + } + return val; +} + +// Extract bits from register value +inline uint64_t extract_bits(uint64_t val, int pos, int len) { + return (val >> pos) & ((1ULL << len) - 1); +} + +struct MSRFlag { + unsigned int reg_id; // System register ID + int bit_pos; // Bit position in the register + int bit_len; // Number of bits to check + uint64_t expected_val; // Expected value for feature to be present + const char* name; // Feature name + + bool get_value() const { + uint64_t reg_val = 0; + + try { + switch (reg_id) { + case ID_AA64PFR0_EL1: + reg_val = read_system_reg(); + break; + case ID_AA64ZFR0_EL1: + if (extract_bits(read_system_reg(), 32, 4) != 0) { + reg_val = read_system_reg(); + } + break; + default: + return false; + } + + return extract_bits(reg_val, bit_pos, bit_len) == expected_val; + } catch (...) { + // If reading the register fails, the feature is not supported + return false; + } + } +}; + +inline const std::unordered_map& get_isa_ext_info() { + static const std::unordered_map isa_ext_info = { + {ISAExt::SVE, {ID_AA64PFR0_EL1, 32, 4, 1, "sve"}}, + {ISAExt::SVE2, {ID_AA64ZFR0_EL1, 0, 4, 1, "sve2"}}, + }; + return isa_ext_info; +} + +#endif +#endif + +inline bool check_extension(ISAExt ext) { return get_isa_ext_info().at(ext).get_value(); } + +inline bool check_extensions(std::vector exts) { + for (const auto& ext : exts) { + if (!check_extension(ext)) { + return false; + } + } + return true; +} + +inline void write_extensions_status(std::ostream& out) { + const auto& ext_info = get_isa_ext_info(); + out << "CPU Extensions Status:" << std::endl; + for (const auto& [ext, info] : ext_info) { + out << " " << info.name << ": " + << (check_extension(ext) ? "Supported" : "Not supported") << std::endl; + } +} + +} // namespace svs::arch diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ca435d08..c84641ff 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -196,9 +196,9 @@ set(DATA_DIRECTORY "${PROJECT_SOURCE_DIR}/data") # Communicate to the test application where it can find test data sets. target_compile_definitions(tests PRIVATE SVS_TEST_DATA_DIR="${DATA_DIRECTORY}") -target_link_libraries(tests PRIVATE ${SVS_LIB}) +target_link_libraries(tests PRIVATE svs::svs) target_link_libraries( - tests PRIVATE svs_compile_options svs_native_options svs_benchmark_library + tests PRIVATE svs_compile_options svs_microarch_options_base svs_benchmark_library ) target_link_libraries(tests PRIVATE Catch2::Catch2WithMain) @@ -210,3 +210,4 @@ include(CTest) include(Catch) catch_discover_tests(tests) +svs_register_static_dimensions(87 100 128 160 223) diff --git a/tests/svs/core/distances/cosine.cpp b/tests/svs/core/distances/cosine.cpp index 8c68ebc1..2eef3b38 100644 --- a/tests/svs/core/distances/cosine.cpp +++ b/tests/svs/core/distances/cosine.cpp @@ -82,15 +82,40 @@ void test_types(T lo, T hi, size_t num_tests) { .epsilon(COSINE_EPSILON) .margin(COSINE_MARGIN); - // Statically Sized Computation + auto as_span = [](auto v) { return std::span{v.data(), v.size()}; }; + auto& arch_env = svs::arch::MicroArchEnvironment::get_instance(); + auto supported_archs = arch_env.get_supported_microarchs(); + + // Check the distance computation for all supported architectures. + for (auto arch : supported_archs) { + arch_env.set_microarch(arch); + auto dist_type = svs::distance::DistanceCosineSimilarity{}; + dist_type.fix_argument(as_span(a)); + auto dist = svs::distance::compute(dist_type, as_span(a), as_span(b)); + CATCH_INFO("Testing architecture: " << svs::arch::microarch_to_string(arch)); + CATCH_REQUIRE(dist == expected); + } + + // Checking statically and dynamically sized computation requires a direct + // call to the compute function. We pick any MicroArch, since all available + // are already tested above. auto a_norm = svs::distance::norm(std::span{a.data(), a.size()}); - CATCH_REQUIRE( - (svs::distance::CosineSimilarity::compute(a.data(), b.data(), a_norm) == - expected) - ); - // Dynamically Sized Computation - auto dist = svs::distance::CosineSimilarity::compute(a.data(), b.data(), a_norm, N); - CATCH_REQUIRE((dist == expected)); + { + // Statically Sized Computation + auto dist = + svs::distance::CosineSimilarity::compute( + a.data(), b.data(), a_norm + ); + CATCH_REQUIRE(dist == expected); + } + { + // Dynamically Sized Computation + auto dist = + svs::distance::CosineSimilarity::compute( + a.data(), b.data(), a_norm, N + ); + CATCH_REQUIRE(dist == expected); + } } } } // anonymous namespace diff --git a/tests/svs/core/distances/distance_euclidean.cpp b/tests/svs/core/distances/distance_euclidean.cpp index 88c23fe4..ef144b23 100644 --- a/tests/svs/core/distances/distance_euclidean.cpp +++ b/tests/svs/core/distances/distance_euclidean.cpp @@ -67,10 +67,36 @@ void test_types(T lo, T hi, size_t num_tests) { svs_test::populate(b, generator_b, N); auto expected = Catch::Approx(euclidean_reference(a, b)); - // Statically Sized Computation - CATCH_REQUIRE((svs::distance::L2::compute(a.data(), b.data()) == expected)); - // Dynamically Sized Computation - CATCH_REQUIRE((svs::distance::L2::compute(a.data(), b.data(), N) == expected)); + auto as_span = [](auto v) { return std::span{v.data(), v.size()}; }; + auto& arch_env = svs::arch::MicroArchEnvironment::get_instance(); + auto supported_archs = arch_env.get_supported_microarchs(); + + // Check the distance computation for all supported architectures. + for (auto arch : supported_archs) { + arch_env.set_microarch(arch); + auto dist_type = svs::distance::DistanceL2{}; + auto dist = svs::distance::compute(dist_type, as_span(a), as_span(b)); + CATCH_INFO("Testing architecture: " << svs::arch::microarch_to_string(arch)); + CATCH_REQUIRE(dist == expected); + } + + // Checking statically and dynamically sized computation requires a direct + // call to the compute function. We pick any MicroArch, since all available + // are already tested above. + { + // Statically Sized Computation + auto dist = svs::distance::L2::compute( + a.data(), b.data() + ); + CATCH_REQUIRE(dist == expected); + } + { + // Dynamically Sized Computation + auto dist = svs::distance::L2::compute( + a.data(), b.data(), N + ); + CATCH_REQUIRE(dist == expected); + } } } } // namespace diff --git a/tests/svs/core/distances/inner_product.cpp b/tests/svs/core/distances/inner_product.cpp index a074a058..adaa623b 100644 --- a/tests/svs/core/distances/inner_product.cpp +++ b/tests/svs/core/distances/inner_product.cpp @@ -75,10 +75,36 @@ void test_types(T lo, T hi, size_t num_tests) { .epsilon(INNERPRODUCT_EPSILON) .margin(INNERPRODUCT_MARGIN); - // Statically Sized Computation - CATCH_REQUIRE((svs::distance::IP::compute(a.data(), b.data()) == expected)); - // Dynamically Sized Computation - CATCH_REQUIRE((svs::distance::IP::compute(a.data(), b.data(), N) == expected)); + auto as_span = [](auto v) { return std::span{v.data(), v.size()}; }; + auto& arch_env = svs::arch::MicroArchEnvironment::get_instance(); + auto supported_archs = arch_env.get_supported_microarchs(); + + // Check the distance computation for all supported architectures. + for (auto arch : supported_archs) { + arch_env.set_microarch(arch); + auto dist_type = svs::distance::DistanceIP{}; + auto dist = svs::distance::compute(dist_type, as_span(a), as_span(b)); + CATCH_INFO("Testing architecture: " << svs::arch::microarch_to_string(arch)); + CATCH_REQUIRE(dist == expected); + } + + // Checking statically and dynamically sized computation requires a direct + // call to the compute function. We pick any MicroArch, since all available + // are already tested above. + { + // Statically Sized Computation + auto dist = svs::distance::IP::compute( + a.data(), b.data() + ); + CATCH_REQUIRE(dist == expected); + } + { + // Dynamically Sized Computation + auto dist = svs::distance::IP::compute( + a.data(), b.data(), N + ); + CATCH_REQUIRE(dist == expected); + } } } } // anonymous namespace diff --git a/tests/svs/quantization/scalar/scalar.cpp b/tests/svs/quantization/scalar/scalar.cpp index 6893bb53..a81052d4 100644 --- a/tests/svs/quantization/scalar/scalar.cpp +++ b/tests/svs/quantization/scalar/scalar.cpp @@ -141,16 +141,23 @@ template void test_distance_single(T l // A buffer into which we decompress the int8 values std::vector rhs(N); - for (size_t i = 0; i < num_tests; ++i) { - auto datum = compressed.get_datum(i); - std::transform(datum.begin(), datum.end(), rhs.begin(), decompress); - auto rhs_span = std::span(rhs); - float reference = svs::distance::compute(distance, query.get_datum(0), rhs_span); - auto expected = Catch::Approx(reference).epsilon(0.01).margin(0.01); - - // Calculate compressed distance and compare with reference - float result = compressed_distance.compute(compressed.get_datum(i)); - CATCH_REQUIRE(result == expected); + auto& arch_env = svs::arch::MicroArchEnvironment::get_instance(); + auto supported_archs = arch_env.get_supported_microarchs(); + + for (auto arch : supported_archs) { + for (size_t i = 0; i < num_tests; ++i) { + arch_env.set_microarch(arch); + auto datum = compressed.get_datum(i); + std::transform(datum.begin(), datum.end(), rhs.begin(), decompress); + auto rhs_span = std::span(rhs); + float reference = + svs::distance::compute(distance, query.get_datum(0), rhs_span); + auto expected = Catch::Approx(reference).epsilon(0.01).margin(0.01); + + // Calculate compressed distance and compare with reference + float result = compressed_distance.compute(compressed.get_datum(i)); + CATCH_REQUIRE(result == expected); + } } } diff --git a/tools/validate_microarch_symbols.py b/tools/validate_microarch_symbols.py new file mode 100644 index 00000000..ee91d311 --- /dev/null +++ b/tools/validate_microarch_symbols.py @@ -0,0 +1,106 @@ +# Copyright 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import subprocess +import sys + + +def build_parser(): + parser = argparse.ArgumentParser( + description="Validation script to check all microarch-specific " \ + "symbols in base object files are undefined." + ) + parser.add_argument( + "directories", nargs="+", + help="Directories to recursively search for .cpp.o files" + ) + return parser + + +def find_cpp_o_files(directory): + cpp_o_files = [] + + for root, dirs, files in os.walk(directory): + # Skip directories with microarch-specific object-files + dirs[:] = [d for d in dirs if 'microarch' not in d.lower()] + + for file in files: + if file.endswith('.cpp.o'): + cpp_o_files.append(os.path.join(root, file)) + + return cpp_o_files + + +def get_microarch_symbols(obj_file): + """Extract all defined microarch-specific symbols.""" + try: + # Use nm to get the symbols from the object file + result = subprocess.run(["nm", "-C", obj_file], capture_output=True, text=True) + + if result.returncode != 0: + print(f"Error processing {obj_file}: {result.stderr}") + return [] + + lines = result.stdout.splitlines() + microarch_symbols = [] + + for line in lines: + if len(line.split()) > 2: + _, symbol_type, symbol = line.split(maxsplit=2) + if ("<(svs::arch::MicroArch" in symbol and + symbol_type in ('T', 't', 'W', 'w') and + "<(svs::arch::MicroArch)0" not in symbol): + microarch_symbols.append((obj_file, symbol)) + + return microarch_symbols + + except Exception as e: + print(f"Error processing {obj_file}: {str(e)}") + return [] + + +def main(): + args = build_parser().parse_args() + + cpp_o_files = [] + for directory in args.directories: + dir_files = find_cpp_o_files(directory) + print(f"Found {len(dir_files)} .cpp.o files in {directory}") + cpp_o_files.extend(dir_files) + + print(f"Found a total of {len(cpp_o_files)} .cpp.o files in all directories") + + if not cpp_o_files: + print("No .cpp.o files found in any of the provided directories") + return + + all_symbols = [] + for obj_file in cpp_o_files: + symbols = get_microarch_symbols(obj_file) + all_symbols.extend(symbols) + + if all_symbols: + print(f"Found {len(all_symbols)} defined MicroArch symbols:") + for obj_file, symbol in all_symbols: + print(f"{obj_file}: {symbol}") + sys.exit(1) + else: + print("No defined MicroArch symbols found") + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 85c6b316..ce94e05a 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -19,10 +19,10 @@ function(create_utility exe file) PRIVATE ${CMAKE_CURRENT_LIST_DIR} ) # Get header directory and C++ 20 - target_link_libraries(${exe} PRIVATE ${SVS_LIB}) + target_link_libraries(${exe} PRIVATE svs::svs) # Get common compiler options with the unit tests. - target_link_libraries(${exe} PRIVATE svs_compile_options svs_native_options) + target_link_libraries(${exe} PRIVATE svs_compile_options svs_microarch_options_base) # Link with third-party executables. target_link_libraries(${exe} PRIVATE fmt::fmt) @@ -53,3 +53,5 @@ create_utility(logging logging.cpp) # # Benchmark # create_utility(benchmark_index_build benchmarks/index_build.cpp) + +svs_register_static_dimensions(96)