From 14d10feb44ab1edd75853517406ab65fd9211ea5 Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Thu, 14 Sep 2023 19:59:01 +0000 Subject: [PATCH 01/21] Add ruy submodule --- .gitmodules | 3 +++ src/3rd_party/ruy | 1 + 2 files changed, 4 insertions(+) create mode 160000 src/3rd_party/ruy diff --git a/.gitmodules b/.gitmodules index a1a876d8b..07791f94f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -20,3 +20,6 @@ [submodule "src/3rd_party/simple-websocket-server"] path = src/3rd_party/simple-websocket-server url = https://github.com/marian-nmt/Simple-WebSocket-Server +[submodule "src/3rd_party/ruy"] + path = src/3rd_party/ruy + url = https://github.com/google/ruy.git diff --git a/src/3rd_party/ruy b/src/3rd_party/ruy new file mode 160000 index 000000000..c04e5e52a --- /dev/null +++ b/src/3rd_party/ruy @@ -0,0 +1 @@ +Subproject commit c04e5e52ae6b144f74ac032652e3c538bda15c9b From 300a8e9b3e0be338459d5bdb60d9a17eb39a99b9 Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Thu, 14 Sep 2023 22:12:45 +0000 Subject: [PATCH 02/21] Initial bits --- CMakeLists.txt | 25 +++++- src/3rd_party/CMakeLists.txt | 12 ++- src/tensors/cpu/integer_common.h | 14 ++-- src/tensors/cpu/prod.cpp | 8 -- src/tensors/cpu/prod_blas.h | 135 +++++++++++++++++++++++++++++-- 5 files changed, 173 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c674e68d..eb51bb8ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,6 +80,29 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Release") endif() +# ARM bits +if(${CMAKE_TARGET_ARCHITECTURE_CODE} MATCHES "arm") + # Define that we are using ARM + add_compile_definitions(ARM) + set(ARM ON) + option(USE_RUY "Use Ruy" ON) # For 8 bit code, later on + set(EXT_LIBS ${EXT_LIBS} ruy) + + # Apple M1 has Apple Accelerate. Otherwise fallback to RUY + if(APPLE) + option(USE_RUY_SGEMM "Compile with Ruy SGEMM" OFF) + else(APPLE) + option(USE_RUY_SGEMM "Compile with Ruy SGEMM" ON) + endif(APPLE) + + set(USE_SIMD_UTILS ON) + + # Some warnings as errors. I don't feel comfortable about the strict aliasing. + set(ARM_WARNINGS "-fno-strict-aliasing -Wno-comment") + +endif(${CMAKE_TARGET_ARCHITECTURE_CODE} MATCHES "arm") + + ############################################################################### # Set compilation flags if(MSVC) @@ -221,7 +244,7 @@ else(MSVC) # Clang-10.0.0 complains when CUDA is newer than 10.1 set(CLANG_IGNORE_UNKNOWN_CUDA "-Wno-unknown-warning-option -Wno-unknown-cuda-version") endif() - set(DISABLE_GLOBALLY "-Wno-unused-result ${CLANG_IGNORE_UNKNOWN_CUDA}") + set(DISABLE_GLOBALLY "-Wno-unused-result ${CLANG_IGNORE_UNKNOWN_CUDA} ${ARM_WARNINGS}") # These are used in src/CMakeLists.txt on a per-target basis list(APPEND ALL_WARNINGS -Wall; -Werror; -Wextra; -Wno-unused-result; -Wno-deprecated; diff --git a/src/3rd_party/CMakeLists.txt b/src/3rd_party/CMakeLists.txt index 838951c50..5fc2c285f 100644 --- a/src/3rd_party/CMakeLists.txt +++ b/src/3rd_party/CMakeLists.txt @@ -9,10 +9,20 @@ add_subdirectory(./faiss) include_directories(./faiss) if(COMPILE_CPU) - if(NOT GENERATE_MARIAN_INSTALL_TARGETS) + if((NOT ARM) AND (NOT GENERATE_MARIAN_INSTALL_TARGETS)) set(INTGEMM_DONT_BUILD_TESTS ON CACHE BOOL "Disable intgemm tests") add_subdirectory(./intgemm) endif() + + if(USE_RUY) + set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL " " FORCE) + set(CPUINFO_BUILD_BENCHMARKS OFF CACHE BOOL " " FORCE) + set(CPUINFO_BUILD_PKG_CONFIG OFF CACHE BOOL " " FORCE) + set(CPUINFO_BUILD_UNIT_TESTS OFF CACHE BOOL " " FORCE) + set(CPUINFO_BUILD_TOOLS OFF CACHE BOOL " " FORCE) + add_subdirectory(ruy/third_party/cpuinfo EXCLUDE_FROM_ALL) + add_subdirectory(ruy EXCLUDE_FROM_ALL) + endif(USE_RUY) endif(COMPILE_CPU) if(USE_FBGEMM) diff --git a/src/tensors/cpu/integer_common.h b/src/tensors/cpu/integer_common.h index f4e632b5c..8a00a7870 100644 --- a/src/tensors/cpu/integer_common.h +++ b/src/tensors/cpu/integer_common.h @@ -5,7 +5,7 @@ #include "tensors/cpu/aligned.h" #include "common/io_item.h" -#if COMPILE_CPU +#if COMPILE_CPU && !defined(ARM) #include "3rd_party/intgemm/intgemm/intgemm.h" #else namespace intgemm { @@ -31,10 +31,12 @@ namespace intgemm { } #endif +#ifndef ARM #include #include #include #include +#endif #include #include @@ -98,7 +100,7 @@ template <> struct intgemm_ { template static inline float& getQuantMult(marian::Tensor val) { -#if COMPILE_CPU +#if COMPILE_CPU && !defined(ARM) ABORT_IF(!isIntgemm(val->type()), "getQuantMult does not work for type {}", val->type()); typedef typename intgemm_::type Integer; return *(reinterpret_cast(val->data() + val->shape().elements())); @@ -109,7 +111,7 @@ static inline float& getQuantMult(marian::Tensor val) { } static inline Type getIntgemmType(Type vtype) { -#if COMPILE_CPU +#if COMPILE_CPU && !defined(ARM) if (vtype == Type::intgemm8) { if (intgemm::kCPU == intgemm::CPUType::AVX512VNNI) { return Type::intgemm8avx512vnni; @@ -142,7 +144,7 @@ static inline Type getIntgemmType(Type vtype) { } static inline bool passOrAbort(Type vtype) { -#if COMPILE_CPU +#if COMPILE_CPU && !defined(ARM) if (vtype == Type::intgemm8 || vtype == Type::intgemm16) { return true; } else if (vtype == Type::intgemm16sse2) { @@ -166,7 +168,7 @@ static inline bool passOrAbort(Type vtype) { template static inline float computeQuantMult(marian::Tensor val) { -#if COMPILE_CPU +#if COMPILE_CPU && !defined(ARM) if(sizeOf(vtype) == 1) return 127.0f / intgemm::MaxAbsolute(val->data(), val->data() + val->shape().elements()); else if(sizeOf(vtype) == 2) @@ -186,7 +188,7 @@ void AddBias(marian::Tensor C, const marian::Tensor Bias); // in our binary format. Then we copy the quantizationMultiplier information at the end template void prepareAndTransposeB(io::Item& item, const char * input) { -#if COMPILE_CPU +#if COMPILE_CPU && !defined(ARM) typedef typename intgemm_::type Integer; Integer * output_tensor = reinterpret_cast(&(*item.bytes.begin())); // Sometimes we will end up with misaligned intput (and output) so we can't use them directly. diff --git a/src/tensors/cpu/prod.cpp b/src/tensors/cpu/prod.cpp index 8fcca924b..639027d05 100755 --- a/src/tensors/cpu/prod.cpp +++ b/src/tensors/cpu/prod.cpp @@ -7,14 +7,6 @@ #include "tensors/tensor.h" #include "tensors/tensor_allocator.h" -#if MKL_FOUND -#include -#else -#if BLAS_FOUND -#include -#endif -#endif - #include "integer_common.h" #include "prod_blas.h" diff --git a/src/tensors/cpu/prod_blas.h b/src/tensors/cpu/prod_blas.h index a591fdd26..85234c05b 100644 --- a/src/tensors/cpu/prod_blas.h +++ b/src/tensors/cpu/prod_blas.h @@ -1,11 +1,122 @@ +#pragma once #if MKL_FOUND -#include -#else -#if BLAS_FOUND -#include -#endif + #include +#elif BLAS_FOUND + #include +#elif USE_RUY_SGEMM +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcomment" + #include "ruy/ruy.h" + #include "ruy/system_aligned_alloc.h" +#pragma GCC pop #endif +#if USE_RUY_SGEMM +// AlignedVector allocates aligned memory and cleans up after itself. RAII +// wrapper similar to intgemm's AlignedVector. +template +class AlignedVector { +public: + AlignedVector(size_t num_elem) + : size_(num_elem), + storage_(reinterpret_cast(ruy::detail::SystemAlignedAlloc(sizeof(T) * num_elem))) {} + + T *begin() { return storage_; } + T *data() { return storage_; } + size_t size() const { return size_; } + size_t memSize() const { return sizeof(T) * size_; } + + // Forbid copy + AlignedVector(const AlignedVector &) = delete; + AlignedVector &operator=(const AlignedVector &) = delete; + + ~AlignedVector() { ruy::detail::SystemAlignedFree(reinterpret_cast(storage_)); } + +private: + size_t size_; + T *storage_; +}; + + +inline void GemmRuy(const bool transA, + const bool transB, + const int M, + const int N, + const int K, + const float alpha, + const float *A, + const int lda, + const float *B, + const int ldb, + const float beta, + float *C, + const int ldc) { + ruy::Context context; + + // If we need to transpose, we can swap dimensions in layout claim the matrix + // is just column-major. Set ordering so transpose. + const auto orderA = (transA ? ruy::Order::kColMajor : ruy::Order::kRowMajor); + const auto orderB = (transB ? ruy::Order::kColMajor : ruy::Order::kRowMajor); + + ruy::Matrix lhs; + ruy::MakeSimpleLayout(M, K, orderA, lhs.mutable_layout()); + lhs.set_data(A); + + ruy::Matrix rhs; + ruy::MakeSimpleLayout(K, N, orderB, rhs.mutable_layout()); + rhs.set_data(B); + + ruy::Matrix dst; + ruy::MakeSimpleLayout(M, N, ruy::Order::kRowMajor, dst.mutable_layout()); + + if(beta == 0) { + // For beta = 0, we want to avoid the additional allocation. This is a + // large amount of our inference use-cases. sgemm is called with `beta` for + // accumulating gradients in backpropogation, which is 0.0 during + // inference. + + dst.set_data(C); + ruy::MulParams mul_params; + ruy::Mul(lhs, rhs, mul_params, &context, &dst); + + if(alpha != 1.0) { + // Write out C as C = alpha * [op(A) * op(B)] + beta * C + // Can we expect the compiler to autovectorize this? + // TODO: Come back and explicitly use SIMD. + const size_t size = M * N; + const float *opA_opB = C; // Alias. + for(size_t i = 0; i < size; i++) { + C[i] = alpha * opA_opB[i]; + } + } + + } else { + // @jerinphilip has not yet been able to find a ruy primitive that does in + // place addition to obtain full gemm. + // + // Safe bet is to make an additional allocation to store the result of + // multiply and use the existing values in C. + // + // See also: https://github.com/google/ruy/issues/307 + + AlignedVector intermediate(M * N); + dst.set_data(intermediate.data()); + ruy::MulParams mul_params; + ruy::Mul(lhs, rhs, mul_params, &context, &dst); + + // Write out C as C = alpha * [op(A) * op(B)] + beta * C + // Can we expect the compiler to autovectorize this? + // TODO: Come back and explicitly use SIMD. + const size_t size = M * N; + const float *opA_opB = intermediate.data(); + for(size_t i = 0; i < size; i++) { + C[i] = alpha * opA_opB[i] + beta * C[i]; + } + } +} + +#endif // RUY_SGEMM + inline void sgemm(bool transA, bool transB, int rows_a, @@ -34,6 +145,20 @@ inline void sgemm(bool transA, beta, c, ldc); +#elif USE_RUY_SGEMM + GemmRuy(transA, + transB, + rows_a, + rows_b, + width, + alpha, + a, + lda, + b, + ldb, + beta, + c, + ldc); #else transA; transB; rows_a; rows_b; width; alpha; a; lda; b; ldb; beta; c; ldc; // make compiler happy ABORT("Marian must be compiled with a BLAS library"); From 8f8fa562bcdddc5b8c86f0b73693bbef7e5a99ac Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Fri, 15 Sep 2023 15:40:28 +0000 Subject: [PATCH 03/21] Add simd utils --- .gitmodules | 3 +++ src/3rd_party/CMakeLists.txt | 2 +- src/3rd_party/simd_utils | 1 + src/tensors/cpu/prod_blas.h | 7 +------ 4 files changed, 6 insertions(+), 7 deletions(-) create mode 160000 src/3rd_party/simd_utils diff --git a/.gitmodules b/.gitmodules index 07791f94f..37b125076 100644 --- a/.gitmodules +++ b/.gitmodules @@ -23,3 +23,6 @@ [submodule "src/3rd_party/ruy"] path = src/3rd_party/ruy url = https://github.com/google/ruy.git +[submodule "src/3rd_party/simd_utils"] + path = src/3rd_party/simd_utils + url = https://github.com/JishinMaster/simd_utils.git diff --git a/src/3rd_party/CMakeLists.txt b/src/3rd_party/CMakeLists.txt index 5fc2c285f..1ab9a53b2 100644 --- a/src/3rd_party/CMakeLists.txt +++ b/src/3rd_party/CMakeLists.txt @@ -9,7 +9,7 @@ add_subdirectory(./faiss) include_directories(./faiss) if(COMPILE_CPU) - if((NOT ARM) AND (NOT GENERATE_MARIAN_INSTALL_TARGETS)) + if((NOT ${CMAKE_TARGET_ARCHITECTURE_CODE} MATCHES "arm") AND (NOT GENERATE_MARIAN_INSTALL_TARGETS)) set(INTGEMM_DONT_BUILD_TESTS ON CACHE BOOL "Disable intgemm tests") add_subdirectory(./intgemm) endif() diff --git a/src/3rd_party/simd_utils b/src/3rd_party/simd_utils new file mode 160000 index 000000000..c092ef9dd --- /dev/null +++ b/src/3rd_party/simd_utils @@ -0,0 +1 @@ +Subproject commit c092ef9dd406cd9b9d54da1ff30cc86c39b4c0a5 diff --git a/src/tensors/cpu/prod_blas.h b/src/tensors/cpu/prod_blas.h index 85234c05b..a281aa7bf 100644 --- a/src/tensors/cpu/prod_blas.h +++ b/src/tensors/cpu/prod_blas.h @@ -91,12 +91,7 @@ inline void GemmRuy(const bool transA, } } else { - // @jerinphilip has not yet been able to find a ruy primitive that does in - // place addition to obtain full gemm. - // - // Safe bet is to make an additional allocation to store the result of - // multiply and use the existing values in C. - // + // No multiply-add in Ruy // See also: https://github.com/google/ruy/issues/307 AlignedVector intermediate(M * N); From 70ab9c6db53fcd7010a9f92c8497f21dfe241526 Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Fri, 15 Sep 2023 16:30:18 +0000 Subject: [PATCH 04/21] First bits working --- CMakeLists.txt | 27 +++- cmake/TargetArch.cmake | 142 ++++++++++++++++++++ src/3rd_party/faiss/VectorTransform.cpp | 4 + src/3rd_party/simd_utils | 2 +- src/common/types.h | 6 +- src/functional/operators.h | 5 +- src/tensors/cpu/expression_graph_packable.h | 2 +- src/tensors/cpu/fbgemm/packed_gemm.cpp | 8 +- src/tensors/cpu/intgemm_interface.h | 4 +- 9 files changed, 189 insertions(+), 11 deletions(-) create mode 100644 cmake/TargetArch.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index eb51bb8ec..dca8e69d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,6 +6,22 @@ if (POLICY CMP0074) endif () project(marian CXX C) + +######### ARCH DETECTION ######### +# Architecture detection +include(TargetArch) + +target_architecture(CMAKE_TARGET_ARCHITECTURES) +list(LENGTH CMAKE_TARGET_ARCHITECTURES cmake_target_arch_len) +if(NOT "${cmake_target_arch_len}" STREQUAL "1") + set(CMAKE_TARGET_ARCHITECTURE_UNIVERSAL TRUE) + set(CMAKE_TARGET_ARCHITECTURE_CODE "universal") +else() + set(CMAKE_TARGET_ARCHITECTURE_UNIVERSAL FALSE) + set(CMAKE_TARGET_ARCHITECTURE_CODE "${CMAKE_TARGET_ARCHITECTURES}") +endif() +######### ARCH DETECTION ######### + set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(BUILD_ARCH native CACHE STRING "Compile for this CPU architecture.") @@ -100,6 +116,15 @@ if(${CMAKE_TARGET_ARCHITECTURE_CODE} MATCHES "arm") # Some warnings as errors. I don't feel comfortable about the strict aliasing. set(ARM_WARNINGS "-fno-strict-aliasing -Wno-comment") + set(USE_SIMD_UTILS ON) + # @TODO this assumes ArmV8. We should also look at armv7 + add_compile_definitions(ARM FMA SSE) #added for ARM + if(MSVC) + add_compile_options(/flax-vector-conversions) + else(MSVC) + add_compile_options(-flax-vector-conversions) + endif(MSVC) + endif(${CMAKE_TARGET_ARCHITECTURE_CODE} MATCHES "arm") @@ -534,7 +559,7 @@ endif() ############################################################################### # Find BLAS library if(COMPILE_CPU) - if(NOT GENERATE_MARIAN_INSTALL_TARGETS) + if(NOT GENERATE_MARIAN_INSTALL_TARGETS AND NOT ARM) set(EXT_LIBS ${EXT_LIBS} intgemm) # Enable intgemm when compiling CPU add_definitions(-DCOMPILE_CPU=1) endif() diff --git a/cmake/TargetArch.cmake b/cmake/TargetArch.cmake new file mode 100644 index 000000000..6e0bb3953 --- /dev/null +++ b/cmake/TargetArch.cmake @@ -0,0 +1,142 @@ +# Modified from https://github.com/axr/solar-cmake/blob/73cfea0db0284c5e2010aca23989046e5bda95c9/Solar.cmake +# Based on the Qt 5 processor detection code, so should be very accurate +# https://qt.gitorious.org/qt/qtbase/blobs/master/src/corelib/global/qprocessordetection.h +# Currently handles arm (v5, v6, v7), x86 (32/64), ia64, and ppc (32/64) + +# Regarding POWER/PowerPC, just as is noted in the Qt source, +# "There are many more known variants/revisions that we do not handle/detect." + +set(archdetect_c_code " +#if defined(__arm__) || defined(__TARGET_ARCH_ARM) || defined(_M_ARM) || defined(_M_ARM64) || defined(__aarch64__) || defined(__ARM64__) + #if defined(__ARM_ARCH_8__) || defined(__ARM_ARCH_8) \\ + || defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH_8A) \\ + || defined(__ARM_ARCH_8R__) || defined(__ARM_ARCH_8R) \\ + || defined(__ARM_ARCH_8M__) || defined(__ARM_ARCH_8M) \\ + || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM-0 >= 8) + #error cmake_ARCH armv8 + #elif defined(__ARM_ARCH_7__) \\ + || defined(__ARM_ARCH_7A__) \\ + || defined(__ARM_ARCH_7R__) \\ + || defined(__ARM_ARCH_7M__) \\ + || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM-0 >= 7) + #error cmake_ARCH armv7 + #elif defined(__ARM_ARCH_6__) \\ + || defined(__ARM_ARCH_6J__) \\ + || defined(__ARM_ARCH_6T2__) \\ + || defined(__ARM_ARCH_6Z__) \\ + || defined(__ARM_ARCH_6K__) \\ + || defined(__ARM_ARCH_6ZK__) \\ + || defined(__ARM_ARCH_6M__) \\ + || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM-0 >= 6) + #error cmake_ARCH armv6 + #elif defined(__ARM_ARCH_5TEJ__) \\ + || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM-0 >= 5) + #error cmake_ARCH armv5 + #else + #error cmake_ARCH arm + #endif +#elif defined(__i386) || defined(__i386__) || defined(_M_IX86) + #error cmake_ARCH i386 +#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64) + #error cmake_ARCH x86_64 +#elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64) + #error cmake_ARCH ia64 +#elif defined(__ppc__) || defined(__ppc) || defined(__powerpc__) \\ + || defined(_ARCH_COM) || defined(_ARCH_PWR) || defined(_ARCH_PPC) \\ + || defined(_M_MPPC) || defined(_M_PPC) + #if defined(__ppc64__) || defined(__powerpc64__) || defined(__64BIT__) + #error cmake_ARCH ppc64 + #else + #error cmake_ARCH ppc + #endif +#endif + +#error cmake_ARCH unknown +") + + +# Set ppc_support to TRUE before including this file or ppc and ppc64 +# will be treated as invalid architectures since they are no longer supported by Apple + +function(target_architecture output_var) + if(APPLE AND CMAKE_OSX_ARCHITECTURES) + # On OS X we use CMAKE_OSX_ARCHITECTURES *if* it was set + # First let's normalize the order of the values + + # Note that it's not possible to compile PowerPC applications if you are using + # the OS X SDK version 10.6 or later - you'll need 10.4/10.5 for that, so we + # disable it by default + # See this page for more information: + # http://stackoverflow.com/questions/5333490/how-can-we-restore-ppc-ppc64-as-well-as-full-10-4-10-5-sdk-support-to-xcode-4 + + # Architecture defaults to i386 or ppc on OS X 10.5 and earlier, depending on the CPU type detected at runtime. + # On OS X 10.6+ the default is x86_64 if the CPU supports it, i386 otherwise. + + foreach(osx_arch ${CMAKE_OSX_ARCHITECTURES}) + if("${osx_arch}" STREQUAL "ppc" AND ppc_support) + set(osx_arch_ppc TRUE) + elseif("${osx_arch}" STREQUAL "i386") + set(osx_arch_i386 TRUE) + elseif("${osx_arch}" STREQUAL "x86_64") + set(osx_arch_x86_64 TRUE) + elseif("${osx_arch}" STREQUAL "ppc64" AND ppc_support) + set(osx_arch_ppc64 TRUE) + else() + message(FATAL_ERROR "Invalid OS X arch name: ${osx_arch}") + endif() + endforeach() + + # Now add all the architectures in our normalized order + if(osx_arch_ppc) + list(APPEND ARCH ppc) + endif() + + if(osx_arch_i386) + list(APPEND ARCH i386) + endif() + + if(osx_arch_x86_64) + list(APPEND ARCH x86_64) + endif() + + if(osx_arch_ppc64) + list(APPEND ARCH ppc64) + endif() + else() + file(WRITE "${CMAKE_BINARY_DIR}/arch.c" "${archdetect_c_code}") + + enable_language(C) + + # Detect the architecture in a rather creative way... + # This compiles a small C program which is a series of ifdefs that selects a + # particular #error preprocessor directive whose message string contains the + # target architecture. The program will always fail to compile (both because + # file is not a valid C program, and obviously because of the presence of the + # #error preprocessor directives... but by exploiting the preprocessor in this + # way, we can detect the correct target architecture even when cross-compiling, + # since the program itself never needs to be run (only the compiler/preprocessor) + try_run( + run_result_unused + compile_result_unused + "${CMAKE_BINARY_DIR}" + "${CMAKE_BINARY_DIR}/arch.c" + COMPILE_OUTPUT_VARIABLE ARCH + CMAKE_FLAGS CMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} + ) + + # Parse the architecture name from the compiler output + string(REGEX MATCH "cmake_ARCH ([a-zA-Z0-9_]+)" ARCH "${ARCH}") + + # Get rid of the value marker leaving just the architecture name + string(REPLACE "cmake_ARCH " "" ARCH "${ARCH}") + + # If we are compiling with an unknown architecture this variable should + # already be set to "unknown" but in the case that it's empty (i.e. due + # to a typo in the code), then set it to unknown + if (NOT ARCH) + set(ARCH unknown) + endif() + endif() + + set(${output_var} "${ARCH}" PARENT_SCOPE) +endfunction() \ No newline at end of file diff --git a/src/3rd_party/faiss/VectorTransform.cpp b/src/3rd_party/faiss/VectorTransform.cpp index 103b0910e..c6bf9d4a1 100644 --- a/src/3rd_party/faiss/VectorTransform.cpp +++ b/src/3rd_party/faiss/VectorTransform.cpp @@ -19,6 +19,10 @@ using namespace faiss; +#ifdef ARM +#include "3rd_party/simd_utils/simd_utils.h" +#endif + extern "C" { diff --git a/src/3rd_party/simd_utils b/src/3rd_party/simd_utils index c092ef9dd..696036258 160000 --- a/src/3rd_party/simd_utils +++ b/src/3rd_party/simd_utils @@ -1 +1 @@ -Subproject commit c092ef9dd406cd9b9d54da1ff30cc86c39b4c0a5 +Subproject commit 6960362584481c977cdae9f6a8f7061a37c766cb diff --git a/src/common/types.h b/src/common/types.h index a0930a0f8..763edb09b 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -17,7 +17,11 @@ #include #ifndef __CUDACC__ // NVCC is very unreliable when it comes to CPU intrinsics, we hide them completely from NVCC-compiled code -#include + #ifndef ARM + #include + #else + #include "3rd_party/simd_utils/simd_utils.h" + #endif #endif #ifdef __CUDACC__ // nvcc is compiling this code diff --git a/src/functional/operators.h b/src/functional/operators.h index 3628fdcb9..6ecc02bd8 100644 --- a/src/functional/operators.h +++ b/src/functional/operators.h @@ -217,8 +217,11 @@ struct Ops { // __CUDACC__ is defined when compiling with NVCC regardless of device type // __CUDA_ARCH__ is defined when compiling device (GPU) code #ifndef __CUDACC__ - +#ifndef ARM #include "3rd_party/sse_mathfun.h" +#else +#include "3rd_party/simd_utils/simd_utils.h" // @TODO this might be dependent on NEON +#endif namespace marian { namespace functional { diff --git a/src/tensors/cpu/expression_graph_packable.h b/src/tensors/cpu/expression_graph_packable.h index 1a233372c..4af69fac9 100644 --- a/src/tensors/cpu/expression_graph_packable.h +++ b/src/tensors/cpu/expression_graph_packable.h @@ -152,7 +152,7 @@ class ExpressionGraphPackable : public ExpressionGraph { #endif } else if (isIntgemm(gemmElementType) && (pName.find("_W") == pName.length() - 3 || pName.find("_W") == pName.length() - 2 /* || pName.find("Wemb") != std::string::npos*/)) { -#if COMPILE_CPU +#if COMPILE_CPU && !defined(ARM) using cpu::integer::cols; using cpu::integer::rows; auto allocator = New(getBackend()); diff --git a/src/tensors/cpu/fbgemm/packed_gemm.cpp b/src/tensors/cpu/fbgemm/packed_gemm.cpp index dd81d0f7f..23ed559f1 100644 --- a/src/tensors/cpu/fbgemm/packed_gemm.cpp +++ b/src/tensors/cpu/fbgemm/packed_gemm.cpp @@ -2,16 +2,16 @@ #include "tensors/tensor_allocator.h" #include "tensors/tensor_operators.h" -#include -#include -#include -#include #include #include #include //#include #if USE_FBGEMM +#include +#include +#include +#include #ifdef _MSC_VER #pragma warning(disable: 4505) // 'fbgemmAlignedAlloc' in fbgemm.h: unreferenced local function has been removed (missing 'static inline') #pragma warning(disable: 4251) // 'fbgemm::CompressedSparseColumn::colptr_': class 'std::vector>' needs to have dll-interface to be used by clients of class 'fbgemm::CompressedSparseColumn' diff --git a/src/tensors/cpu/intgemm_interface.h b/src/tensors/cpu/intgemm_interface.h index 88408aa18..80784e0f6 100644 --- a/src/tensors/cpu/intgemm_interface.h +++ b/src/tensors/cpu/intgemm_interface.h @@ -9,7 +9,7 @@ namespace marian { namespace cpu { namespace integer { -#if COMPILE_CPU +#if COMPILE_CPU && !defined(ARM) /* * Prepare an activation matrix into intgemm8/16 format. For now the activation matrix is just quantized. * Expr input: The input tensor @@ -45,7 +45,7 @@ static inline Expr prepareA(Expr a) { */ template static inline Expr affineOrDotTyped(Expr a, Expr bQuant, Expr bias, bool transA, bool /*transB*/, float scale) { -#if COMPILE_CPU +#if COMPILE_CPU && !defined(ARM) ABORT_IF(!isFloat(a->value_type()), "Intgemm expects type of A to be float32 not {}", a->value_type()); ABORT_IF(!isIntgemm(bQuant->value_type()), "Intgemm expects type of B to be a variant of intgemm not {}", bQuant->value_type()); From 3ac5facd45934c472561ad733953df625707e561 Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Fri, 15 Sep 2023 16:50:46 +0000 Subject: [PATCH 05/21] Remove m64 --- CMakeLists.txt | 8 ++++---- src/3rd_party/CMakeLists.txt | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dca8e69d9..71ae4cac1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -289,9 +289,9 @@ else(MSVC) endif(CMAKE_COMPILER_IS_GNUCC) set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -m64 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}") + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}") set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}") - set(CMAKE_CXX_FLAGS_SLIM "-O3 -m64 -funroll-loops -DNDEBUG") + set(CMAKE_CXX_FLAGS_SLIM "-O3 -funroll-loops -DNDEBUG") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELEASE}") set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg") set(CMAKE_CXX_FLAGS_PROFGEN "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-generate -fprofile-correction") @@ -299,9 +299,9 @@ else(MSVC) # these need to be set separately set(CMAKE_C_FLAGS "-pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") - set(CMAKE_C_FLAGS_RELEASE "-O3 -m64 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}") + set(CMAKE_C_FLAGS_RELEASE "-O3 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}") set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}") - set(CMAKE_C_FLAGS_SLIM "-O3 -m64 -funroll-loops -DNDEBUG") + set(CMAKE_C_FLAGS_SLIM "-O3 -funroll-loops -DNDEBUG") set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELEASE}") set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") set(CMAKE_C_FLAGS_PROFGEN "${CMAKE_C_FLAGS_RELEASE} -fprofile-generate -fprofile-correction") diff --git a/src/3rd_party/CMakeLists.txt b/src/3rd_party/CMakeLists.txt index 1ab9a53b2..2cec739ba 100644 --- a/src/3rd_party/CMakeLists.txt +++ b/src/3rd_party/CMakeLists.txt @@ -9,7 +9,7 @@ add_subdirectory(./faiss) include_directories(./faiss) if(COMPILE_CPU) - if((NOT ${CMAKE_TARGET_ARCHITECTURE_CODE} MATCHES "arm") AND (NOT GENERATE_MARIAN_INSTALL_TARGETS)) + if(NOT GENERATE_MARIAN_INSTALL_TARGETS AND NOT ARM) set(INTGEMM_DONT_BUILD_TESTS ON CACHE BOOL "Disable intgemm tests") add_subdirectory(./intgemm) endif() From 0128e5d5c356ba6ef07c556790388cffe3ae91ef Mon Sep 17 00:00:00 2001 From: Varun Mathur Date: Tue, 24 Oct 2023 17:47:48 -0700 Subject: [PATCH 06/21] check in changes --- CMakeLists.txt | 59 +- cmake/TargetArch.cmake | 142 ---- cmake/ios.toolchain.cmake | 1099 +++++++++++++++++++++++++ src/3rd_party/CMakeLists.txt | 4 +- src/3rd_party/faiss/VectorTransform.h | 2 + 5 files changed, 1129 insertions(+), 177 deletions(-) delete mode 100644 cmake/TargetArch.cmake create mode 100644 cmake/ios.toolchain.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 71ae4cac1..98125bcbe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,21 +7,6 @@ endif () project(marian CXX C) -######### ARCH DETECTION ######### -# Architecture detection -include(TargetArch) - -target_architecture(CMAKE_TARGET_ARCHITECTURES) -list(LENGTH CMAKE_TARGET_ARCHITECTURES cmake_target_arch_len) -if(NOT "${cmake_target_arch_len}" STREQUAL "1") - set(CMAKE_TARGET_ARCHITECTURE_UNIVERSAL TRUE) - set(CMAKE_TARGET_ARCHITECTURE_CODE "universal") -else() - set(CMAKE_TARGET_ARCHITECTURE_UNIVERSAL FALSE) - set(CMAKE_TARGET_ARCHITECTURE_CODE "${CMAKE_TARGET_ARCHITECTURES}") -endif() -######### ARCH DETECTION ######### - set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(BUILD_ARCH native CACHE STRING "Compile for this CPU architecture.") @@ -96,37 +81,42 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Release") endif() -# ARM bits -if(${CMAKE_TARGET_ARCHITECTURE_CODE} MATCHES "arm") - # Define that we are using ARM - add_compile_definitions(ARM) +# iOS support +if(CMAKE_SYSTEM_NAME STREQUAL "iOS" ) set(ARM ON) - option(USE_RUY "Use Ruy" ON) # For 8 bit code, later on - set(EXT_LIBS ${EXT_LIBS} ruy) + # need to ignore this warning for Xcode to be happy + list(APPEND ALL_WARNINGS -Wno-shorten-64-to-32;) +endif() - # Apple M1 has Apple Accelerate. Otherwise fallback to RUY +# ARM support +if(ARM) + # Apple by default has Apple Accelerate. Otherwise fallback to RUY for GEMM if(APPLE) + message(STATUS "Using Apple Accelerate SGEMM") option(USE_RUY_SGEMM "Compile with Ruy SGEMM" OFF) else(APPLE) + message(STATUS "Using Ruy SGEMM") + set(EXT_LIBS ${EXT_LIBS} ruy) option(USE_RUY_SGEMM "Compile with Ruy SGEMM" ON) endif(APPLE) + # Define that we are using ARM + add_compile_definitions(ARM FMA SSE) set(USE_SIMD_UTILS ON) # Some warnings as errors. I don't feel comfortable about the strict aliasing. set(ARM_WARNINGS "-fno-strict-aliasing -Wno-comment") - set(USE_SIMD_UTILS ON) # @TODO this assumes ArmV8. We should also look at armv7 - add_compile_definitions(ARM FMA SSE) #added for ARM if(MSVC) add_compile_options(/flax-vector-conversions) else(MSVC) add_compile_options(-flax-vector-conversions) endif(MSVC) +endif(ARM) -endif(${CMAKE_TARGET_ARCHITECTURE_CODE} MATCHES "arm") - +#if(SDK_NAME EQUAL "iphoneos" OR SDK_NAME EQUAL "iphonesimulator") +#endif(SDK_NAME) ############################################################################### # Set compilation flags @@ -187,13 +177,16 @@ else(MSVC) set(INTRINSICS "") list(APPEND INTRINSICS_NVCC) - option(COMPILE_SSE2 "Compile CPU code with SSE2 support" ON) - option(COMPILE_SSE3 "Compile CPU code with SSE3 support" ON) - option(COMPILE_SSE4_1 "Compile CPU code with SSE4.1 support" ON) - option(COMPILE_SSE4_2 "Compile CPU code with SSE4.2 support" ON) - option(COMPILE_AVX "Compile CPU code with AVX support" ON) - option(COMPILE_AVX2 "Compile CPU code with AVX2 support" ON) - option(COMPILE_AVX512 "Compile CPU code with AVX512 support" ON) + if(NOT ARM) + # none of these options are available on ARM + option(COMPILE_SSE2 "Compile CPU code with SSE2 support" ON) + option(COMPILE_SSE3 "Compile CPU code with SSE3 support" ON) + option(COMPILE_SSE4_1 "Compile CPU code with SSE4.1 support" ON) + option(COMPILE_SSE4_2 "Compile CPU code with SSE4.2 support" ON) + option(COMPILE_AVX "Compile CPU code with AVX support" ON) + option(COMPILE_AVX2 "Compile CPU code with AVX2 support" ON) + option(COMPILE_AVX512 "Compile CPU code with AVX512 support" ON) + endif(NOT ARM) if(BUILD_ARCH STREQUAL "native") message(STATUS "Building with -march=native and intrinsics will be chosen automatically by the compiler to match the current machine.") diff --git a/cmake/TargetArch.cmake b/cmake/TargetArch.cmake deleted file mode 100644 index 6e0bb3953..000000000 --- a/cmake/TargetArch.cmake +++ /dev/null @@ -1,142 +0,0 @@ -# Modified from https://github.com/axr/solar-cmake/blob/73cfea0db0284c5e2010aca23989046e5bda95c9/Solar.cmake -# Based on the Qt 5 processor detection code, so should be very accurate -# https://qt.gitorious.org/qt/qtbase/blobs/master/src/corelib/global/qprocessordetection.h -# Currently handles arm (v5, v6, v7), x86 (32/64), ia64, and ppc (32/64) - -# Regarding POWER/PowerPC, just as is noted in the Qt source, -# "There are many more known variants/revisions that we do not handle/detect." - -set(archdetect_c_code " -#if defined(__arm__) || defined(__TARGET_ARCH_ARM) || defined(_M_ARM) || defined(_M_ARM64) || defined(__aarch64__) || defined(__ARM64__) - #if defined(__ARM_ARCH_8__) || defined(__ARM_ARCH_8) \\ - || defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH_8A) \\ - || defined(__ARM_ARCH_8R__) || defined(__ARM_ARCH_8R) \\ - || defined(__ARM_ARCH_8M__) || defined(__ARM_ARCH_8M) \\ - || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM-0 >= 8) - #error cmake_ARCH armv8 - #elif defined(__ARM_ARCH_7__) \\ - || defined(__ARM_ARCH_7A__) \\ - || defined(__ARM_ARCH_7R__) \\ - || defined(__ARM_ARCH_7M__) \\ - || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM-0 >= 7) - #error cmake_ARCH armv7 - #elif defined(__ARM_ARCH_6__) \\ - || defined(__ARM_ARCH_6J__) \\ - || defined(__ARM_ARCH_6T2__) \\ - || defined(__ARM_ARCH_6Z__) \\ - || defined(__ARM_ARCH_6K__) \\ - || defined(__ARM_ARCH_6ZK__) \\ - || defined(__ARM_ARCH_6M__) \\ - || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM-0 >= 6) - #error cmake_ARCH armv6 - #elif defined(__ARM_ARCH_5TEJ__) \\ - || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM-0 >= 5) - #error cmake_ARCH armv5 - #else - #error cmake_ARCH arm - #endif -#elif defined(__i386) || defined(__i386__) || defined(_M_IX86) - #error cmake_ARCH i386 -#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64) - #error cmake_ARCH x86_64 -#elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64) - #error cmake_ARCH ia64 -#elif defined(__ppc__) || defined(__ppc) || defined(__powerpc__) \\ - || defined(_ARCH_COM) || defined(_ARCH_PWR) || defined(_ARCH_PPC) \\ - || defined(_M_MPPC) || defined(_M_PPC) - #if defined(__ppc64__) || defined(__powerpc64__) || defined(__64BIT__) - #error cmake_ARCH ppc64 - #else - #error cmake_ARCH ppc - #endif -#endif - -#error cmake_ARCH unknown -") - - -# Set ppc_support to TRUE before including this file or ppc and ppc64 -# will be treated as invalid architectures since they are no longer supported by Apple - -function(target_architecture output_var) - if(APPLE AND CMAKE_OSX_ARCHITECTURES) - # On OS X we use CMAKE_OSX_ARCHITECTURES *if* it was set - # First let's normalize the order of the values - - # Note that it's not possible to compile PowerPC applications if you are using - # the OS X SDK version 10.6 or later - you'll need 10.4/10.5 for that, so we - # disable it by default - # See this page for more information: - # http://stackoverflow.com/questions/5333490/how-can-we-restore-ppc-ppc64-as-well-as-full-10-4-10-5-sdk-support-to-xcode-4 - - # Architecture defaults to i386 or ppc on OS X 10.5 and earlier, depending on the CPU type detected at runtime. - # On OS X 10.6+ the default is x86_64 if the CPU supports it, i386 otherwise. - - foreach(osx_arch ${CMAKE_OSX_ARCHITECTURES}) - if("${osx_arch}" STREQUAL "ppc" AND ppc_support) - set(osx_arch_ppc TRUE) - elseif("${osx_arch}" STREQUAL "i386") - set(osx_arch_i386 TRUE) - elseif("${osx_arch}" STREQUAL "x86_64") - set(osx_arch_x86_64 TRUE) - elseif("${osx_arch}" STREQUAL "ppc64" AND ppc_support) - set(osx_arch_ppc64 TRUE) - else() - message(FATAL_ERROR "Invalid OS X arch name: ${osx_arch}") - endif() - endforeach() - - # Now add all the architectures in our normalized order - if(osx_arch_ppc) - list(APPEND ARCH ppc) - endif() - - if(osx_arch_i386) - list(APPEND ARCH i386) - endif() - - if(osx_arch_x86_64) - list(APPEND ARCH x86_64) - endif() - - if(osx_arch_ppc64) - list(APPEND ARCH ppc64) - endif() - else() - file(WRITE "${CMAKE_BINARY_DIR}/arch.c" "${archdetect_c_code}") - - enable_language(C) - - # Detect the architecture in a rather creative way... - # This compiles a small C program which is a series of ifdefs that selects a - # particular #error preprocessor directive whose message string contains the - # target architecture. The program will always fail to compile (both because - # file is not a valid C program, and obviously because of the presence of the - # #error preprocessor directives... but by exploiting the preprocessor in this - # way, we can detect the correct target architecture even when cross-compiling, - # since the program itself never needs to be run (only the compiler/preprocessor) - try_run( - run_result_unused - compile_result_unused - "${CMAKE_BINARY_DIR}" - "${CMAKE_BINARY_DIR}/arch.c" - COMPILE_OUTPUT_VARIABLE ARCH - CMAKE_FLAGS CMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} - ) - - # Parse the architecture name from the compiler output - string(REGEX MATCH "cmake_ARCH ([a-zA-Z0-9_]+)" ARCH "${ARCH}") - - # Get rid of the value marker leaving just the architecture name - string(REPLACE "cmake_ARCH " "" ARCH "${ARCH}") - - # If we are compiling with an unknown architecture this variable should - # already be set to "unknown" but in the case that it's empty (i.e. due - # to a typo in the code), then set it to unknown - if (NOT ARCH) - set(ARCH unknown) - endif() - endif() - - set(${output_var} "${ARCH}" PARENT_SCOPE) -endfunction() \ No newline at end of file diff --git a/cmake/ios.toolchain.cmake b/cmake/ios.toolchain.cmake new file mode 100644 index 000000000..2131172fd --- /dev/null +++ b/cmake/ios.toolchain.cmake @@ -0,0 +1,1099 @@ +# This file is part of the ios-cmake project. It was retrieved from +# https://github.com/leetal/ios-cmake.git, which is a fork of +# https://github.com/gerstrong/ios-cmake.git, which is a fork of +# https://github.com/cristeab/ios-cmake.git, which is a fork of +# https://code.google.com/p/ios-cmake/. Which in turn is based off of +# the Platform/Darwin.cmake and Platform/UnixPaths.cmake files which +# are included with CMake 2.8.4 +# +# The ios-cmake project is licensed under the new BSD license. +# +# Copyright (c) 2014, Bogdan Cristea and LTE Engineering Software, +# Kitware, Inc., Insight Software Consortium. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# This file is based on the Platform/Darwin.cmake and +# Platform/UnixPaths.cmake files which are included with CMake 2.8.4 +# It has been altered for iOS development. +# +# Updated by Alex Stewart (alexs.mac@gmail.com) +# +# ***************************************************************************** +# Now maintained by Alexander Widerberg (widerbergaren [at] gmail.com) +# under the BSD-3-Clause license +# https://github.com/leetal/ios-cmake +# ***************************************************************************** +# +# INFORMATION / HELP +# +############################################################################### +# OPTIONS # +############################################################################### +# +# PLATFORM: (default "OS64") +# OS = Build for iPhoneOS. +# OS64 = Build for arm64 iphoneOS. +# OS64COMBINED = Build for arm64 x86_64 iphoneOS + iphoneOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step) +# SIMULATOR = Build for x86 i386 iphoneOS Simulator. +# SIMULATOR64 = Build for x86_64 iphoneOS Simulator. +# SIMULATORARM64 = Build for arm64 iphoneOS Simulator. +# SIMULATOR64COMBINED = Build for arm64 x86_64 iphoneOS Simulator. Combined into FAT STATIC lib (supported on 3.14+ of CMakewith "-G Xcode" argument ONLY) +# TVOS = Build for arm64 tvOS. +# TVOSCOMBINED = Build for arm64 x86_64 tvOS + tvOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step) +# SIMULATOR_TVOS = Build for x86_64 tvOS Simulator. +# SIMULATORARM64_TVOS = Build for arm64 tvOS Simulator. +# WATCHOS = Build for armv7k arm64_32 for watchOS. +# WATCHOSCOMBINED = Build for armv7k arm64_32 x86_64 watchOS + watchOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step) +# SIMULATOR_WATCHOS = Build for x86_64 for watchOS Simulator. +# MAC = Build for x86_64 macOS. +# MAC_ARM64 = Build for Apple Silicon macOS. +# MAC_UNIVERSAL = Combined build for x86_64 and Apple Silicon on macOS. +# MAC_CATALYST = Build for x86_64 macOS with Catalyst support (iOS toolchain on macOS). +# Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS +# MAC_CATALYST_ARM64 = Build for Apple Silicon macOS with Catalyst support (iOS toolchain on macOS). +# Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS +# +# CMAKE_OSX_SYSROOT: Path to the SDK to use. By default this is +# automatically determined from PLATFORM and xcodebuild, but +# can also be manually specified (although this should not be required). +# +# CMAKE_DEVELOPER_ROOT: Path to the Developer directory for the platform +# being compiled for. By default, this is automatically determined from +# CMAKE_OSX_SYSROOT, but can also be manually specified (although this should +# not be required). +# +# DEPLOYMENT_TARGET: Minimum SDK version to target. Default 2.0 on watchOS and 9.0 on tvOS+iOS +# +# NAMED_LANGUAGE_SUPPORT: +# ON (default) = Will require "enable_language(OBJC) and/or enable_language(OBJCXX)" for full OBJC|OBJCXX support +# OFF = Will embed the OBJC and OBJCXX flags into the CMAKE_C_FLAGS and CMAKE_CXX_FLAGS (legacy behavior, CMake version < 3.16) +# +# ENABLE_BITCODE: (ON|OFF) Enables or disables bitcode support. Default OFF +# +# ENABLE_ARC: (ON|OFF) Enables or disables ARC support. Default ON (ARC enabled by default) +# +# ENABLE_VISIBILITY: (ON|OFF) Enables or disables symbol visibility support. Default OFF (visibility hidden by default) +# +# ENABLE_STRICT_TRY_COMPILE: (ON|OFF) Enables or disables strict try_compile() on all Check* directives (will run linker +# to actually check if linking is possible). Default OFF (will set CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY) +# +# ARCHS: (armv7 armv7s armv7k arm64 arm64_32 i386 x86_64) If specified, will override the default architectures for the given PLATFORM +# OS = armv7 armv7s arm64 (if applicable) +# OS64 = arm64 (if applicable) +# SIMULATOR = i386 +# SIMULATOR64 = x86_64 +# SIMULATORARM64 = arm64 +# TVOS = arm64 +# SIMULATOR_TVOS = x86_64 (i386 has since long been deprecated) +# SIMULATORARM64_TVOS = arm64 +# WATCHOS = armv7k arm64_32 (if applicable) +# SIMULATOR_WATCHOS = x86_64 (i386 has since long been deprecated) +# MAC = x86_64 +# MAC_ARM64 = arm64 +# MAC_UNIVERSAL = x86_64 arm64 +# MAC_CATALYST = x86_64 +# MAC_CATALYST_ARM64 = arm64 +# +# NOTE: When manually specifying ARCHS, put a semi-colon between the entries. E.g., -DARCHS="armv7;arm64" +# +############################################################################### +# END OPTIONS # +############################################################################### +# +# This toolchain defines the following properties (available via get_property()) for use externally: +# +# PLATFORM: The currently targeted platform. +# XCODE_VERSION: Version number (not including Build version) of Xcode detected. +# SDK_VERSION: Version of SDK being used. +# OSX_ARCHITECTURES: Architectures being compiled for (generated from PLATFORM). +# APPLE_TARGET_TRIPLE: Used by autoconf build systems. NOTE: If "ARCHS" is overridden, this will *NOT* be set! +# +# This toolchain defines the following macros for use externally: +# +# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE XCODE_VARIANT) +# A convenience macro for setting xcode specific properties on targets. +# Available variants are: All, Release, RelWithDebInfo, Debug, MinSizeRel +# example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1" "all"). +# +# find_host_package (PROGRAM ARGS) +# A macro used to find executable programs on the host system, not within the +# environment. Thanks to the android-cmake project for providing the +# command. +# + +cmake_minimum_required(VERSION 3.8.0) + +# CMake invokes the toolchain file twice during the first build, but only once during subsequent rebuilds. +if(DEFINED ENV{_IOS_TOOLCHAIN_HAS_RUN}) + return() +endif() +set(ENV{_IOS_TOOLCHAIN_HAS_RUN} true) + +# List of supported platform values +list(APPEND _supported_platforms + "OS" "OS64" "OS64COMBINED" "SIMULATOR" "SIMULATOR64" "SIMULATORARM64" "SIMULATOR64COMBINED" + "TVOS" "TVOSCOMBINED" "SIMULATOR_TVOS" "SIMULATORARM64_TVOS" + "WATCHOS" "WATCHOSCOMBINED" "SIMULATOR_WATCHOS" + "MAC" "MAC_ARM64" "MAC_UNIVERSAL" + "VISIONOS" "SIMULATOR_VISIONOS" "SIMULATOR64_VISIONOS" + "MAC_CATALYST" "MAC_CATALYST_ARM64") + +# Cache what generator is used +set(USED_CMAKE_GENERATOR "${CMAKE_GENERATOR}") + +# Check if using a CMake version capable of building combined FAT builds (simulator and target slices combined in one static lib) +if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14") + set(MODERN_CMAKE YES) +endif() + +# Get the Xcode version being used. +# Problem: CMake runs toolchain files multiple times, but can't read cache variables on some runs. +# Workaround: On the first run (in which cache variables are always accessible), set an intermediary environment variable. +# +# NOTE: This pattern is used in many places in this toolchain to speed up checks of all sorts +if(DEFINED XCODE_VERSION_INT) + # Environment variables are always preserved. + set(ENV{_XCODE_VERSION_INT} "${XCODE_VERSION_INT}") +elseif(DEFINED ENV{_XCODE_VERSION_INT}) + set(XCODE_VERSION_INT "$ENV{_XCODE_VERSION_INT}") +elseif(NOT DEFINED XCODE_VERSION_INT) + find_program(XCODEBUILD_EXECUTABLE xcodebuild) + if(NOT XCODEBUILD_EXECUTABLE) + message(FATAL_ERROR "xcodebuild not found. Please install either the standalone commandline tools or Xcode.") + endif() + execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version + OUTPUT_VARIABLE XCODE_VERSION_INT + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + string(REGEX MATCH "Xcode [0-9\\.]+" XCODE_VERSION_INT "${XCODE_VERSION_INT}") + string(REGEX REPLACE "Xcode ([0-9\\.]+)" "\\1" XCODE_VERSION_INT "${XCODE_VERSION_INT}") + set(XCODE_VERSION_INT "${XCODE_VERSION_INT}" CACHE INTERNAL "") +endif() + +# Assuming that xcode 12.0 is installed you most probably have ios sdk 14.0 or later installed (tested on Big Sur) +# if you don't set a deployment target it will be set the way you only get 64-bit builds +#if(NOT DEFINED DEPLOYMENT_TARGET AND XCODE_VERSION_INT VERSION_GREATER 12.0) +# Temporarily fix the arm64 issues in CMake install-combined by excluding arm64 for simulator builds (needed for Apple Silicon...) +# set(CMAKE_XCODE_ATTRIBUTE_EXCLUDED_ARCHS[sdk=iphonesimulator*] "arm64") +#endif() + +# Check if the platform variable is set +if(DEFINED PLATFORM) + # Environment variables are always preserved. + set(ENV{_PLATFORM} "${PLATFORM}") +elseif(DEFINED ENV{_PLATFORM}) + set(PLATFORM "$ENV{_PLATFORM}") +elseif(NOT DEFINED PLATFORM) + message(FATAL_ERROR "PLATFORM argument not set. Bailing configure since I don't know what target you want to build for!") +endif () + +if(PLATFORM MATCHES ".*COMBINED" AND NOT CMAKE_GENERATOR MATCHES "Xcode") + message(FATAL_ERROR "The combined builds support requires Xcode to be used as a generator via '-G Xcode' command-line argument in CMake") +endif() + +# Safeguard that the platform value is set and is one of the supported values +list(FIND _supported_platforms ${PLATFORM} contains_PLATFORM) +if("${contains_PLATFORM}" EQUAL "-1") + string(REPLACE ";" "\n * " _supported_platforms_formatted "${_supported_platforms}") + message(FATAL_ERROR " Invalid PLATFORM specified! Current value: ${PLATFORM}.\n" + " Supported PLATFORM values: \n * ${_supported_platforms_formatted}") +endif() + +# Check if Apple Silicon is supported +if(PLATFORM MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$|^(MAC_UNIVERSAL)$" AND ${CMAKE_VERSION} VERSION_LESS "3.19.5") + message(FATAL_ERROR "Apple Silicon builds requires a minimum of CMake 3.19.5") +endif() + +# Touch the toolchain variable to suppress the "unused variable" warning. +# This happens if CMake is invoked with the same command line the second time. +if(CMAKE_TOOLCHAIN_FILE) +endif() + +# Fix for PThread library not in path +set(CMAKE_THREAD_LIBS_INIT "-lpthread") +set(CMAKE_HAVE_THREADS_LIBRARY 1) +set(CMAKE_USE_WIN32_THREADS_INIT 0) +set(CMAKE_USE_PTHREADS_INIT 1) + +# Specify named language support defaults. +if(NOT DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.16") + set(NAMED_LANGUAGE_SUPPORT ON) + message(STATUS "[DEFAULTS] Using explicit named language support! E.g., enable_language(CXX) is needed in the project files.") +elseif(NOT DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_LESS "3.16") + set(NAMED_LANGUAGE_SUPPORT OFF) + message(STATUS "[DEFAULTS] Disabling explicit named language support. Falling back to legacy behavior.") +elseif(DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_LESS "3.16") + message(FATAL_ERROR "CMake named language support for OBJC and OBJCXX was added in CMake 3.16.") +endif() +set(NAMED_LANGUAGE_SUPPORT_INT ${NAMED_LANGUAGE_SUPPORT} CACHE BOOL + "Whether or not to enable explicit named language support" FORCE) + +# Specify the minimum version of the deployment target. +if(NOT DEFINED DEPLOYMENT_TARGET) + if (PLATFORM MATCHES "WATCHOS") + # Unless specified, SDK version 4.0 is used by default as minimum target version (watchOS). + set(DEPLOYMENT_TARGET "4.0") + elseif(PLATFORM STREQUAL "MAC") + # Unless specified, SDK version 10.13 (High Sierra) is used by default as the minimum target version (macos). + set(DEPLOYMENT_TARGET "10.13") + elseif(PLATFORM STREQUAL "VISIONOS" OR PLATFORM STREQUAL "SIMULATOR_VISIONOS" OR PLATFORM STREQUAL "SIMULATOR64_VISIONOS") + # Unless specified, SDK version 1.0 is used by default as minimum target version (visionOS). + set(DEPLOYMENT_TARGET "1.0") + elseif(PLATFORM STREQUAL "MAC_ARM64") + # Unless specified, SDK version 11.0 (Big Sur) is used by default as the minimum target version (macOS on arm). + set(DEPLOYMENT_TARGET "11.0") + elseif(PLATFORM STREQUAL "MAC_UNIVERSAL") + # Unless specified, SDK version 11.0 (Big Sur) is used by default as minimum target version for universal builds. + set(DEPLOYMENT_TARGET "11.0") + elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64") + # Unless specified, SDK version 13.0 is used by default as the minimum target version (mac catalyst minimum requirement). + set(DEPLOYMENT_TARGET "13.1") + else() + # Unless specified, SDK version 11.0 is used by default as the minimum target version (iOS, tvOS). + set(DEPLOYMENT_TARGET "11.0") + endif() + message(STATUS "[DEFAULTS] Using the default min-version since DEPLOYMENT_TARGET not provided!") +elseif(DEFINED DEPLOYMENT_TARGET AND PLATFORM MATCHES "^MAC_CATALYST" AND ${DEPLOYMENT_TARGET} VERSION_LESS "13.1") + message(FATAL_ERROR "Mac Catalyst builds requires a minimum deployment target of 13.1!") +endif() + +# Store the DEPLOYMENT_TARGET in the cache +set(DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}" CACHE INTERNAL "") + +# Handle the case where we are targeting iOS and a version above 10.3.4 (32-bit support dropped officially) +if(PLATFORM STREQUAL "OS" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4) + set(PLATFORM "OS64") + message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.") +elseif(PLATFORM STREQUAL "SIMULATOR" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4) + set(PLATFORM "SIMULATOR64") + message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.") +endif() + +set(PLATFORM_INT "${PLATFORM}") + +if(DEFINED ARCHS) + string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}") +endif() + +# Determine the platform name and architectures for use in xcodebuild commands +# from the specified PLATFORM_INT name. +if(PLATFORM_INT STREQUAL "OS") + set(SDK_NAME iphoneos) + if(NOT ARCHS) + set(ARCHS armv7 armv7s arm64) + set(APPLE_TARGET_TRIPLE_INT arm-apple-ios${DEPLOYMENT_TARGET}) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}) + endif() +elseif(PLATFORM_INT STREQUAL "OS64") + set(SDK_NAME iphoneos) + if(NOT ARCHS) + if (XCODE_VERSION_INT VERSION_GREATER 10.0) + set(ARCHS arm64) # FIXME: Add arm64e when Apple has fixed the integration issues with it, libarclite_iphoneos.a is currently missing bitcode markers for example + else() + set(ARCHS arm64) + endif() + set(APPLE_TARGET_TRIPLE_INT arm64-apple-ios${DEPLOYMENT_TARGET}) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}) + endif() +elseif(PLATFORM_INT STREQUAL "OS64COMBINED") + set(SDK_NAME iphoneos) + if(MODERN_CMAKE) + if(NOT ARCHS) + if (XCODE_VERSION_INT VERSION_GREATER 12.0) + set(ARCHS arm64 x86_64) + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64 arm64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64 arm64") + else() + set(ARCHS arm64 x86_64) + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64") + endif() + set(APPLE_TARGET_TRIPLE_INT arm64-x86_64-apple-ios${DEPLOYMENT_TARGET}) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}) + endif() + else() + message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the OS64COMBINED setting work") + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATOR64COMBINED") + set(SDK_NAME iphonesimulator) + if(MODERN_CMAKE) + if(NOT ARCHS) + if (XCODE_VERSION_INT VERSION_GREATER 12.0) + set(ARCHS arm64 x86_64) # FIXME: Add arm64e when Apple have fixed the integration issues with it, libarclite_iphoneos.a is currently missing bitcode markers for example + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64 arm64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64 arm64") + else() + set(ARCHS arm64 x86_64) + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64") + endif() + set(APPLE_TARGET_TRIPLE_INT aarch64-x86_64-apple-ios${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator) + endif() + else() + message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the SIMULATOR64COMBINED setting work") + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATOR") + set(SDK_NAME iphonesimulator) + if(NOT ARCHS) + set(ARCHS i386) + set(APPLE_TARGET_TRIPLE_INT i386-apple-ios${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator) + endif() + message(DEPRECATION "SIMULATOR IS DEPRECATED. Consider using SIMULATOR64 instead.") +elseif(PLATFORM_INT STREQUAL "SIMULATOR64") + set(SDK_NAME iphonesimulator) + if(NOT ARCHS) + set(ARCHS x86_64) + set(APPLE_TARGET_TRIPLE_INT x86_64-apple-ios${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator) + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATORARM64") + set(SDK_NAME iphonesimulator) + if(NOT ARCHS) + set(ARCHS arm64) + set(APPLE_TARGET_TRIPLE_INT arm64-apple-ios${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator) + endif() +elseif(PLATFORM_INT STREQUAL "TVOS") + set(SDK_NAME appletvos) + if(NOT ARCHS) + set(ARCHS arm64) + set(APPLE_TARGET_TRIPLE_INT arm64-apple-tvos${DEPLOYMENT_TARGET}) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET}) + endif() +elseif (PLATFORM_INT STREQUAL "TVOSCOMBINED") + set(SDK_NAME appletvos) + if(MODERN_CMAKE) + if(NOT ARCHS) + set(ARCHS arm64 x86_64) + set(APPLE_TARGET_TRIPLE_INT arm64-x86_64-apple-tvos${DEPLOYMENT_TARGET}) + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvos*] "arm64") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvsimulator*] "x86_64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvos*] "arm64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvsimulator*] "x86_64") + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET}) + endif() + else() + message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the TVOSCOMBINED setting work") + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS") + set(SDK_NAME appletvsimulator) + if(NOT ARCHS) + set(ARCHS x86_64) + set(APPLE_TARGET_TRIPLE_INT x86_64-apple-tvos${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET}-simulator) + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATORARM64_TVOS") + set(SDK_NAME appletvsimulator) + if(NOT ARCHS) + set(ARCHS arm64) + set(APPLE_TARGET_TRIPLE_INT arm64-apple-tvos${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET}-simulator) + endif() +elseif(PLATFORM_INT STREQUAL "WATCHOS") + set(SDK_NAME watchos) + if(NOT ARCHS) + if (XCODE_VERSION_INT VERSION_GREATER 10.0) + set(ARCHS armv7k arm64_32) + set(APPLE_TARGET_TRIPLE_INT arm64_32-apple-watchos${DEPLOYMENT_TARGET}) + else() + set(ARCHS armv7k) + set(APPLE_TARGET_TRIPLE_INT arm-apple-watchos${DEPLOYMENT_TARGET}) + endif() + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET}) + endif() +elseif(PLATFORM_INT STREQUAL "WATCHOSCOMBINED") + set(SDK_NAME watchos) + if(MODERN_CMAKE) + if(NOT ARCHS) + if (XCODE_VERSION_INT VERSION_GREATER 10.0) + set(ARCHS armv7k arm64_32 i386) + set(APPLE_TARGET_TRIPLE_INT arm64_32-i386-apple-watchos${DEPLOYMENT_TARGET}) + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k arm64_32") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k arm64_32") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386") + else() + set(ARCHS armv7k i386) + set(APPLE_TARGET_TRIPLE_INT arm-i386-apple-watchos${DEPLOYMENT_TARGET}) + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386") + endif() + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET}) + endif() + else() + message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the WATCHOSCOMBINED setting work") + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS") + set(SDK_NAME watchsimulator) + if(NOT ARCHS) + set(ARCHS i386) + set(APPLE_TARGET_TRIPLE_INT i386-apple-watchos${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET}-simulator) + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATOR64_VISIONOS") + set(SDK_NAME xrsimulator) + if(NOT ARCHS) + set(ARCHS x86_64) + set(APPLE_TARGET_TRIPLE_INT x86_64-apple-xros${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-xros${DEPLOYMENT_TARGET}-simulator) + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATOR_VISIONOS") + set(SDK_NAME xrsimulator) + if(NOT ARCHS) + set(ARCHS arm64) + set(APPLE_TARGET_TRIPLE_INT arm64-apple-xros${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-xros${DEPLOYMENT_TARGET}-simulator) + endif() +elseif(PLATFORM_INT STREQUAL "VISIONOS") + set(SDK_NAME xros) + if(NOT ARCHS) + set(ARCHS arm64) + set(APPLE_TARGET_TRIPLE_INT arm64-apple-xros${DEPLOYMENT_TARGET}) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-xros${DEPLOYMENT_TARGET}) + endif() +elseif(PLATFORM_INT STREQUAL "MAC" OR PLATFORM_INT STREQUAL "MAC_CATALYST") + set(SDK_NAME macosx) + if(NOT ARCHS) + set(ARCHS x86_64) + endif() + string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}") + if(PLATFORM_INT STREQUAL "MAC") + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx${DEPLOYMENT_TARGET}) + elseif(PLATFORM_INT STREQUAL "MAC_CATALYST") + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi) + endif() +elseif(PLATFORM_INT MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$") + set(SDK_NAME macosx) + if(NOT ARCHS) + set(ARCHS arm64) + endif() + string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}") + if(PLATFORM_INT STREQUAL "MAC_ARM64") + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx${DEPLOYMENT_TARGET}) + elseif(PLATFORM_INT STREQUAL "MAC_CATALYST_ARM64") + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi) + endif() +elseif(PLATFORM_INT STREQUAL "MAC_UNIVERSAL") + set(SDK_NAME macosx) + if(NOT ARCHS) + set(ARCHS "x86_64;arm64") + endif() + string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}") + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx${DEPLOYMENT_TARGET}) +else() + message(FATAL_ERROR "Invalid PLATFORM: ${PLATFORM_INT}") +endif() + +string(REPLACE ";" " " ARCHS_SPACED "${ARCHS}") + +if(MODERN_CMAKE AND PLATFORM_INT MATCHES ".*COMBINED" AND NOT CMAKE_GENERATOR MATCHES "Xcode") + message(FATAL_ERROR "The COMBINED options only work with Xcode generator, -G Xcode") +endif() + +if(CMAKE_GENERATOR MATCHES "Xcode" AND PLATFORM_INT MATCHES "^MAC_CATALYST") + set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++") + set(CMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS "macosx") + set(CMAKE_XCODE_ATTRIBUTE_SUPPORTS_MACCATALYST "YES") + if(NOT DEFINED MACOSX_DEPLOYMENT_TARGET) + set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "10.15") + else() + set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "${MACOSX_DEPLOYMENT_TARGET}") + endif() +elseif(CMAKE_GENERATOR MATCHES "Xcode") + set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++") + set(CMAKE_XCODE_ATTRIBUTE_IPHONEOS_DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}") + if(NOT PLATFORM_INT MATCHES ".*COMBINED") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=${SDK_NAME}*] "${ARCHS_SPACED}") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=${SDK_NAME}*] "${ARCHS_SPACED}") + endif() +endif() + +# If the user did not specify the SDK root to use, then query xcodebuild for it. +if(DEFINED CMAKE_OSX_SYSROOT_INT) + # Environment variables are always preserved. + set(ENV{_CMAKE_OSX_SYSROOT_INT} "${CMAKE_OSX_SYSROOT_INT}") +elseif(DEFINED ENV{_CMAKE_OSX_SYSROOT_INT}) + set(CMAKE_OSX_SYSROOT_INT "$ENV{_CMAKE_OSX_SYSROOT_INT}") +elseif(NOT DEFINED CMAKE_OSX_SYSROOT_INT) + execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version -sdk ${SDK_NAME} Path + OUTPUT_VARIABLE CMAKE_OSX_SYSROOT_INT + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() + +if (NOT DEFINED CMAKE_OSX_SYSROOT_INT AND NOT DEFINED CMAKE_OSX_SYSROOT) + message(SEND_ERROR "Please make sure that Xcode is installed and that the toolchain" + "is pointing to the correct path. Please run:" + "sudo xcode-select -s /Applications/Xcode.app/Contents/Developer" + "and see if that fixes the problem for you.") + message(FATAL_ERROR "Invalid CMAKE_OSX_SYSROOT: ${CMAKE_OSX_SYSROOT} " + "does not exist.") +elseif(DEFINED CMAKE_OSX_SYSROOT_INT) + set(CMAKE_OSX_SYSROOT_INT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "") + # Specify the location or name of the platform SDK to be used in CMAKE_OSX_SYSROOT. + set(CMAKE_OSX_SYSROOT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "") +endif() + +# Use bitcode or not +if(NOT DEFINED ENABLE_BITCODE) + message(STATUS "[DEFAULTS] Disabling bitcode support by default. ENABLE_BITCODE not provided for override!") + set(ENABLE_BITCODE OFF) +endif() +set(ENABLE_BITCODE_INT ${ENABLE_BITCODE} CACHE BOOL + "Whether or not to enable bitcode" FORCE) +# Use ARC or not +if(NOT DEFINED ENABLE_ARC) + # Unless specified, enable ARC support by default + set(ENABLE_ARC ON) + message(STATUS "[DEFAULTS] Enabling ARC support by default. ENABLE_ARC not provided!") +endif() +set(ENABLE_ARC_INT ${ENABLE_ARC} CACHE BOOL "Whether or not to enable ARC" FORCE) +# Use hidden visibility or not +if(NOT DEFINED ENABLE_VISIBILITY) + # Unless specified, disable symbols visibility by default + set(ENABLE_VISIBILITY OFF) + message(STATUS "[DEFAULTS] Hiding symbols visibility by default. ENABLE_VISIBILITY not provided!") +endif() +set(ENABLE_VISIBILITY_INT ${ENABLE_VISIBILITY} CACHE BOOL "Whether or not to hide symbols from the dynamic linker (-fvisibility=hidden)" FORCE) +# Set strict compiler checks or not +if(NOT DEFINED ENABLE_STRICT_TRY_COMPILE) + # Unless specified, disable strict try_compile() + set(ENABLE_STRICT_TRY_COMPILE OFF) + message(STATUS "[DEFAULTS] Using NON-strict compiler checks by default. ENABLE_STRICT_TRY_COMPILE not provided!") +endif() +set(ENABLE_STRICT_TRY_COMPILE_INT ${ENABLE_STRICT_TRY_COMPILE} CACHE BOOL + "Whether or not to use strict compiler checks" FORCE) + +# Get the SDK version information. +if(DEFINED SDK_VERSION) + # Environment variables are always preserved. + set(ENV{_SDK_VERSION} "${SDK_VERSION}") +elseif(DEFINED ENV{_SDK_VERSION}) + set(SDK_VERSION "$ENV{_SDK_VERSION}") +elseif(NOT DEFINED SDK_VERSION) + execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -sdk ${CMAKE_OSX_SYSROOT_INT} -version SDKVersion + OUTPUT_VARIABLE SDK_VERSION + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() + +# Find the Developer root for the specific iOS platform being compiled for +# from CMAKE_OSX_SYSROOT. Should be ../../ from SDK specified in +# CMAKE_OSX_SYSROOT. There does not appear to be a direct way to obtain +# this information from xcrun or xcodebuild. +if (NOT DEFINED CMAKE_DEVELOPER_ROOT AND NOT CMAKE_GENERATOR MATCHES "Xcode") + get_filename_component(PLATFORM_SDK_DIR ${CMAKE_OSX_SYSROOT_INT} PATH) + get_filename_component(CMAKE_DEVELOPER_ROOT ${PLATFORM_SDK_DIR} PATH) + if (NOT EXISTS "${CMAKE_DEVELOPER_ROOT}") + message(FATAL_ERROR "Invalid CMAKE_DEVELOPER_ROOT: ${CMAKE_DEVELOPER_ROOT} does not exist.") + endif() +endif() + +# Find the C & C++ compilers for the specified SDK. +if(DEFINED CMAKE_C_COMPILER) + # Environment variables are always preserved. + set(ENV{_CMAKE_C_COMPILER} "${CMAKE_C_COMPILER}") +elseif(DEFINED ENV{_CMAKE_C_COMPILER}) + set(CMAKE_C_COMPILER "$ENV{_CMAKE_C_COMPILER}") + set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER}) +elseif(NOT DEFINED CMAKE_C_COMPILER) + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang + OUTPUT_VARIABLE CMAKE_C_COMPILER + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER}) +endif() +if(DEFINED CMAKE_CXX_COMPILER) + # Environment variables are always preserved. + set(ENV{_CMAKE_CXX_COMPILER} "${CMAKE_CXX_COMPILER}") +elseif(DEFINED ENV{_CMAKE_CXX_COMPILER}) + set(CMAKE_CXX_COMPILER "$ENV{_CMAKE_CXX_COMPILER}") +elseif(NOT DEFINED CMAKE_CXX_COMPILER) + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang++ + OUTPUT_VARIABLE CMAKE_CXX_COMPILER + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() +# Find (Apple's) libtool. +if(DEFINED BUILD_LIBTOOL) + # Environment variables are always preserved. + set(ENV{_BUILD_LIBTOOL} "${BUILD_LIBTOOL}") +elseif(DEFINED ENV{_BUILD_LIBTOOL}) + set(BUILD_LIBTOOL "$ENV{_BUILD_LIBTOOL}") +elseif(NOT DEFINED BUILD_LIBTOOL) + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find libtool + OUTPUT_VARIABLE BUILD_LIBTOOL + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() +# Find the toolchain's provided install_name_tool if none is found on the host +if(DEFINED CMAKE_INSTALL_NAME_TOOL) + # Environment variables are always preserved. + set(ENV{_CMAKE_INSTALL_NAME_TOOL} "${CMAKE_INSTALL_NAME_TOOL}") +elseif(DEFINED ENV{_CMAKE_INSTALL_NAME_TOOL}) + set(CMAKE_INSTALL_NAME_TOOL "$ENV{_CMAKE_INSTALL_NAME_TOOL}") +elseif(NOT DEFINED CMAKE_INSTALL_NAME_TOOL) + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find install_name_tool + OUTPUT_VARIABLE CMAKE_INSTALL_NAME_TOOL_INT + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + set(CMAKE_INSTALL_NAME_TOOL ${CMAKE_INSTALL_NAME_TOOL_INT} CACHE INTERNAL "") +endif() + +# Configure libtool to be used instead of ar + ranlib to build static libraries. +# This is required on Xcode 7+, but should also work on previous versions of +# Xcode. +get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES) +foreach(lang ${languages}) + set(CMAKE_${lang}_CREATE_STATIC_LIBRARY "${BUILD_LIBTOOL} -static -o " CACHE INTERNAL "") +endforeach() + +# CMake 3.14+ support building for iOS, watchOS, and tvOS out of the box. +if(MODERN_CMAKE) + if(SDK_NAME MATCHES "iphone") + set(CMAKE_SYSTEM_NAME iOS) + elseif(SDK_NAME MATCHES "xros") + set(CMAKE_SYSTEM_NAME visionOS) + elseif(SDK_NAME MATCHES "xrsimulator") + set(CMAKE_SYSTEM_NAME visionOS) + elseif(SDK_NAME MATCHES "macosx") + set(CMAKE_SYSTEM_NAME Darwin) + elseif(SDK_NAME MATCHES "appletv") + set(CMAKE_SYSTEM_NAME tvOS) + elseif(SDK_NAME MATCHES "watch") + set(CMAKE_SYSTEM_NAME watchOS) + endif() + # Provide flags for a combined FAT library build on newer CMake versions + if(PLATFORM_INT MATCHES ".*COMBINED") + set(CMAKE_IOS_INSTALL_COMBINED YES) + if(CMAKE_GENERATOR MATCHES "Xcode") + # Set the SDKROOT Xcode properties to a Xcode-friendly value (the SDK_NAME, E.g, iphoneos) + # This way, Xcode will automatically switch between the simulator and device SDK when building. + set(CMAKE_XCODE_ATTRIBUTE_SDKROOT "${SDK_NAME}") + # Force to not build just one ARCH, but all! + set(CMAKE_XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO") + endif() + endif() +elseif(NOT DEFINED CMAKE_SYSTEM_NAME AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.10") + # Legacy code path prior to CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified + set(CMAKE_SYSTEM_NAME iOS) +elseif(NOT DEFINED CMAKE_SYSTEM_NAME) + # Legacy code path before CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified + set(CMAKE_SYSTEM_NAME Darwin) +endif() +# Standard settings. +set(CMAKE_SYSTEM_VERSION ${SDK_VERSION} CACHE INTERNAL "") +set(UNIX ON CACHE BOOL "") +set(APPLE ON CACHE BOOL "") +if(PLATFORM STREQUAL "MAC" OR PLATFORM STREQUAL "MAC_ARM64" OR PLATFORM STREQUAL "MAC_UNIVERSAL") + set(IOS OFF CACHE BOOL "") + set(MACOS ON CACHE BOOL "") +elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64") + set(IOS ON CACHE BOOL "") + set(MACOS ON CACHE BOOL "") +else() + set(IOS ON CACHE BOOL "") +endif() +# Set the architectures for which to build. +set(CMAKE_OSX_ARCHITECTURES ${ARCHS} CACHE INTERNAL "") +# Change the type of target generated for try_compile() so it'll work when cross-compiling, weak compiler checks +if(NOT ENABLE_STRICT_TRY_COMPILE_INT) + set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +endif() +# All iOS/Darwin specific settings - some may be redundant. +if (NOT DEFINED CMAKE_MACOSX_BUNDLE) + set(CMAKE_MACOSX_BUNDLE YES) +endif() +set(CMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED "NO") +set(CMAKE_SHARED_LIBRARY_PREFIX "lib") +set(CMAKE_SHARED_LIBRARY_SUFFIX ".dylib") +set(CMAKE_SHARED_MODULE_PREFIX "lib") +set(CMAKE_SHARED_MODULE_SUFFIX ".so") +set(CMAKE_C_COMPILER_ABI ELF) +set(CMAKE_CXX_COMPILER_ABI ELF) +set(CMAKE_C_HAS_ISYSROOT 1) +set(CMAKE_CXX_HAS_ISYSROOT 1) +set(CMAKE_MODULE_EXISTS 1) +set(CMAKE_DL_LIBS "") +set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ") +set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ") +set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}") +set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}") + +if(ARCHS MATCHES "((^|;|, )(arm64|arm64e|x86_64))+") + set(CMAKE_C_SIZEOF_DATA_PTR 8) + set(CMAKE_CXX_SIZEOF_DATA_PTR 8) + if(ARCHS MATCHES "((^|;|, )(arm64|arm64e))+") + set(CMAKE_SYSTEM_PROCESSOR "aarch64") + else() + set(CMAKE_SYSTEM_PROCESSOR "x86_64") + endif() +else() + set(CMAKE_C_SIZEOF_DATA_PTR 4) + set(CMAKE_CXX_SIZEOF_DATA_PTR 4) + set(CMAKE_SYSTEM_PROCESSOR "arm") +endif() + +# Note that only Xcode 7+ supports the newer more specific: +# -m${SDK_NAME}-version-min flags, older versions of Xcode use: +# -m(ios/ios-simulator)-version-min instead. +if(${CMAKE_VERSION} VERSION_LESS "3.11") + if(PLATFORM_INT STREQUAL "OS" OR PLATFORM_INT STREQUAL "OS64") + if(XCODE_VERSION_INT VERSION_LESS 7.0) + set(SDK_NAME_VERSION_FLAGS + "-mios-version-min=${DEPLOYMENT_TARGET}") + else() + # Xcode 7.0+ uses flags we can build directly from SDK_NAME. + set(SDK_NAME_VERSION_FLAGS + "-m${SDK_NAME}-version-min=${DEPLOYMENT_TARGET}") + endif() + elseif(PLATFORM_INT STREQUAL "TVOS") + set(SDK_NAME_VERSION_FLAGS + "-mtvos-version-min=${DEPLOYMENT_TARGET}") + elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS") + set(SDK_NAME_VERSION_FLAGS + "-mtvos-simulator-version-min=${DEPLOYMENT_TARGET}") +elseif(PLATFORM_INT STREQUAL "SIMULATORARM64_TVOS") + set(SDK_NAME_VERSION_FLAGS + "-mtvos-simulator-version-min=${DEPLOYMENT_TARGET}") + elseif(PLATFORM_INT STREQUAL "WATCHOS") + set(SDK_NAME_VERSION_FLAGS + "-mwatchos-version-min=${DEPLOYMENT_TARGET}") + elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS") + set(SDK_NAME_VERSION_FLAGS + "-mwatchos-simulator-version-min=${DEPLOYMENT_TARGET}") + elseif(PLATFORM_INT STREQUAL "MAC") + set(SDK_NAME_VERSION_FLAGS + "-mmacosx-version-min=${DEPLOYMENT_TARGET}") + else() + # SIMULATOR or SIMULATOR64 both use -mios-simulator-version-min. + set(SDK_NAME_VERSION_FLAGS + "-mios-simulator-version-min=${DEPLOYMENT_TARGET}") + endif() +elseif(NOT PLATFORM_INT MATCHES "^MAC_CATALYST") + # Newer versions of CMake sets the version min flags correctly, skip this for Mac Catalyst targets + set(CMAKE_OSX_DEPLOYMENT_TARGET ${DEPLOYMENT_TARGET}) +endif() + +if(DEFINED APPLE_TARGET_TRIPLE_INT) + set(APPLE_TARGET_TRIPLE ${APPLE_TARGET_TRIPLE_INT} CACHE INTERNAL "") + set(CMAKE_C_COMPILER_TARGET ${APPLE_TARGET_TRIPLE}) + set(CMAKE_CXX_COMPILER_TARGET ${APPLE_TARGET_TRIPLE}) + set(CMAKE_ASM_COMPILER_TARGET ${APPLE_TARGET_TRIPLE}) +endif() + +if(PLATFORM_INT MATCHES "^MAC_CATALYST") + set(C_TARGET_FLAGS "-isystem ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/usr/include -iframework ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/System/Library/Frameworks") +endif() + +if(ENABLE_BITCODE_INT) + set(BITCODE "-fembed-bitcode") + set(CMAKE_XCODE_ATTRIBUTE_BITCODE_GENERATION_MODE "bitcode") + set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "YES") +else() + set(BITCODE "") + set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "NO") +endif() + +if(ENABLE_ARC_INT) + set(FOBJC_ARC "-fobjc-arc") + set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "YES") +else() + set(FOBJC_ARC "-fno-objc-arc") + set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "NO") +endif() + +if(NAMED_LANGUAGE_SUPPORT_INT) + set(OBJC_VARS "-fobjc-abi-version=2 -DOBJC_OLD_DISPATCH_PROTOTYPES=0") + set(OBJC_LEGACY_VARS "") +else() + set(OBJC_VARS "") + set(OBJC_LEGACY_VARS "-fobjc-abi-version=2 -DOBJC_OLD_DISPATCH_PROTOTYPES=0") +endif() + +if(NOT ENABLE_VISIBILITY_INT) + foreach(lang ${languages}) + set(CMAKE_${lang}_VISIBILITY_PRESET "hidden" CACHE INTERNAL "") + endforeach() + set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "YES") + set(VISIBILITY "-fvisibility=hidden -fvisibility-inlines-hidden") +else() + foreach(lang ${languages}) + set(CMAKE_${lang}_VISIBILITY_PRESET "default" CACHE INTERNAL "") + endforeach() + set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "NO") + set(VISIBILITY "-fvisibility=default") +endif() + +if(DEFINED APPLE_TARGET_TRIPLE) + set(APPLE_TARGET_TRIPLE_FLAG "-target ${APPLE_TARGET_TRIPLE}") +endif() + +#Check if Xcode generator is used since that will handle these flags automagically +if(CMAKE_GENERATOR MATCHES "Xcode") + message(STATUS "Not setting any manual command-line buildflags, since Xcode is selected as the generator. Modifying the Xcode build-settings directly instead.") +else() + set(CMAKE_C_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${OBJC_LEGACY_VARS} ${BITCODE} ${VISIBILITY} ${CMAKE_C_FLAGS}") + set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_C_FLAGS_DEBUG}") + set(CMAKE_C_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_C_FLAGS_MINSIZEREL}") + set(CMAKE_C_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_C_FLAGS_RELWITHDEBINFO}") + set(CMAKE_C_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_C_FLAGS_RELEASE}") + set(CMAKE_CXX_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${OBJC_LEGACY_VARS} ${BITCODE} ${VISIBILITY} ${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_CXX_FLAGS_DEBUG}") + set(CMAKE_CXX_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_CXX_FLAGS_MINSIZEREL}") + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") + set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_CXX_FLAGS_RELEASE}") + if(NAMED_LANGUAGE_SUPPORT_INT) + set(CMAKE_OBJC_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} ${FOBJC_ARC} ${OBJC_VARS} ${CMAKE_OBJC_FLAGS}") + set(CMAKE_OBJC_FLAGS_DEBUG "-O0 -g ${CMAKE_OBJC_FLAGS_DEBUG}") + set(CMAKE_OBJC_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_OBJC_FLAGS_MINSIZEREL}") + set(CMAKE_OBJC_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_OBJC_FLAGS_RELWITHDEBINFO}") + set(CMAKE_OBJC_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_OBJC_FLAGS_RELEASE}") + set(CMAKE_OBJCXX_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} ${FOBJC_ARC} ${OBJC_VARS} ${CMAKE_OBJCXX_FLAGS}") + set(CMAKE_OBJCXX_FLAGS_DEBUG "-O0 -g ${CMAKE_OBJCXX_FLAGS_DEBUG}") + set(CMAKE_OBJCXX_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_OBJCXX_FLAGS_MINSIZEREL}") + set(CMAKE_OBJCXX_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_OBJCXX_FLAGS_RELWITHDEBINFO}") + set(CMAKE_OBJCXX_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_OBJCXX_FLAGS_RELEASE}") + endif() + set(CMAKE_C_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}") + set(CMAKE_CXX_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}") + if(NAMED_LANGUAGE_SUPPORT_INT) + set(CMAKE_OBJC_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_OBJC_LINK_FLAGS}") + set(CMAKE_OBJCXX_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_OBJCXX_LINK_FLAGS}") + endif() + set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS} -x assembler-with-cpp -arch ${CMAKE_OSX_ARCHITECTURES} ${APPLE_TARGET_TRIPLE_FLAG}") +endif() + +## Print status messages to inform of the current state +message(STATUS "Configuring ${SDK_NAME} build for platform: ${PLATFORM_INT}, architecture(s): ${ARCHS}") +message(STATUS "Using SDK: ${CMAKE_OSX_SYSROOT_INT}") +message(STATUS "Using C compiler: ${CMAKE_C_COMPILER}") +message(STATUS "Using CXX compiler: ${CMAKE_CXX_COMPILER}") +message(STATUS "Using libtool: ${BUILD_LIBTOOL}") +message(STATUS "Using install name tool: ${CMAKE_INSTALL_NAME_TOOL}") +if(DEFINED APPLE_TARGET_TRIPLE) + message(STATUS "Autoconf target triple: ${APPLE_TARGET_TRIPLE}") +endif() +message(STATUS "Using minimum deployment version: ${DEPLOYMENT_TARGET}" + " (SDK version: ${SDK_VERSION})") +if(MODERN_CMAKE) + message(STATUS "Merging integrated CMake 3.14+ iOS,tvOS,watchOS,macOS toolchain(s) with this toolchain!") + if(PLATFORM_INT MATCHES ".*COMBINED") + message(STATUS "Will combine built (static) artifacts into FAT lib...") + endif() +endif() +if(CMAKE_GENERATOR MATCHES "Xcode") + message(STATUS "Using Xcode version: ${XCODE_VERSION_INT}") +endif() +message(STATUS "CMake version: ${CMAKE_VERSION}") +if(DEFINED SDK_NAME_VERSION_FLAGS) + message(STATUS "Using version flags: ${SDK_NAME_VERSION_FLAGS}") +endif() +message(STATUS "Using a data_ptr size of: ${CMAKE_CXX_SIZEOF_DATA_PTR}") +if(ENABLE_BITCODE_INT) + message(STATUS "Bitcode: Enabled") +else() + message(STATUS "Bitcode: Disabled") +endif() + +if(ENABLE_ARC_INT) + message(STATUS "ARC: Enabled") +else() + message(STATUS "ARC: Disabled") +endif() + +if(ENABLE_VISIBILITY_INT) + message(STATUS "Hiding symbols: Disabled") +else() + message(STATUS "Hiding symbols: Enabled") +endif() + +# Set global properties +set_property(GLOBAL PROPERTY PLATFORM "${PLATFORM}") +set_property(GLOBAL PROPERTY APPLE_TARGET_TRIPLE "${APPLE_TARGET_TRIPLE_INT}") +set_property(GLOBAL PROPERTY SDK_VERSION "${SDK_VERSION}") +set_property(GLOBAL PROPERTY XCODE_VERSION "${XCODE_VERSION_INT}") +set_property(GLOBAL PROPERTY OSX_ARCHITECTURES "${CMAKE_OSX_ARCHITECTURES}") + +# Export configurable variables for the try_compile() command. +set(CMAKE_TRY_COMPILE_PLATFORM_VARIABLES + PLATFORM + XCODE_VERSION_INT + SDK_VERSION + NAMED_LANGUAGE_SUPPORT + DEPLOYMENT_TARGET + CMAKE_DEVELOPER_ROOT + CMAKE_OSX_SYSROOT_INT + ENABLE_BITCODE + ENABLE_ARC + CMAKE_ASM_COMPILER + CMAKE_C_COMPILER + CMAKE_C_COMPILER_TARGET + CMAKE_CXX_COMPILER + CMAKE_CXX_COMPILER_TARGET + BUILD_LIBTOOL + CMAKE_INSTALL_NAME_TOOL + CMAKE_C_FLAGS + CMAKE_C_DEBUG + CMAKE_C_MINSIZEREL + CMAKE_C_RELWITHDEBINFO + CMAKE_C_RELEASE + CMAKE_CXX_FLAGS + CMAKE_CXX_FLAGS_DEBUG + CMAKE_CXX_FLAGS_MINSIZEREL + CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_CXX_FLAGS_RELEASE + CMAKE_C_LINK_FLAGS + CMAKE_CXX_LINK_FLAGS + CMAKE_ASM_FLAGS +) + +if(NAMED_LANGUAGE_SUPPORT_INT) + list(APPEND CMAKE_TRY_COMPILE_PLATFORM_VARIABLES + CMAKE_OBJC_FLAGS + CMAKE_OBJC_DEBUG + CMAKE_OBJC_MINSIZEREL + CMAKE_OBJC_RELWITHDEBINFO + CMAKE_OBJC_RELEASE + CMAKE_OBJCXX_FLAGS + CMAKE_OBJCXX_DEBUG + CMAKE_OBJCXX_MINSIZEREL + CMAKE_OBJCXX_RELWITHDEBINFO + CMAKE_OBJCXX_RELEASE + CMAKE_OBJC_LINK_FLAGS + CMAKE_OBJCXX_LINK_FLAGS + ) +endif() + +set(CMAKE_PLATFORM_HAS_INSTALLNAME 1) +set(CMAKE_SHARED_LINKER_FLAGS "-rpath @executable_path/Frameworks -rpath @loader_path/Frameworks") +set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -Wl,-headerpad_max_install_names") +set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -Wl,-headerpad_max_install_names") +set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,") +set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,") +set(CMAKE_FIND_LIBRARY_SUFFIXES ".tbd" ".dylib" ".so" ".a") +set(CMAKE_SHARED_LIBRARY_SONAME_C_FLAG "-install_name") + +# Set the find root to the SDK developer roots. +# Note: CMAKE_FIND_ROOT_PATH is only useful when cross-compiling. Thus, do not set on macOS builds. +if(NOT PLATFORM_INT MATCHES "^MAC.*$") + list(APPEND CMAKE_FIND_ROOT_PATH "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "") + set(CMAKE_IGNORE_PATH "/System/Library/Frameworks;/usr/local/lib;/opt/homebrew" CACHE INTERNAL "") +endif() + +# Default to searching for frameworks first. +IF(NOT DEFINED CMAKE_FIND_FRAMEWORK) + set(CMAKE_FIND_FRAMEWORK FIRST) +ENDIF(NOT DEFINED CMAKE_FIND_FRAMEWORK) + +# Set up the default search directories for frameworks. +if(PLATFORM_INT MATCHES "^MAC_CATALYST") + set(CMAKE_FRAMEWORK_PATH + ${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks + ${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks + ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/System/Library/Frameworks + ${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "") +else() + set(CMAKE_FRAMEWORK_PATH + ${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks + ${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks + ${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "") +endif() + +# By default, search both the specified iOS SDK and the remainder of the host filesystem. +if(NOT CMAKE_FIND_ROOT_PATH_MODE_PROGRAM) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH CACHE INTERNAL "") +endif() +if(NOT CMAKE_FIND_ROOT_PATH_MODE_LIBRARY) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH CACHE INTERNAL "") +endif() +if(NOT CMAKE_FIND_ROOT_PATH_MODE_INCLUDE) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH CACHE INTERNAL "") +endif() +if(NOT CMAKE_FIND_ROOT_PATH_MODE_PACKAGE) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH CACHE INTERNAL "") +endif() + +# +# Some helper-macros below to simplify and beautify the CMakeFile +# + +# This little macro lets you set any Xcode specific property. +macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE XCODE_RELVERSION) + set(XCODE_RELVERSION_I "${XCODE_RELVERSION}") + if(XCODE_RELVERSION_I STREQUAL "All") + set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} "${XCODE_VALUE}") + else() + set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY}[variant=${XCODE_RELVERSION_I}] "${XCODE_VALUE}") + endif() +endmacro(set_xcode_property) + +# This macro lets you find executable programs on the host system. +macro(find_host_package) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE NEVER) + set(_TOOLCHAIN_IOS ${IOS}) + set(IOS OFF) + find_package(${ARGN}) + set(IOS ${_TOOLCHAIN_IOS}) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH) +endmacro(find_host_package) \ No newline at end of file diff --git a/src/3rd_party/CMakeLists.txt b/src/3rd_party/CMakeLists.txt index 2cec739ba..cf7cc1359 100644 --- a/src/3rd_party/CMakeLists.txt +++ b/src/3rd_party/CMakeLists.txt @@ -14,7 +14,7 @@ if(COMPILE_CPU) add_subdirectory(./intgemm) endif() - if(USE_RUY) + if(USE_RUY_SGEMM) set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL " " FORCE) set(CPUINFO_BUILD_BENCHMARKS OFF CACHE BOOL " " FORCE) set(CPUINFO_BUILD_PKG_CONFIG OFF CACHE BOOL " " FORCE) @@ -22,7 +22,7 @@ if(COMPILE_CPU) set(CPUINFO_BUILD_TOOLS OFF CACHE BOOL " " FORCE) add_subdirectory(ruy/third_party/cpuinfo EXCLUDE_FROM_ALL) add_subdirectory(ruy EXCLUDE_FROM_ALL) - endif(USE_RUY) + endif(USE_RUY_SGEMM) endif(COMPILE_CPU) if(USE_FBGEMM) diff --git a/src/3rd_party/faiss/VectorTransform.h b/src/3rd_party/faiss/VectorTransform.h index 5fc96bc46..e8689bc15 100644 --- a/src/3rd_party/faiss/VectorTransform.h +++ b/src/3rd_party/faiss/VectorTransform.h @@ -19,8 +19,10 @@ #include #ifdef __APPLE__ +#ifndef ARM #include #endif +#endif namespace faiss { From f001ab4ae1c46e638af874564afb03b01cf12559 Mon Sep 17 00:00:00 2001 From: Varun Mathur Date: Thu, 26 Oct 2023 14:03:23 -0700 Subject: [PATCH 07/21] simd change --- src/3rd_party/simd_utils | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/3rd_party/simd_utils b/src/3rd_party/simd_utils index 696036258..c3f774c98 160000 --- a/src/3rd_party/simd_utils +++ b/src/3rd_party/simd_utils @@ -1 +1 @@ -Subproject commit 6960362584481c977cdae9f6a8f7061a37c766cb +Subproject commit c3f774c98a73356f5bca52f6bf32e9f4a6191bc0 From 9b6a278c700099403808e1d1fe5c5ae671ed0599 Mon Sep 17 00:00:00 2001 From: Varun Mathur Date: Thu, 26 Oct 2023 17:33:23 -0700 Subject: [PATCH 08/21] update simd --- src/3rd_party/simd_utils | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/3rd_party/simd_utils b/src/3rd_party/simd_utils index c3f774c98..658dac3b6 160000 --- a/src/3rd_party/simd_utils +++ b/src/3rd_party/simd_utils @@ -1 +1 @@ -Subproject commit c3f774c98a73356f5bca52f6bf32e9f4a6191bc0 +Subproject commit 658dac3b678ec25bd2ea4d075d0498b3946a0d52 From db8312dd04b7d754bf9d2b4d06bb5a4764c3ea98 Mon Sep 17 00:00:00 2001 From: Varun Mathur Date: Sat, 4 Nov 2023 15:54:18 -0700 Subject: [PATCH 09/21] fix TranslateService --- src/translator/translator.h | 77 +++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 24 deletions(-) diff --git a/src/translator/translator.h b/src/translator/translator.h index f0fc0b908..abb5a5775 100644 --- a/src/translator/translator.h +++ b/src/translator/translator.h @@ -248,6 +248,9 @@ class TranslateService : public ModelServiceTask { size_t numDevices_; + std::vector model_mmaps_; // map + std::vector> model_items_; // non-mmap + public: virtual ~TranslateService() {} @@ -282,36 +285,62 @@ class TranslateService : public ModelServiceTask { auto devices = Config::getDevices(options_); numDevices_ = devices.size(); + ThreadPool threadPool(numDevices_, numDevices_); + scorers_.resize(numDevices_); + graphs_.resize(numDevices_); + // preload models - std::vector> model_items_; auto models = options->get>("models"); - for(auto model : models) { - auto items = io::loadItems(model); - model_items_.push_back(std::move(items)); + if(options_->get("model-mmap", false)) { + for(auto model : models) { + ABORT_IF(!io::isBin(model), "Non-binarized models cannot be mmapped"); + LOG(info, "Loading model from {}", model); + model_mmaps_.push_back(mio::mmap_source(model)); + } + } + else { + for(auto model : models) { + LOG(info, "Loading model from {}", model); + auto items = io::loadItems(model); + model_items_.push_back(std::move(items)); + } } // initialize scorers + size_t id = 0; for(auto device : devices) { - auto graph = New(true); - - auto precison = options_->get>("precision", {"float32"}); - graph->setDefaultElementType(typeFromString(precison[0])); // only use first type, used for parameter type in graph - graph->setDevice(device); - if (device.type == DeviceType::cpu) { - graph->getBackend()->setOptimized(options_->get("optimize")); - graph->getBackend()->setGemmType(options_->get("gemm-type")); - graph->getBackend()->setQuantizeRange(options_->get("quantize-range")); - } - graph->reserveWorkspaceMB(options_->get("workspace")); - graphs_.push_back(graph); - - auto scorers = createScorers(options_, model_items_); - for(auto scorer : scorers) { - scorer->init(graph); - if(shortlistGenerator_) - scorer->setShortlistGenerator(shortlistGenerator_); - } - scorers_.push_back(scorers); + auto task = [&](DeviceId device, size_t id) { + auto graph = New(true); + auto prec = options_->get>("precision", {"float32"}); + graph->setDefaultElementType(typeFromString(prec[0])); + graph->setDevice(device); + if (device.type == DeviceType::cpu) { + graph->getBackend()->setOptimized(options_->get("optimize")); + graph->getBackend()->setGemmType(options_->get("gemm-type")); + graph->getBackend()->setQuantizeRange(options_->get("quantize-range")); + } + graph->reserveWorkspaceMB(options_->get("workspace")); + graphs_[id] = graph; + + std::vector> scorers; + if(options_->get("model-mmap", false)) { + scorers = createScorers(options_, model_mmaps_); + } + else { + scorers = createScorers(options_, model_items_); + } + + for(auto scorer : scorers) { + scorer->init(graph); + if(shortlistGenerator_) + scorer->setShortlistGenerator(shortlistGenerator_); + } + + scorers_[id] = scorers; + graph->forward(); + }; + + threadPool.enqueue(task, device, id++); } } From dc6566b286a3900ba4dee39f07778d9059b625bc Mon Sep 17 00:00:00 2001 From: Varun Mathur Date: Sat, 4 Nov 2023 15:59:02 -0700 Subject: [PATCH 10/21] more changes --- .github/workflows/ios.yml | 43 +++++++++++++++++++++++++++++++++++++++ CMakeLists.txt | 2 -- 2 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/ios.yml diff --git a/.github/workflows/ios.yml b/.github/workflows/ios.yml new file mode 100644 index 000000000..4dfa8905d --- /dev/null +++ b/.github/workflows/ios.yml @@ -0,0 +1,43 @@ +name: iOS + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build-macos: + name: iOS CPU-only + runs-on: macos-12 + + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + submodules: recursive + + - name: Install dependencies + run: brew install boost openblas openssl protobuf + + - name: Configure CMake + run: | + export LDFLAGS="-L/usr/local/opt/openblas/lib" + export CPPFLAGS="-I/usr/local/opt/openblas/include" + mkdir -p build + cd build + cmake .. \ + -DCOMPILE_CPU=on \ + -DCOMPILE_CUDA=off \ + -DCOMPILE_EXAMPLES=on \ + -DCOMPILE_SERVER=off \ + -DCOMPILE_TESTS=on \ + -DUSE_SENTENCEPIECE=on \ + -DCMAKE_TOOLCHAIN_FILE=../cmake/ios.toolchain.cmake \ + -DUSE_SENTENCEPIECE=on \ + -DPLATFORM=OS64 \ + -DDEPLOYMENT_TARGET=13.0 + + - name: Compile + working-directory: build + run: cmake --build . --config Release \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 98125bcbe..ebca39a9f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -102,8 +102,6 @@ if(ARM) # Define that we are using ARM add_compile_definitions(ARM FMA SSE) - set(USE_SIMD_UTILS ON) - # Some warnings as errors. I don't feel comfortable about the strict aliasing. set(ARM_WARNINGS "-fno-strict-aliasing -Wno-comment") From c4efce72aa2ed64e7e3afb1308a4ac3312c73fc8 Mon Sep 17 00:00:00 2001 From: Varun Mathur Date: Sat, 4 Nov 2023 16:00:28 -0700 Subject: [PATCH 11/21] update simd to master --- src/3rd_party/simd_utils | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/3rd_party/simd_utils b/src/3rd_party/simd_utils index 658dac3b6..fe9fa82c9 160000 --- a/src/3rd_party/simd_utils +++ b/src/3rd_party/simd_utils @@ -1 +1 @@ -Subproject commit 658dac3b678ec25bd2ea4d075d0498b3946a0d52 +Subproject commit fe9fa82c9d7e6297913bc6c98fe079acc6e157e9 From 0261395d30a04ebb7b01776aa984fe4751906bd0 Mon Sep 17 00:00:00 2001 From: Varun Mathur Date: Sat, 4 Nov 2023 16:01:18 -0700 Subject: [PATCH 12/21] update sentencepiece commit --- src/3rd_party/sentencepiece | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/3rd_party/sentencepiece b/src/3rd_party/sentencepiece index fb6f8e408..e8e46f594 160000 --- a/src/3rd_party/sentencepiece +++ b/src/3rd_party/sentencepiece @@ -1 +1 @@ -Subproject commit fb6f8e408d2078ebfedc8ccc33985fef03c50b0e +Subproject commit e8e46f5940d1cec3f59c6d89df5500cac953a089 From 10e78c0a2dff3ef4bdb8448a58c71460e5e0410e Mon Sep 17 00:00:00 2001 From: Varun Mathur Date: Sat, 4 Nov 2023 16:22:13 -0700 Subject: [PATCH 13/21] replace spdlog includes --- src/3rd_party/spdlog/async_logger.h | 6 +++--- src/3rd_party/spdlog/common.h | 2 +- src/3rd_party/spdlog/details/async_log_helper.h | 12 ++++++------ src/3rd_party/spdlog/details/async_logger_impl.h | 4 ++-- src/3rd_party/spdlog/details/file_helper.h | 4 ++-- src/3rd_party/spdlog/details/line_logger_fwd.h | 4 ++-- src/3rd_party/spdlog/details/line_logger_impl.h | 6 +++--- src/3rd_party/spdlog/details/log_msg.h | 4 ++-- src/3rd_party/spdlog/details/logger_impl.h | 2 +- src/3rd_party/spdlog/details/mpmc_bounded_q.h | 2 +- src/3rd_party/spdlog/details/os.h | 2 +- .../spdlog/details/pattern_formatter_impl.h | 8 ++++---- src/3rd_party/spdlog/details/registry.h | 8 ++++---- src/3rd_party/spdlog/details/spdlog_impl.h | 12 ++++++------ src/3rd_party/spdlog/formatter.h | 4 ++-- src/3rd_party/spdlog/logger.h | 10 +++++----- src/3rd_party/spdlog/sinks/android_sink.h | 4 ++-- src/3rd_party/spdlog/sinks/ansicolor_sink.h | 4 ++-- src/3rd_party/spdlog/sinks/base_sink.h | 8 ++++---- src/3rd_party/spdlog/sinks/dist_sink.h | 8 ++++---- src/3rd_party/spdlog/sinks/file_sinks.h | 8 ++++---- src/3rd_party/spdlog/sinks/msvc_sink.h | 4 ++-- src/3rd_party/spdlog/sinks/null_sink.h | 4 ++-- src/3rd_party/spdlog/sinks/ostream_sink.h | 4 ++-- src/3rd_party/spdlog/sinks/sink.h | 2 +- src/3rd_party/spdlog/sinks/stdout_sinks.h | 2 +- src/3rd_party/spdlog/sinks/syslog_sink.h | 6 +++--- src/3rd_party/spdlog/spdlog.h | 8 ++++---- 28 files changed, 76 insertions(+), 76 deletions(-) diff --git a/src/3rd_party/spdlog/async_logger.h b/src/3rd_party/spdlog/async_logger.h index 21075614d..e6d75fef0 100644 --- a/src/3rd_party/spdlog/async_logger.h +++ b/src/3rd_party/spdlog/async_logger.h @@ -15,8 +15,8 @@ // 3. will throw spdlog_ex upon log exceptions // Upong destruction, logs all remaining messages in the queue before destructing.. -#include -#include +#include "spdlog/common.h" +#include "spdlog/logger.h" #include #include @@ -70,5 +70,5 @@ class async_logger :public logger } -#include +#include "spdlog/details/async_logger_impl.h" diff --git a/src/3rd_party/spdlog/common.h b/src/3rd_party/spdlog/common.h index 556692787..cc1db3eeb 100644 --- a/src/3rd_party/spdlog/common.h +++ b/src/3rd_party/spdlog/common.h @@ -17,7 +17,7 @@ #include #endif -#include +#include "spdlog/details/null_mutex.h" //visual studio does not support noexcept yet #ifndef _MSC_VER diff --git a/src/3rd_party/spdlog/details/async_log_helper.h b/src/3rd_party/spdlog/details/async_log_helper.h index c14365f93..48090d697 100644 --- a/src/3rd_party/spdlog/details/async_log_helper.h +++ b/src/3rd_party/spdlog/details/async_log_helper.h @@ -14,12 +14,12 @@ #pragma once -#include -#include -#include -#include -#include -#include +#include "spdlog/common.h" +#include "spdlog/sinks/sink.h" +#include "spdlog/details/mpmc_bounded_q.h" +#include "spdlog/details/log_msg.h" +#include "spdlog/details/os.h" +#include "spdlog/formatter.h" #include #include diff --git a/src/3rd_party/spdlog/details/async_logger_impl.h b/src/3rd_party/spdlog/details/async_logger_impl.h index 140d45f48..9d1ca8455 100644 --- a/src/3rd_party/spdlog/details/async_logger_impl.h +++ b/src/3rd_party/spdlog/details/async_logger_impl.h @@ -8,8 +8,8 @@ // Async Logger implementation // Use an async_sink (queue per logger) to perform the logging in a worker thread -#include -#include +#include "spdlog/details/async_log_helper.h" +#include "spdlog/async_logger.h" #include #include diff --git a/src/3rd_party/spdlog/details/file_helper.h b/src/3rd_party/spdlog/details/file_helper.h index 024a21b55..c879a2efc 100644 --- a/src/3rd_party/spdlog/details/file_helper.h +++ b/src/3rd_party/spdlog/details/file_helper.h @@ -10,8 +10,8 @@ // Can be set to auto flush on every line // Throw spdlog_ex exception on errors -#include -#include +#include "spdlog/details/os.h" +#include "spdlog/details/log_msg.h" #include #include diff --git a/src/3rd_party/spdlog/details/line_logger_fwd.h b/src/3rd_party/spdlog/details/line_logger_fwd.h index a8bc58ff5..ad2d7445b 100644 --- a/src/3rd_party/spdlog/details/line_logger_fwd.h +++ b/src/3rd_party/spdlog/details/line_logger_fwd.h @@ -4,8 +4,8 @@ // #pragma once -#include -#include +#include "spdlog/common.h" +#include "spdlog/details/log_msg.h" #include diff --git a/src/3rd_party/spdlog/details/line_logger_impl.h b/src/3rd_party/spdlog/details/line_logger_impl.h index d61225afb..c1ee3f094 100644 --- a/src/3rd_party/spdlog/details/line_logger_impl.h +++ b/src/3rd_party/spdlog/details/line_logger_impl.h @@ -5,9 +5,9 @@ #pragma once #include -#include -#include -#include +#include "spdlog/details/line_logger_fwd.h" +#include "spdlog/common.h" +#include "spdlog/logger.h" #include #include diff --git a/src/3rd_party/spdlog/details/log_msg.h b/src/3rd_party/spdlog/details/log_msg.h index 0d50b6848..96c3b2811 100644 --- a/src/3rd_party/spdlog/details/log_msg.h +++ b/src/3rd_party/spdlog/details/log_msg.h @@ -5,8 +5,8 @@ #pragma once -#include -#include +#include "spdlog/common.h" +#include "spdlog/details/format.h" #include #include diff --git a/src/3rd_party/spdlog/details/logger_impl.h b/src/3rd_party/spdlog/details/logger_impl.h index 428cd1896..9f5e5cf2d 100644 --- a/src/3rd_party/spdlog/details/logger_impl.h +++ b/src/3rd_party/spdlog/details/logger_impl.h @@ -5,7 +5,7 @@ #pragma once -#include +#include "spdlog/logger.h" #include #include diff --git a/src/3rd_party/spdlog/details/mpmc_bounded_q.h b/src/3rd_party/spdlog/details/mpmc_bounded_q.h index ad14d6f25..83afd6086 100644 --- a/src/3rd_party/spdlog/details/mpmc_bounded_q.h +++ b/src/3rd_party/spdlog/details/mpmc_bounded_q.h @@ -43,7 +43,7 @@ Distributed under the MIT License (http://opensource.org/licenses/MIT) #pragma once -#include +#include "spdlog/common.h" #include #include diff --git a/src/3rd_party/spdlog/details/os.h b/src/3rd_party/spdlog/details/os.h index f4720648a..004368e36 100644 --- a/src/3rd_party/spdlog/details/os.h +++ b/src/3rd_party/spdlog/details/os.h @@ -4,7 +4,7 @@ // #pragma once -#include +#include "spdlog/common.h" #include #include diff --git a/src/3rd_party/spdlog/details/pattern_formatter_impl.h b/src/3rd_party/spdlog/details/pattern_formatter_impl.h index 3965b831f..b7afcb383 100644 --- a/src/3rd_party/spdlog/details/pattern_formatter_impl.h +++ b/src/3rd_party/spdlog/details/pattern_formatter_impl.h @@ -5,10 +5,10 @@ #pragma once -#include -#include -#include -#include +#include "spdlog/formatter.h" +#include "spdlog/details/log_msg.h" +#include "spdlog/details/os.h" +#include "spdlog/details/format.h" #include #include diff --git a/src/3rd_party/spdlog/details/registry.h b/src/3rd_party/spdlog/details/registry.h index 0a35451c3..35e91cafb 100644 --- a/src/3rd_party/spdlog/details/registry.h +++ b/src/3rd_party/spdlog/details/registry.h @@ -10,10 +10,10 @@ // If user requests a non existing logger, nullptr will be returned // This class is thread safe -#include -#include -#include -#include +#include "spdlog/details/null_mutex.h" +#include "spdlog/logger.h" +#include "spdlog/async_logger.h" +#include "spdlog/common.h" #include #include diff --git a/src/3rd_party/spdlog/details/spdlog_impl.h b/src/3rd_party/spdlog/details/spdlog_impl.h index 3942e5a70..c359cc481 100644 --- a/src/3rd_party/spdlog/details/spdlog_impl.h +++ b/src/3rd_party/spdlog/details/spdlog_impl.h @@ -8,12 +8,12 @@ // // Global registry functions // -#include -#include -#include -#include -#include -#include +#include "spdlog/spdlog.h" +#include "spdlog/details/registry.h" +#include "spdlog/sinks/file_sinks.h" +#include "spdlog/sinks/stdout_sinks.h" +#include "spdlog/sinks/syslog_sink.h" +#include "spdlog/sinks/ansicolor_sink.h" #include #include diff --git a/src/3rd_party/spdlog/formatter.h b/src/3rd_party/spdlog/formatter.h index 0ffcec03e..89fc03300 100644 --- a/src/3rd_party/spdlog/formatter.h +++ b/src/3rd_party/spdlog/formatter.h @@ -5,7 +5,7 @@ #pragma once -#include +#include "spdlog/details/log_msg.h" #include #include @@ -41,5 +41,5 @@ class pattern_formatter : public formatter }; } -#include +#include "spdlog/details/pattern_formatter_impl.h" diff --git a/src/3rd_party/spdlog/logger.h b/src/3rd_party/spdlog/logger.h index 41d51fbf8..8e9aeb8d0 100644 --- a/src/3rd_party/spdlog/logger.h +++ b/src/3rd_party/spdlog/logger.h @@ -12,9 +12,9 @@ // 2. Format the message using the formatter function // 3. Pass the formatted message to its sinks to performa the actual logging -#include -#include -#include +#include "spdlog/sinks/base_sink.h" +#include "spdlog/common.h" +#include "spdlog/details/line_logger_fwd.h" #include #include @@ -107,6 +107,6 @@ class logger }; } -#include -#include +#include "spdlog/details/logger_impl.h" +#include "spdlog/details/line_logger_impl.h" diff --git a/src/3rd_party/spdlog/sinks/android_sink.h b/src/3rd_party/spdlog/sinks/android_sink.h index 885f78da7..1e2ba7cf0 100644 --- a/src/3rd_party/spdlog/sinks/android_sink.h +++ b/src/3rd_party/spdlog/sinks/android_sink.h @@ -7,8 +7,8 @@ #if defined(__ANDROID__) -#include -#include +#include "spdlog/sinks/base_sink.h" +#include "spdlog/details/null_mutex.h" #include diff --git a/src/3rd_party/spdlog/sinks/ansicolor_sink.h b/src/3rd_party/spdlog/sinks/ansicolor_sink.h index 664b25992..08e40c70c 100644 --- a/src/3rd_party/spdlog/sinks/ansicolor_sink.h +++ b/src/3rd_party/spdlog/sinks/ansicolor_sink.h @@ -5,8 +5,8 @@ #pragma once -#include -#include +#include "spdlog/sinks/base_sink.h" +#include "spdlog/common.h" #include #include diff --git a/src/3rd_party/spdlog/sinks/base_sink.h b/src/3rd_party/spdlog/sinks/base_sink.h index 615bb6f0c..68c1306d7 100644 --- a/src/3rd_party/spdlog/sinks/base_sink.h +++ b/src/3rd_party/spdlog/sinks/base_sink.h @@ -10,10 +10,10 @@ // all locking is taken care of here so no locking needed by the implementors.. // -#include -#include -#include -#include +#include "spdlog/sinks/sink.h" +#include "spdlog/formatter.h" +#include "spdlog/common.h" +#include "spdlog/details/log_msg.h" #include diff --git a/src/3rd_party/spdlog/sinks/dist_sink.h b/src/3rd_party/spdlog/sinks/dist_sink.h index 0e7cfc1e9..b957f47af 100644 --- a/src/3rd_party/spdlog/sinks/dist_sink.h +++ b/src/3rd_party/spdlog/sinks/dist_sink.h @@ -5,10 +5,10 @@ #pragma once -#include -#include -#include -#include +#include "spdlog/details/log_msg.h" +#include "spdlog/details/null_mutex.h" +#include "spdlog/sinks/base_sink.h" +#include "spdlog/sinks/sink.h" #include #include diff --git a/src/3rd_party/spdlog/sinks/file_sinks.h b/src/3rd_party/spdlog/sinks/file_sinks.h index e28572709..e64b2361c 100644 --- a/src/3rd_party/spdlog/sinks/file_sinks.h +++ b/src/3rd_party/spdlog/sinks/file_sinks.h @@ -5,10 +5,10 @@ #pragma once -#include -#include -#include -#include +#include "spdlog/sinks/base_sink.h" +#include "spdlog/details/null_mutex.h" +#include "spdlog/details/file_helper.h" +#include "spdlog/details/format.h" #include #include diff --git a/src/3rd_party/spdlog/sinks/msvc_sink.h b/src/3rd_party/spdlog/sinks/msvc_sink.h index 16342ca26..5382cd3e4 100644 --- a/src/3rd_party/spdlog/sinks/msvc_sink.h +++ b/src/3rd_party/spdlog/sinks/msvc_sink.h @@ -7,8 +7,8 @@ #if defined(_MSC_VER) -#include -#include +#include "spdlog/sinks/base_sink.h" +#include "spdlog/details/null_mutex.h" #include diff --git a/src/3rd_party/spdlog/sinks/null_sink.h b/src/3rd_party/spdlog/sinks/null_sink.h index 68bd9c94d..e6a656053 100644 --- a/src/3rd_party/spdlog/sinks/null_sink.h +++ b/src/3rd_party/spdlog/sinks/null_sink.h @@ -5,8 +5,8 @@ #pragma once -#include -#include +#include "spdlog/sinks/base_sink.h" +#include "spdlog/details/null_mutex.h" #include diff --git a/src/3rd_party/spdlog/sinks/ostream_sink.h b/src/3rd_party/spdlog/sinks/ostream_sink.h index feb5efa18..51459c10a 100644 --- a/src/3rd_party/spdlog/sinks/ostream_sink.h +++ b/src/3rd_party/spdlog/sinks/ostream_sink.h @@ -5,8 +5,8 @@ #pragma once -#include -#include +#include "spdlog/details/null_mutex.h" +#include "spdlog/sinks/base_sink.h" #include #include diff --git a/src/3rd_party/spdlog/sinks/sink.h b/src/3rd_party/spdlog/sinks/sink.h index 39dc771ad..a24c78fca 100644 --- a/src/3rd_party/spdlog/sinks/sink.h +++ b/src/3rd_party/spdlog/sinks/sink.h @@ -6,7 +6,7 @@ #pragma once -#include +#include "spdlog/details/log_msg.h" namespace spdlog { diff --git a/src/3rd_party/spdlog/sinks/stdout_sinks.h b/src/3rd_party/spdlog/sinks/stdout_sinks.h index ca4c55ac8..380539f21 100644 --- a/src/3rd_party/spdlog/sinks/stdout_sinks.h +++ b/src/3rd_party/spdlog/sinks/stdout_sinks.h @@ -5,7 +5,7 @@ #pragma once -#include +#include "spdlog/details/null_mutex.h" #include #include diff --git a/src/3rd_party/spdlog/sinks/syslog_sink.h b/src/3rd_party/spdlog/sinks/syslog_sink.h index 5d7ccf871..eb88d4bff 100644 --- a/src/3rd_party/spdlog/sinks/syslog_sink.h +++ b/src/3rd_party/spdlog/sinks/syslog_sink.h @@ -7,9 +7,9 @@ #if defined(__linux__) || defined(__APPLE__) -#include -#include -#include +#include "spdlog/sinks/sink.h" +#include "spdlog/common.h" +#include "spdlog/details/log_msg.h" #include #include diff --git a/src/3rd_party/spdlog/spdlog.h b/src/3rd_party/spdlog/spdlog.h index 2fd5e4b8f..6f3ea02fc 100644 --- a/src/3rd_party/spdlog/spdlog.h +++ b/src/3rd_party/spdlog/spdlog.h @@ -8,9 +8,9 @@ #pragma once -#include -#include -#include +#include "spdlog/tweakme.h" +#include "spdlog/common.h" +#include "spdlog/logger.h" #include #include @@ -136,4 +136,4 @@ void drop_all(); } -#include +#include "spdlog/details/spdlog_impl.h" From ff97193a2d414cff5447befcc45b69407500f181 Mon Sep 17 00:00:00 2001 From: Varun Mathur Date: Sat, 4 Nov 2023 16:48:55 -0700 Subject: [PATCH 14/21] memory map configs correctly --- src/common/binary.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/binary.cpp b/src/common/binary.cpp index 6bb90c508..f8dc26f12 100644 --- a/src/common/binary.cpp +++ b/src/common/binary.cpp @@ -109,7 +109,7 @@ void loadItems(const std::string& fileName, std::vector& items) { io::Item getItem(const void* current, const std::string& varName) { std::vector items; - loadItems(current, items); + loadItems(current, items, /*mapped=*/true); for(auto& item : items) if(item.name == varName) From 41dbe2da265259c73abb99160df8ee344b163e54 Mon Sep 17 00:00:00 2001 From: Varun Mathur Date: Sat, 4 Nov 2023 16:49:27 -0700 Subject: [PATCH 15/21] remove -march native --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ebca39a9f..cf5c5405d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -279,7 +279,7 @@ else(MSVC) set(CMAKE_RDYNAMIC_FLAG "-rdynamic") endif(CMAKE_COMPILER_IS_GNUCC) - set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") + set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} ${INTRINSICS}") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}") set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}") set(CMAKE_CXX_FLAGS_SLIM "-O3 -funroll-loops -DNDEBUG") @@ -289,7 +289,7 @@ else(MSVC) set(CMAKE_CXX_FLAGS_PROFUSE "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-use -fprofile-correction") # these need to be set separately - set(CMAKE_C_FLAGS "-pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") + set(CMAKE_C_FLAGS "-pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} ${INTRINSICS}") set(CMAKE_C_FLAGS_RELEASE "-O3 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}") set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}") set(CMAKE_C_FLAGS_SLIM "-O3 -funroll-loops -DNDEBUG") From 87a974a05af51820487f9495548b91731a22609f Mon Sep 17 00:00:00 2001 From: Varun Mathur Date: Mon, 6 Nov 2023 22:01:08 -0800 Subject: [PATCH 16/21] update sp to latest --- src/3rd_party/sentencepiece | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/3rd_party/sentencepiece b/src/3rd_party/sentencepiece index e8e46f594..5978fc9eb 160000 --- a/src/3rd_party/sentencepiece +++ b/src/3rd_party/sentencepiece @@ -1 +1 @@ -Subproject commit e8e46f5940d1cec3f59c6d89df5500cac953a089 +Subproject commit 5978fc9ebb6067a3a1b11370bf46b8da0f89e592 From b2b3e26a5c6807e6a1d9c98b785b55807ae4d87e Mon Sep 17 00:00:00 2001 From: Varun Mathur Date: Mon, 6 Nov 2023 22:17:10 -0800 Subject: [PATCH 17/21] don't use -march --- CMakeLists.txt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ebca39a9f..c44c496a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -279,7 +279,7 @@ else(MSVC) set(CMAKE_RDYNAMIC_FLAG "-rdynamic") endif(CMAKE_COMPILER_IS_GNUCC) - set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") + set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} ${INTRINSICS}") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}") set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}") set(CMAKE_CXX_FLAGS_SLIM "-O3 -funroll-loops -DNDEBUG") @@ -289,7 +289,7 @@ else(MSVC) set(CMAKE_CXX_FLAGS_PROFUSE "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-use -fprofile-correction") # these need to be set separately - set(CMAKE_C_FLAGS "-pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") + set(CMAKE_C_FLAGS "-pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} ${INTRINSICS}") set(CMAKE_C_FLAGS_RELEASE "-O3 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}") set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}") set(CMAKE_C_FLAGS_SLIM "-O3 -funroll-loops -DNDEBUG") @@ -297,6 +297,12 @@ else(MSVC) set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") set(CMAKE_C_FLAGS_PROFGEN "${CMAKE_C_FLAGS_RELEASE} -fprofile-generate -fprofile-correction") set(CMAKE_C_FLAGS_PROFUSE "${CMAKE_C_FLAGS_RELEASE} -fprofile-use -fprofile-correction") + + # set -march for all builds except iOS cross compilation + if(NOT CMAKE_SYSTEM_NAME STREQUAL "iOS" ) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${BUILD_ARCH}") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${BUILD_ARCH}") + endif() endif(MSVC) # with gcc 7.0 and above we need to mark fallthrough in switch case statements From 8124cf003e751fff7bad11bf97281231c9bd66fe Mon Sep 17 00:00:00 2001 From: Varun Mathur Date: Tue, 6 Feb 2024 10:48:26 -0800 Subject: [PATCH 18/21] add comment explaining build --- src/3rd_party/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/3rd_party/CMakeLists.txt b/src/3rd_party/CMakeLists.txt index cf7cc1359..f586d6ec4 100644 --- a/src/3rd_party/CMakeLists.txt +++ b/src/3rd_party/CMakeLists.txt @@ -9,11 +9,14 @@ add_subdirectory(./faiss) include_directories(./faiss) if(COMPILE_CPU) + # intgemm is not ARM-compatible. do not build it if we are on ARM if(NOT GENERATE_MARIAN_INSTALL_TARGETS AND NOT ARM) set(INTGEMM_DONT_BUILD_TESTS ON CACHE BOOL "Disable intgemm tests") add_subdirectory(./intgemm) endif() + # the default codepath does not use ruy so there is no need to add these directories + # to the build unless it is explicitly enabled. RUY is intended mostly for ARM support if(USE_RUY_SGEMM) set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL " " FORCE) set(CPUINFO_BUILD_BENCHMARKS OFF CACHE BOOL " " FORCE) From 7604996aad019eae8c09e01e04ed3bc2c3318801 Mon Sep 17 00:00:00 2001 From: Varun Mathur Date: Tue, 6 Feb 2024 11:22:29 -0800 Subject: [PATCH 19/21] revert spdlog changes --- src/3rd_party/spdlog/async_logger.h | 6 +++--- src/3rd_party/spdlog/common.h | 2 +- src/3rd_party/spdlog/details/async_log_helper.h | 12 ++++++------ src/3rd_party/spdlog/details/async_logger_impl.h | 4 ++-- src/3rd_party/spdlog/details/file_helper.h | 4 ++-- src/3rd_party/spdlog/details/line_logger_fwd.h | 4 ++-- src/3rd_party/spdlog/details/line_logger_impl.h | 6 +++--- src/3rd_party/spdlog/details/log_msg.h | 4 ++-- src/3rd_party/spdlog/details/logger_impl.h | 2 +- src/3rd_party/spdlog/details/mpmc_bounded_q.h | 2 +- src/3rd_party/spdlog/details/os.h | 2 +- .../spdlog/details/pattern_formatter_impl.h | 8 ++++---- src/3rd_party/spdlog/details/registry.h | 8 ++++---- src/3rd_party/spdlog/details/spdlog_impl.h | 12 ++++++------ src/3rd_party/spdlog/formatter.h | 4 ++-- src/3rd_party/spdlog/logger.h | 10 +++++----- src/3rd_party/spdlog/sinks/android_sink.h | 4 ++-- src/3rd_party/spdlog/sinks/ansicolor_sink.h | 4 ++-- src/3rd_party/spdlog/sinks/base_sink.h | 8 ++++---- src/3rd_party/spdlog/sinks/dist_sink.h | 8 ++++---- src/3rd_party/spdlog/sinks/file_sinks.h | 8 ++++---- src/3rd_party/spdlog/sinks/msvc_sink.h | 4 ++-- src/3rd_party/spdlog/sinks/null_sink.h | 4 ++-- src/3rd_party/spdlog/sinks/ostream_sink.h | 4 ++-- src/3rd_party/spdlog/sinks/sink.h | 2 +- src/3rd_party/spdlog/sinks/stdout_sinks.h | 2 +- src/3rd_party/spdlog/sinks/syslog_sink.h | 6 +++--- src/3rd_party/spdlog/spdlog.h | 8 ++++---- 28 files changed, 76 insertions(+), 76 deletions(-) diff --git a/src/3rd_party/spdlog/async_logger.h b/src/3rd_party/spdlog/async_logger.h index e6d75fef0..21075614d 100644 --- a/src/3rd_party/spdlog/async_logger.h +++ b/src/3rd_party/spdlog/async_logger.h @@ -15,8 +15,8 @@ // 3. will throw spdlog_ex upon log exceptions // Upong destruction, logs all remaining messages in the queue before destructing.. -#include "spdlog/common.h" -#include "spdlog/logger.h" +#include +#include #include #include @@ -70,5 +70,5 @@ class async_logger :public logger } -#include "spdlog/details/async_logger_impl.h" +#include diff --git a/src/3rd_party/spdlog/common.h b/src/3rd_party/spdlog/common.h index cc1db3eeb..556692787 100644 --- a/src/3rd_party/spdlog/common.h +++ b/src/3rd_party/spdlog/common.h @@ -17,7 +17,7 @@ #include #endif -#include "spdlog/details/null_mutex.h" +#include //visual studio does not support noexcept yet #ifndef _MSC_VER diff --git a/src/3rd_party/spdlog/details/async_log_helper.h b/src/3rd_party/spdlog/details/async_log_helper.h index 48090d697..c14365f93 100644 --- a/src/3rd_party/spdlog/details/async_log_helper.h +++ b/src/3rd_party/spdlog/details/async_log_helper.h @@ -14,12 +14,12 @@ #pragma once -#include "spdlog/common.h" -#include "spdlog/sinks/sink.h" -#include "spdlog/details/mpmc_bounded_q.h" -#include "spdlog/details/log_msg.h" -#include "spdlog/details/os.h" -#include "spdlog/formatter.h" +#include +#include +#include +#include +#include +#include #include #include diff --git a/src/3rd_party/spdlog/details/async_logger_impl.h b/src/3rd_party/spdlog/details/async_logger_impl.h index 9d1ca8455..140d45f48 100644 --- a/src/3rd_party/spdlog/details/async_logger_impl.h +++ b/src/3rd_party/spdlog/details/async_logger_impl.h @@ -8,8 +8,8 @@ // Async Logger implementation // Use an async_sink (queue per logger) to perform the logging in a worker thread -#include "spdlog/details/async_log_helper.h" -#include "spdlog/async_logger.h" +#include +#include #include #include diff --git a/src/3rd_party/spdlog/details/file_helper.h b/src/3rd_party/spdlog/details/file_helper.h index c879a2efc..024a21b55 100644 --- a/src/3rd_party/spdlog/details/file_helper.h +++ b/src/3rd_party/spdlog/details/file_helper.h @@ -10,8 +10,8 @@ // Can be set to auto flush on every line // Throw spdlog_ex exception on errors -#include "spdlog/details/os.h" -#include "spdlog/details/log_msg.h" +#include +#include #include #include diff --git a/src/3rd_party/spdlog/details/line_logger_fwd.h b/src/3rd_party/spdlog/details/line_logger_fwd.h index ad2d7445b..a8bc58ff5 100644 --- a/src/3rd_party/spdlog/details/line_logger_fwd.h +++ b/src/3rd_party/spdlog/details/line_logger_fwd.h @@ -4,8 +4,8 @@ // #pragma once -#include "spdlog/common.h" -#include "spdlog/details/log_msg.h" +#include +#include #include diff --git a/src/3rd_party/spdlog/details/line_logger_impl.h b/src/3rd_party/spdlog/details/line_logger_impl.h index c1ee3f094..d61225afb 100644 --- a/src/3rd_party/spdlog/details/line_logger_impl.h +++ b/src/3rd_party/spdlog/details/line_logger_impl.h @@ -5,9 +5,9 @@ #pragma once #include -#include "spdlog/details/line_logger_fwd.h" -#include "spdlog/common.h" -#include "spdlog/logger.h" +#include +#include +#include #include #include diff --git a/src/3rd_party/spdlog/details/log_msg.h b/src/3rd_party/spdlog/details/log_msg.h index 96c3b2811..0d50b6848 100644 --- a/src/3rd_party/spdlog/details/log_msg.h +++ b/src/3rd_party/spdlog/details/log_msg.h @@ -5,8 +5,8 @@ #pragma once -#include "spdlog/common.h" -#include "spdlog/details/format.h" +#include +#include #include #include diff --git a/src/3rd_party/spdlog/details/logger_impl.h b/src/3rd_party/spdlog/details/logger_impl.h index 9f5e5cf2d..428cd1896 100644 --- a/src/3rd_party/spdlog/details/logger_impl.h +++ b/src/3rd_party/spdlog/details/logger_impl.h @@ -5,7 +5,7 @@ #pragma once -#include "spdlog/logger.h" +#include #include #include diff --git a/src/3rd_party/spdlog/details/mpmc_bounded_q.h b/src/3rd_party/spdlog/details/mpmc_bounded_q.h index 83afd6086..ad14d6f25 100644 --- a/src/3rd_party/spdlog/details/mpmc_bounded_q.h +++ b/src/3rd_party/spdlog/details/mpmc_bounded_q.h @@ -43,7 +43,7 @@ Distributed under the MIT License (http://opensource.org/licenses/MIT) #pragma once -#include "spdlog/common.h" +#include #include #include diff --git a/src/3rd_party/spdlog/details/os.h b/src/3rd_party/spdlog/details/os.h index 004368e36..f4720648a 100644 --- a/src/3rd_party/spdlog/details/os.h +++ b/src/3rd_party/spdlog/details/os.h @@ -4,7 +4,7 @@ // #pragma once -#include "spdlog/common.h" +#include #include #include diff --git a/src/3rd_party/spdlog/details/pattern_formatter_impl.h b/src/3rd_party/spdlog/details/pattern_formatter_impl.h index b7afcb383..3965b831f 100644 --- a/src/3rd_party/spdlog/details/pattern_formatter_impl.h +++ b/src/3rd_party/spdlog/details/pattern_formatter_impl.h @@ -5,10 +5,10 @@ #pragma once -#include "spdlog/formatter.h" -#include "spdlog/details/log_msg.h" -#include "spdlog/details/os.h" -#include "spdlog/details/format.h" +#include +#include +#include +#include #include #include diff --git a/src/3rd_party/spdlog/details/registry.h b/src/3rd_party/spdlog/details/registry.h index 35e91cafb..0a35451c3 100644 --- a/src/3rd_party/spdlog/details/registry.h +++ b/src/3rd_party/spdlog/details/registry.h @@ -10,10 +10,10 @@ // If user requests a non existing logger, nullptr will be returned // This class is thread safe -#include "spdlog/details/null_mutex.h" -#include "spdlog/logger.h" -#include "spdlog/async_logger.h" -#include "spdlog/common.h" +#include +#include +#include +#include #include #include diff --git a/src/3rd_party/spdlog/details/spdlog_impl.h b/src/3rd_party/spdlog/details/spdlog_impl.h index c359cc481..3942e5a70 100644 --- a/src/3rd_party/spdlog/details/spdlog_impl.h +++ b/src/3rd_party/spdlog/details/spdlog_impl.h @@ -8,12 +8,12 @@ // // Global registry functions // -#include "spdlog/spdlog.h" -#include "spdlog/details/registry.h" -#include "spdlog/sinks/file_sinks.h" -#include "spdlog/sinks/stdout_sinks.h" -#include "spdlog/sinks/syslog_sink.h" -#include "spdlog/sinks/ansicolor_sink.h" +#include +#include +#include +#include +#include +#include #include #include diff --git a/src/3rd_party/spdlog/formatter.h b/src/3rd_party/spdlog/formatter.h index 89fc03300..0ffcec03e 100644 --- a/src/3rd_party/spdlog/formatter.h +++ b/src/3rd_party/spdlog/formatter.h @@ -5,7 +5,7 @@ #pragma once -#include "spdlog/details/log_msg.h" +#include #include #include @@ -41,5 +41,5 @@ class pattern_formatter : public formatter }; } -#include "spdlog/details/pattern_formatter_impl.h" +#include diff --git a/src/3rd_party/spdlog/logger.h b/src/3rd_party/spdlog/logger.h index 8e9aeb8d0..41d51fbf8 100644 --- a/src/3rd_party/spdlog/logger.h +++ b/src/3rd_party/spdlog/logger.h @@ -12,9 +12,9 @@ // 2. Format the message using the formatter function // 3. Pass the formatted message to its sinks to performa the actual logging -#include "spdlog/sinks/base_sink.h" -#include "spdlog/common.h" -#include "spdlog/details/line_logger_fwd.h" +#include +#include +#include #include #include @@ -107,6 +107,6 @@ class logger }; } -#include "spdlog/details/logger_impl.h" -#include "spdlog/details/line_logger_impl.h" +#include +#include diff --git a/src/3rd_party/spdlog/sinks/android_sink.h b/src/3rd_party/spdlog/sinks/android_sink.h index 1e2ba7cf0..885f78da7 100644 --- a/src/3rd_party/spdlog/sinks/android_sink.h +++ b/src/3rd_party/spdlog/sinks/android_sink.h @@ -7,8 +7,8 @@ #if defined(__ANDROID__) -#include "spdlog/sinks/base_sink.h" -#include "spdlog/details/null_mutex.h" +#include +#include #include diff --git a/src/3rd_party/spdlog/sinks/ansicolor_sink.h b/src/3rd_party/spdlog/sinks/ansicolor_sink.h index 08e40c70c..664b25992 100644 --- a/src/3rd_party/spdlog/sinks/ansicolor_sink.h +++ b/src/3rd_party/spdlog/sinks/ansicolor_sink.h @@ -5,8 +5,8 @@ #pragma once -#include "spdlog/sinks/base_sink.h" -#include "spdlog/common.h" +#include +#include #include #include diff --git a/src/3rd_party/spdlog/sinks/base_sink.h b/src/3rd_party/spdlog/sinks/base_sink.h index 68c1306d7..615bb6f0c 100644 --- a/src/3rd_party/spdlog/sinks/base_sink.h +++ b/src/3rd_party/spdlog/sinks/base_sink.h @@ -10,10 +10,10 @@ // all locking is taken care of here so no locking needed by the implementors.. // -#include "spdlog/sinks/sink.h" -#include "spdlog/formatter.h" -#include "spdlog/common.h" -#include "spdlog/details/log_msg.h" +#include +#include +#include +#include #include diff --git a/src/3rd_party/spdlog/sinks/dist_sink.h b/src/3rd_party/spdlog/sinks/dist_sink.h index b957f47af..0e7cfc1e9 100644 --- a/src/3rd_party/spdlog/sinks/dist_sink.h +++ b/src/3rd_party/spdlog/sinks/dist_sink.h @@ -5,10 +5,10 @@ #pragma once -#include "spdlog/details/log_msg.h" -#include "spdlog/details/null_mutex.h" -#include "spdlog/sinks/base_sink.h" -#include "spdlog/sinks/sink.h" +#include +#include +#include +#include #include #include diff --git a/src/3rd_party/spdlog/sinks/file_sinks.h b/src/3rd_party/spdlog/sinks/file_sinks.h index e64b2361c..e28572709 100644 --- a/src/3rd_party/spdlog/sinks/file_sinks.h +++ b/src/3rd_party/spdlog/sinks/file_sinks.h @@ -5,10 +5,10 @@ #pragma once -#include "spdlog/sinks/base_sink.h" -#include "spdlog/details/null_mutex.h" -#include "spdlog/details/file_helper.h" -#include "spdlog/details/format.h" +#include +#include +#include +#include #include #include diff --git a/src/3rd_party/spdlog/sinks/msvc_sink.h b/src/3rd_party/spdlog/sinks/msvc_sink.h index 5382cd3e4..16342ca26 100644 --- a/src/3rd_party/spdlog/sinks/msvc_sink.h +++ b/src/3rd_party/spdlog/sinks/msvc_sink.h @@ -7,8 +7,8 @@ #if defined(_MSC_VER) -#include "spdlog/sinks/base_sink.h" -#include "spdlog/details/null_mutex.h" +#include +#include #include diff --git a/src/3rd_party/spdlog/sinks/null_sink.h b/src/3rd_party/spdlog/sinks/null_sink.h index e6a656053..68bd9c94d 100644 --- a/src/3rd_party/spdlog/sinks/null_sink.h +++ b/src/3rd_party/spdlog/sinks/null_sink.h @@ -5,8 +5,8 @@ #pragma once -#include "spdlog/sinks/base_sink.h" -#include "spdlog/details/null_mutex.h" +#include +#include #include diff --git a/src/3rd_party/spdlog/sinks/ostream_sink.h b/src/3rd_party/spdlog/sinks/ostream_sink.h index 51459c10a..feb5efa18 100644 --- a/src/3rd_party/spdlog/sinks/ostream_sink.h +++ b/src/3rd_party/spdlog/sinks/ostream_sink.h @@ -5,8 +5,8 @@ #pragma once -#include "spdlog/details/null_mutex.h" -#include "spdlog/sinks/base_sink.h" +#include +#include #include #include diff --git a/src/3rd_party/spdlog/sinks/sink.h b/src/3rd_party/spdlog/sinks/sink.h index a24c78fca..39dc771ad 100644 --- a/src/3rd_party/spdlog/sinks/sink.h +++ b/src/3rd_party/spdlog/sinks/sink.h @@ -6,7 +6,7 @@ #pragma once -#include "spdlog/details/log_msg.h" +#include namespace spdlog { diff --git a/src/3rd_party/spdlog/sinks/stdout_sinks.h b/src/3rd_party/spdlog/sinks/stdout_sinks.h index 380539f21..ca4c55ac8 100644 --- a/src/3rd_party/spdlog/sinks/stdout_sinks.h +++ b/src/3rd_party/spdlog/sinks/stdout_sinks.h @@ -5,7 +5,7 @@ #pragma once -#include "spdlog/details/null_mutex.h" +#include #include #include diff --git a/src/3rd_party/spdlog/sinks/syslog_sink.h b/src/3rd_party/spdlog/sinks/syslog_sink.h index eb88d4bff..5d7ccf871 100644 --- a/src/3rd_party/spdlog/sinks/syslog_sink.h +++ b/src/3rd_party/spdlog/sinks/syslog_sink.h @@ -7,9 +7,9 @@ #if defined(__linux__) || defined(__APPLE__) -#include "spdlog/sinks/sink.h" -#include "spdlog/common.h" -#include "spdlog/details/log_msg.h" +#include +#include +#include #include #include diff --git a/src/3rd_party/spdlog/spdlog.h b/src/3rd_party/spdlog/spdlog.h index 6f3ea02fc..2fd5e4b8f 100644 --- a/src/3rd_party/spdlog/spdlog.h +++ b/src/3rd_party/spdlog/spdlog.h @@ -8,9 +8,9 @@ #pragma once -#include "spdlog/tweakme.h" -#include "spdlog/common.h" -#include "spdlog/logger.h" +#include +#include +#include #include #include @@ -136,4 +136,4 @@ void drop_all(); } -#include "spdlog/details/spdlog_impl.h" +#include From ebdfcba377d6edae643a9895c4f877ea5772e0ac Mon Sep 17 00:00:00 2001 From: Varun Mathur Date: Wed, 7 Feb 2024 12:38:45 -0800 Subject: [PATCH 20/21] resolve comments --- CMakeLists.txt | 8 +++++--- src/functional/operators.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 75d17c2bd..ab1c32723 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -88,8 +88,11 @@ if(CMAKE_SYSTEM_NAME STREQUAL "iOS" ) list(APPEND ALL_WARNINGS -Wno-shorten-64-to-32;) endif() -# ARM support +# ARM support: currently ONLY armv8. armv8 includes NEON by default +# we do not currently have good support for automatic architecture detection, including for cross-compilation +# this is planned for future PRs if(ARM) + # Apple by default has Apple Accelerate. Otherwise fallback to RUY for GEMM if(APPLE) message(STATUS "Using Apple Accelerate SGEMM") @@ -100,12 +103,11 @@ if(ARM) option(USE_RUY_SGEMM "Compile with Ruy SGEMM" ON) endif(APPLE) - # Define that we are using ARM + # Define that we are using ARM as required by simd_utils. See their README for info add_compile_definitions(ARM FMA SSE) # Some warnings as errors. I don't feel comfortable about the strict aliasing. set(ARM_WARNINGS "-fno-strict-aliasing -Wno-comment") - # @TODO this assumes ArmV8. We should also look at armv7 if(MSVC) add_compile_options(/flax-vector-conversions) else(MSVC) diff --git a/src/functional/operators.h b/src/functional/operators.h index 6ecc02bd8..e7dcea3c6 100644 --- a/src/functional/operators.h +++ b/src/functional/operators.h @@ -220,7 +220,7 @@ struct Ops { #ifndef ARM #include "3rd_party/sse_mathfun.h" #else -#include "3rd_party/simd_utils/simd_utils.h" // @TODO this might be dependent on NEON +#include "3rd_party/simd_utils/simd_utils.h" #endif namespace marian { From d052891fc3c0532dee0e56e6e315862733d68984 Mon Sep 17 00:00:00 2001 From: Varun Mathur Date: Fri, 16 Feb 2024 10:10:23 -0500 Subject: [PATCH 21/21] address comment --- src/3rd_party/faiss/VectorTransform.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/3rd_party/faiss/VectorTransform.cpp b/src/3rd_party/faiss/VectorTransform.cpp index c6bf9d4a1..22fecbf78 100644 --- a/src/3rd_party/faiss/VectorTransform.cpp +++ b/src/3rd_party/faiss/VectorTransform.cpp @@ -20,6 +20,8 @@ using namespace faiss; #ifdef ARM +// we use various AVX/SSE instructions in this file +// simd_utils translates these into ARM/NEON compatible instructions #include "3rd_party/simd_utils/simd_utils.h" #endif