From 2d067afb9ce5e3a0b6c32585706affc6e7295920 Mon Sep 17 00:00:00 2001 From: Varun Mathur Date: Fri, 16 Feb 2024 11:44:13 -0500 Subject: [PATCH] add iOS and armv8 build (#1014) add iOS and armv8 build (#1014) Co-authored-by: Nikolay Bogoychev Co-authored-by: Varun Mathur --- .github/workflows/ios.yml | 43 + .gitmodules | 6 + CMakeLists.txt | 75 +- cmake/ios.toolchain.cmake | 1099 +++++++++++++++++++ src/3rd_party/CMakeLists.txt | 15 +- src/3rd_party/faiss/VectorTransform.cpp | 6 + src/3rd_party/faiss/VectorTransform.h | 2 + src/3rd_party/ruy | 1 + src/3rd_party/sentencepiece | 2 +- src/3rd_party/simd_utils | 1 + src/common/binary.cpp | 2 +- src/common/types.h | 6 +- src/functional/operators.h | 5 +- src/tensors/cpu/expression_graph_packable.h | 2 +- src/tensors/cpu/fbgemm/packed_gemm.cpp | 8 +- src/tensors/cpu/integer_common.h | 14 +- src/tensors/cpu/intgemm_interface.h | 4 +- src/tensors/cpu/prod.cpp | 8 - src/tensors/cpu/prod_blas.h | 130 ++- src/translator/translator.h | 77 +- 20 files changed, 1435 insertions(+), 71 deletions(-) create mode 100644 .github/workflows/ios.yml create mode 100644 cmake/ios.toolchain.cmake create mode 160000 src/3rd_party/ruy create mode 160000 src/3rd_party/simd_utils diff --git a/.github/workflows/ios.yml b/.github/workflows/ios.yml new file mode 100644 index 000000000..4dfa8905d --- /dev/null +++ b/.github/workflows/ios.yml @@ -0,0 +1,43 @@ +name: iOS + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build-macos: + name: iOS CPU-only + runs-on: macos-12 + + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + submodules: recursive + + - name: Install dependencies + run: brew install boost openblas openssl protobuf + + - name: Configure CMake + run: | + export LDFLAGS="-L/usr/local/opt/openblas/lib" + export CPPFLAGS="-I/usr/local/opt/openblas/include" + mkdir -p build + cd build + cmake .. \ + -DCOMPILE_CPU=on \ + -DCOMPILE_CUDA=off \ + -DCOMPILE_EXAMPLES=on \ + -DCOMPILE_SERVER=off \ + -DCOMPILE_TESTS=on \ + -DUSE_SENTENCEPIECE=on \ + -DCMAKE_TOOLCHAIN_FILE=../cmake/ios.toolchain.cmake \ + -DUSE_SENTENCEPIECE=on \ + -DPLATFORM=OS64 \ + -DDEPLOYMENT_TARGET=13.0 + + - name: Compile + working-directory: build + run: cmake --build . --config Release \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index a1a876d8b..37b125076 100644 --- a/.gitmodules +++ b/.gitmodules @@ -20,3 +20,9 @@ [submodule "src/3rd_party/simple-websocket-server"] path = src/3rd_party/simple-websocket-server url = https://github.com/marian-nmt/Simple-WebSocket-Server +[submodule "src/3rd_party/ruy"] + path = src/3rd_party/ruy + url = https://github.com/google/ruy.git +[submodule "src/3rd_party/simd_utils"] + path = src/3rd_party/simd_utils + url = https://github.com/JishinMaster/simd_utils.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ff5e2c07..ab1c32723 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,6 +6,7 @@ if (POLICY CMP0074) endif () project(marian CXX C) + set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(BUILD_ARCH native CACHE STRING "Compile for this CPU architecture.") @@ -80,6 +81,40 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Release") endif() +# iOS support +if(CMAKE_SYSTEM_NAME STREQUAL "iOS" ) + set(ARM ON) + # need to ignore this warning for Xcode to be happy + list(APPEND ALL_WARNINGS -Wno-shorten-64-to-32;) +endif() + +# ARM support: currently ONLY armv8. armv8 includes NEON by default +# we do not currently have good support for automatic architecture detection, including for cross-compilation +# this is planned for future PRs +if(ARM) + + # Apple by default has Apple Accelerate. Otherwise fallback to RUY for GEMM + if(APPLE) + message(STATUS "Using Apple Accelerate SGEMM") + option(USE_RUY_SGEMM "Compile with Ruy SGEMM" OFF) + else(APPLE) + message(STATUS "Using Ruy SGEMM") + set(EXT_LIBS ${EXT_LIBS} ruy) + option(USE_RUY_SGEMM "Compile with Ruy SGEMM" ON) + endif(APPLE) + + # Define that we are using ARM as required by simd_utils. See their README for info + add_compile_definitions(ARM FMA SSE) + # Some warnings as errors. I don't feel comfortable about the strict aliasing. + set(ARM_WARNINGS "-fno-strict-aliasing -Wno-comment") + + if(MSVC) + add_compile_options(/flax-vector-conversions) + else(MSVC) + add_compile_options(-flax-vector-conversions) + endif(MSVC) +endif(ARM) + ######## # pThreads: consider it as EXT_LIBS for a more portable binary set(CMAKE_THREAD_PREFER_PTHREAD TRUE) @@ -88,7 +123,6 @@ find_package(Threads REQUIRED) set(EXT_LIBS ${EXT_LIBS} Threads::Threads) ######## - ############################################################################### # Set compilation flags if(MSVC) @@ -148,13 +182,16 @@ else(MSVC) set(INTRINSICS "") list(APPEND INTRINSICS_NVCC) - option(COMPILE_SSE2 "Compile CPU code with SSE2 support" ON) - option(COMPILE_SSE3 "Compile CPU code with SSE3 support" ON) - option(COMPILE_SSE4_1 "Compile CPU code with SSE4.1 support" ON) - option(COMPILE_SSE4_2 "Compile CPU code with SSE4.2 support" ON) - option(COMPILE_AVX "Compile CPU code with AVX support" ON) - option(COMPILE_AVX2 "Compile CPU code with AVX2 support" ON) - option(COMPILE_AVX512 "Compile CPU code with AVX512 support" ON) + if(NOT ARM) + # none of these options are available on ARM + option(COMPILE_SSE2 "Compile CPU code with SSE2 support" ON) + option(COMPILE_SSE3 "Compile CPU code with SSE3 support" ON) + option(COMPILE_SSE4_1 "Compile CPU code with SSE4.1 support" ON) + option(COMPILE_SSE4_2 "Compile CPU code with SSE4.2 support" ON) + option(COMPILE_AVX "Compile CPU code with AVX support" ON) + option(COMPILE_AVX2 "Compile CPU code with AVX2 support" ON) + option(COMPILE_AVX512 "Compile CPU code with AVX512 support" ON) + endif(NOT ARM) if(BUILD_ARCH STREQUAL "native") message(STATUS "Building with -march=native and intrinsics will be chosen automatically by the compiler to match the current machine.") @@ -230,7 +267,7 @@ else(MSVC) # Clang-10.0.0 complains when CUDA is newer than 10.1 set(CLANG_IGNORE_UNKNOWN_CUDA "-Wno-unknown-warning-option -Wno-unknown-cuda-version") endif() - set(DISABLE_GLOBALLY "-Wno-unused-result ${CLANG_IGNORE_UNKNOWN_CUDA}") + set(DISABLE_GLOBALLY "-Wno-unused-result ${CLANG_IGNORE_UNKNOWN_CUDA} ${ARM_WARNINGS}") # These are used in src/CMakeLists.txt on a per-target basis list(APPEND ALL_WARNINGS -Wall; -Werror; -Wextra; -Wno-unused-result; -Wno-deprecated; @@ -249,24 +286,30 @@ else(MSVC) set(CMAKE_RDYNAMIC_FLAG "-rdynamic") endif(CMAKE_COMPILER_IS_GNUCC) - set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -m64 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}") + set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} ${INTRINSICS}") + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}") set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}") - set(CMAKE_CXX_FLAGS_SLIM "-O3 -m64 -funroll-loops -DNDEBUG") + set(CMAKE_CXX_FLAGS_SLIM "-O3 -funroll-loops -DNDEBUG") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELEASE}") set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg") set(CMAKE_CXX_FLAGS_PROFGEN "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-generate -fprofile-correction") set(CMAKE_CXX_FLAGS_PROFUSE "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-use -fprofile-correction") # these need to be set separately - set(CMAKE_C_FLAGS "-pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") - set(CMAKE_C_FLAGS_RELEASE "-O3 -m64 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}") + set(CMAKE_C_FLAGS "-pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} ${INTRINSICS}") + set(CMAKE_C_FLAGS_RELEASE "-O3 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}") set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}") - set(CMAKE_C_FLAGS_SLIM "-O3 -m64 -funroll-loops -DNDEBUG") + set(CMAKE_C_FLAGS_SLIM "-O3 -funroll-loops -DNDEBUG") set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELEASE}") set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") set(CMAKE_C_FLAGS_PROFGEN "${CMAKE_C_FLAGS_RELEASE} -fprofile-generate -fprofile-correction") set(CMAKE_C_FLAGS_PROFUSE "${CMAKE_C_FLAGS_RELEASE} -fprofile-use -fprofile-correction") + + # set -march for all builds except iOS cross compilation + if(NOT CMAKE_SYSTEM_NAME STREQUAL "iOS" ) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${BUILD_ARCH}") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${BUILD_ARCH}") + endif() endif(MSVC) # with gcc 7.0 and above we need to mark fallthrough in switch case statements @@ -520,7 +563,7 @@ endif() ############################################################################### # Find BLAS library if(COMPILE_CPU) - if(NOT GENERATE_MARIAN_INSTALL_TARGETS) + if(NOT GENERATE_MARIAN_INSTALL_TARGETS AND NOT ARM) set(EXT_LIBS ${EXT_LIBS} intgemm) # Enable intgemm when compiling CPU add_definitions(-DCOMPILE_CPU=1) endif() diff --git a/cmake/ios.toolchain.cmake b/cmake/ios.toolchain.cmake new file mode 100644 index 000000000..2131172fd --- /dev/null +++ b/cmake/ios.toolchain.cmake @@ -0,0 +1,1099 @@ +# This file is part of the ios-cmake project. It was retrieved from +# https://github.com/leetal/ios-cmake.git, which is a fork of +# https://github.com/gerstrong/ios-cmake.git, which is a fork of +# https://github.com/cristeab/ios-cmake.git, which is a fork of +# https://code.google.com/p/ios-cmake/. Which in turn is based off of +# the Platform/Darwin.cmake and Platform/UnixPaths.cmake files which +# are included with CMake 2.8.4 +# +# The ios-cmake project is licensed under the new BSD license. +# +# Copyright (c) 2014, Bogdan Cristea and LTE Engineering Software, +# Kitware, Inc., Insight Software Consortium. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# This file is based on the Platform/Darwin.cmake and +# Platform/UnixPaths.cmake files which are included with CMake 2.8.4 +# It has been altered for iOS development. +# +# Updated by Alex Stewart (alexs.mac@gmail.com) +# +# ***************************************************************************** +# Now maintained by Alexander Widerberg (widerbergaren [at] gmail.com) +# under the BSD-3-Clause license +# https://github.com/leetal/ios-cmake +# ***************************************************************************** +# +# INFORMATION / HELP +# +############################################################################### +# OPTIONS # +############################################################################### +# +# PLATFORM: (default "OS64") +# OS = Build for iPhoneOS. +# OS64 = Build for arm64 iphoneOS. +# OS64COMBINED = Build for arm64 x86_64 iphoneOS + iphoneOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step) +# SIMULATOR = Build for x86 i386 iphoneOS Simulator. +# SIMULATOR64 = Build for x86_64 iphoneOS Simulator. +# SIMULATORARM64 = Build for arm64 iphoneOS Simulator. +# SIMULATOR64COMBINED = Build for arm64 x86_64 iphoneOS Simulator. Combined into FAT STATIC lib (supported on 3.14+ of CMakewith "-G Xcode" argument ONLY) +# TVOS = Build for arm64 tvOS. +# TVOSCOMBINED = Build for arm64 x86_64 tvOS + tvOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step) +# SIMULATOR_TVOS = Build for x86_64 tvOS Simulator. +# SIMULATORARM64_TVOS = Build for arm64 tvOS Simulator. +# WATCHOS = Build for armv7k arm64_32 for watchOS. +# WATCHOSCOMBINED = Build for armv7k arm64_32 x86_64 watchOS + watchOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step) +# SIMULATOR_WATCHOS = Build for x86_64 for watchOS Simulator. +# MAC = Build for x86_64 macOS. +# MAC_ARM64 = Build for Apple Silicon macOS. +# MAC_UNIVERSAL = Combined build for x86_64 and Apple Silicon on macOS. +# MAC_CATALYST = Build for x86_64 macOS with Catalyst support (iOS toolchain on macOS). +# Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS +# MAC_CATALYST_ARM64 = Build for Apple Silicon macOS with Catalyst support (iOS toolchain on macOS). +# Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS +# +# CMAKE_OSX_SYSROOT: Path to the SDK to use. By default this is +# automatically determined from PLATFORM and xcodebuild, but +# can also be manually specified (although this should not be required). +# +# CMAKE_DEVELOPER_ROOT: Path to the Developer directory for the platform +# being compiled for. By default, this is automatically determined from +# CMAKE_OSX_SYSROOT, but can also be manually specified (although this should +# not be required). +# +# DEPLOYMENT_TARGET: Minimum SDK version to target. Default 2.0 on watchOS and 9.0 on tvOS+iOS +# +# NAMED_LANGUAGE_SUPPORT: +# ON (default) = Will require "enable_language(OBJC) and/or enable_language(OBJCXX)" for full OBJC|OBJCXX support +# OFF = Will embed the OBJC and OBJCXX flags into the CMAKE_C_FLAGS and CMAKE_CXX_FLAGS (legacy behavior, CMake version < 3.16) +# +# ENABLE_BITCODE: (ON|OFF) Enables or disables bitcode support. Default OFF +# +# ENABLE_ARC: (ON|OFF) Enables or disables ARC support. Default ON (ARC enabled by default) +# +# ENABLE_VISIBILITY: (ON|OFF) Enables or disables symbol visibility support. Default OFF (visibility hidden by default) +# +# ENABLE_STRICT_TRY_COMPILE: (ON|OFF) Enables or disables strict try_compile() on all Check* directives (will run linker +# to actually check if linking is possible). Default OFF (will set CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY) +# +# ARCHS: (armv7 armv7s armv7k arm64 arm64_32 i386 x86_64) If specified, will override the default architectures for the given PLATFORM +# OS = armv7 armv7s arm64 (if applicable) +# OS64 = arm64 (if applicable) +# SIMULATOR = i386 +# SIMULATOR64 = x86_64 +# SIMULATORARM64 = arm64 +# TVOS = arm64 +# SIMULATOR_TVOS = x86_64 (i386 has since long been deprecated) +# SIMULATORARM64_TVOS = arm64 +# WATCHOS = armv7k arm64_32 (if applicable) +# SIMULATOR_WATCHOS = x86_64 (i386 has since long been deprecated) +# MAC = x86_64 +# MAC_ARM64 = arm64 +# MAC_UNIVERSAL = x86_64 arm64 +# MAC_CATALYST = x86_64 +# MAC_CATALYST_ARM64 = arm64 +# +# NOTE: When manually specifying ARCHS, put a semi-colon between the entries. E.g., -DARCHS="armv7;arm64" +# +############################################################################### +# END OPTIONS # +############################################################################### +# +# This toolchain defines the following properties (available via get_property()) for use externally: +# +# PLATFORM: The currently targeted platform. +# XCODE_VERSION: Version number (not including Build version) of Xcode detected. +# SDK_VERSION: Version of SDK being used. +# OSX_ARCHITECTURES: Architectures being compiled for (generated from PLATFORM). +# APPLE_TARGET_TRIPLE: Used by autoconf build systems. NOTE: If "ARCHS" is overridden, this will *NOT* be set! +# +# This toolchain defines the following macros for use externally: +# +# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE XCODE_VARIANT) +# A convenience macro for setting xcode specific properties on targets. +# Available variants are: All, Release, RelWithDebInfo, Debug, MinSizeRel +# example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1" "all"). +# +# find_host_package (PROGRAM ARGS) +# A macro used to find executable programs on the host system, not within the +# environment. Thanks to the android-cmake project for providing the +# command. +# + +cmake_minimum_required(VERSION 3.8.0) + +# CMake invokes the toolchain file twice during the first build, but only once during subsequent rebuilds. +if(DEFINED ENV{_IOS_TOOLCHAIN_HAS_RUN}) + return() +endif() +set(ENV{_IOS_TOOLCHAIN_HAS_RUN} true) + +# List of supported platform values +list(APPEND _supported_platforms + "OS" "OS64" "OS64COMBINED" "SIMULATOR" "SIMULATOR64" "SIMULATORARM64" "SIMULATOR64COMBINED" + "TVOS" "TVOSCOMBINED" "SIMULATOR_TVOS" "SIMULATORARM64_TVOS" + "WATCHOS" "WATCHOSCOMBINED" "SIMULATOR_WATCHOS" + "MAC" "MAC_ARM64" "MAC_UNIVERSAL" + "VISIONOS" "SIMULATOR_VISIONOS" "SIMULATOR64_VISIONOS" + "MAC_CATALYST" "MAC_CATALYST_ARM64") + +# Cache what generator is used +set(USED_CMAKE_GENERATOR "${CMAKE_GENERATOR}") + +# Check if using a CMake version capable of building combined FAT builds (simulator and target slices combined in one static lib) +if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14") + set(MODERN_CMAKE YES) +endif() + +# Get the Xcode version being used. +# Problem: CMake runs toolchain files multiple times, but can't read cache variables on some runs. +# Workaround: On the first run (in which cache variables are always accessible), set an intermediary environment variable. +# +# NOTE: This pattern is used in many places in this toolchain to speed up checks of all sorts +if(DEFINED XCODE_VERSION_INT) + # Environment variables are always preserved. + set(ENV{_XCODE_VERSION_INT} "${XCODE_VERSION_INT}") +elseif(DEFINED ENV{_XCODE_VERSION_INT}) + set(XCODE_VERSION_INT "$ENV{_XCODE_VERSION_INT}") +elseif(NOT DEFINED XCODE_VERSION_INT) + find_program(XCODEBUILD_EXECUTABLE xcodebuild) + if(NOT XCODEBUILD_EXECUTABLE) + message(FATAL_ERROR "xcodebuild not found. Please install either the standalone commandline tools or Xcode.") + endif() + execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version + OUTPUT_VARIABLE XCODE_VERSION_INT + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + string(REGEX MATCH "Xcode [0-9\\.]+" XCODE_VERSION_INT "${XCODE_VERSION_INT}") + string(REGEX REPLACE "Xcode ([0-9\\.]+)" "\\1" XCODE_VERSION_INT "${XCODE_VERSION_INT}") + set(XCODE_VERSION_INT "${XCODE_VERSION_INT}" CACHE INTERNAL "") +endif() + +# Assuming that xcode 12.0 is installed you most probably have ios sdk 14.0 or later installed (tested on Big Sur) +# if you don't set a deployment target it will be set the way you only get 64-bit builds +#if(NOT DEFINED DEPLOYMENT_TARGET AND XCODE_VERSION_INT VERSION_GREATER 12.0) +# Temporarily fix the arm64 issues in CMake install-combined by excluding arm64 for simulator builds (needed for Apple Silicon...) +# set(CMAKE_XCODE_ATTRIBUTE_EXCLUDED_ARCHS[sdk=iphonesimulator*] "arm64") +#endif() + +# Check if the platform variable is set +if(DEFINED PLATFORM) + # Environment variables are always preserved. + set(ENV{_PLATFORM} "${PLATFORM}") +elseif(DEFINED ENV{_PLATFORM}) + set(PLATFORM "$ENV{_PLATFORM}") +elseif(NOT DEFINED PLATFORM) + message(FATAL_ERROR "PLATFORM argument not set. Bailing configure since I don't know what target you want to build for!") +endif () + +if(PLATFORM MATCHES ".*COMBINED" AND NOT CMAKE_GENERATOR MATCHES "Xcode") + message(FATAL_ERROR "The combined builds support requires Xcode to be used as a generator via '-G Xcode' command-line argument in CMake") +endif() + +# Safeguard that the platform value is set and is one of the supported values +list(FIND _supported_platforms ${PLATFORM} contains_PLATFORM) +if("${contains_PLATFORM}" EQUAL "-1") + string(REPLACE ";" "\n * " _supported_platforms_formatted "${_supported_platforms}") + message(FATAL_ERROR " Invalid PLATFORM specified! Current value: ${PLATFORM}.\n" + " Supported PLATFORM values: \n * ${_supported_platforms_formatted}") +endif() + +# Check if Apple Silicon is supported +if(PLATFORM MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$|^(MAC_UNIVERSAL)$" AND ${CMAKE_VERSION} VERSION_LESS "3.19.5") + message(FATAL_ERROR "Apple Silicon builds requires a minimum of CMake 3.19.5") +endif() + +# Touch the toolchain variable to suppress the "unused variable" warning. +# This happens if CMake is invoked with the same command line the second time. +if(CMAKE_TOOLCHAIN_FILE) +endif() + +# Fix for PThread library not in path +set(CMAKE_THREAD_LIBS_INIT "-lpthread") +set(CMAKE_HAVE_THREADS_LIBRARY 1) +set(CMAKE_USE_WIN32_THREADS_INIT 0) +set(CMAKE_USE_PTHREADS_INIT 1) + +# Specify named language support defaults. +if(NOT DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.16") + set(NAMED_LANGUAGE_SUPPORT ON) + message(STATUS "[DEFAULTS] Using explicit named language support! E.g., enable_language(CXX) is needed in the project files.") +elseif(NOT DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_LESS "3.16") + set(NAMED_LANGUAGE_SUPPORT OFF) + message(STATUS "[DEFAULTS] Disabling explicit named language support. Falling back to legacy behavior.") +elseif(DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_LESS "3.16") + message(FATAL_ERROR "CMake named language support for OBJC and OBJCXX was added in CMake 3.16.") +endif() +set(NAMED_LANGUAGE_SUPPORT_INT ${NAMED_LANGUAGE_SUPPORT} CACHE BOOL + "Whether or not to enable explicit named language support" FORCE) + +# Specify the minimum version of the deployment target. +if(NOT DEFINED DEPLOYMENT_TARGET) + if (PLATFORM MATCHES "WATCHOS") + # Unless specified, SDK version 4.0 is used by default as minimum target version (watchOS). + set(DEPLOYMENT_TARGET "4.0") + elseif(PLATFORM STREQUAL "MAC") + # Unless specified, SDK version 10.13 (High Sierra) is used by default as the minimum target version (macos). + set(DEPLOYMENT_TARGET "10.13") + elseif(PLATFORM STREQUAL "VISIONOS" OR PLATFORM STREQUAL "SIMULATOR_VISIONOS" OR PLATFORM STREQUAL "SIMULATOR64_VISIONOS") + # Unless specified, SDK version 1.0 is used by default as minimum target version (visionOS). + set(DEPLOYMENT_TARGET "1.0") + elseif(PLATFORM STREQUAL "MAC_ARM64") + # Unless specified, SDK version 11.0 (Big Sur) is used by default as the minimum target version (macOS on arm). + set(DEPLOYMENT_TARGET "11.0") + elseif(PLATFORM STREQUAL "MAC_UNIVERSAL") + # Unless specified, SDK version 11.0 (Big Sur) is used by default as minimum target version for universal builds. + set(DEPLOYMENT_TARGET "11.0") + elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64") + # Unless specified, SDK version 13.0 is used by default as the minimum target version (mac catalyst minimum requirement). + set(DEPLOYMENT_TARGET "13.1") + else() + # Unless specified, SDK version 11.0 is used by default as the minimum target version (iOS, tvOS). + set(DEPLOYMENT_TARGET "11.0") + endif() + message(STATUS "[DEFAULTS] Using the default min-version since DEPLOYMENT_TARGET not provided!") +elseif(DEFINED DEPLOYMENT_TARGET AND PLATFORM MATCHES "^MAC_CATALYST" AND ${DEPLOYMENT_TARGET} VERSION_LESS "13.1") + message(FATAL_ERROR "Mac Catalyst builds requires a minimum deployment target of 13.1!") +endif() + +# Store the DEPLOYMENT_TARGET in the cache +set(DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}" CACHE INTERNAL "") + +# Handle the case where we are targeting iOS and a version above 10.3.4 (32-bit support dropped officially) +if(PLATFORM STREQUAL "OS" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4) + set(PLATFORM "OS64") + message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.") +elseif(PLATFORM STREQUAL "SIMULATOR" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4) + set(PLATFORM "SIMULATOR64") + message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.") +endif() + +set(PLATFORM_INT "${PLATFORM}") + +if(DEFINED ARCHS) + string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}") +endif() + +# Determine the platform name and architectures for use in xcodebuild commands +# from the specified PLATFORM_INT name. +if(PLATFORM_INT STREQUAL "OS") + set(SDK_NAME iphoneos) + if(NOT ARCHS) + set(ARCHS armv7 armv7s arm64) + set(APPLE_TARGET_TRIPLE_INT arm-apple-ios${DEPLOYMENT_TARGET}) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}) + endif() +elseif(PLATFORM_INT STREQUAL "OS64") + set(SDK_NAME iphoneos) + if(NOT ARCHS) + if (XCODE_VERSION_INT VERSION_GREATER 10.0) + set(ARCHS arm64) # FIXME: Add arm64e when Apple has fixed the integration issues with it, libarclite_iphoneos.a is currently missing bitcode markers for example + else() + set(ARCHS arm64) + endif() + set(APPLE_TARGET_TRIPLE_INT arm64-apple-ios${DEPLOYMENT_TARGET}) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}) + endif() +elseif(PLATFORM_INT STREQUAL "OS64COMBINED") + set(SDK_NAME iphoneos) + if(MODERN_CMAKE) + if(NOT ARCHS) + if (XCODE_VERSION_INT VERSION_GREATER 12.0) + set(ARCHS arm64 x86_64) + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64 arm64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64 arm64") + else() + set(ARCHS arm64 x86_64) + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64") + endif() + set(APPLE_TARGET_TRIPLE_INT arm64-x86_64-apple-ios${DEPLOYMENT_TARGET}) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}) + endif() + else() + message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the OS64COMBINED setting work") + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATOR64COMBINED") + set(SDK_NAME iphonesimulator) + if(MODERN_CMAKE) + if(NOT ARCHS) + if (XCODE_VERSION_INT VERSION_GREATER 12.0) + set(ARCHS arm64 x86_64) # FIXME: Add arm64e when Apple have fixed the integration issues with it, libarclite_iphoneos.a is currently missing bitcode markers for example + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64 arm64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64 arm64") + else() + set(ARCHS arm64 x86_64) + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64") + endif() + set(APPLE_TARGET_TRIPLE_INT aarch64-x86_64-apple-ios${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator) + endif() + else() + message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the SIMULATOR64COMBINED setting work") + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATOR") + set(SDK_NAME iphonesimulator) + if(NOT ARCHS) + set(ARCHS i386) + set(APPLE_TARGET_TRIPLE_INT i386-apple-ios${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator) + endif() + message(DEPRECATION "SIMULATOR IS DEPRECATED. Consider using SIMULATOR64 instead.") +elseif(PLATFORM_INT STREQUAL "SIMULATOR64") + set(SDK_NAME iphonesimulator) + if(NOT ARCHS) + set(ARCHS x86_64) + set(APPLE_TARGET_TRIPLE_INT x86_64-apple-ios${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator) + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATORARM64") + set(SDK_NAME iphonesimulator) + if(NOT ARCHS) + set(ARCHS arm64) + set(APPLE_TARGET_TRIPLE_INT arm64-apple-ios${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator) + endif() +elseif(PLATFORM_INT STREQUAL "TVOS") + set(SDK_NAME appletvos) + if(NOT ARCHS) + set(ARCHS arm64) + set(APPLE_TARGET_TRIPLE_INT arm64-apple-tvos${DEPLOYMENT_TARGET}) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET}) + endif() +elseif (PLATFORM_INT STREQUAL "TVOSCOMBINED") + set(SDK_NAME appletvos) + if(MODERN_CMAKE) + if(NOT ARCHS) + set(ARCHS arm64 x86_64) + set(APPLE_TARGET_TRIPLE_INT arm64-x86_64-apple-tvos${DEPLOYMENT_TARGET}) + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvos*] "arm64") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvsimulator*] "x86_64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvos*] "arm64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvsimulator*] "x86_64") + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET}) + endif() + else() + message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the TVOSCOMBINED setting work") + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS") + set(SDK_NAME appletvsimulator) + if(NOT ARCHS) + set(ARCHS x86_64) + set(APPLE_TARGET_TRIPLE_INT x86_64-apple-tvos${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET}-simulator) + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATORARM64_TVOS") + set(SDK_NAME appletvsimulator) + if(NOT ARCHS) + set(ARCHS arm64) + set(APPLE_TARGET_TRIPLE_INT arm64-apple-tvos${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET}-simulator) + endif() +elseif(PLATFORM_INT STREQUAL "WATCHOS") + set(SDK_NAME watchos) + if(NOT ARCHS) + if (XCODE_VERSION_INT VERSION_GREATER 10.0) + set(ARCHS armv7k arm64_32) + set(APPLE_TARGET_TRIPLE_INT arm64_32-apple-watchos${DEPLOYMENT_TARGET}) + else() + set(ARCHS armv7k) + set(APPLE_TARGET_TRIPLE_INT arm-apple-watchos${DEPLOYMENT_TARGET}) + endif() + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET}) + endif() +elseif(PLATFORM_INT STREQUAL "WATCHOSCOMBINED") + set(SDK_NAME watchos) + if(MODERN_CMAKE) + if(NOT ARCHS) + if (XCODE_VERSION_INT VERSION_GREATER 10.0) + set(ARCHS armv7k arm64_32 i386) + set(APPLE_TARGET_TRIPLE_INT arm64_32-i386-apple-watchos${DEPLOYMENT_TARGET}) + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k arm64_32") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k arm64_32") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386") + else() + set(ARCHS armv7k i386) + set(APPLE_TARGET_TRIPLE_INT arm-i386-apple-watchos${DEPLOYMENT_TARGET}) + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386") + endif() + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET}) + endif() + else() + message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the WATCHOSCOMBINED setting work") + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS") + set(SDK_NAME watchsimulator) + if(NOT ARCHS) + set(ARCHS i386) + set(APPLE_TARGET_TRIPLE_INT i386-apple-watchos${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET}-simulator) + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATOR64_VISIONOS") + set(SDK_NAME xrsimulator) + if(NOT ARCHS) + set(ARCHS x86_64) + set(APPLE_TARGET_TRIPLE_INT x86_64-apple-xros${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-xros${DEPLOYMENT_TARGET}-simulator) + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATOR_VISIONOS") + set(SDK_NAME xrsimulator) + if(NOT ARCHS) + set(ARCHS arm64) + set(APPLE_TARGET_TRIPLE_INT arm64-apple-xros${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-xros${DEPLOYMENT_TARGET}-simulator) + endif() +elseif(PLATFORM_INT STREQUAL "VISIONOS") + set(SDK_NAME xros) + if(NOT ARCHS) + set(ARCHS arm64) + set(APPLE_TARGET_TRIPLE_INT arm64-apple-xros${DEPLOYMENT_TARGET}) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-xros${DEPLOYMENT_TARGET}) + endif() +elseif(PLATFORM_INT STREQUAL "MAC" OR PLATFORM_INT STREQUAL "MAC_CATALYST") + set(SDK_NAME macosx) + if(NOT ARCHS) + set(ARCHS x86_64) + endif() + string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}") + if(PLATFORM_INT STREQUAL "MAC") + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx${DEPLOYMENT_TARGET}) + elseif(PLATFORM_INT STREQUAL "MAC_CATALYST") + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi) + endif() +elseif(PLATFORM_INT MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$") + set(SDK_NAME macosx) + if(NOT ARCHS) + set(ARCHS arm64) + endif() + string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}") + if(PLATFORM_INT STREQUAL "MAC_ARM64") + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx${DEPLOYMENT_TARGET}) + elseif(PLATFORM_INT STREQUAL "MAC_CATALYST_ARM64") + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi) + endif() +elseif(PLATFORM_INT STREQUAL "MAC_UNIVERSAL") + set(SDK_NAME macosx) + if(NOT ARCHS) + set(ARCHS "x86_64;arm64") + endif() + string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}") + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx${DEPLOYMENT_TARGET}) +else() + message(FATAL_ERROR "Invalid PLATFORM: ${PLATFORM_INT}") +endif() + +string(REPLACE ";" " " ARCHS_SPACED "${ARCHS}") + +if(MODERN_CMAKE AND PLATFORM_INT MATCHES ".*COMBINED" AND NOT CMAKE_GENERATOR MATCHES "Xcode") + message(FATAL_ERROR "The COMBINED options only work with Xcode generator, -G Xcode") +endif() + +if(CMAKE_GENERATOR MATCHES "Xcode" AND PLATFORM_INT MATCHES "^MAC_CATALYST") + set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++") + set(CMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS "macosx") + set(CMAKE_XCODE_ATTRIBUTE_SUPPORTS_MACCATALYST "YES") + if(NOT DEFINED MACOSX_DEPLOYMENT_TARGET) + set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "10.15") + else() + set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "${MACOSX_DEPLOYMENT_TARGET}") + endif() +elseif(CMAKE_GENERATOR MATCHES "Xcode") + set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++") + set(CMAKE_XCODE_ATTRIBUTE_IPHONEOS_DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}") + if(NOT PLATFORM_INT MATCHES ".*COMBINED") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=${SDK_NAME}*] "${ARCHS_SPACED}") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=${SDK_NAME}*] "${ARCHS_SPACED}") + endif() +endif() + +# If the user did not specify the SDK root to use, then query xcodebuild for it. +if(DEFINED CMAKE_OSX_SYSROOT_INT) + # Environment variables are always preserved. + set(ENV{_CMAKE_OSX_SYSROOT_INT} "${CMAKE_OSX_SYSROOT_INT}") +elseif(DEFINED ENV{_CMAKE_OSX_SYSROOT_INT}) + set(CMAKE_OSX_SYSROOT_INT "$ENV{_CMAKE_OSX_SYSROOT_INT}") +elseif(NOT DEFINED CMAKE_OSX_SYSROOT_INT) + execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version -sdk ${SDK_NAME} Path + OUTPUT_VARIABLE CMAKE_OSX_SYSROOT_INT + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() + +if (NOT DEFINED CMAKE_OSX_SYSROOT_INT AND NOT DEFINED CMAKE_OSX_SYSROOT) + message(SEND_ERROR "Please make sure that Xcode is installed and that the toolchain" + "is pointing to the correct path. Please run:" + "sudo xcode-select -s /Applications/Xcode.app/Contents/Developer" + "and see if that fixes the problem for you.") + message(FATAL_ERROR "Invalid CMAKE_OSX_SYSROOT: ${CMAKE_OSX_SYSROOT} " + "does not exist.") +elseif(DEFINED CMAKE_OSX_SYSROOT_INT) + set(CMAKE_OSX_SYSROOT_INT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "") + # Specify the location or name of the platform SDK to be used in CMAKE_OSX_SYSROOT. + set(CMAKE_OSX_SYSROOT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "") +endif() + +# Use bitcode or not +if(NOT DEFINED ENABLE_BITCODE) + message(STATUS "[DEFAULTS] Disabling bitcode support by default. ENABLE_BITCODE not provided for override!") + set(ENABLE_BITCODE OFF) +endif() +set(ENABLE_BITCODE_INT ${ENABLE_BITCODE} CACHE BOOL + "Whether or not to enable bitcode" FORCE) +# Use ARC or not +if(NOT DEFINED ENABLE_ARC) + # Unless specified, enable ARC support by default + set(ENABLE_ARC ON) + message(STATUS "[DEFAULTS] Enabling ARC support by default. ENABLE_ARC not provided!") +endif() +set(ENABLE_ARC_INT ${ENABLE_ARC} CACHE BOOL "Whether or not to enable ARC" FORCE) +# Use hidden visibility or not +if(NOT DEFINED ENABLE_VISIBILITY) + # Unless specified, disable symbols visibility by default + set(ENABLE_VISIBILITY OFF) + message(STATUS "[DEFAULTS] Hiding symbols visibility by default. ENABLE_VISIBILITY not provided!") +endif() +set(ENABLE_VISIBILITY_INT ${ENABLE_VISIBILITY} CACHE BOOL "Whether or not to hide symbols from the dynamic linker (-fvisibility=hidden)" FORCE) +# Set strict compiler checks or not +if(NOT DEFINED ENABLE_STRICT_TRY_COMPILE) + # Unless specified, disable strict try_compile() + set(ENABLE_STRICT_TRY_COMPILE OFF) + message(STATUS "[DEFAULTS] Using NON-strict compiler checks by default. ENABLE_STRICT_TRY_COMPILE not provided!") +endif() +set(ENABLE_STRICT_TRY_COMPILE_INT ${ENABLE_STRICT_TRY_COMPILE} CACHE BOOL + "Whether or not to use strict compiler checks" FORCE) + +# Get the SDK version information. +if(DEFINED SDK_VERSION) + # Environment variables are always preserved. + set(ENV{_SDK_VERSION} "${SDK_VERSION}") +elseif(DEFINED ENV{_SDK_VERSION}) + set(SDK_VERSION "$ENV{_SDK_VERSION}") +elseif(NOT DEFINED SDK_VERSION) + execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -sdk ${CMAKE_OSX_SYSROOT_INT} -version SDKVersion + OUTPUT_VARIABLE SDK_VERSION + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() + +# Find the Developer root for the specific iOS platform being compiled for +# from CMAKE_OSX_SYSROOT. Should be ../../ from SDK specified in +# CMAKE_OSX_SYSROOT. There does not appear to be a direct way to obtain +# this information from xcrun or xcodebuild. +if (NOT DEFINED CMAKE_DEVELOPER_ROOT AND NOT CMAKE_GENERATOR MATCHES "Xcode") + get_filename_component(PLATFORM_SDK_DIR ${CMAKE_OSX_SYSROOT_INT} PATH) + get_filename_component(CMAKE_DEVELOPER_ROOT ${PLATFORM_SDK_DIR} PATH) + if (NOT EXISTS "${CMAKE_DEVELOPER_ROOT}") + message(FATAL_ERROR "Invalid CMAKE_DEVELOPER_ROOT: ${CMAKE_DEVELOPER_ROOT} does not exist.") + endif() +endif() + +# Find the C & C++ compilers for the specified SDK. +if(DEFINED CMAKE_C_COMPILER) + # Environment variables are always preserved. + set(ENV{_CMAKE_C_COMPILER} "${CMAKE_C_COMPILER}") +elseif(DEFINED ENV{_CMAKE_C_COMPILER}) + set(CMAKE_C_COMPILER "$ENV{_CMAKE_C_COMPILER}") + set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER}) +elseif(NOT DEFINED CMAKE_C_COMPILER) + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang + OUTPUT_VARIABLE CMAKE_C_COMPILER + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER}) +endif() +if(DEFINED CMAKE_CXX_COMPILER) + # Environment variables are always preserved. + set(ENV{_CMAKE_CXX_COMPILER} "${CMAKE_CXX_COMPILER}") +elseif(DEFINED ENV{_CMAKE_CXX_COMPILER}) + set(CMAKE_CXX_COMPILER "$ENV{_CMAKE_CXX_COMPILER}") +elseif(NOT DEFINED CMAKE_CXX_COMPILER) + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang++ + OUTPUT_VARIABLE CMAKE_CXX_COMPILER + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() +# Find (Apple's) libtool. +if(DEFINED BUILD_LIBTOOL) + # Environment variables are always preserved. + set(ENV{_BUILD_LIBTOOL} "${BUILD_LIBTOOL}") +elseif(DEFINED ENV{_BUILD_LIBTOOL}) + set(BUILD_LIBTOOL "$ENV{_BUILD_LIBTOOL}") +elseif(NOT DEFINED BUILD_LIBTOOL) + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find libtool + OUTPUT_VARIABLE BUILD_LIBTOOL + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() +# Find the toolchain's provided install_name_tool if none is found on the host +if(DEFINED CMAKE_INSTALL_NAME_TOOL) + # Environment variables are always preserved. + set(ENV{_CMAKE_INSTALL_NAME_TOOL} "${CMAKE_INSTALL_NAME_TOOL}") +elseif(DEFINED ENV{_CMAKE_INSTALL_NAME_TOOL}) + set(CMAKE_INSTALL_NAME_TOOL "$ENV{_CMAKE_INSTALL_NAME_TOOL}") +elseif(NOT DEFINED CMAKE_INSTALL_NAME_TOOL) + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find install_name_tool + OUTPUT_VARIABLE CMAKE_INSTALL_NAME_TOOL_INT + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + set(CMAKE_INSTALL_NAME_TOOL ${CMAKE_INSTALL_NAME_TOOL_INT} CACHE INTERNAL "") +endif() + +# Configure libtool to be used instead of ar + ranlib to build static libraries. +# This is required on Xcode 7+, but should also work on previous versions of +# Xcode. +get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES) +foreach(lang ${languages}) + set(CMAKE_${lang}_CREATE_STATIC_LIBRARY "${BUILD_LIBTOOL} -static -o " CACHE INTERNAL "") +endforeach() + +# CMake 3.14+ support building for iOS, watchOS, and tvOS out of the box. +if(MODERN_CMAKE) + if(SDK_NAME MATCHES "iphone") + set(CMAKE_SYSTEM_NAME iOS) + elseif(SDK_NAME MATCHES "xros") + set(CMAKE_SYSTEM_NAME visionOS) + elseif(SDK_NAME MATCHES "xrsimulator") + set(CMAKE_SYSTEM_NAME visionOS) + elseif(SDK_NAME MATCHES "macosx") + set(CMAKE_SYSTEM_NAME Darwin) + elseif(SDK_NAME MATCHES "appletv") + set(CMAKE_SYSTEM_NAME tvOS) + elseif(SDK_NAME MATCHES "watch") + set(CMAKE_SYSTEM_NAME watchOS) + endif() + # Provide flags for a combined FAT library build on newer CMake versions + if(PLATFORM_INT MATCHES ".*COMBINED") + set(CMAKE_IOS_INSTALL_COMBINED YES) + if(CMAKE_GENERATOR MATCHES "Xcode") + # Set the SDKROOT Xcode properties to a Xcode-friendly value (the SDK_NAME, E.g, iphoneos) + # This way, Xcode will automatically switch between the simulator and device SDK when building. + set(CMAKE_XCODE_ATTRIBUTE_SDKROOT "${SDK_NAME}") + # Force to not build just one ARCH, but all! + set(CMAKE_XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO") + endif() + endif() +elseif(NOT DEFINED CMAKE_SYSTEM_NAME AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.10") + # Legacy code path prior to CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified + set(CMAKE_SYSTEM_NAME iOS) +elseif(NOT DEFINED CMAKE_SYSTEM_NAME) + # Legacy code path before CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified + set(CMAKE_SYSTEM_NAME Darwin) +endif() +# Standard settings. +set(CMAKE_SYSTEM_VERSION ${SDK_VERSION} CACHE INTERNAL "") +set(UNIX ON CACHE BOOL "") +set(APPLE ON CACHE BOOL "") +if(PLATFORM STREQUAL "MAC" OR PLATFORM STREQUAL "MAC_ARM64" OR PLATFORM STREQUAL "MAC_UNIVERSAL") + set(IOS OFF CACHE BOOL "") + set(MACOS ON CACHE BOOL "") +elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64") + set(IOS ON CACHE BOOL "") + set(MACOS ON CACHE BOOL "") +else() + set(IOS ON CACHE BOOL "") +endif() +# Set the architectures for which to build. +set(CMAKE_OSX_ARCHITECTURES ${ARCHS} CACHE INTERNAL "") +# Change the type of target generated for try_compile() so it'll work when cross-compiling, weak compiler checks +if(NOT ENABLE_STRICT_TRY_COMPILE_INT) + set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +endif() +# All iOS/Darwin specific settings - some may be redundant. +if (NOT DEFINED CMAKE_MACOSX_BUNDLE) + set(CMAKE_MACOSX_BUNDLE YES) +endif() +set(CMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED "NO") +set(CMAKE_SHARED_LIBRARY_PREFIX "lib") +set(CMAKE_SHARED_LIBRARY_SUFFIX ".dylib") +set(CMAKE_SHARED_MODULE_PREFIX "lib") +set(CMAKE_SHARED_MODULE_SUFFIX ".so") +set(CMAKE_C_COMPILER_ABI ELF) +set(CMAKE_CXX_COMPILER_ABI ELF) +set(CMAKE_C_HAS_ISYSROOT 1) +set(CMAKE_CXX_HAS_ISYSROOT 1) +set(CMAKE_MODULE_EXISTS 1) +set(CMAKE_DL_LIBS "") +set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ") +set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ") +set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}") +set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}") + +if(ARCHS MATCHES "((^|;|, )(arm64|arm64e|x86_64))+") + set(CMAKE_C_SIZEOF_DATA_PTR 8) + set(CMAKE_CXX_SIZEOF_DATA_PTR 8) + if(ARCHS MATCHES "((^|;|, )(arm64|arm64e))+") + set(CMAKE_SYSTEM_PROCESSOR "aarch64") + else() + set(CMAKE_SYSTEM_PROCESSOR "x86_64") + endif() +else() + set(CMAKE_C_SIZEOF_DATA_PTR 4) + set(CMAKE_CXX_SIZEOF_DATA_PTR 4) + set(CMAKE_SYSTEM_PROCESSOR "arm") +endif() + +# Note that only Xcode 7+ supports the newer more specific: +# -m${SDK_NAME}-version-min flags, older versions of Xcode use: +# -m(ios/ios-simulator)-version-min instead. +if(${CMAKE_VERSION} VERSION_LESS "3.11") + if(PLATFORM_INT STREQUAL "OS" OR PLATFORM_INT STREQUAL "OS64") + if(XCODE_VERSION_INT VERSION_LESS 7.0) + set(SDK_NAME_VERSION_FLAGS + "-mios-version-min=${DEPLOYMENT_TARGET}") + else() + # Xcode 7.0+ uses flags we can build directly from SDK_NAME. + set(SDK_NAME_VERSION_FLAGS + "-m${SDK_NAME}-version-min=${DEPLOYMENT_TARGET}") + endif() + elseif(PLATFORM_INT STREQUAL "TVOS") + set(SDK_NAME_VERSION_FLAGS + "-mtvos-version-min=${DEPLOYMENT_TARGET}") + elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS") + set(SDK_NAME_VERSION_FLAGS + "-mtvos-simulator-version-min=${DEPLOYMENT_TARGET}") +elseif(PLATFORM_INT STREQUAL "SIMULATORARM64_TVOS") + set(SDK_NAME_VERSION_FLAGS + "-mtvos-simulator-version-min=${DEPLOYMENT_TARGET}") + elseif(PLATFORM_INT STREQUAL "WATCHOS") + set(SDK_NAME_VERSION_FLAGS + "-mwatchos-version-min=${DEPLOYMENT_TARGET}") + elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS") + set(SDK_NAME_VERSION_FLAGS + "-mwatchos-simulator-version-min=${DEPLOYMENT_TARGET}") + elseif(PLATFORM_INT STREQUAL "MAC") + set(SDK_NAME_VERSION_FLAGS + "-mmacosx-version-min=${DEPLOYMENT_TARGET}") + else() + # SIMULATOR or SIMULATOR64 both use -mios-simulator-version-min. + set(SDK_NAME_VERSION_FLAGS + "-mios-simulator-version-min=${DEPLOYMENT_TARGET}") + endif() +elseif(NOT PLATFORM_INT MATCHES "^MAC_CATALYST") + # Newer versions of CMake sets the version min flags correctly, skip this for Mac Catalyst targets + set(CMAKE_OSX_DEPLOYMENT_TARGET ${DEPLOYMENT_TARGET}) +endif() + +if(DEFINED APPLE_TARGET_TRIPLE_INT) + set(APPLE_TARGET_TRIPLE ${APPLE_TARGET_TRIPLE_INT} CACHE INTERNAL "") + set(CMAKE_C_COMPILER_TARGET ${APPLE_TARGET_TRIPLE}) + set(CMAKE_CXX_COMPILER_TARGET ${APPLE_TARGET_TRIPLE}) + set(CMAKE_ASM_COMPILER_TARGET ${APPLE_TARGET_TRIPLE}) +endif() + +if(PLATFORM_INT MATCHES "^MAC_CATALYST") + set(C_TARGET_FLAGS "-isystem ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/usr/include -iframework ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/System/Library/Frameworks") +endif() + +if(ENABLE_BITCODE_INT) + set(BITCODE "-fembed-bitcode") + set(CMAKE_XCODE_ATTRIBUTE_BITCODE_GENERATION_MODE "bitcode") + set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "YES") +else() + set(BITCODE "") + set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "NO") +endif() + +if(ENABLE_ARC_INT) + set(FOBJC_ARC "-fobjc-arc") + set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "YES") +else() + set(FOBJC_ARC "-fno-objc-arc") + set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "NO") +endif() + +if(NAMED_LANGUAGE_SUPPORT_INT) + set(OBJC_VARS "-fobjc-abi-version=2 -DOBJC_OLD_DISPATCH_PROTOTYPES=0") + set(OBJC_LEGACY_VARS "") +else() + set(OBJC_VARS "") + set(OBJC_LEGACY_VARS "-fobjc-abi-version=2 -DOBJC_OLD_DISPATCH_PROTOTYPES=0") +endif() + +if(NOT ENABLE_VISIBILITY_INT) + foreach(lang ${languages}) + set(CMAKE_${lang}_VISIBILITY_PRESET "hidden" CACHE INTERNAL "") + endforeach() + set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "YES") + set(VISIBILITY "-fvisibility=hidden -fvisibility-inlines-hidden") +else() + foreach(lang ${languages}) + set(CMAKE_${lang}_VISIBILITY_PRESET "default" CACHE INTERNAL "") + endforeach() + set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "NO") + set(VISIBILITY "-fvisibility=default") +endif() + +if(DEFINED APPLE_TARGET_TRIPLE) + set(APPLE_TARGET_TRIPLE_FLAG "-target ${APPLE_TARGET_TRIPLE}") +endif() + +#Check if Xcode generator is used since that will handle these flags automagically +if(CMAKE_GENERATOR MATCHES "Xcode") + message(STATUS "Not setting any manual command-line buildflags, since Xcode is selected as the generator. Modifying the Xcode build-settings directly instead.") +else() + set(CMAKE_C_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${OBJC_LEGACY_VARS} ${BITCODE} ${VISIBILITY} ${CMAKE_C_FLAGS}") + set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_C_FLAGS_DEBUG}") + set(CMAKE_C_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_C_FLAGS_MINSIZEREL}") + set(CMAKE_C_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_C_FLAGS_RELWITHDEBINFO}") + set(CMAKE_C_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_C_FLAGS_RELEASE}") + set(CMAKE_CXX_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${OBJC_LEGACY_VARS} ${BITCODE} ${VISIBILITY} ${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_CXX_FLAGS_DEBUG}") + set(CMAKE_CXX_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_CXX_FLAGS_MINSIZEREL}") + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") + set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_CXX_FLAGS_RELEASE}") + if(NAMED_LANGUAGE_SUPPORT_INT) + set(CMAKE_OBJC_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} ${FOBJC_ARC} ${OBJC_VARS} ${CMAKE_OBJC_FLAGS}") + set(CMAKE_OBJC_FLAGS_DEBUG "-O0 -g ${CMAKE_OBJC_FLAGS_DEBUG}") + set(CMAKE_OBJC_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_OBJC_FLAGS_MINSIZEREL}") + set(CMAKE_OBJC_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_OBJC_FLAGS_RELWITHDEBINFO}") + set(CMAKE_OBJC_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_OBJC_FLAGS_RELEASE}") + set(CMAKE_OBJCXX_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} ${FOBJC_ARC} ${OBJC_VARS} ${CMAKE_OBJCXX_FLAGS}") + set(CMAKE_OBJCXX_FLAGS_DEBUG "-O0 -g ${CMAKE_OBJCXX_FLAGS_DEBUG}") + set(CMAKE_OBJCXX_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_OBJCXX_FLAGS_MINSIZEREL}") + set(CMAKE_OBJCXX_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_OBJCXX_FLAGS_RELWITHDEBINFO}") + set(CMAKE_OBJCXX_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_OBJCXX_FLAGS_RELEASE}") + endif() + set(CMAKE_C_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}") + set(CMAKE_CXX_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}") + if(NAMED_LANGUAGE_SUPPORT_INT) + set(CMAKE_OBJC_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_OBJC_LINK_FLAGS}") + set(CMAKE_OBJCXX_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_OBJCXX_LINK_FLAGS}") + endif() + set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS} -x assembler-with-cpp -arch ${CMAKE_OSX_ARCHITECTURES} ${APPLE_TARGET_TRIPLE_FLAG}") +endif() + +## Print status messages to inform of the current state +message(STATUS "Configuring ${SDK_NAME} build for platform: ${PLATFORM_INT}, architecture(s): ${ARCHS}") +message(STATUS "Using SDK: ${CMAKE_OSX_SYSROOT_INT}") +message(STATUS "Using C compiler: ${CMAKE_C_COMPILER}") +message(STATUS "Using CXX compiler: ${CMAKE_CXX_COMPILER}") +message(STATUS "Using libtool: ${BUILD_LIBTOOL}") +message(STATUS "Using install name tool: ${CMAKE_INSTALL_NAME_TOOL}") +if(DEFINED APPLE_TARGET_TRIPLE) + message(STATUS "Autoconf target triple: ${APPLE_TARGET_TRIPLE}") +endif() +message(STATUS "Using minimum deployment version: ${DEPLOYMENT_TARGET}" + " (SDK version: ${SDK_VERSION})") +if(MODERN_CMAKE) + message(STATUS "Merging integrated CMake 3.14+ iOS,tvOS,watchOS,macOS toolchain(s) with this toolchain!") + if(PLATFORM_INT MATCHES ".*COMBINED") + message(STATUS "Will combine built (static) artifacts into FAT lib...") + endif() +endif() +if(CMAKE_GENERATOR MATCHES "Xcode") + message(STATUS "Using Xcode version: ${XCODE_VERSION_INT}") +endif() +message(STATUS "CMake version: ${CMAKE_VERSION}") +if(DEFINED SDK_NAME_VERSION_FLAGS) + message(STATUS "Using version flags: ${SDK_NAME_VERSION_FLAGS}") +endif() +message(STATUS "Using a data_ptr size of: ${CMAKE_CXX_SIZEOF_DATA_PTR}") +if(ENABLE_BITCODE_INT) + message(STATUS "Bitcode: Enabled") +else() + message(STATUS "Bitcode: Disabled") +endif() + +if(ENABLE_ARC_INT) + message(STATUS "ARC: Enabled") +else() + message(STATUS "ARC: Disabled") +endif() + +if(ENABLE_VISIBILITY_INT) + message(STATUS "Hiding symbols: Disabled") +else() + message(STATUS "Hiding symbols: Enabled") +endif() + +# Set global properties +set_property(GLOBAL PROPERTY PLATFORM "${PLATFORM}") +set_property(GLOBAL PROPERTY APPLE_TARGET_TRIPLE "${APPLE_TARGET_TRIPLE_INT}") +set_property(GLOBAL PROPERTY SDK_VERSION "${SDK_VERSION}") +set_property(GLOBAL PROPERTY XCODE_VERSION "${XCODE_VERSION_INT}") +set_property(GLOBAL PROPERTY OSX_ARCHITECTURES "${CMAKE_OSX_ARCHITECTURES}") + +# Export configurable variables for the try_compile() command. +set(CMAKE_TRY_COMPILE_PLATFORM_VARIABLES + PLATFORM + XCODE_VERSION_INT + SDK_VERSION + NAMED_LANGUAGE_SUPPORT + DEPLOYMENT_TARGET + CMAKE_DEVELOPER_ROOT + CMAKE_OSX_SYSROOT_INT + ENABLE_BITCODE + ENABLE_ARC + CMAKE_ASM_COMPILER + CMAKE_C_COMPILER + CMAKE_C_COMPILER_TARGET + CMAKE_CXX_COMPILER + CMAKE_CXX_COMPILER_TARGET + BUILD_LIBTOOL + CMAKE_INSTALL_NAME_TOOL + CMAKE_C_FLAGS + CMAKE_C_DEBUG + CMAKE_C_MINSIZEREL + CMAKE_C_RELWITHDEBINFO + CMAKE_C_RELEASE + CMAKE_CXX_FLAGS + CMAKE_CXX_FLAGS_DEBUG + CMAKE_CXX_FLAGS_MINSIZEREL + CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_CXX_FLAGS_RELEASE + CMAKE_C_LINK_FLAGS + CMAKE_CXX_LINK_FLAGS + CMAKE_ASM_FLAGS +) + +if(NAMED_LANGUAGE_SUPPORT_INT) + list(APPEND CMAKE_TRY_COMPILE_PLATFORM_VARIABLES + CMAKE_OBJC_FLAGS + CMAKE_OBJC_DEBUG + CMAKE_OBJC_MINSIZEREL + CMAKE_OBJC_RELWITHDEBINFO + CMAKE_OBJC_RELEASE + CMAKE_OBJCXX_FLAGS + CMAKE_OBJCXX_DEBUG + CMAKE_OBJCXX_MINSIZEREL + CMAKE_OBJCXX_RELWITHDEBINFO + CMAKE_OBJCXX_RELEASE + CMAKE_OBJC_LINK_FLAGS + CMAKE_OBJCXX_LINK_FLAGS + ) +endif() + +set(CMAKE_PLATFORM_HAS_INSTALLNAME 1) +set(CMAKE_SHARED_LINKER_FLAGS "-rpath @executable_path/Frameworks -rpath @loader_path/Frameworks") +set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -Wl,-headerpad_max_install_names") +set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -Wl,-headerpad_max_install_names") +set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,") +set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,") +set(CMAKE_FIND_LIBRARY_SUFFIXES ".tbd" ".dylib" ".so" ".a") +set(CMAKE_SHARED_LIBRARY_SONAME_C_FLAG "-install_name") + +# Set the find root to the SDK developer roots. +# Note: CMAKE_FIND_ROOT_PATH is only useful when cross-compiling. Thus, do not set on macOS builds. +if(NOT PLATFORM_INT MATCHES "^MAC.*$") + list(APPEND CMAKE_FIND_ROOT_PATH "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "") + set(CMAKE_IGNORE_PATH "/System/Library/Frameworks;/usr/local/lib;/opt/homebrew" CACHE INTERNAL "") +endif() + +# Default to searching for frameworks first. +IF(NOT DEFINED CMAKE_FIND_FRAMEWORK) + set(CMAKE_FIND_FRAMEWORK FIRST) +ENDIF(NOT DEFINED CMAKE_FIND_FRAMEWORK) + +# Set up the default search directories for frameworks. +if(PLATFORM_INT MATCHES "^MAC_CATALYST") + set(CMAKE_FRAMEWORK_PATH + ${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks + ${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks + ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/System/Library/Frameworks + ${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "") +else() + set(CMAKE_FRAMEWORK_PATH + ${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks + ${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks + ${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "") +endif() + +# By default, search both the specified iOS SDK and the remainder of the host filesystem. +if(NOT CMAKE_FIND_ROOT_PATH_MODE_PROGRAM) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH CACHE INTERNAL "") +endif() +if(NOT CMAKE_FIND_ROOT_PATH_MODE_LIBRARY) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH CACHE INTERNAL "") +endif() +if(NOT CMAKE_FIND_ROOT_PATH_MODE_INCLUDE) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH CACHE INTERNAL "") +endif() +if(NOT CMAKE_FIND_ROOT_PATH_MODE_PACKAGE) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH CACHE INTERNAL "") +endif() + +# +# Some helper-macros below to simplify and beautify the CMakeFile +# + +# This little macro lets you set any Xcode specific property. +macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE XCODE_RELVERSION) + set(XCODE_RELVERSION_I "${XCODE_RELVERSION}") + if(XCODE_RELVERSION_I STREQUAL "All") + set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} "${XCODE_VALUE}") + else() + set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY}[variant=${XCODE_RELVERSION_I}] "${XCODE_VALUE}") + endif() +endmacro(set_xcode_property) + +# This macro lets you find executable programs on the host system. +macro(find_host_package) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE NEVER) + set(_TOOLCHAIN_IOS ${IOS}) + set(IOS OFF) + find_package(${ARGN}) + set(IOS ${_TOOLCHAIN_IOS}) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH) +endmacro(find_host_package) \ No newline at end of file diff --git a/src/3rd_party/CMakeLists.txt b/src/3rd_party/CMakeLists.txt index 838951c50..f586d6ec4 100644 --- a/src/3rd_party/CMakeLists.txt +++ b/src/3rd_party/CMakeLists.txt @@ -9,10 +9,23 @@ add_subdirectory(./faiss) include_directories(./faiss) if(COMPILE_CPU) - if(NOT GENERATE_MARIAN_INSTALL_TARGETS) + # intgemm is not ARM-compatible. do not build it if we are on ARM + if(NOT GENERATE_MARIAN_INSTALL_TARGETS AND NOT ARM) set(INTGEMM_DONT_BUILD_TESTS ON CACHE BOOL "Disable intgemm tests") add_subdirectory(./intgemm) endif() + + # the default codepath does not use ruy so there is no need to add these directories + # to the build unless it is explicitly enabled. RUY is intended mostly for ARM support + if(USE_RUY_SGEMM) + set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL " " FORCE) + set(CPUINFO_BUILD_BENCHMARKS OFF CACHE BOOL " " FORCE) + set(CPUINFO_BUILD_PKG_CONFIG OFF CACHE BOOL " " FORCE) + set(CPUINFO_BUILD_UNIT_TESTS OFF CACHE BOOL " " FORCE) + set(CPUINFO_BUILD_TOOLS OFF CACHE BOOL " " FORCE) + add_subdirectory(ruy/third_party/cpuinfo EXCLUDE_FROM_ALL) + add_subdirectory(ruy EXCLUDE_FROM_ALL) + endif(USE_RUY_SGEMM) endif(COMPILE_CPU) if(USE_FBGEMM) diff --git a/src/3rd_party/faiss/VectorTransform.cpp b/src/3rd_party/faiss/VectorTransform.cpp index 103b0910e..22fecbf78 100644 --- a/src/3rd_party/faiss/VectorTransform.cpp +++ b/src/3rd_party/faiss/VectorTransform.cpp @@ -19,6 +19,12 @@ using namespace faiss; +#ifdef ARM +// we use various AVX/SSE instructions in this file +// simd_utils translates these into ARM/NEON compatible instructions +#include "3rd_party/simd_utils/simd_utils.h" +#endif + extern "C" { diff --git a/src/3rd_party/faiss/VectorTransform.h b/src/3rd_party/faiss/VectorTransform.h index 5fc96bc46..e8689bc15 100644 --- a/src/3rd_party/faiss/VectorTransform.h +++ b/src/3rd_party/faiss/VectorTransform.h @@ -19,8 +19,10 @@ #include #ifdef __APPLE__ +#ifndef ARM #include #endif +#endif namespace faiss { diff --git a/src/3rd_party/ruy b/src/3rd_party/ruy new file mode 160000 index 000000000..c04e5e52a --- /dev/null +++ b/src/3rd_party/ruy @@ -0,0 +1 @@ +Subproject commit c04e5e52ae6b144f74ac032652e3c538bda15c9b diff --git a/src/3rd_party/sentencepiece b/src/3rd_party/sentencepiece index fb6f8e408..5978fc9eb 160000 --- a/src/3rd_party/sentencepiece +++ b/src/3rd_party/sentencepiece @@ -1 +1 @@ -Subproject commit fb6f8e408d2078ebfedc8ccc33985fef03c50b0e +Subproject commit 5978fc9ebb6067a3a1b11370bf46b8da0f89e592 diff --git a/src/3rd_party/simd_utils b/src/3rd_party/simd_utils new file mode 160000 index 000000000..fe9fa82c9 --- /dev/null +++ b/src/3rd_party/simd_utils @@ -0,0 +1 @@ +Subproject commit fe9fa82c9d7e6297913bc6c98fe079acc6e157e9 diff --git a/src/common/binary.cpp b/src/common/binary.cpp index 6bb90c508..f8dc26f12 100644 --- a/src/common/binary.cpp +++ b/src/common/binary.cpp @@ -109,7 +109,7 @@ void loadItems(const std::string& fileName, std::vector& items) { io::Item getItem(const void* current, const std::string& varName) { std::vector items; - loadItems(current, items); + loadItems(current, items, /*mapped=*/true); for(auto& item : items) if(item.name == varName) diff --git a/src/common/types.h b/src/common/types.h index a0930a0f8..763edb09b 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -17,7 +17,11 @@ #include #ifndef __CUDACC__ // NVCC is very unreliable when it comes to CPU intrinsics, we hide them completely from NVCC-compiled code -#include + #ifndef ARM + #include + #else + #include "3rd_party/simd_utils/simd_utils.h" + #endif #endif #ifdef __CUDACC__ // nvcc is compiling this code diff --git a/src/functional/operators.h b/src/functional/operators.h index 3628fdcb9..e7dcea3c6 100644 --- a/src/functional/operators.h +++ b/src/functional/operators.h @@ -217,8 +217,11 @@ struct Ops { // __CUDACC__ is defined when compiling with NVCC regardless of device type // __CUDA_ARCH__ is defined when compiling device (GPU) code #ifndef __CUDACC__ - +#ifndef ARM #include "3rd_party/sse_mathfun.h" +#else +#include "3rd_party/simd_utils/simd_utils.h" +#endif namespace marian { namespace functional { diff --git a/src/tensors/cpu/expression_graph_packable.h b/src/tensors/cpu/expression_graph_packable.h index 1a233372c..4af69fac9 100644 --- a/src/tensors/cpu/expression_graph_packable.h +++ b/src/tensors/cpu/expression_graph_packable.h @@ -152,7 +152,7 @@ class ExpressionGraphPackable : public ExpressionGraph { #endif } else if (isIntgemm(gemmElementType) && (pName.find("_W") == pName.length() - 3 || pName.find("_W") == pName.length() - 2 /* || pName.find("Wemb") != std::string::npos*/)) { -#if COMPILE_CPU +#if COMPILE_CPU && !defined(ARM) using cpu::integer::cols; using cpu::integer::rows; auto allocator = New(getBackend()); diff --git a/src/tensors/cpu/fbgemm/packed_gemm.cpp b/src/tensors/cpu/fbgemm/packed_gemm.cpp index dd81d0f7f..23ed559f1 100644 --- a/src/tensors/cpu/fbgemm/packed_gemm.cpp +++ b/src/tensors/cpu/fbgemm/packed_gemm.cpp @@ -2,16 +2,16 @@ #include "tensors/tensor_allocator.h" #include "tensors/tensor_operators.h" -#include -#include -#include -#include #include #include #include //#include #if USE_FBGEMM +#include +#include +#include +#include #ifdef _MSC_VER #pragma warning(disable: 4505) // 'fbgemmAlignedAlloc' in fbgemm.h: unreferenced local function has been removed (missing 'static inline') #pragma warning(disable: 4251) // 'fbgemm::CompressedSparseColumn::colptr_': class 'std::vector>' needs to have dll-interface to be used by clients of class 'fbgemm::CompressedSparseColumn' diff --git a/src/tensors/cpu/integer_common.h b/src/tensors/cpu/integer_common.h index f4e632b5c..8a00a7870 100644 --- a/src/tensors/cpu/integer_common.h +++ b/src/tensors/cpu/integer_common.h @@ -5,7 +5,7 @@ #include "tensors/cpu/aligned.h" #include "common/io_item.h" -#if COMPILE_CPU +#if COMPILE_CPU && !defined(ARM) #include "3rd_party/intgemm/intgemm/intgemm.h" #else namespace intgemm { @@ -31,10 +31,12 @@ namespace intgemm { } #endif +#ifndef ARM #include #include #include #include +#endif #include #include @@ -98,7 +100,7 @@ template <> struct intgemm_ { template static inline float& getQuantMult(marian::Tensor val) { -#if COMPILE_CPU +#if COMPILE_CPU && !defined(ARM) ABORT_IF(!isIntgemm(val->type()), "getQuantMult does not work for type {}", val->type()); typedef typename intgemm_::type Integer; return *(reinterpret_cast(val->data() + val->shape().elements())); @@ -109,7 +111,7 @@ static inline float& getQuantMult(marian::Tensor val) { } static inline Type getIntgemmType(Type vtype) { -#if COMPILE_CPU +#if COMPILE_CPU && !defined(ARM) if (vtype == Type::intgemm8) { if (intgemm::kCPU == intgemm::CPUType::AVX512VNNI) { return Type::intgemm8avx512vnni; @@ -142,7 +144,7 @@ static inline Type getIntgemmType(Type vtype) { } static inline bool passOrAbort(Type vtype) { -#if COMPILE_CPU +#if COMPILE_CPU && !defined(ARM) if (vtype == Type::intgemm8 || vtype == Type::intgemm16) { return true; } else if (vtype == Type::intgemm16sse2) { @@ -166,7 +168,7 @@ static inline bool passOrAbort(Type vtype) { template static inline float computeQuantMult(marian::Tensor val) { -#if COMPILE_CPU +#if COMPILE_CPU && !defined(ARM) if(sizeOf(vtype) == 1) return 127.0f / intgemm::MaxAbsolute(val->data(), val->data() + val->shape().elements()); else if(sizeOf(vtype) == 2) @@ -186,7 +188,7 @@ void AddBias(marian::Tensor C, const marian::Tensor Bias); // in our binary format. Then we copy the quantizationMultiplier information at the end template void prepareAndTransposeB(io::Item& item, const char * input) { -#if COMPILE_CPU +#if COMPILE_CPU && !defined(ARM) typedef typename intgemm_::type Integer; Integer * output_tensor = reinterpret_cast(&(*item.bytes.begin())); // Sometimes we will end up with misaligned intput (and output) so we can't use them directly. diff --git a/src/tensors/cpu/intgemm_interface.h b/src/tensors/cpu/intgemm_interface.h index 88408aa18..80784e0f6 100644 --- a/src/tensors/cpu/intgemm_interface.h +++ b/src/tensors/cpu/intgemm_interface.h @@ -9,7 +9,7 @@ namespace marian { namespace cpu { namespace integer { -#if COMPILE_CPU +#if COMPILE_CPU && !defined(ARM) /* * Prepare an activation matrix into intgemm8/16 format. For now the activation matrix is just quantized. * Expr input: The input tensor @@ -45,7 +45,7 @@ static inline Expr prepareA(Expr a) { */ template static inline Expr affineOrDotTyped(Expr a, Expr bQuant, Expr bias, bool transA, bool /*transB*/, float scale) { -#if COMPILE_CPU +#if COMPILE_CPU && !defined(ARM) ABORT_IF(!isFloat(a->value_type()), "Intgemm expects type of A to be float32 not {}", a->value_type()); ABORT_IF(!isIntgemm(bQuant->value_type()), "Intgemm expects type of B to be a variant of intgemm not {}", bQuant->value_type()); diff --git a/src/tensors/cpu/prod.cpp b/src/tensors/cpu/prod.cpp index 8fcca924b..639027d05 100755 --- a/src/tensors/cpu/prod.cpp +++ b/src/tensors/cpu/prod.cpp @@ -7,14 +7,6 @@ #include "tensors/tensor.h" #include "tensors/tensor_allocator.h" -#if MKL_FOUND -#include -#else -#if BLAS_FOUND -#include -#endif -#endif - #include "integer_common.h" #include "prod_blas.h" diff --git a/src/tensors/cpu/prod_blas.h b/src/tensors/cpu/prod_blas.h index a591fdd26..a281aa7bf 100644 --- a/src/tensors/cpu/prod_blas.h +++ b/src/tensors/cpu/prod_blas.h @@ -1,11 +1,117 @@ +#pragma once #if MKL_FOUND -#include -#else -#if BLAS_FOUND -#include -#endif + #include +#elif BLAS_FOUND + #include +#elif USE_RUY_SGEMM +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcomment" + #include "ruy/ruy.h" + #include "ruy/system_aligned_alloc.h" +#pragma GCC pop #endif +#if USE_RUY_SGEMM +// AlignedVector allocates aligned memory and cleans up after itself. RAII +// wrapper similar to intgemm's AlignedVector. +template +class AlignedVector { +public: + AlignedVector(size_t num_elem) + : size_(num_elem), + storage_(reinterpret_cast(ruy::detail::SystemAlignedAlloc(sizeof(T) * num_elem))) {} + + T *begin() { return storage_; } + T *data() { return storage_; } + size_t size() const { return size_; } + size_t memSize() const { return sizeof(T) * size_; } + + // Forbid copy + AlignedVector(const AlignedVector &) = delete; + AlignedVector &operator=(const AlignedVector &) = delete; + + ~AlignedVector() { ruy::detail::SystemAlignedFree(reinterpret_cast(storage_)); } + +private: + size_t size_; + T *storage_; +}; + + +inline void GemmRuy(const bool transA, + const bool transB, + const int M, + const int N, + const int K, + const float alpha, + const float *A, + const int lda, + const float *B, + const int ldb, + const float beta, + float *C, + const int ldc) { + ruy::Context context; + + // If we need to transpose, we can swap dimensions in layout claim the matrix + // is just column-major. Set ordering so transpose. + const auto orderA = (transA ? ruy::Order::kColMajor : ruy::Order::kRowMajor); + const auto orderB = (transB ? ruy::Order::kColMajor : ruy::Order::kRowMajor); + + ruy::Matrix lhs; + ruy::MakeSimpleLayout(M, K, orderA, lhs.mutable_layout()); + lhs.set_data(A); + + ruy::Matrix rhs; + ruy::MakeSimpleLayout(K, N, orderB, rhs.mutable_layout()); + rhs.set_data(B); + + ruy::Matrix dst; + ruy::MakeSimpleLayout(M, N, ruy::Order::kRowMajor, dst.mutable_layout()); + + if(beta == 0) { + // For beta = 0, we want to avoid the additional allocation. This is a + // large amount of our inference use-cases. sgemm is called with `beta` for + // accumulating gradients in backpropogation, which is 0.0 during + // inference. + + dst.set_data(C); + ruy::MulParams mul_params; + ruy::Mul(lhs, rhs, mul_params, &context, &dst); + + if(alpha != 1.0) { + // Write out C as C = alpha * [op(A) * op(B)] + beta * C + // Can we expect the compiler to autovectorize this? + // TODO: Come back and explicitly use SIMD. + const size_t size = M * N; + const float *opA_opB = C; // Alias. + for(size_t i = 0; i < size; i++) { + C[i] = alpha * opA_opB[i]; + } + } + + } else { + // No multiply-add in Ruy + // See also: https://github.com/google/ruy/issues/307 + + AlignedVector intermediate(M * N); + dst.set_data(intermediate.data()); + ruy::MulParams mul_params; + ruy::Mul(lhs, rhs, mul_params, &context, &dst); + + // Write out C as C = alpha * [op(A) * op(B)] + beta * C + // Can we expect the compiler to autovectorize this? + // TODO: Come back and explicitly use SIMD. + const size_t size = M * N; + const float *opA_opB = intermediate.data(); + for(size_t i = 0; i < size; i++) { + C[i] = alpha * opA_opB[i] + beta * C[i]; + } + } +} + +#endif // RUY_SGEMM + inline void sgemm(bool transA, bool transB, int rows_a, @@ -34,6 +140,20 @@ inline void sgemm(bool transA, beta, c, ldc); +#elif USE_RUY_SGEMM + GemmRuy(transA, + transB, + rows_a, + rows_b, + width, + alpha, + a, + lda, + b, + ldb, + beta, + c, + ldc); #else transA; transB; rows_a; rows_b; width; alpha; a; lda; b; ldb; beta; c; ldc; // make compiler happy ABORT("Marian must be compiled with a BLAS library"); diff --git a/src/translator/translator.h b/src/translator/translator.h index f0fc0b908..abb5a5775 100644 --- a/src/translator/translator.h +++ b/src/translator/translator.h @@ -248,6 +248,9 @@ class TranslateService : public ModelServiceTask { size_t numDevices_; + std::vector model_mmaps_; // map + std::vector> model_items_; // non-mmap + public: virtual ~TranslateService() {} @@ -282,36 +285,62 @@ class TranslateService : public ModelServiceTask { auto devices = Config::getDevices(options_); numDevices_ = devices.size(); + ThreadPool threadPool(numDevices_, numDevices_); + scorers_.resize(numDevices_); + graphs_.resize(numDevices_); + // preload models - std::vector> model_items_; auto models = options->get>("models"); - for(auto model : models) { - auto items = io::loadItems(model); - model_items_.push_back(std::move(items)); + if(options_->get("model-mmap", false)) { + for(auto model : models) { + ABORT_IF(!io::isBin(model), "Non-binarized models cannot be mmapped"); + LOG(info, "Loading model from {}", model); + model_mmaps_.push_back(mio::mmap_source(model)); + } + } + else { + for(auto model : models) { + LOG(info, "Loading model from {}", model); + auto items = io::loadItems(model); + model_items_.push_back(std::move(items)); + } } // initialize scorers + size_t id = 0; for(auto device : devices) { - auto graph = New(true); - - auto precison = options_->get>("precision", {"float32"}); - graph->setDefaultElementType(typeFromString(precison[0])); // only use first type, used for parameter type in graph - graph->setDevice(device); - if (device.type == DeviceType::cpu) { - graph->getBackend()->setOptimized(options_->get("optimize")); - graph->getBackend()->setGemmType(options_->get("gemm-type")); - graph->getBackend()->setQuantizeRange(options_->get("quantize-range")); - } - graph->reserveWorkspaceMB(options_->get("workspace")); - graphs_.push_back(graph); - - auto scorers = createScorers(options_, model_items_); - for(auto scorer : scorers) { - scorer->init(graph); - if(shortlistGenerator_) - scorer->setShortlistGenerator(shortlistGenerator_); - } - scorers_.push_back(scorers); + auto task = [&](DeviceId device, size_t id) { + auto graph = New(true); + auto prec = options_->get>("precision", {"float32"}); + graph->setDefaultElementType(typeFromString(prec[0])); + graph->setDevice(device); + if (device.type == DeviceType::cpu) { + graph->getBackend()->setOptimized(options_->get("optimize")); + graph->getBackend()->setGemmType(options_->get("gemm-type")); + graph->getBackend()->setQuantizeRange(options_->get("quantize-range")); + } + graph->reserveWorkspaceMB(options_->get("workspace")); + graphs_[id] = graph; + + std::vector> scorers; + if(options_->get("model-mmap", false)) { + scorers = createScorers(options_, model_mmaps_); + } + else { + scorers = createScorers(options_, model_items_); + } + + for(auto scorer : scorers) { + scorer->init(graph); + if(shortlistGenerator_) + scorer->setShortlistGenerator(shortlistGenerator_); + } + + scorers_[id] = scorers; + graph->forward(); + }; + + threadPool.enqueue(task, device, id++); } }