diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..3209d28 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,133 @@ +name: CI + +on: [push, pull_request] + +jobs: + ######################### + # Build and test with GCC + ######################### + CPU: + # The type of runner that the job will run on + runs-on: ubuntu-18.04 + strategy: + fail-fast: false + matrix: + build_type: [release, debug] + compiler: [g++] + use_omp: [true] + use_mpi: [true, false] + use_float: [true] + include: + - build_type: debug + compiler: clang++ + use_omp: false + use_mpi: true + use_float: false + + env: + USE_OMP: ${{ matrix.use_omp }} + USE_MPI: ${{ matrix.use_mpi }} + USE_FLOAT: ${{ matrix.use_float }} + BUILD_TYPE: ${{ matrix.build_type }} + COMPILER: ${{ matrix.compiler }} + + steps: + # Checks-out your repository under $GITHUB_WORKSPACE + - uses: actions/checkout@v2 + + + - name: Print build config + run: | + echo "Compiler: ${COMPILER}, Build type: ${BUILD_TYPE}, OpenMP: ${USE_OMP}, MPI: ${USE_MPI}, FLOAT: ${USE_FLOAT}" + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y libfftw3-dev make g++ clang wget git make + cd ${HOME} && wget https://github.com/Kitware/CMake/releases/download/v3.11.4/cmake-3.11.4-Linux-x86_64.tar.gz && tar -xzvf cmake-3.11.4-Linux-x86_64.tar.gz + + - name: Install MPI + if: ${{ matrix.use_mpi }} + run: | + sudo apt-get install -y mpi-default-dev + + - name: Build and install + run: | + mkdir -p build + cd build + mkdir -p install_dir + export INSTALL_DIR=$(pwd)/install_dir + CXX=${COMPILER} ${HOME}/cmake-3.11.4-Linux-x86_64/bin/cmake .. -DSPFFT_BUILD_TESTS=OFF -DSPFFT_OMP=${USE_OMP} -DSPFFT_MPI=${USE_MPI} -DSPFFT_SINGLE_PRECISION=${USE_FLOAT} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} + make -j2 + make VERBOSE=1 install + test -f ${INSTALL_DIR}/lib/libspfft.so + test -f ${INSTALL_DIR}/include/spfft/spfft.hpp + test -f ${INSTALL_DIR}/include/spfft/spfft.h + + - name: Build tests + run: | + cd ${GITHUB_WORKSPACE} + rm -rf build + mkdir -p build + cd build + CXX=${COMPILER} ${HOME}/cmake-3.11.4-Linux-x86_64/bin/cmake .. -DSPFFT_BUILD_TESTS=ON -DSPFFT_OMP=${USE_OMP} -DSPFFT_MPI=${USE_MPI} -DSPFFT_SINGLE_PRECISION=${USE_FLOAT} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} + make -j2 + + - name: Run tests + env: + OMPI_MCA_btl_vader_single_copy_mechanism: none + run: ${GITHUB_WORKSPACE}/build/tests/run_local_tests + + - name: Run tests with MPI + if: ${{ matrix.use_mpi }} + env: + OMPI_MCA_btl_vader_single_copy_mechanism: none + run: mpirun -n 2 ${GITHUB_WORKSPACE}/build/tests/run_mpi_tests + + + ################# + # Build with CUDA + ################# + CUDA: + runs-on: ubuntu-18.04 + container: nvidia/cuda:9.2-devel-ubuntu18.04 + + steps: + # Checks-out your repository under $GITHUB_WORKSPACE + - uses: actions/checkout@v2 + + - name: Install dependencies + run: | + apt-get update + apt-get install -y libfftw3-dev make g++ mpi-default-dev wget git make + cd ${HOME} && wget https://github.com/Kitware/CMake/releases/download/v3.14.6/cmake-3.14.6-Linux-x86_64.tar.gz && tar -xzvf cmake-3.14.6-Linux-x86_64.tar.gz + + - name: Build + run: | + cd ${GITHUB_WORKSPACE} + mkdir -p build + cd build + ${HOME}/cmake-3.14.6-Linux-x86_64/bin/cmake .. -DSPFFT_BUILD_TESTS=ON -DSPFFT_GPU_BACKEND=CUDA -DSPFFT_OMP=OFF + make -j2 + + + ################# + # Build with ROCm + ################# + ROCM: + runs-on: ubuntu-18.04 + container: adhocman/master:ubuntu18.04_rocm + + steps: + # Checks-out your repository under $GITHUB_WORKSPACE + - uses: actions/checkout@v2 + + - name: Build + run: | + cd ${GITHUB_WORKSPACE} + mkdir -p build + cd build + /root/cmake-3.11.4-Linux-x86_64/bin/cmake .. -DSPFFT_BUILD_TESTS=ON -DSPFFT_GPU_BACKEND=ROCM + make -j2 + + diff --git a/CI/azure_templates/ubuntu_build_test.yml b/CI/azure_templates/ubuntu_build_test.yml deleted file mode 100644 index 5d6ab5a..0000000 --- a/CI/azure_templates/ubuntu_build_test.yml +++ /dev/null @@ -1,69 +0,0 @@ -parameters: -# job config - job_name: 'Ubuntu_GCC' - run_mpi_tests: false - run_local_tests: false -#cmake parameters - cm_mpi: 'ON' - cm_omp: 'ON' - cm_single_precision: 'OFF' - cm_gpu_backend: 'OFF' - -jobs: -- job: ${{ parameters.job_name }} - pool: - vmImage: 'ubuntu-latest' - - container: adhocman/master:ubuntu_18_04_cuda_9 - - strategy: - matrix: - Release: - BUILD_TYPE: 'RELEASE' - Debug: - BUILD_TYPE: 'DEBUG' - maxParallel: 2 - - variables: - SPFFT_MPI: ${{ parameters.cm_mpi }} - SPFFT_OMP: ${{ parameters.cm_omp }} - SPFFT_SINGLE_PRECISION: ${{ parameters.cm_single_precision }} - SPFFT_GPU_BACKEND: ${{ parameters.cm_gpu_backend }} - - steps: - - script: | - mkdir -p build - cd build - CUDACXX=/usr/local/cuda-9.0/bin/nvcc /opt/cmake-3.11.0/bin/cmake .. -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DSPFFT_BUILD_TESTS=OFF -DSPFFT_MPI=${SPFFT_MPI} -DSPFFT_OMP=${SPFFT_OMP} -DSPFFT_SINGLE_PRECISION=${SPFFT_SINGLE_PRECISION} -DSPFFT_GPU_BACKEND=${SPFFT_GPU_BACKEND} - make VERBOSE=1 - displayName: 'Build library' - - - script: | - cd build - mkdir -p install_dir - INSTALL_DIR=$(pwd)/install_dir - /opt/cmake-3.11.0/bin/cmake .. -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} - make VERBOSE=1 install - test -f ${INSTALL_DIR}/lib/libspfft.so - test -f ${INSTALL_DIR}/include/spfft/spfft.hpp - test -f ${INSTALL_DIR}/include/spfft/spfft.h - displayName: 'Install library' - - - script: | - mkdir -p build - cd build - /opt/cmake-3.11.0/bin/cmake .. -DSPFFT_BUILD_TESTS=ON - make VERBOSE=1 - displayName: 'Build tests' - - - script: | - export OMP_NUM_THREADS=2 - ./build/tests/run_local_tests - displayName: 'Run local tests' - condition: and(succeeded(), eq('${{ parameters.run_local_tests }}', 'true')) - - - script: | - export OMP_NUM_THREADS=2 - mpirun -n 2 ./build/tests/run_mpi_tests - displayName: 'Run MPI tests' - condition: and(succeeded(), eq('${{ parameters.run_mpi_tests }}', 'true')) diff --git a/CMakeLists.txt b/CMakeLists.txt index c8bc50e..9cbcf30 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.11 FATAL_ERROR) # 3.11 to avoid issues with OpenMP + CUDA -project(SpFFT LANGUAGES CXX VERSION 0.9.10) +project(SpFFT LANGUAGES CXX VERSION 0.9.11) set(SPFFT_SO_VERSION 0) set(SPFFT_VERSION ${PROJECT_VERSION}) @@ -110,10 +110,15 @@ endif() # ROCM if(SPFFT_ROCM) - find_package(ROCM REQUIRED COMPONENTS rocfft) - list(APPEND SPFFT_EXTERNAL_INCLUDE_DIRS ${ROCM_INCLUDE_DIRS}) - list(APPEND SPFFT_EXTERNAL_LIBS ${ROCM_LIBRARIES}) - list(APPEND SPFFT_EXTERNAL_COMPILE_OPTIONS ${ROCM_DEFINITIONS}) + if(NOT HIP_HCC_FLAGS) + message(STATUS "Using default AMD gpu targets: gfx803, gfx900, gfx906. Set HIP_HCC_FLAGS to override.") + set(HIP_HCC_FLAGS ${HIP_HCC_FLAGS} --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906) + endif() + find_package(HIP REQUIRED) + find_package(HIPLIBS REQUIRED) + find_package(ROCFFT REQUIRED) + list(APPEND SPFFT_EXTERNAL_LIBS HIPLIBS::hiplibs ROCFFT::rocfft) + list(APPEND SPFFT_EXTERNAL_COMPILE_OPTIONS -D__HIP_PLATFORM_HCC__) # required for parsing HIP headers with another compiler endif() diff --git a/README.md b/README.md index 1c45725..aa0a8f7 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,11 @@ -[![Build Status](https://dev.azure.com/zeadreamplay/SpFFT/_apis/build/status/eth-cscs.SpFFT?branchName=master)](https://dev.azure.com/zeadreamplay/SpFFT/_build/latest?definitionId=3&branchName=master) +[![CI](https://github.com/eth-cscs/SpFFT/workflows/CI/badge.svg)](https://github.com/eth-cscs/SpFFT/actions?query=workflow%3ACI) [![Documentation](https://readthedocs.org/projects/spfft/badge/?version=latest)](https://spfft.readthedocs.io/en/latest/?badge=latest) [![License](https://img.shields.io/badge/license-BSD-blue.svg)](https://raw.githubusercontent.com/eth-cscs/SpFFT/master/LICENSE) # SpFFT SpFFT - A 3D FFT library for sparse frequency domain data written in C++ with support for MPI, OpenMP, CUDA and ROCm. -It was originally intended for transforms of data with spherical cutoff in frequency domain, as required by some computational material science codes. -For distributed computations, SpFFT uses a slab decomposition in space domain and pencil decomposition in frequency domain (all sparse data within a pencil must be on one rank). +Inspired by the need of some computational material science applications with spherical cutoff data in frequency domain, SpFFT provides Fast Fourier Transformations of sparse frequency domain data. For distributed computations with MPI, slab decomposition in space domain and pencil decomposition in frequency domain (sparse data within a pencil / column must be on one rank) is used. @@ -15,17 +14,17 @@ For distributed computations, SpFFT uses a slab decomposition in space domain an ### Design Goals - Sparse frequency domain input - Reuse of pre-allocated memory -- Support of negative indexing for frequency domain data +- Support for shifted indexing with centered zero-frequency +- Optional parallelization and GPU acceleration - Unified interface for calculations on CPUs and GPUs -- Support of Complex-To-Real and Real-To-Complex transforms, where the full hermitian symmetry property is utilized. +- Support of Complex-To-Real and Real-To-Complex transforms, where the full hermitian symmetry property is utilized - C++, C and Fortran interfaces -- Parallelization and acceleration are optional ### Interface Design To allow for pre-allocation and reuse of memory, the design is based on two classes: -- **Grid**: Allocates memory for transforms up to a given size in each dimension. -- **Transform**: Is associated with a *Grid* and can have any size up to the *Grid* dimensions. A *Transform* holds a counted reference to the underlying *Grid*. Therefore, *Transforms* created with the same *Grid* share memory, which is only freed, once the *Grid* and all associated *Transforms* are destroyed. +- **Grid**: Provides memory for transforms up to a given size. +- **Transform**: Created with information on sparse input data and is associated with a *Grid*. Maximum size is limited by *Grid* dimensions. Internal reference counting to *Grid* objects guarantee a valid state until *Transform* object destruction. The user provides memory for storing sparse frequency domain data, while a *Transform* provides memory for space domain data. This implies, that executing a *Transform* will override the space domain data of all other *Transforms* associated with the same *Grid*. @@ -159,3 +158,12 @@ int main(int argc, char** argv) { return 0; } ``` + +## Acknowledgements +This work was supported by: + + +|![ethz](docs/images/logo_ethz.png) | [**Swiss Federal Institute of Technology in Zurich**](https://www.ethz.ch/) | +|:----:|:----:| +|![cscs](docs/images/logo_cscs.png) | [**Swiss National Supercomputing Centre**](https://www.cscs.ch/) | +|![max](docs/images/logo_max.png) | [**MAterials design at the eXascale**](http://www.max-centre.eu)
(Horizon2020, grant agreement MaX CoE, No. 824143) | diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index 84c2037..0000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,69 +0,0 @@ -####################### -# Linux -####################### -jobs: -# CUDA build -- template: CI/azure_templates/ubuntu_build_test.yml - parameters: - job_name: "Ubuntu_GCC_CUDA" - run_mpi_tests: false - run_local_tests: false - cm_mpi: 'ON' - cm_omp: 'ON' - cm_single_precision: 'ON' - cm_gpu_backend: 'CUDA' - -# buld and test without MPI and OpenMP -- template: CI/azure_templates/ubuntu_build_test.yml - parameters: - job_name: "Ubuntu_GCC_no_parallel" - run_mpi_tests: false - run_local_tests: true - cm_mpi: 'OFF' - cm_omp: 'OFF' - cm_single_precision: 'OFF' - cm_gpu_backend: 'OFF' - -# buld and test with full parallelization -- template: CI/azure_templates/ubuntu_build_test.yml - parameters: - job_name: "Ubuntu_GCC_parallel" - run_mpi_tests: true - run_local_tests: true - cm_mpi: 'ON' - cm_omp: 'ON' - cm_single_precision: 'OFF' - cm_gpu_backend: 'OFF' - -####################### -# macOS -####################### -- job: macOS_Clang - pool: - vmImage: 'macOS-10.14' - - steps: - - script: | - /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" - displayName: 'Install Homebrew' - - - script: | - brew install fftw - brew install open-mpi - displayName: 'Install dependencies' - - # Apple Clang does not support OpenMP - - script: | - mkdir -p build - cd build - cmake .. -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DSPFFT_BUILD_TESTS=ON -DSPFFT_MPI=ON -DSPFFT_OMP=OFF - make VERBOSE=1 - displayName: 'Build' - - - script: | - ./build/tests/run_local_tests - displayName: 'Run local tests' - - - script: | - mpirun -n 2 ./build/tests/run_mpi_tests - displayName: 'Run MPI tests' diff --git a/cmake/modules/FindHIP.cmake b/cmake/modules/FindHIP.cmake new file mode 100644 index 0000000..1a8b5e5 --- /dev/null +++ b/cmake/modules/FindHIP.cmake @@ -0,0 +1,593 @@ +############################################################################### +# FindHIP.cmake +############################################################################### + +# Copyright (c) 2008-2020 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +############################################################################### +# SET: Variable defaults +############################################################################### +# User defined flags +set(HIP_HIPCC_FLAGS "" CACHE STRING "Semicolon delimited flags for HIPCC") +set(HIP_HCC_FLAGS "" CACHE STRING "Semicolon delimited flags for HCC") +set(HIP_NVCC_FLAGS "" CACHE STRING "Semicolon delimted flags for NVCC") +mark_as_advanced(HIP_HIPCC_FLAGS HIP_HCC_FLAGS HIP_NVCC_FLAGS) +set(_hip_configuration_types ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE} Debug MinSizeRel Release RelWithDebInfo) +list(REMOVE_DUPLICATES _hip_configuration_types) +foreach(config ${_hip_configuration_types}) + string(TOUPPER ${config} config_upper) + set(HIP_HIPCC_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for HIPCC") + set(HIP_HCC_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for HCC") + set(HIP_NVCC_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for NVCC") + mark_as_advanced(HIP_HIPCC_FLAGS_${config_upper} HIP_HCC_FLAGS_${config_upper} HIP_NVCC_FLAGS_${config_upper}) +endforeach() +option(HIP_HOST_COMPILATION_CPP "Host code compilation mode" ON) +option(HIP_VERBOSE_BUILD "Print out the commands run while compiling the HIP source file. With the Makefile generator this defaults to VERBOSE variable specified on the command line, but can be forced on with this option." OFF) +mark_as_advanced(HIP_HOST_COMPILATION_CPP) + +############################################################################### +# Set HIP CMAKE Flags +############################################################################### +# Copy the invocation styles from CXX to HIP +set(CMAKE_HIP_ARCHIVE_CREATE ${CMAKE_CXX_ARCHIVE_CREATE}) +set(CMAKE_HIP_ARCHIVE_APPEND ${CMAKE_CXX_ARCHIVE_APPEND}) +set(CMAKE_HIP_ARCHIVE_FINISH ${CMAKE_CXX_ARCHIVE_FINISH}) +set(CMAKE_SHARED_LIBRARY_SONAME_HIP_FLAG ${CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG}) +set(CMAKE_SHARED_LIBRARY_CREATE_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS}) +set(CMAKE_SHARED_LIBRARY_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_CXX_FLAGS}) +#set(CMAKE_SHARED_LIBRARY_LINK_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS}) +set(CMAKE_SHARED_LIBRARY_RUNTIME_HIP_FLAG ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG}) +set(CMAKE_SHARED_LIBRARY_RUNTIME_HIP_FLAG_SEP ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG_SEP}) +set(CMAKE_SHARED_LIBRARY_LINK_STATIC_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_STATIC_CXX_FLAGS}) +set(CMAKE_SHARED_LIBRARY_LINK_DYNAMIC_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_DYNAMIC_CXX_FLAGS}) + +# Set the CMake Flags to use the HCC Compilier. +set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} -o ") +set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} -o -shared" ) +set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} -o ") + +############################################################################### +# FIND: HIP and associated helper binaries +############################################################################### + +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../" REALPATH) + +# HIP is supported on Linux only +if(UNIX AND NOT APPLE AND NOT CYGWIN) + # Search for HIP installation + if(NOT HIP_ROOT_DIR) + # Search in user specified path first + find_path( + HIP_ROOT_DIR + NAMES bin/hipconfig + PATHS + "$ENV{ROCM_PATH}/hip" + ENV HIP_PATH + ${_IMPORT_PREFIX} + /opt/rocm/hip + DOC "HIP installed location" + NO_DEFAULT_PATH + ) + if(NOT EXISTS ${HIP_ROOT_DIR}) + if(HIP_FIND_REQUIRED) + message(FATAL_ERROR "Specify HIP_ROOT_DIR") + elseif(NOT HIP_FIND_QUIETLY) + message("HIP_ROOT_DIR not found or specified") + endif() + endif() + # And push it back to the cache + set(HIP_ROOT_DIR ${HIP_ROOT_DIR} CACHE PATH "HIP installed location" FORCE) + endif() + + # Find HIPCC executable + find_program( + HIP_HIPCC_EXECUTABLE + NAMES hipcc + PATHS + "${HIP_ROOT_DIR}" + ENV ROCM_PATH + ENV HIP_PATH + /opt/rocm + /opt/rocm/hip + PATH_SUFFIXES bin + NO_DEFAULT_PATH + ) + if(NOT HIP_HIPCC_EXECUTABLE) + # Now search in default paths + find_program(HIP_HIPCC_EXECUTABLE hipcc) + endif() + mark_as_advanced(HIP_HIPCC_EXECUTABLE) + + # Find HIPCONFIG executable + find_program( + HIP_HIPCONFIG_EXECUTABLE + NAMES hipconfig + PATHS + "${HIP_ROOT_DIR}" + ENV ROCM_PATH + ENV HIP_PATH + /opt/rocm + /opt/rocm/hip + PATH_SUFFIXES bin + NO_DEFAULT_PATH + ) + if(NOT HIP_HIPCONFIG_EXECUTABLE) + # Now search in default paths + find_program(HIP_HIPCONFIG_EXECUTABLE hipconfig) + endif() + mark_as_advanced(HIP_HIPCONFIG_EXECUTABLE) + + # Find HIPCC_CMAKE_LINKER_HELPER executable + find_program( + HIP_HIPCC_CMAKE_LINKER_HELPER + NAMES hipcc_cmake_linker_helper + PATHS + "${HIP_ROOT_DIR}" + ENV ROCM_PATH + ENV HIP_PATH + /opt/rocm + /opt/rocm/hip + PATH_SUFFIXES bin + NO_DEFAULT_PATH + ) + if(NOT HIP_HIPCC_CMAKE_LINKER_HELPER) + # Now search in default paths + find_program(HIP_HIPCC_CMAKE_LINKER_HELPER hipcc_cmake_linker_helper) + endif() + mark_as_advanced(HIP_HIPCC_CMAKE_LINKER_HELPER) + + if(HIP_HIPCONFIG_EXECUTABLE AND NOT HIP_VERSION) + # Compute the version + execute_process( + COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --version + OUTPUT_VARIABLE _hip_version + ERROR_VARIABLE _hip_error + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_STRIP_TRAILING_WHITESPACE + ) + if(NOT _hip_error) + set(HIP_VERSION ${_hip_version} CACHE STRING "Version of HIP as computed from hipcc") + else() + set(HIP_VERSION "0.0.0" CACHE STRING "Version of HIP as computed by FindHIP()") + endif() + mark_as_advanced(HIP_VERSION) + endif() + if(HIP_VERSION) + string(REPLACE "." ";" _hip_version_list "${HIP_VERSION}") + list(GET _hip_version_list 0 HIP_VERSION_MAJOR) + list(GET _hip_version_list 1 HIP_VERSION_MINOR) + list(GET _hip_version_list 2 HIP_VERSION_PATCH) + set(HIP_VERSION_STRING "${HIP_VERSION}") + endif() + + if(HIP_HIPCONFIG_EXECUTABLE AND NOT HIP_PLATFORM) + # Compute the platform + execute_process( + COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --platform + OUTPUT_VARIABLE _hip_platform + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + set(HIP_PLATFORM ${_hip_platform} CACHE STRING "HIP platform as computed by hipconfig") + mark_as_advanced(HIP_PLATFORM) + endif() +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( + HIP + REQUIRED_VARS + HIP_ROOT_DIR + HIP_HIPCC_EXECUTABLE + HIP_HIPCONFIG_EXECUTABLE + HIP_PLATFORM + VERSION_VAR HIP_VERSION + ) + +############################################################################### +# MACRO: Locate helper files +############################################################################### +macro(HIP_FIND_HELPER_FILE _name _extension) + set(_hip_full_name "${_name}.${_extension}") + get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) + set(HIP_${_name} "${CMAKE_CURRENT_LIST_DIR}/FindHIP/${_hip_full_name}") + if(NOT EXISTS "${HIP_${_name}}") + set(error_message "${_hip_full_name} not found in ${CMAKE_CURRENT_LIST_DIR}/FindHIP") + if(HIP_FIND_REQUIRED) + message(FATAL_ERROR "${error_message}") + else() + if(NOT HIP_FIND_QUIETLY) + message(STATUS "${error_message}") + endif() + endif() + endif() + # Set this variable as internal, so the user isn't bugged with it. + set(HIP_${_name} ${HIP_${_name}} CACHE INTERNAL "Location of ${_full_name}" FORCE) +endmacro() + +############################################################################### +hip_find_helper_file(run_make2cmake cmake) +hip_find_helper_file(run_hipcc cmake) +############################################################################### + +############################################################################### +# MACRO: Reset compiler flags +############################################################################### +macro(HIP_RESET_FLAGS) + unset(HIP_HIPCC_FLAGS) + unset(HIP_HCC_FLAGS) + unset(HIP_NVCC_FLAGS) + foreach(config ${_hip_configuration_types}) + string(TOUPPER ${config} config_upper) + unset(HIP_HIPCC_FLAGS_${config_upper}) + unset(HIP_HCC_FLAGS_${config_upper}) + unset(HIP_NVCC_FLAGS_${config_upper}) + endforeach() +endmacro() + +############################################################################### +# MACRO: Separate the options from the sources +############################################################################### +macro(HIP_GET_SOURCES_AND_OPTIONS _sources _cmake_options _hipcc_options _hcc_options _nvcc_options) + set(${_sources}) + set(${_cmake_options}) + set(${_hipcc_options}) + set(${_hcc_options}) + set(${_nvcc_options}) + set(_hipcc_found_options FALSE) + set(_hcc_found_options FALSE) + set(_nvcc_found_options FALSE) + foreach(arg ${ARGN}) + if("x${arg}" STREQUAL "xHIPCC_OPTIONS") + set(_hipcc_found_options TRUE) + set(_hcc_found_options FALSE) + set(_nvcc_found_options FALSE) + elseif("x${arg}" STREQUAL "xHCC_OPTIONS") + set(_hipcc_found_options FALSE) + set(_hcc_found_options TRUE) + set(_nvcc_found_options FALSE) + elseif("x${arg}" STREQUAL "xNVCC_OPTIONS") + set(_hipcc_found_options FALSE) + set(_hcc_found_options FALSE) + set(_nvcc_found_options TRUE) + elseif( + "x${arg}" STREQUAL "xEXCLUDE_FROM_ALL" OR + "x${arg}" STREQUAL "xSTATIC" OR + "x${arg}" STREQUAL "xSHARED" OR + "x${arg}" STREQUAL "xMODULE" + ) + list(APPEND ${_cmake_options} ${arg}) + else() + if(_hipcc_found_options) + list(APPEND ${_hipcc_options} ${arg}) + elseif(_hcc_found_options) + list(APPEND ${_hcc_options} ${arg}) + elseif(_nvcc_found_options) + list(APPEND ${_nvcc_options} ${arg}) + else() + # Assume this is a file + list(APPEND ${_sources} ${arg}) + endif() + endif() + endforeach() +endmacro() + +############################################################################### +# MACRO: Add include directories to pass to the hipcc command +############################################################################### +set(HIP_HIPCC_INCLUDE_ARGS_USER "") +macro(HIP_INCLUDE_DIRECTORIES) + foreach(dir ${ARGN}) + list(APPEND HIP_HIPCC_INCLUDE_ARGS_USER $<$:-I${dir}>) + endforeach() +endmacro() + +############################################################################### +# FUNCTION: Helper to avoid clashes of files with the same basename but different paths +############################################################################### +function(HIP_COMPUTE_BUILD_PATH path build_path) + # Convert to cmake style paths + file(TO_CMAKE_PATH "${path}" bpath) + if(IS_ABSOLUTE "${bpath}") + string(FIND "${bpath}" "${CMAKE_CURRENT_BINARY_DIR}" _binary_dir_pos) + if(_binary_dir_pos EQUAL 0) + file(RELATIVE_PATH bpath "${CMAKE_CURRENT_BINARY_DIR}" "${bpath}") + else() + file(RELATIVE_PATH bpath "${CMAKE_CURRENT_SOURCE_DIR}" "${bpath}") + endif() + endif() + + # Remove leading / + string(REGEX REPLACE "^[/]+" "" bpath "${bpath}") + # Avoid absolute paths by removing ':' + string(REPLACE ":" "_" bpath "${bpath}") + # Avoid relative paths that go up the tree + string(REPLACE "../" "__/" bpath "${bpath}") + # Avoid spaces + string(REPLACE " " "_" bpath "${bpath}") + # Strip off the filename + get_filename_component(bpath "${bpath}" PATH) + + set(${build_path} "${bpath}" PARENT_SCOPE) +endfunction() + +############################################################################### +# MACRO: Parse OPTIONS from ARGN & set variables prefixed by _option_prefix +############################################################################### +macro(HIP_PARSE_HIPCC_OPTIONS _option_prefix) + set(_hip_found_config) + foreach(arg ${ARGN}) + # Determine if we are dealing with a per-configuration flag + foreach(config ${_hip_configuration_types}) + string(TOUPPER ${config} config_upper) + if(arg STREQUAL "${config_upper}") + set(_hip_found_config _${arg}) + # Clear arg to prevent it from being processed anymore + set(arg) + endif() + endforeach() + if(arg) + list(APPEND ${_option_prefix}${_hip_found_config} "${arg}") + endif() + endforeach() +endmacro() + +############################################################################### +# MACRO: Try and include dependency file if it exists +############################################################################### +macro(HIP_INCLUDE_HIPCC_DEPENDENCIES dependency_file) + set(HIP_HIPCC_DEPEND) + set(HIP_HIPCC_DEPEND_REGENERATE FALSE) + + # Create the dependency file if it doesn't exist + if(NOT EXISTS ${dependency_file}) + file(WRITE ${dependency_file} "# Generated by: FindHIP.cmake. Do not edit.\n") + endif() + # Include the dependency file + include(${dependency_file}) + + # Verify the existence of all the included files + if(HIP_HIPCC_DEPEND) + foreach(f ${HIP_HIPCC_DEPEND}) + if(NOT EXISTS ${f}) + # If they aren't there, regenerate the file again + set(HIP_HIPCC_DEPEND_REGENERATE TRUE) + endif() + endforeach() + else() + # No dependencies, so regenerate the file + set(HIP_HIPCC_DEPEND_REGENERATE TRUE) + endif() + + # Regenerate the dependency file if needed + if(HIP_HIPCC_DEPEND_REGENERATE) + set(HIP_HIPCC_DEPEND ${dependency_file}) + file(WRITE ${dependency_file} "# Generated by: FindHIP.cmake. Do not edit.\n") + endif() +endmacro() + +############################################################################### +# MACRO: Prepare cmake commands for the target +############################################################################### +macro(HIP_PREPARE_TARGET_COMMANDS _target _format _generated_files _source_files) + set(_hip_flags "") + string(TOUPPER "${CMAKE_BUILD_TYPE}" _hip_build_configuration) + if(HIP_HOST_COMPILATION_CPP) + set(HIP_C_OR_CXX CXX) + else() + set(HIP_C_OR_CXX C) + endif() + set(generated_extension ${CMAKE_${HIP_C_OR_CXX}_OUTPUT_EXTENSION}) + + # Initialize list of includes with those specified by the user. Append with + # ones specified to cmake directly. + set(HIP_HIPCC_INCLUDE_ARGS ${HIP_HIPCC_INCLUDE_ARGS_USER}) + + # Add the include directories + set(include_directories_generator $) + list(APPEND HIP_HIPCC_INCLUDE_ARGS $<$:-I$>) + + get_directory_property(_hip_include_directories INCLUDE_DIRECTORIES) + list(REMOVE_DUPLICATES _hip_include_directories) + if(_hip_include_directories) + foreach(dir ${_hip_include_directories}) + list(APPEND HIP_HIPCC_INCLUDE_ARGS $<$:-I${dir}>) + endforeach() + endif() + + HIP_GET_SOURCES_AND_OPTIONS(_hip_sources _hip_cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN}) + HIP_PARSE_HIPCC_OPTIONS(HIP_HIPCC_FLAGS ${_hipcc_options}) + HIP_PARSE_HIPCC_OPTIONS(HIP_HCC_FLAGS ${_hcc_options}) + HIP_PARSE_HIPCC_OPTIONS(HIP_NVCC_FLAGS ${_nvcc_options}) + + # Add the compile definitions + set(compile_definition_generator $) + list(APPEND HIP_HIPCC_FLAGS $<$:-D$>) + + # Check if we are building shared library. + set(_hip_build_shared_libs FALSE) + list(FIND _hip_cmake_options SHARED _hip_found_SHARED) + list(FIND _hip_cmake_options MODULE _hip_found_MODULE) + if(_hip_found_SHARED GREATER -1 OR _hip_found_MODULE GREATER -1) + set(_hip_build_shared_libs TRUE) + endif() + list(FIND _hip_cmake_options STATIC _hip_found_STATIC) + if(_hip_found_STATIC GREATER -1) + set(_hip_build_shared_libs FALSE) + endif() + + # If we are building a shared library, add extra flags to HIP_HIPCC_FLAGS + if(_hip_build_shared_libs) + list(APPEND HIP_HCC_FLAGS "-fPIC") + list(APPEND HIP_NVCC_FLAGS "--shared -Xcompiler '-fPIC'") + endif() + + # Set host compiler + set(HIP_HOST_COMPILER "${CMAKE_${HIP_C_OR_CXX}_COMPILER}") + + # Set compiler flags + set(_HIP_HOST_FLAGS "set(CMAKE_HOST_FLAGS ${CMAKE_${HIP_C_OR_CXX}_FLAGS})") + set(_HIP_HIPCC_FLAGS "set(HIP_HIPCC_FLAGS ${HIP_HIPCC_FLAGS})") + set(_HIP_HCC_FLAGS "set(HIP_HCC_FLAGS ${HIP_HCC_FLAGS})") + set(_HIP_NVCC_FLAGS "set(HIP_NVCC_FLAGS ${HIP_NVCC_FLAGS})") + foreach(config ${_hip_configuration_types}) + string(TOUPPER ${config} config_upper) + set(_HIP_HOST_FLAGS "${_HIP_HOST_FLAGS}\nset(CMAKE_HOST_FLAGS_${config_upper} ${CMAKE_${HIP_C_OR_CXX}_FLAGS_${config_upper}})") + set(_HIP_HIPCC_FLAGS "${_HIP_HIPCC_FLAGS}\nset(HIP_HIPCC_FLAGS_${config_upper} ${HIP_HIPCC_FLAGS_${config_upper}})") + set(_HIP_HCC_FLAGS "${_HIP_HCC_FLAGS}\nset(HIP_HCC_FLAGS_${config_upper} ${HIP_HCC_FLAGS_${config_upper}})") + set(_HIP_NVCC_FLAGS "${_HIP_NVCC_FLAGS}\nset(HIP_NVCC_FLAGS_${config_upper} ${HIP_NVCC_FLAGS_${config_upper}})") + endforeach() + + # Reset the output variable + set(_hip_generated_files "") + set(_hip_source_files "") + + # Iterate over all arguments and create custom commands for all source files + foreach(file ${ARGN}) + # Ignore any file marked as a HEADER_FILE_ONLY + get_source_file_property(_is_header ${file} HEADER_FILE_ONLY) + # Allow per source file overrides of the format. Also allows compiling non .cu files. + get_source_file_property(_hip_source_format ${file} HIP_SOURCE_PROPERTY_FORMAT) + if((${file} MATCHES "\\.cu$" OR _hip_source_format) AND NOT _is_header) + set(host_flag FALSE) + else() + set(host_flag TRUE) + endif() + + if(NOT host_flag) + # Determine output directory + HIP_COMPUTE_BUILD_PATH("${file}" hip_build_path) + set(hip_compile_output_dir "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${_target}.dir/${hip_build_path}") + + get_filename_component(basename ${file} NAME) + set(generated_file_path "${hip_compile_output_dir}/${CMAKE_CFG_INTDIR}") + set(generated_file_basename "${_target}_generated_${basename}${generated_extension}") + + # Set file names + set(generated_file "${generated_file_path}/${generated_file_basename}") + set(cmake_dependency_file "${hip_compile_output_dir}/${generated_file_basename}.depend") + set(custom_target_script_pregen "${hip_compile_output_dir}/${generated_file_basename}.cmake.pre-gen") + set(custom_target_script "${hip_compile_output_dir}/${generated_file_basename}.cmake") + + # Set properties for object files + set_source_files_properties("${generated_file}" + PROPERTIES + EXTERNAL_OBJECT true # This is an object file not to be compiled, but only be linked + ) + + # Don't add CMAKE_CURRENT_SOURCE_DIR if the path is already an absolute path + get_filename_component(file_path "${file}" PATH) + if(IS_ABSOLUTE "${file_path}") + set(source_file "${file}") + else() + set(source_file "${CMAKE_CURRENT_SOURCE_DIR}/${file}") + endif() + + # Bring in the dependencies + HIP_INCLUDE_HIPCC_DEPENDENCIES(${cmake_dependency_file}) + + # Configure the build script + configure_file("${HIP_run_hipcc}" "${custom_target_script_pregen}" @ONLY) + file(GENERATE + OUTPUT "${custom_target_script}" + INPUT "${custom_target_script_pregen}" + ) + set(main_dep DEPENDS ${source_file}) + if(CMAKE_GENERATOR MATCHES "Makefiles") + set(verbose_output "$(VERBOSE)") + elseif(HIP_VERBOSE_BUILD) + set(verbose_output ON) + else() + set(verbose_output OFF) + endif() + + # Create up the comment string + file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}") + set(hip_build_comment_string "Building HIPCC object ${generated_file_relative_path}") + + # Build the generated file and dependency file + add_custom_command( + OUTPUT ${generated_file} + # These output files depend on the source_file and the contents of cmake_dependency_file + ${main_dep} + DEPENDS ${HIP_HIPCC_DEPEND} + DEPENDS ${custom_target_script} + # Make sure the output directory exists before trying to write to it. + COMMAND ${CMAKE_COMMAND} -E make_directory "${generated_file_path}" + COMMAND ${CMAKE_COMMAND} ARGS + -D verbose:BOOL=${verbose_output} + -D build_configuration:STRING=${_hip_build_configuration} + -D "generated_file:STRING=${generated_file}" + -P "${custom_target_script}" + WORKING_DIRECTORY "${hip_compile_output_dir}" + COMMENT "${hip_build_comment_string}" + ) + + # Make sure the build system knows the file is generated + set_source_files_properties(${generated_file} PROPERTIES GENERATED TRUE) + list(APPEND _hip_generated_files ${generated_file}) + list(APPEND _hip_source_files ${file}) + endif() + endforeach() + + # Set the return parameter + set(${_generated_files} ${_hip_generated_files}) + set(${_source_files} ${_hip_source_files}) +endmacro() + +############################################################################### +# HIP_ADD_EXECUTABLE +############################################################################### +macro(HIP_ADD_EXECUTABLE hip_target) + # Separate the sources from the options + HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN}) + HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options}) + if(_source_files) + list(REMOVE_ITEM _sources ${_source_files}) + endif() + if("x${HCC_HOME}" STREQUAL "x") + if (DEFINED $ENV{ROCM_PATH}) + set(HCC_HOME "$ENV{ROCM_PATH}/hcc") + elseif( DEFINED $ENV{HIP_PATH}) + set(HCC_HOME "$ENV{HIP_PATH}/../hcc") + else() + set(HCC_HOME "/opt/rocm/hcc") + endif() + endif() + set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} -o ") + add_executable(${hip_target} ${_cmake_options} ${_generated_files} ${_sources}) + set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE HIP) +endmacro() + +############################################################################### +# HIP_ADD_LIBRARY +############################################################################### +macro(HIP_ADD_LIBRARY hip_target) + # Separate the sources from the options + HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN}) + HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} ${_cmake_options} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options}) + if(_source_files) + list(REMOVE_ITEM _sources ${_source_files}) + endif() + add_library(${hip_target} ${_cmake_options} ${_generated_files} ${_sources}) + set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE ${HIP_C_OR_CXX}) +endmacro() + +# vim: ts=4:sw=4:expandtab:smartindent diff --git a/cmake/modules/FindHIP/run_hipcc.cmake b/cmake/modules/FindHIP/run_hipcc.cmake new file mode 100644 index 0000000..a02806c --- /dev/null +++ b/cmake/modules/FindHIP/run_hipcc.cmake @@ -0,0 +1,188 @@ +############################################################################### +# Runs commands using HIPCC +############################################################################### + +# Copyright (c) 2008-2020 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +############################################################################### +# This file runs the hipcc commands to produce the desired output file +# along with the dependency file needed by CMake to compute dependencies. +# +# Input variables: +# +# verbose:BOOL=<> OFF: Be as quiet as possible (default) +# ON : Describe each step +# build_configuration:STRING=<> Build configuration. Defaults to Debug. +# generated_file:STRING=<> File to generate. Mandatory argument. + +if(NOT build_configuration) + set(build_configuration Debug) +endif() +if(NOT generated_file) + message(FATAL_ERROR "You must specify generated_file on the command line") +endif() + +# Set these up as variables to make reading the generated file easier +set(HIP_HIPCC_EXECUTABLE "@HIP_HIPCC_EXECUTABLE@") # path +set(HIP_HIPCONFIG_EXECUTABLE "@HIP_HIPCONFIG_EXECUTABLE@") #path +set(HIP_HOST_COMPILER "@HIP_HOST_COMPILER@") # path +set(CMAKE_COMMAND "@CMAKE_COMMAND@") # path +set(HIP_run_make2cmake "@HIP_run_make2cmake@") # path +set(HCC_HOME "@HCC_HOME@") #path + +@HIP_HOST_FLAGS@ +@_HIP_HIPCC_FLAGS@ +@_HIP_HCC_FLAGS@ +@_HIP_NVCC_FLAGS@ +set(HIP_HIPCC_INCLUDE_ARGS "@HIP_HIPCC_INCLUDE_ARGS@") # list (needs to be in quotes to handle spaces properly) + +set(cmake_dependency_file "@cmake_dependency_file@") # path +set(source_file "@source_file@") # path +set(host_flag "@host_flag@") # bool + +# Determine compiler and compiler flags +execute_process(COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --platform OUTPUT_VARIABLE HIP_PLATFORM OUTPUT_STRIP_TRAILING_WHITESPACE) +if(NOT host_flag) + set(__CC ${HIP_HIPCC_EXECUTABLE}) + if(HIP_PLATFORM STREQUAL "hcc") + if(NOT "x${HCC_HOME}" STREQUAL "x") + set(ENV{HCC_HOME} ${HCC_HOME}) + endif() + set(__CC_FLAGS ${HIP_HIPCC_FLAGS} ${HIP_HCC_FLAGS} ${HIP_HIPCC_FLAGS_${build_configuration}} ${HIP_HCC_FLAGS_${build_configuration}}) + else() + set(__CC_FLAGS ${HIP_HIPCC_FLAGS} ${HIP_NVCC_FLAGS} ${HIP_HIPCC_FLAGS_${build_configuration}} ${HIP_NVCC_FLAGS_${build_configuration}}) + endif() +else() + set(__CC ${HIP_HOST_COMPILER}) + set(__CC_FLAGS ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}}) +endif() +set(__CC_INCLUDES ${HIP_HIPCC_INCLUDE_ARGS}) + +# hip_execute_process - Executes a command with optional command echo and status message. +# status - Status message to print if verbose is true +# command - COMMAND argument from the usual execute_process argument structure +# ARGN - Remaining arguments are the command with arguments +# HIP_result - Return value from running the command +macro(hip_execute_process status command) + set(_command ${command}) + if(NOT "x${_command}" STREQUAL "xCOMMAND") + message(FATAL_ERROR "Malformed call to hip_execute_process. Missing COMMAND as second argument. (command = ${command})") + endif() + if(verbose) + execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status}) + # Build command string to print + set(hip_execute_process_string) + foreach(arg ${ARGN}) + # Escape quotes if any + string(REPLACE "\"" "\\\"" arg ${arg}) + # Surround args with spaces with quotes + if(arg MATCHES " ") + list(APPEND hip_execute_process_string "\"${arg}\"") + else() + list(APPEND hip_execute_process_string ${arg}) + endif() + endforeach() + # Echo the command + execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${hip_execute_process_string}) + endif() + # Run the command + execute_process(COMMAND ${ARGN} RESULT_VARIABLE HIP_result) +endmacro() + +# Delete the target file +hip_execute_process( + "Removing ${generated_file}" + COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}" + ) + +# Generate the dependency file +hip_execute_process( + "Generating dependency file: ${cmake_dependency_file}.pre" + COMMAND "${__CC}" + -M + "${source_file}" + -o "${cmake_dependency_file}.pre" + ${__CC_FLAGS} + ${__CC_INCLUDES} + ) + +if(HIP_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Generate the cmake readable dependency file to a temp file +hip_execute_process( + "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp" + COMMAND "${CMAKE_COMMAND}" + -D "input_file:FILEPATH=${cmake_dependency_file}.pre" + -D "output_file:FILEPATH=${cmake_dependency_file}.tmp" + -D "verbose=${verbose}" + -P "${HIP_run_make2cmake}" + ) + +if(HIP_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Copy the file if it is different +hip_execute_process( + "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}" + COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}" + ) + +if(HIP_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Delete the temporary file +hip_execute_process( + "Removing ${cmake_dependency_file}.tmp and ${cmake_dependency_file}.pre" + COMMAND "${CMAKE_COMMAND}" -E remove "${cmake_dependency_file}.tmp" "${cmake_dependency_file}.pre" + ) + +if(HIP_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Generate the output file +hip_execute_process( + "Generating ${generated_file}" + COMMAND "${__CC}" + -c + "${source_file}" + -o "${generated_file}" + ${__CC_FLAGS} + ${__CC_INCLUDES} + ) + +if(HIP_result) + # Make sure that we delete the output file + hip_execute_process( + "Removing ${generated_file}" + COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}" + ) + message(FATAL_ERROR "Error generating file ${generated_file}") +else() + if(verbose) + message("Generated ${generated_file} successfully.") + endif() +endif() +# vim: ts=4:sw=4:expandtab:smartindent diff --git a/cmake/modules/FindHIP/run_make2cmake.cmake b/cmake/modules/FindHIP/run_make2cmake.cmake new file mode 100644 index 0000000..e7b179a --- /dev/null +++ b/cmake/modules/FindHIP/run_make2cmake.cmake @@ -0,0 +1,70 @@ +############################################################################### +# Computes dependencies using HIPCC +############################################################################### + +# Copyright (c) 2008-2020 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +############################################################################### +# This file converts dependency files generated using hipcc to a format that +# cmake can understand. + +# Input variables: +# +# input_file:STRING=<> Dependency file to parse. Required argument +# output_file:STRING=<> Output file to generate. Required argument + +if(NOT input_file OR NOT output_file) + message(FATAL_ERROR "You must specify input_file and output_file on the command line") +endif() + +file(READ ${input_file} depend_text) + +if (NOT "${depend_text}" STREQUAL "") + string(REPLACE " /" "\n/" depend_text ${depend_text}) + string(REGEX REPLACE "^.*:" "" depend_text ${depend_text}) + string(REGEX REPLACE "[ \\\\]*\n" ";" depend_text ${depend_text}) + + set(dependency_list "") + + foreach(file ${depend_text}) + string(REGEX REPLACE "^ +" "" file ${file}) + if(NOT EXISTS "${file}") + message(WARNING " Removing non-existent dependency file: ${file}") + set(file "") + endif() + + if(NOT IS_DIRECTORY "${file}") + get_filename_component(file_absolute "${file}" ABSOLUTE) + list(APPEND dependency_list "${file_absolute}") + endif() + endforeach() +endif() + +# Remove the duplicate entries and sort them. +list(REMOVE_DUPLICATES dependency_list) +list(SORT dependency_list) + +foreach(file ${dependency_list}) + set(hip_hipcc_depend "${hip_hipcc_depend} \"${file}\"\n") +endforeach() + +file(WRITE ${output_file} "# Generated by: FindHIP.cmake. Do not edit.\nSET(HIP_HIPCC_DEPEND\n ${hip_hipcc_depend})\n\n") +# vim: ts=4:sw=4:expandtab:smartindent diff --git a/cmake/modules/FindHIPLIBS.cmake b/cmake/modules/FindHIPLIBS.cmake new file mode 100644 index 0000000..5a2ce1f --- /dev/null +++ b/cmake/modules/FindHIPLIBS.cmake @@ -0,0 +1,115 @@ +# Copyright (c) 2019 ETH Zurich +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + + +#.rst: +# FindHIPLIBS +# ----------- +# +# This module searches for the fftw3 library. +# +# The following variables are set +# +# :: +# +# HIPLIBS_FOUND - True if hiplibs is found +# HIPLIBS_LIBRARIES - The required libraries +# HIPLIBS_INCLUDE_DIRS - The required include directory +# +# The following import target is created +# +# :: +# +# HIPLIBS::hiplibs + +#set paths to look for library from ROOT variables.If new policy is set, find_library() automatically uses them. +if(NOT POLICY CMP0074) + set(_HIPLIBS_PATHS ${HIPLIBS_ROOT} $ENV{HIPLIBS_ROOT}) +endif() + +if(NOT _HIPLIBS_PATHS) + set(_HIPLIBS_PATHS /opt/rocm $ENV{ROCM_HOME}) +endif() + +find_path( + HIPLIBS_HIP_INCLUDE_DIRS + NAMES "hip/hip_runtime_api.h" + HINTS ${_HIPLIBS_PATHS} + PATH_SUFFIXES "hip/include" "include" +) +find_library( + HIPLIBS_HIP_LIBRARY + NAMES "amdhip64" "hip_hcc" + HINTS ${_HIPLIBS_PATHS} + PATH_SUFFIXES "hip/lib" "lib" "lib64" +) +find_library( + HIPLIBS_HSA_LIBRARY + NAMES "hsa-runtime64" + HINTS ${_HIPLIBS_PATHS} + PATH_SUFFIXES "hsa/lib" "lib" "lib64" +) +find_library( + HIPLIBS_THUNK_LIBRARY + NAMES "hsakmt" + HINTS ${_HIPLIBS_PATHS} + PATH_SUFFIXES "hsa/lib" "lib" "lib64" +) +find_path( + HIPLIBS_HSA_INCLUDE_DIRS + NAMES "hsa/hsa.h" + HINTS ${_HIPLIBS_PATHS} + PATH_SUFFIXES "hip/include" "include" +) + +# check if found +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(HIPLIBS REQUIRED_VARS HIPLIBS_HIP_INCLUDE_DIRS HIPLIBS_HIP_LIBRARY HIPLIBS_HSA_LIBRARY HIPLIBS_THUNK_LIBRARY HIPLIBS_HSA_INCLUDE_DIRS) + + +if(HIPLIBS_HIP_LIBRARY AND HIPLIBS_HSA_LIBRARY AND HIPLIBS_THUNK_LIBRARY) + set(HIPLIBS_LIBRARIES ${HIPLIBS_HIP_LIBRARY} ${HIPLIBS_HSA_LIBRARY} ${HIPLIBS_THUNK_LIBRARY} CACHE STRING "Path to libraries.") +else() + set(HIPLIBS_LIBRARIES HIPLIBS_LIBRARIES-NOTFOUND CACHE STRING "Path to libraries.") +endif() + +if(HIPLIBS_HIP_INCLUDE_DIRS AND HIPLIBS_HSA_INCLUDE_DIRS) + set(HIPLIBS_INCLUDE_DIRS ${HIPLIBS_HIP_INCLUDE_DIRS} ${HIPLIBS_HSA_INCLUDE_DIRS} CACHE STRING "Path to files.") +else() + set(HIPLIBS_INCLUDE_DIRS HIPLIBS_INCLUDE_DIRS-NOTFOUND CACHE STRING "Path to files.") +endif() + +# add target to link against +if(HIPLIBS_FOUND) + if(NOT TARGET HIPLIBS::hiplibs) + add_library(HIPLIBS::hiplibs INTERFACE IMPORTED) + endif() + set_property(TARGET HIPLIBS::hiplibs PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${HIPLIBS_INCLUDE_DIRS}) + set_property(TARGET HIPLIBS::hiplibs PROPERTY INTERFACE_LINK_LIBRARIES ${HIPLIBS_LIBRARIES}) +endif() + +# prevent clutter in cache +MARK_AS_ADVANCED(HIPLIBS_FOUND HIPLIBS_LIBRARIES HIPLIBS_INCLUDE_DIRS HIPLIBS_HIP_INCLUDE_DIRS HIPLIBS_HIP_LIBRARY HIPLIBS_HSA_LIBRARY HIPLIBS_THUNK_LIBRARY HIPLIBS_HSA_INCLUDE_DIRS) diff --git a/cmake/modules/FindROCFFT.cmake b/cmake/modules/FindROCFFT.cmake new file mode 100644 index 0000000..4754c4c --- /dev/null +++ b/cmake/modules/FindROCFFT.cmake @@ -0,0 +1,84 @@ +# Copyright (c) 2019 ETH Zurich +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + + +#.rst: +# FindROCFFT +# ----------- +# +# This module searches for the fftw3 library. +# +# The following variables are set +# +# :: +# +# ROCFFT_FOUND - True if rocfft is found +# ROCFFT_LIBRARIES - The required libraries +# ROCFFT_INCLUDE_DIRS - The required include directory +# +# The following import target is created +# +# :: +# +# ROCFFT::rocfft + +#set paths to look for library from ROOT variables.If new policy is set, find_library() automatically uses them. +if(NOT POLICY CMP0074) + set(_ROCFFT_PATHS ${ROCFFT_ROOT} $ENV{ROCFFT_ROOT}) +endif() + +if(NOT _ROCFFT_PATHS) + set(_ROCFFT_PATHS /opt/rocm $ENV{ROCM_HOME}) +endif() + +find_library( + ROCFFT_LIBRARIES + NAMES "rocfft" + HINTS ${_ROCFFT_PATHS} + PATH_SUFFIXES "rocfft/lib" "rocfft" +) +find_path( + ROCFFT_INCLUDE_DIRS + NAMES "rocfft.h" + HINTS ${_ROCFFT_PATHS} + PATH_SUFFIXES "rocfft/include" "include" +) + +# check if found +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ROCFFT REQUIRED_VARS ROCFFT_INCLUDE_DIRS ROCFFT_LIBRARIES ) + +# add target to link against +if(ROCFFT_FOUND) + if(NOT TARGET ROCFFT::rocfft) + add_library(ROCFFT::rocfft INTERFACE IMPORTED) + endif() + set_property(TARGET ROCFFT::rocfft PROPERTY INTERFACE_LINK_LIBRARIES ${ROCFFT_LIBRARIES}) + set_property(TARGET ROCFFT::rocfft PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${ROCFFT_INCLUDE_DIRS}) +endif() + +# prevent clutter in cache +MARK_AS_ADVANCED(ROCFFT_FOUND ROCFFT_LIBRARIES ROCFFT_INCLUDE_DIRS) diff --git a/cmake/modules/FindROCM.cmake b/cmake/modules/FindROCM.cmake deleted file mode 100644 index 15dfce2..0000000 --- a/cmake/modules/FindROCM.cmake +++ /dev/null @@ -1,542 +0,0 @@ -# - Find the ROCM library -# -# Usage: -# find_package(ROCM [REQUIRED] [QUIET] COMPONENTS [components ...] ) -# -# Compnents available: -# - hipblas -# - hipsparse -# - rocfft -# - rocblas -# - rocsparse -# -# Commands made available: -# rocm_hip_add_library( [STATIC | SHARED] [FLAGS] [OUTPUT_DIR] [INCLUDE_DIRS] ) -# --- Compiles source files into an imported library with hipcc. No global defitions or include directories are taken into account. -# -# The following variables can be set for compilation: -# ROCM_HIPCC_FLAGS ----------------- Flags passed on to hipcc compiler -# ROCM_HIPCC_FLAGS_DEBUG ----------- Flags passed on to hipcc compiler in DEBUG mode -# ROCM_HIPCC_FLAGS_RELEASE --------- Flags passed on to hipcc compiler in RELEASE mode -# ROCM_HIPCC_FLAGS_RELWITHDEBINFO -- Flags passed on to hipcc compiler in RELWITHDEBINFO mode -# ROCM_HIPCC_FLAGS_MINSIZEREL ------ Flags passed on to hipcc compiler in MINSIZEREL mode -# -# The following variables can be set to specify a search location -# ROCM_ROOT ------------ if set, the libraries are exclusively searched under this path -# _ROOT ------ if set, search for component specific libraries at given path. Takes precedence over ROCM_ROOT -# -# The following variables are generated: -# ROCM_FOUND ------------------- true if ROCM is found on the system -# ROCM_LIBRARIES --------------- full path to ROCM -# ROCM_INCLUDE_DIRS ------------ ROCM include directories -# ROCM_DEFINITIONS ------------- ROCM definitions -# ROCM_HCC_EXECUTABLE ---------- ROCM HCC compiler -# ROCM_HCC-CONFIG_EXECUTABLE --- ROCM HCC config -# ROCM_HIPCC_EXECUTABLE -------- HIPCC compiler -# ROCM_HIPCONFIG_EXECUTABLE ---- hip config -# ROCM_HIPIFY-PERL_EXECUTABLE -- hipify -# ROCM_HIP_PLATFORM ------------ Platform identifier: "hcc" or "nvcc" -# - - -set(ROCM_HIPCC_FLAGS "" CACHE STRING "Flags for HIPCC Compiler") -set(ROCM_HIPCC_FLAGS_DEBUG "-g" CACHE STRING "Debug flags for HIPCC Compiler") -set(ROCM_HIPCC_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "Release flags for HIPCC Compiler") -set(ROCM_HIPCC_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG" CACHE STRING "Release with debug flags for HIPCC Compiler") -set(ROCM_HIPCC_FLAGS_MINSIZEREL "-Os -DNDEBUG" CACHE STRING "Minimum size flags for HIPCC Compiler") - -#If environment variable ROCM_ROOT is specified -if(NOT ROCM_ROOT AND ENV{ROCM_ROOT}) - file(TO_CMAKE_PATH "$ENV{ROCM_ROOT}" ROCM_ROOT) - set(ROCM_ROOT "${ROCM_ROOT}" CACHE PATH "Root directory for ROCM installation.") -endif() - -set(ROCM_FOUND FALSE) -set(ROCM_LIBRARIES) -set(ROCM_INCLUDE_DIRS) -set(ROCM_DEFINITIONS) -unset(ROCM_HCC_EXECUTABLE) -unset(ROCM_HCC-CONFIG_EXECUTABLE) -unset(ROCM_HIPCC_EXECUTABLE) -unset(ROCM_HIPCONFIG_EXECUTABLE) -unset(ROCM_HIPFIY-PERL-EXECUTABLE) -unset(ROCM_HIP_PLATFORM) - -include(FindPackageHandleStandardArgs) - - -# Finds libraries and include path for rocm modules -# IN: -# - module_name: name of a module (e.g. hcc) -# - following arguments: name of libraries required -# OUT: -# - ROCM_LIBRARIES: Appends to list of libraries -# - ROCM_INCLUDE_DIRS: Appends to include dirs -function(find_rcm_module module_name) - # convert module name to upper case for consistent variable naming - string(TOUPPER ${module_name} MODULE_NAME_UPPER) - - - if(DEFINED ${MODULE_NAME_UPPER}_ROOT) - set(ROOT_DIR ${${MODULE_NAME_UPPER}_ROOT}) - elseif(DEFINED ROCM_ROOT) - set(ROOT_DIR ${ROCM_ROOT}) - endif() - - # get abosolute path to avoid issues with tilde - if(ROOT_DIR) - get_filename_component(ROOT_DIR ${ROOT_DIR} ABSOLUTE) - endif() - - # remove module name from input arguments - set(LIBRARY_NAMES ${ARGV}) - list(REMOVE_AT LIBRARY_NAMES 0) - - if(${ROCM_FIND_REQUIRED}) - set(ROCM_${MODULE_NAME_UPPER}_FIND_REQUIRED TRUE) - else() - set(ROCM_${MODULE_NAME_UPPER}_FIND_REQUIRED FALSE) - endif() - if(${ROCM_FIND_QUIETLY}) - set(ROCM_${MODULE_NAME_UPPER}_FIND_QUIETLY TRUE) - else() - set(ROCM_${MODULE_NAME_UPPER}_FIND_QUIETLY FALSE) - endif() - - set(ROCM_LIBRARIES_${MODULE_NAME_UPPER}) - - if(ROOT_DIR) - # find libraries - foreach(library_name IN LISTS LIBRARY_NAMES) - find_library( - ROCM_LIBRARIES_${library_name} - NAMES ${library_name} - PATHS ${ROOT_DIR} - PATH_SUFFIXES "lib" "${module_name}/lib" - NO_DEFAULT_PATH - ) - find_package_handle_standard_args(ROCM_${MODULE_NAME_UPPER} FAIL_MESSAGE - "For ROCM module ${module_name}, library ${library_name} could not be found. Please specify ROCM_ROOT or ${MODULE_NAME_UPPER}_ROOT." - REQUIRED_VARS ROCM_LIBRARIES_${library_name}) - if(ROCM_LIBRARIES_${library_name}) - list(APPEND ROCM_LIBRARIES_${MODULE_NAME_UPPER} ${ROCM_LIBRARIES_${library_name}}) - mark_as_advanced(ROCM_LIBRARIES_${library_name}) - endif() - endforeach() - - # find include directory - find_path( - ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER} - NAMES ${module_name}/include - PATHS ${ROOT_DIR} ${ROOT_DIR}/.. - NO_DEFAULT_PATH - ) - # set include directory for module if found - if(ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER}) - set(ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER} ${ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER}}/${module_name}/include) - endif() - - else() - - foreach(library_name IN LISTS LIBRARY_NAMES) - find_library( - ROCM_LIBRARIES_${library_name} - NAMES ${library_name} - PATHS /opt/rocm - PATH_SUFFIXES "lib" "lib64" "${module_name}/lib" "rocm/${module_name}/lib" - ) - find_package_handle_standard_args(ROCM_${MODULE_NAME_UPPER} FAIL_MESSAGE - "For ROCM module ${module_name}, library ${library_name} could not be found. Please specify ROCM_ROOT or ${MODULE_NAME_UPPER}_ROOT." - REQUIRED_VARS ROCM_LIBRARIES_${library_name}) - if(ROCM_LIBRARIES_${library_name}) - list(APPEND ROCM_LIBRARIES_${MODULE_NAME_UPPER} ${ROCM_LIBRARIES_${library_name}}) - mark_as_advanced(ROCM_LIBRARIES_${library_name}) - endif() - endforeach() - - # find include directory - find_path( - ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER} - NAMES ${module_name}/include - PATHS /opt/rocm/ - ) - # set include directory for module if found - if(ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER}) - set(ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER} ${ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER}}/${module_name}/include) - endif() - endif() - - - # check if all required parts found - find_package_handle_standard_args(ROCM_${MODULE_NAME_UPPER} FAIL_MESSAGE - "ROCM module ${module_name} could not be found. Please specify ROCM_ROOT or ${MODULE_NAME_UPPER}_ROOT." - REQUIRED_VARS ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER}) - if(ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER}) - mark_as_advanced(ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER}) - endif() - - # set global variables - if(ROCM_LIBRARIES_${MODULE_NAME_UPPER}) - set(ROCM_LIBRARIES ${ROCM_LIBRARIES} ${ROCM_LIBRARIES_${MODULE_NAME_UPPER}} PARENT_SCOPE) - endif() - if(ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER}) - set(ROCM_INCLUDE_DIRS ${ROCM_INCLUDE_DIRS} ${ROCM_INCLUDE_DIRS_${MODULE_NAME_UPPER}} PARENT_SCOPE) - endif() - -endfunction() - - -# Finds executables of rocm modules -# IN: -# - module_name: name of a module (e.g. hcc) -# - executable_name: name of the executable (e.g. hcc) -# OUT: -# - ROCM_${executable_name}_EXECUTABLE: Path to executable -function(find_rocm_executable module_name executable_name) - string(TOUPPER ${module_name} MODULE_NAME_UPPER) - string(TOUPPER ${executable_name} EXECUTABLE_NAME_UPPER) - unset(ROCM_${EXECUTABLE_NAME_UPPER}_EXECUTABLE PARENT_SCOPE) - - if(DEFINED ${MODULE_NAME_UPPER}_ROOT) - set(ROOT_DIR ${${MODULE_NAME_UPPER}_ROOT}) - elseif(DEFINED ROCM_ROOT) - set(ROOT_DIR ${ROCM_ROOT}) - endif() - - # get abosolute path to avoid issues with tilde - if(ROOT_DIR) - get_filename_component(ROOT_DIR ${ROOT_DIR} ABSOLUTE) - endif() - - if(ROOT_DIR) - find_file( - ROCM_${EXECUTABLE_NAME_UPPER}_EXECUTABLE - NAMES ${executable_name} - PATHS ${ROOT_DIR} - PATH_SUFFIXES "bin" "${module_name}/bin" - NO_DEFAULT_PATH - ) - else() - find_file( - ROCM_${EXECUTABLE_NAME_UPPER}_EXECUTABLE - NAMES ${executable_name} - PATHS "/opt/rocm" - PATH_SUFFIXES "bin" "${module_name}/bin" - ) - endif() - set(ROCM_${EXECUTABLE_NAME_UPPER} ROCM_${EXECUTABLE_NAME_UPPER} PARENT_SCOPE) - - if(${ROCM_FIND_REQUIRED}) - set(ROCM_${MODULE_NAME_UPPER}_${EXECUTABLE_NAME_UPPER}_FIND_REQUIRED TRUE) - else() - set(ROCM_${MODULE_NAME_UPPER}_${EXECUTABLE_NAME_UPPER}_FIND_REQUIRED FALSE) - endif() - if(${ROCM_FIND_QUIETLY}) - set(ROCM_${MODULE_NAME_UPPER}_${EXECUTABLE_NAME_UPPER}_FIND_QUIETLY TRUE) - else() - set(ROCM_${MODULE_NAME_UPPER}_${EXECUTABLE_NAME_UPPER}_FIND_QUIETLY FALSE) - endif() - find_package_handle_standard_args(ROCM FAIL_MESSAGE - "ROCM_${MODULE_NAME_UPPER}_${EXECUTABLE_NAME_UPPER} ${executable_name} executable could not be found. Please specify ROCM_ROOT or ${MODULE_NAME_UPPER}_ROOT." - REQUIRED_VARS ROCM_${EXECUTABLE_NAME_UPPER}_EXECUTABLE) - if(ROCM_${EXECUTABLE_NAME_UPPER}_EXECUTABLE) - set(ROCM_${EXECUTABLE_NAME_UPPER}_EXECUTABLE ${ROCM_${EXECUTABLE_NAME_UPPER}_EXECUTABLE} PARENT_SCOPE) - mark_as_advanced(ROCM_${EXECUTABLE_NAME_UPPER}_EXECUTABLE) - endif() -endfunction() - - - -# find compilers -find_rocm_executable(hcc hcc) -find_rocm_executable(hip hipcc) - -if(ROCM_HIPCC_EXECUTABLE AND ROCM_HCC_EXECUTABLE) - set(ROCM_FOUND TRUE) -else() - set(ROCM_FOUND FALSE) - return() -endif() - - -# find other executables and libraries -find_rocm_executable(hcc hcc-config) -find_rocm_executable(hip hipconfig) -find_rocm_executable(hip hipify-perl) -find_rcm_module(hcc LTO mcwamp mcwamp_cpu mcwamp_hsa hc_am) -find_rcm_module(hip hip_hcc) -find_rcm_module(rocm hsa-runtime64) - - -# parse hip config -execute_process(COMMAND ${ROCM_HIPCONFIG_EXECUTABLE} -P OUTPUT_VARIABLE ROCM_HIP_PLATFORM RESULT_VARIABLE RESULT_VALUE) -if(NOT ${RESULT_VALUE} EQUAL 0) - message(FATAL_ERROR "Error parsing platform identifier from hipconfig! Code: ${RESULT_VALUE}") -endif() -if(NOT ROCM_HIP_PLATFORM) - message(FATAL_ERROR "Empty platform identifier from hipconfig!") -endif() - -# set definitions -if("${ROCM_HIP_PLATFORM}" STREQUAL "hcc") - set(ROCM_DEFINITIONS -D__HIP_PLATFORM_HCC__) -elseif("${ROCM_HIP_PLATFORM}" STREQUAL "nvcc") - set(ROCM_DEFINITIONS -D__HIP_PLATFORM_NVCC__) -else() - message(FATAL_ERROR "Could not parse platform identifier from hipconfig! Value: ${ROCM_HIP_PLATFORM}") -endif() - -# find libraries for each specified components -foreach(module_name IN LISTS ROCM_FIND_COMPONENTS) - # set required libaries for each module - if("${module_name}" STREQUAL "hipblas") - find_rcm_module(hipblas hipblas) - elseif("${module_name}" STREQUAL "hipsparse") - find_rcm_module(hipsparse hipsparse) - elseif("${module_name}" STREQUAL "rocblas") - find_rcm_module(rocblas rocblas) - elseif("${module_name}" STREQUAL "rocsparse") - find_rcm_module(rocsparse rocsparse) - elseif("${module_name}" STREQUAL "rocfft") - find_rcm_module(rocfft rocfft rocfft-device) - else() - message(FATAL_ERROR "Unrecognized component \"${module_name}\" in FindROCM module!") - endif() -endforeach() - - -# Generates library compiled with hipcc -# Usage: -# rocm_hip_add_library( [STATIC | SHARED] [FLAGS] [OUTPUT_DIR] [INCLUDE_DIRS] ) -macro(rocm_add_library) - cmake_parse_arguments( - ROCM_ARG - "SHARED;STATIC" - "OUTPUT_DIR" - "FLAGS;INCLUDE_DIRS" - ${ARGN} - ) - # allow either STATIC or SHARED - if(ROCM_ARG_SHARED AND ROCM_ARG_STATIC) - message(FATAL_ERROR "rocm_hip_add_library: library cannot by both static and shared!") - endif() - - # default to SHARED - if(NOT (ROCM_ARG_SHARED OR ROCM_ARG_STATIC)) - set(ROCM_ARG_SHARED TRUE) - endif() - - # default to current binary output directory - if(NOT ROCM_ARG_OUTPUT_DIR) - set(ROCM_ARG_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}) - endif() - - # parse positional arguments - list(LENGTH ROCM_ARG_UNPARSED_ARGUMENTS NARGS) - if(${NARGS} LESS 2) - message(FATAL_ERROR "rocm_hip_add_library: Not enough arguments!") - endif() - list(GET ROCM_ARG_UNPARSED_ARGUMENTS 0 ROCM_ARG_NAME) - list(REMOVE_AT ROCM_ARG_UNPARSED_ARGUMENTS 0) - set(ROCM_ARG_SOURCES ${ROCM_ARG_UNPARSED_ARGUMENTS}) - - # generate include flags - set(_ROCM_FULL_PATH_INCLUDE_FLAGS) - foreach(_rocm_iternal_dir IN LISTS ROCM_ARG_INCLUDE_DIRS) - if(NOT IS_ABSOLUTE ${_rocm_iternal_dir}) - get_filename_component(_rocm_iternal_dir ${_rocm_iternal_dir} ABSOLUTE) - endif() - list(APPEND _ROCM_FULL_PATH_INCLUDE_FLAGS -I${_rocm_iternal_dir}) - endforeach() - - # generate full path to source files - unset(_ROCM_SOURCES) - foreach(source IN LISTS ROCM_ARG_SOURCES) - if(NOT IS_ABSOLUTE ${source}) - get_filename_component(source ${source} ABSOLUTE) - endif() - set(_ROCM_SOURCES ${_ROCM_SOURCES} ${source}) - endforeach() - get_filename_component(ROCM_ARG_OUTPUT_DIR ${ROCM_ARG_OUTPUT_DIR} ABSOLUTE) - - # generate flags to use - set(_ROCM_STD_FLAGS ${ROCM_ARG_FLAGS} ${ROCM_HIPCC_FLAGS}) - if(_ROCM_STD_FLAGS) - list(FILTER _ROCM_STD_FLAGS INCLUDE REGEX -std=) - endif() - set(_ROCM_FLAGS ${ROCM_ARG_FLAGS}) - if(CMAKE_CXX_STANDARD AND NOT _ROCM_STD_FLAGS) - list(APPEND _ROCM_FLAGS -std=c++${CMAKE_CXX_STANDARD}) - endif() - if(CMAKE_BUILD_TYPE) - string(TOUPPER ${CMAKE_BUILD_TYPE} _ROCM_BUILD_TYPE_UPPER) - list(APPEND _ROCM_FLAGS ${ROCM_HIPCC_FLAGS_${_ROCM_BUILD_TYPE_UPPER}}) - endif() - - if(NOT ROCM_HIPCC_EXECUTABLE) - message(FATAL_ERROR "HIPCC executable not found!") - endif() - - if(ROCM_ARG_SHARED) - set(_ROCM_FLAGS ${_ROCM_FLAGS} -fPIC) - endif() - - # compile all files to .o - set(_ROCM_OBJS) - set(_ROCM_OBJ_TARGETS) - foreach(_rocm_file IN LISTS _ROCM_SOURCES) - - # create output directory for .o file - get_filename_component(_ROCM_CURRENT_DIR ${_rocm_file} DIRECTORY) - file(RELATIVE_PATH _ROCM_CURRENT_DIR "${CMAKE_CURRENT_SOURCE_DIR}" ${_ROCM_CURRENT_DIR}) - set(_ROCM_OBJ_OUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${ROCM_ARG_NAME}.dir/${_ROCM_CURRENT_DIR}") - file(MAKE_DIRECTORY ${_ROCM_OBJ_OUT_DIR}) - - # set .o name and path - get_filename_component(_ROCM_FILE_NAME_ONLY ${_rocm_file} NAME) - set(_ROCM_OBJ_FILE ${_ROCM_OBJ_OUT_DIR}/${_ROCM_FILE_NAME_ONLY}.o) - list(APPEND _ROCM_OBJS ${_ROCM_OBJ_FILE}) - list(APPEND _ROCM_OBJ_TARGETS HIP_TARGET_${_ROCM_FILE_NAME_ONLY}) - - # compile .o file - add_custom_target(HIP_TARGET_${_ROCM_FILE_NAME_ONLY} COMMAND ${ROCM_HIPCC_EXECUTABLE} -c ${_rocm_file} -o ${_ROCM_OBJ_FILE} ${_ROCM_FLAGS} ${_ROCM_FULL_PATH_INCLUDE_FLAGS} - WORKING_DIRECTORY ${_ROCM_OBJ_OUT_DIR} SOURCES ${_rocm_file}) - - endforeach() - - # compile shared library - if(ROCM_ARG_SHARED) - add_custom_target(HIP_TARGET_${ROCM_ARG_NAME} COMMAND ${ROCM_HIPCC_EXECUTABLE} ${_ROCM_OBJS} -fPIC --shared -o ${ROCM_ARG_OUTPUT_DIR}/lib${ROCM_ARG_NAME}.so - ${_ROCM_FLAGS} ${_ROCM_FULL_PATH_INCLUDE_FLAGS} - WORKING_DIRECTORY ${ROCM_ARG_OUTPUT_DIR}) - - add_library(${ROCM_ARG_NAME} INTERFACE) - target_link_libraries(${ROCM_ARG_NAME} INTERFACE ${ROCM_ARG_OUTPUT_DIR}/lib${ROCM_ARG_NAME}.so) - - # add depencies - add_dependencies(${ROCM_ARG_NAME} HIP_TARGET_${ROCM_ARG_NAME}) - foreach(_rocm_target IN LISTS _ROCM_OBJ_TARGETS) - add_dependencies(HIP_TARGET_${ROCM_ARG_NAME} ${_rocm_target}) - endforeach() - endif() - - # static library - if(ROCM_ARG_STATIC) - # create library from object files - add_library(${ROCM_ARG_NAME} ${_ROCM_OBJS}) - set_target_properties(${ROCM_ARG_NAME} PROPERTIES LINKER_LANGUAGE CXX) - set_source_files_properties( - ${_ROCM_OBJS} - PROPERTIES - EXTERNAL_OBJECT true - GENERATED true - ) - # add dependencies - foreach(_rocm_target IN LISTS _ROCM_OBJ_TARGETS) - add_dependencies(${ROCM_ARG_NAME} ${_rocm_target}) - endforeach() - endif() - -endmacro() - - -# Generates compiled objects -# Usage: -# rocm_hip_add_library( [FLAGS] [INCLUDE_DIRS] ) -macro(rocm_generate_objects) - cmake_parse_arguments( - ROCM_ARG - "" # options - "" # single value args - "FLAGS;INCLUDE_DIRS" # multi value args - ${ARGN} - ) - set(ROCM_ARG_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}) - - # parse positional arguments - list(LENGTH ROCM_ARG_UNPARSED_ARGUMENTS NARGS) - if(${NARGS} LESS 3) - message(FATAL_ERROR "rocm_hip_add_library: Not enough arguments!") - endif() - list(GET ROCM_ARG_UNPARSED_ARGUMENTS 0 ROCM_ARG_OBJECT_VAR) - list(REMOVE_AT ROCM_ARG_UNPARSED_ARGUMENTS 0) - list(GET ROCM_ARG_UNPARSED_ARGUMENTS 0 ROCM_ARG_TARGETS_VAR) - list(REMOVE_AT ROCM_ARG_UNPARSED_ARGUMENTS 0) - set(ROCM_ARG_SOURCES ${ROCM_ARG_UNPARSED_ARGUMENTS}) - - # generate include flags - set(_ROCM_FULL_PATH_INCLUDE_FLAGS) - foreach(_rocm_iternal_dir IN LISTS ROCM_ARG_INCLUDE_DIRS) - if(NOT IS_ABSOLUTE ${_rocm_iternal_dir}) - get_filename_component(_rocm_iternal_dir ${_rocm_iternal_dir} ABSOLUTE) - endif() - list(APPEND _ROCM_FULL_PATH_INCLUDE_FLAGS -I${_rocm_iternal_dir}) - endforeach() - - # generate full path to source files - unset(_ROCM_SOURCES) - foreach(source IN LISTS ROCM_ARG_SOURCES) - if(NOT IS_ABSOLUTE ${source}) - get_filename_component(source ${source} ABSOLUTE) - endif() - set(_ROCM_SOURCES ${_ROCM_SOURCES} ${source}) - endforeach() - get_filename_component(ROCM_ARG_OUTPUT_DIR ${ROCM_ARG_OUTPUT_DIR} ABSOLUTE) - - # generate flags to use - set(_ROCM_STD_FLAGS ${ROCM_ARG_FLAGS} ${ROCM_HIPCC_FLAGS}) - if(_ROCM_STD_FLAGS) - list(FILTER _ROCM_STD_FLAGS INCLUDE REGEX -std=) - endif() - set(_ROCM_FLAGS ${ROCM_ARG_FLAGS}) - if(CMAKE_CXX_STANDARD AND NOT _ROCM_STD_FLAGS) - list(APPEND _ROCM_FLAGS -std=c++${CMAKE_CXX_STANDARD}) - endif() - if(CMAKE_BUILD_TYPE) - string(TOUPPER ${CMAKE_BUILD_TYPE} _ROCM_BUILD_TYPE_UPPER) - list(APPEND _ROCM_FLAGS ${ROCM_HIPCC_FLAGS_${_ROCM_BUILD_TYPE_UPPER}}) - endif() - - if(NOT ROCM_HIPCC_EXECUTABLE) - message(FATAL_ERROR "HIPCC executable not found!") - endif() - - # compile all files to .o - set(_ROCM_OBJS) - set(_ROCM_OBJ_TARGETS) - foreach(_rocm_file IN LISTS _ROCM_SOURCES) - get_filename_component(_ROCM_FILE_NAME_ONLY ${_rocm_file} NAME) - - set(_ROCM_TARGET_NAME hip_target_${_ROCM_FILE_NAME_ONLY}) - - # create output directory for .o file - get_filename_component(_ROCM_CURRENT_DIR ${_rocm_file} DIRECTORY) - file(RELATIVE_PATH _ROCM_CURRENT_DIR "${CMAKE_CURRENT_SOURCE_DIR}" ${_ROCM_CURRENT_DIR}) - set(_ROCM_OBJ_OUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${_ROCM_TARGET_NAME}.dir") - file(MAKE_DIRECTORY ${_ROCM_OBJ_OUT_DIR}) - - # set .o name and path - set(_ROCM_OBJ_FILE ${_ROCM_OBJ_OUT_DIR}/${_ROCM_FILE_NAME_ONLY}.o) - list(APPEND _ROCM_OBJS ${_ROCM_OBJ_FILE}) - list(APPEND _ROCM_OBJ_TARGETS ${_ROCM_TARGET_NAME}) - - # compile .o file - add_custom_target(${_ROCM_TARGET_NAME} ALL - COMMAND ${ROCM_HIPCC_EXECUTABLE} -c ${_rocm_file} -o ${_ROCM_OBJ_FILE} ${_ROCM_FLAGS} ${_ROCM_FULL_PATH_INCLUDE_FLAGS} - WORKING_DIRECTORY ${_ROCM_OBJ_OUT_DIR} SOURCES ${_rocm_file}) - endforeach() - - set_source_files_properties( - ${_ROCM_OBJS} - PROPERTIES - EXTERNAL_OBJECT true - GENERATED true - ) - - # set input variable to object files - set(${ROCM_ARG_OBJECT_VAR} ${_ROCM_OBJS}) - - # set targets for dependency management - set(${ROCM_ARG_TARGETS_VAR} ${_ROCM_OBJ_TARGETS}) -endmacro() - diff --git a/docs/images/logo_cscs.png b/docs/images/logo_cscs.png new file mode 100644 index 0000000..1c6e036 Binary files /dev/null and b/docs/images/logo_cscs.png differ diff --git a/docs/images/logo_ethz.png b/docs/images/logo_ethz.png new file mode 100644 index 0000000..2d1a275 Binary files /dev/null and b/docs/images/logo_ethz.png differ diff --git a/docs/images/logo_max.png b/docs/images/logo_max.png new file mode 100644 index 0000000..2ebda13 Binary files /dev/null and b/docs/images/logo_max.png differ diff --git a/docs/source/details.rst b/docs/source/details.rst index 865571f..75cb07b 100644 --- a/docs/source/details.rst +++ b/docs/source/details.rst @@ -74,6 +74,20 @@ SPFFT_EXCH_UNBUFFERED | For both *SPFFT_EXCH_BUFFERED* and *SPFFT_EXCH_COMPACT_BUFFERED*, an exchange in single precision can be selected. With transforms in double precision, the number of bytes sent and received is halved. For execution on GPUs without GPUDirect, the data transfer between GPU and host also benefits. This option can provide a significant speedup, but incurs a slight accuracy loss. The double precision values are converted to and from single precision between the transform in z and the transform in x / y, while all actual calculations are still done in the selected precision. + +Thread-Safety +------------- +The creation of Grid and Transform objects is thread-safe only if: + +* No FFTW library calls are executed concurrently. +* In the distributed case, MPI thread support is set to *MPI_THREAD_MULTIPLE*. + + +The execution of transforms is thread-safe if + +* Each thread executes using its own Grid and associated Transform object. +* In the distributed case, MPI thread support is set to *MPI_THREAD_MULTIPLE*. + GPU --- | Saving transfer time between host and GPU is key to good performance for execution with GPUs. Ideally, both input and output is located on GPU memory. If host memory pointers are provided as input or output, it is helpful to use pinned memory through the CUDA or ROCm API. diff --git a/docs/source/grid.rst b/docs/source/grid.rst index 01a9e16..c997a8a 100644 --- a/docs/source/grid.rst +++ b/docs/source/grid.rst @@ -1,8 +1,7 @@ Grid ==== .. note:: - A Grid object can be safely destroyed after transforms have been created. The transforms hold a reference counted objtect containing the allocated memory, which will remain valid until all transforms are destroyed as well. - + A Grid object can be safely destroyed after Transform objects have been created, since internal reference counting used to prevent the release of resources while still in use. .. doxygenclass:: spfft::Grid :project: SpFFT diff --git a/docs/source/grid_c.rst b/docs/source/grid_c.rst index c59fd1c..cea81db 100644 --- a/docs/source/grid_c.rst +++ b/docs/source/grid_c.rst @@ -1,5 +1,8 @@ Grid ==== +.. note:: + A Grid handle can be safely destroyed after Transform handles have been created, since internal reference counting used to prevent the release of resources while still in use. + .. doxygenfile:: spfft/grid.h :project: SpFFT diff --git a/docs/source/grid_float.rst b/docs/source/grid_float.rst index f840c52..cf6880d 100644 --- a/docs/source/grid_float.rst +++ b/docs/source/grid_float.rst @@ -5,7 +5,8 @@ GridFloat This class is only available if single precision support is enabled, in which case the marco SPFFT_SINGLE_PRECISION is defined in config.h. .. note:: - A Grid object can be safely destroyed after transforms have been created. The transforms hold a reference counted objtect containing the allocated memory, which will remain valid until all transforms are destroyed as well. + A Grid object can be safely destroyed after Transform objects have been created, since internal reference counting used to prevent the release of resources while still in use. + .. doxygenclass:: spfft::GridFloat :project: SpFFT diff --git a/docs/source/grid_float_c.rst b/docs/source/grid_float_c.rst index e1a46f0..46b4f8b 100644 --- a/docs/source/grid_float_c.rst +++ b/docs/source/grid_float_c.rst @@ -1,5 +1,8 @@ GridFloat ========= +.. note:: + A Grid handle can be safely destroyed after Transform handles have been created, since internal reference counting used to prevent the release of resources while still in use. + .. note:: These functions are only available if single precision support is enabled, in which case the marco SPFFT_SINGLE_PRECISION is defined in config.h. diff --git a/docs/source/index.rst b/docs/source/index.rst index 7219771..1302fd9 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -6,9 +6,9 @@ SpFFT Documentation =================== -| SpFFT - A 3D FFT library for sparse frequency domain data written in C++ with support for MPI, OpenMP, CUDA and ROCm. It was originally intended for transforms of data with spherical cutoff in frequency domain, as required by some computational material science codes. +| SpFFT - A 3D FFT library for sparse frequency domain data written in C++ with support for MPI, OpenMP, CUDA and ROCm. -| For distributed computations, SpFFT uses a slab decomposition in space domain and pencil decomposition in frequency domain (all sparse data within a pencil must be on one rank). +| Inspired by the need of some computational material science applications with spherical cutoff data in frequency domain, SpFFT provides Fast Fourier Transformations of sparse frequency domain data. For distributed computations with MPI, slab decomposition in space domain and pencil decomposition in frequency domain (sparse data within a pencil / column must be on one rank) is used. .. figure:: ../images/sparse_to_dense.png :align: center @@ -22,10 +22,10 @@ Design Goals - Sparse frequency domain input - Reuse of pre-allocated memory - Support of negative indexing for frequency domain data +- Parallelization and acceleration are optional - Unified interface for calculations on CPUs and GPUs -- Support of Complex-To-Real and Real-To-Complex transforms, where the full hermitian symmetry property is utilized. +- Support of Complex-To-Real and Real-To-Complex transforms, where the full hermitian symmetry property is utilized - C++, C and Fortran interfaces -- Parallelization and acceleration are optional Interface Design ---------------- diff --git a/include/spfft/grid.h b/include/spfft/grid.h index caa8424..d62803c 100644 --- a/include/spfft/grid.h +++ b/include/spfft/grid.h @@ -66,6 +66,7 @@ SPFFT_EXPORT SpfftError spfft_grid_create(SpfftGrid* grid, int maxDimX, int maxD #ifdef SPFFT_MPI /** * Constructor for a distributed grid. + * Thread-safe if MPI thread support is set to MPI_THREAD_MULTIPLE. * * @param[out] grid Handle to grid. * @param[in] maxDimX Maximum dimension in x. diff --git a/include/spfft/grid.hpp b/include/spfft/grid.hpp index bbc1696..ee91c27 100644 --- a/include/spfft/grid.hpp +++ b/include/spfft/grid.hpp @@ -68,6 +68,7 @@ class SPFFT_EXPORT Grid { #ifdef SPFFT_MPI /** * Constructor for a distributed grid. + * Thread-safe if MPI thread support is set to MPI_THREAD_MULTIPLE. * * @param[in] maxDimX Maximum dimension in x. * @param[in] maxDimY Maximum dimension in y. @@ -116,6 +117,7 @@ class SPFFT_EXPORT Grid { /** * Creates a transform from this grid object. + * Thread-safe if no FFTW calls are executed concurrently. * * @param[in] processingUnit The processing unit type to use. Must be either SPFFT_PU_HOST or * SPFFT_PU_GPU and be supported by the grid itself. diff --git a/include/spfft/grid_float.h b/include/spfft/grid_float.h index 8a02586..a7316a6 100644 --- a/include/spfft/grid_float.h +++ b/include/spfft/grid_float.h @@ -66,6 +66,7 @@ SPFFT_EXPORT SpfftError spfft_float_grid_create(SpfftFloatGrid* grid, int maxDim #ifdef SPFFT_MPI /** * Constructor for a single precision distributed grid. + * Thread-safe if MPI thread support is set to MPI_THREAD_MULTIPLE. * * @param[out] grid Handle to grid. * @param[in] maxDimX Maximum dimension in x. diff --git a/include/spfft/grid_float.hpp b/include/spfft/grid_float.hpp index eb10f44..de340c5 100644 --- a/include/spfft/grid_float.hpp +++ b/include/spfft/grid_float.hpp @@ -69,6 +69,7 @@ class SPFFT_EXPORT GridFloat { #ifdef SPFFT_MPI /** * Constructor for a distributed grid. + * Thread-safe if MPI thread support is set to MPI_THREAD_MULTIPLE. * * @param[in] maxDimX Maximum dimension in x. * @param[in] maxDimY Maximum dimension in y. @@ -117,6 +118,7 @@ class SPFFT_EXPORT GridFloat { /** * Creates a transform from this grid object. + * Thread-safe if no FFTW calls are executed concurrently. * * @param[in] processingUnit The processing unit type to use. Must be either SPFFT_PU_HOST or * SPFFT_PU_GPU and be supported by the grid itself. diff --git a/include/spfft/transform.h b/include/spfft/transform.h index 1dc0dcf..79c23ad 100644 --- a/include/spfft/transform.h +++ b/include/spfft/transform.h @@ -47,6 +47,7 @@ typedef void* SpfftTransform; /** * Creates a transform from a grid handle. + * Thread-safe if no FFTW calls are executed concurrently. * * @param[out] transform Handle to the transform. * @param[in] grid Handle to the grid, with which the transform is created. diff --git a/include/spfft/transform_float.h b/include/spfft/transform_float.h index 449b55b..e8bca52 100644 --- a/include/spfft/transform_float.h +++ b/include/spfft/transform_float.h @@ -47,6 +47,7 @@ typedef void* SpfftFloatTransform; /** * Creates a single precision transform from a single precision grid handle. + * Thread-safe if no FFTW calls are executed concurrently. * * @param[out] transform Handle to the transform. * @param[in] grid Handle to the grid, with which the transform is created. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 89146b5..636126b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,6 +9,7 @@ set(SPFFT_SOURCE_FILES spfft/multi_transform.cpp spfft/grid.cpp spfft/grid_internal.cpp + fft/fftw_mutex.cpp ) if(SPFFT_SINGLE_PRECISION) @@ -39,18 +40,9 @@ if(SPFFT_CUDA OR SPFFT_ROCM) transpose/transpose_mpi_unbuffered_gpu.cpp ) endif() -endif() - -if(SPFFT_CUDA) list(APPEND SPFFT_SOURCE_FILES ${SPFFT_GPU_KERNELS}) endif() -if(SPFFT_ROCM) - rocm_generate_objects(ROCM_OBJECTS ROCM_TARGETS ${SPFFT_GPU_KERNELS} INCLUDE_DIRS ${SPFFT_INCLUDE_DIRS} ${SPFFT_EXTERNAL_INCLUDE_DIRS} - FLAGS -fPIC -fno-gpu-rdc --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906) - list(APPEND SPFFT_SOURCE_FILES ${ROCM_OBJECTS}) -endif() - if(SPFFT_MPI) list(APPEND SPFFT_SOURCE_FILES transpose/transpose_mpi_buffered_host.cpp @@ -59,11 +51,26 @@ if(SPFFT_MPI) ) endif() -add_library(spfft ${SPFFT_LIBRARY_TYPE} - ${SPFFT_SOURCE_FILES} - ) +if(SPFFT_ROCM) + set(HIP_HCC_FLAGS ${HIP_HCC_FLAGS} -fno-gpu-rdc) + set(HIP_HCC_FLAGS_RELEASE ${HIP_HCC_FLAGS_RELEASE} -Wno-everything) + if(CMAKE_CXX_STANDARD) + set(HIP_HCC_FLAGS ${HIP_HCC_FLAGS} -std=gnu++${CMAKE_CXX_STANDARD}) + endif() + + # macro from FindHIP package, which compiles all .cu files with hipcc and cpp files with the set c++ compiler + HIP_ADD_LIBRARY(spfft ${SPFFT_LIBRARY_TYPE} ${SPFFT_SOURCE_FILES}) +else() + add_library(spfft ${SPFFT_LIBRARY_TYPE} ${SPFFT_SOURCE_FILES}) +endif() + set_property(TARGET spfft PROPERTY VERSION ${SPFFT_VERSION}) set_property(TARGET spfft PROPERTY SOVERSION ${SPFFT_SO_VERSION}) +# All .cu files are self-contained. Device linking can have issues with propageted linker flags of other targets like MPI. +if(SPFFT_CUDA) + set_property(TARGET spfft PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS OFF) + set_property(TARGET spfft PROPERTY CUDA_SEPARABLE_COMPILATION OFF) +endif() # build fortran module if(SPFFT_FORTRAN) @@ -79,11 +86,6 @@ if(UNIX AND NOT APPLE) endif() endif() -# add depency on ojbect generating targets for ROCM -if(SPFFT_ROCM) - add_dependencies(spfft ${ROCM_TARGETS}) -endif() - # generate export header to control symbol visibility include(GenerateExportHeader) generate_export_header(spfft) diff --git a/src/fft/fftw_mutex.cpp b/src/fft/fftw_mutex.cpp new file mode 100644 index 0000000..1f138d5 --- /dev/null +++ b/src/fft/fftw_mutex.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 ETH Zurich, Simon Frasch + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "fft/fftw_mutex.hpp" +#include "spfft/config.h" + +namespace spfft { +auto global_fftw_mutex() -> std::mutex& { + static std::mutex globMutex; // thread safe initialization since C++11 + return globMutex; +} +} // namespace spfft + diff --git a/src/fft/fftw_mutex.hpp b/src/fft/fftw_mutex.hpp new file mode 100644 index 0000000..fe185f7 --- /dev/null +++ b/src/fft/fftw_mutex.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2019 ETH Zurich, Simon Frasch + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SPFFT_FFTW_MUTEX_HPP +#define SPFFT_FFTW_MUTEX_HPP + +#include +#include "spfft/config.h" + +namespace spfft { + +// provides a global mutex for guarding fftw functions calls, which are not thread-safe +auto global_fftw_mutex() -> std::mutex&; + +} // namespace spfft + +#endif diff --git a/src/fft/fftw_plan_1d.hpp b/src/fft/fftw_plan_1d.hpp index 4385b4d..83a1bce 100644 --- a/src/fft/fftw_plan_1d.hpp +++ b/src/fft/fftw_plan_1d.hpp @@ -31,10 +31,12 @@ #include #include #include +#include #include "spfft/config.h" #include "spfft/exceptions.hpp" #include "util/common_types.hpp" #include "util/type_check.hpp" +#include "fft/fftw_mutex.hpp" namespace spfft { @@ -58,8 +60,12 @@ class FFTWPlan { if (input != output) { flags = flags | FFTW_DESTROY_INPUT; // allow input override for out-of-place transform } - plan_ = fftw_plan_dft_1d(size, reinterpret_cast(input), - reinterpret_cast(output), sign, flags); + + { + std::lock_guard guard(global_fftw_mutex()); + plan_ = fftw_plan_dft_1d(size, reinterpret_cast(input), + reinterpret_cast(output), sign, flags); + } if (!plan_) throw FFTWError(); } @@ -76,17 +82,24 @@ class FFTWPlan { if (input != output) { flags = flags | FFTW_DESTROY_INPUT; // allow input override for out-of-place transform } - plan_ = - fftw_plan_many_dft(rank, n, (int)howmany, reinterpret_cast(input), inembed, - (int)istride, (int)idist, reinterpret_cast(output), - onembed, (int)ostride, (int)odist, sign, flags); + + { + std::lock_guard guard(global_fftw_mutex()); + plan_ = + fftw_plan_many_dft(rank, n, (int)howmany, reinterpret_cast(input), inembed, + (int)istride, (int)idist, reinterpret_cast(output), + onembed, (int)ostride, (int)odist, sign, flags); + } if (!plan_) throw FFTWError(); } FFTWPlan(const FFTWPlan& other) = delete; FFTWPlan(FFTWPlan&& other) noexcept { - if (plan_) fftw_destroy_plan(plan_); + if (plan_) { + std::lock_guard guard(global_fftw_mutex()); + fftw_destroy_plan(plan_); + } plan_ = other.plan_; other.plan_ = nullptr; } @@ -94,7 +107,10 @@ class FFTWPlan { auto operator=(const FFTWPlan& other) -> FFTWPlan& = delete; auto operator=(FFTWPlan&& other) noexcept -> FFTWPlan& { - if (plan_) fftw_destroy_plan(plan_); + if (plan_) { + std::lock_guard guard(global_fftw_mutex()); + fftw_destroy_plan(plan_); + } plan_ = other.plan_; other.plan_ = nullptr; return *this; @@ -134,7 +150,10 @@ class FFTWPlan { } ~FFTWPlan() { - if (plan_) fftw_destroy_plan(plan_); + if (plan_) { + std::lock_guard guard(global_fftw_mutex()); + fftw_destroy_plan(plan_); + } plan_ = nullptr; } diff --git a/src/fft/fftw_real_plan_1d.hpp b/src/fft/fftw_real_plan_1d.hpp index 126f9af..ac70d22 100644 --- a/src/fft/fftw_real_plan_1d.hpp +++ b/src/fft/fftw_real_plan_1d.hpp @@ -31,10 +31,12 @@ #include #include #include +#include #include "spfft/config.h" #include "spfft/exceptions.hpp" #include "util/common_types.hpp" #include "util/type_check.hpp" +#include "fft/fftw_mutex.hpp" namespace spfft { @@ -57,9 +59,12 @@ class FFTWRealPlan { int inembed[] = {n[0]}; int onembed[] = {n[0]}; auto flags = FFTW_ESTIMATE | FFTW_DESTROY_INPUT; - plan_ = fftw_plan_many_dft_r2c(rank, n, (int)howmany, input, inembed, (int)istride, (int)idist, - reinterpret_cast(output), onembed, (int)ostride, - (int)odist, flags); + { + std::lock_guard guard(global_fftw_mutex()); + plan_ = fftw_plan_many_dft_r2c(rank, n, (int)howmany, input, inembed, (int)istride, + (int)idist, reinterpret_cast(output), onembed, + (int)ostride, (int)odist, flags); + } if (!plan_) throw FFTWError(); } @@ -74,16 +79,22 @@ class FFTWRealPlan { int inembed[] = {n[0]}; int onembed[] = {n[0]}; auto flags = FFTW_ESTIMATE | FFTW_DESTROY_INPUT; - plan_ = fftw_plan_many_dft_c2r(rank, n, (int)howmany, reinterpret_cast(input), - inembed, (int)istride, (int)idist, output, onembed, (int)ostride, - (int)odist, flags); + { + std::lock_guard guard(global_fftw_mutex()); + plan_ = fftw_plan_many_dft_c2r(rank, n, (int)howmany, reinterpret_cast(input), + inembed, (int)istride, (int)idist, output, onembed, + (int)ostride, (int)odist, flags); + } if (!plan_) throw FFTWError(); } FFTWRealPlan(const FFTWRealPlan& other) = delete; FFTWRealPlan(FFTWRealPlan&& other) noexcept { - if (plan_) fftw_destroy_plan(plan_); + if (plan_) { + std::lock_guard guard(global_fftw_mutex()); + fftw_destroy_plan(plan_); + } plan_ = other.plan_; other.plan_ = nullptr; } @@ -91,7 +102,10 @@ class FFTWRealPlan { auto operator=(const FFTWRealPlan& other) -> FFTWRealPlan& = delete; auto operator=(FFTWRealPlan&& other) noexcept -> FFTWRealPlan& { - if (plan_) fftw_destroy_plan(plan_); + if (plan_){ + std::lock_guard guard(global_fftw_mutex()); + fftw_destroy_plan(plan_); + } plan_ = other.plan_; other.plan_ = nullptr; return *this; @@ -119,7 +133,10 @@ class FFTWRealPlan { auto execute() -> void { fftw_execute(plan_); } ~FFTWRealPlan() { - if (plan_) fftw_destroy_plan(plan_); + if (plan_){ + std::lock_guard guard(global_fftw_mutex()); + fftw_destroy_plan(plan_); + } plan_ = nullptr; } @@ -146,9 +163,13 @@ class FFTWRealPlan { int inembed[] = {n[0]}; int onembed[] = {n[0]}; auto flags = FFTW_ESTIMATE | FFTW_DESTROY_INPUT; - plan_ = fftwf_plan_many_dft_r2c(rank, n, (int)howmany, input, inembed, (int)istride, (int)idist, - reinterpret_cast(output), onembed, (int)ostride, - (int)odist, flags); + + { + std::lock_guard guard(global_fftw_mutex()); + plan_ = fftwf_plan_many_dft_r2c(rank, n, (int)howmany, input, inembed, (int)istride, + (int)idist, reinterpret_cast(output), onembed, + (int)ostride, (int)odist, flags); + } if (!plan_) throw FFTWError(); } @@ -163,9 +184,12 @@ class FFTWRealPlan { int inembed[] = {n[0]}; int onembed[] = {n[0]}; auto flags = FFTW_ESTIMATE | FFTW_DESTROY_INPUT; - plan_ = fftwf_plan_many_dft_c2r(rank, n, (int)howmany, reinterpret_cast(input), - inembed, (int)istride, (int)idist, output, onembed, - (int)ostride, (int)odist, flags); + { + std::lock_guard guard(global_fftw_mutex()); + plan_ = fftwf_plan_many_dft_c2r( + rank, n, (int)howmany, reinterpret_cast(input), inembed, (int)istride, + (int)idist, output, onembed, (int)ostride, (int)odist, flags); + } if (!plan_) throw FFTWError(); } @@ -220,7 +244,10 @@ class FFTWRealPlan { } ~FFTWRealPlan() { - if (plan_) fftwf_destroy_plan(plan_); + if (plan_){ + std::lock_guard guard(global_fftw_mutex()); + fftwf_destroy_plan(plan_); + } plan_ = nullptr; }