Skip to content

Commit

Permalink
Merge modernized CUDA CMake setup
Browse files Browse the repository at this point in the history
* Set the minimum requirement for CMake to 3.16
* Set the minimum requirement for CUDA builds to CMake 3.18 to enable usage
  of the FindCUDAToolkit module and CMAKE_CUDA_ARCHITECTURES.
* Use FindCUDAToolkit instead of manually searching for the CUDA runtime,
  cuBLAS, cuSPARSE, cuRAND and cuFFT
* Set CMAKE_CUDA_ARCHITECTURES via CAS
* Set C++ and CUDA (host) compiler via environment variables

Related PR: #1368
  • Loading branch information
upsj authored Aug 10, 2023
2 parents cd1d2a2 + 5dbf0ff commit 7b4134d
Show file tree
Hide file tree
Showing 56 changed files with 120 additions and 227 deletions.
17 changes: 3 additions & 14 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ build/cuda114/nompi/gcc/cuda/debug/shared:
- .build_and_test_template
- .default_variables
- .quick_test_condition
- .use_gko_cuda114-openmpi-gnu11-llvm12
- .use_gko_cuda114-openmpi-gnu10-llvm12
variables:
BUILD_OMP: "ON"
BUILD_CUDA: "ON"
Expand Down Expand Up @@ -526,24 +526,13 @@ build/nocuda/openmpi/clang/omp/debug/static:
FAST_TESTS: "ON"
BUILD_SHARED_LIBS: "OFF"

test/nocuda/openmpi/clang/omp/debug/static:
extends:
- .build_and_test_template
- .default_variables
- .full_test_condition
- .use_gko-nocuda-openmpi-gnu9-llvm8
variables:
USE_NAME: "nocuda-openmpi-clang-${CI_PIPELINE_ID}"
dependencies: null
needs: [ "build/nocuda/openmpi/clang/omp/debug/static" ]

# nocuda with the oldest supported compiler
build/nocuda/nompi/gcc/omp/release/static:
extends:
- .build_and_test_template
- .default_variables
- .quick_test_condition
- .use_gko-nocuda-mvapich2-gnu5-llvm39-intel2018
- .use_gko-nocuda-mvapich2-gnu5-llvm39-intel2019
variables:
BUILD_OMP: "ON"
BUILD_TYPE: "Release"
Expand All @@ -554,7 +543,7 @@ build/nocuda-nomixed/nompi/clang/omp/release/static:
- .build_and_test_template
- .default_variables
- .full_test_condition
- .use_gko-nocuda-mvapich2-gnu5-llvm39-intel2018
- .use_gko-nocuda-mvapich2-gnu5-llvm39-intel2019
variables:
C_COMPILER: "clang"
CXX_COMPILER: "clang++"
Expand Down
8 changes: 4 additions & 4 deletions .gitlab/image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
- cpu
- amdci

.use_gko-nocuda-mvapich2-gnu5-llvm39-intel2018:
image: ginkgohub/cpu:mvapich2-gnu5-llvm39-intel2018
.use_gko-nocuda-mvapich2-gnu5-llvm39-intel2019:
image: ginkgohub/cpu:mvapich2-gnu5-llvm39-intel2019
tags:
- private_ci
- cpu
Expand Down Expand Up @@ -50,8 +50,8 @@
- private_ci
- horeka

.use_gko_cuda114-openmpi-gnu11-llvm12:
image: ginkgohub/cuda:114-openmpi-gnu11-llvm12
.use_gko_cuda114-openmpi-gnu10-llvm12:
image: ginkgohub/cuda:114-openmpi-gnu10-llvm12
tags:
- private_ci
- nvidia-gpu
Expand Down
19 changes: 8 additions & 11 deletions .gitlab/scripts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@
script:
- mkdir -p ${CI_JOB_NAME} && cd ${CI_JOB_NAME}
- if [ -n "${CUDA_ARCH}" ]; then
CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH};
CUDA_HOST_STR=-DCMAKE_CUDA_HOST_COMPILER=$(which ${CXX_COMPILER});
export CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH};
fi
- if [[ "${MPI_AS_ROOT}" == "ON" ]];then
export OMPI_ALLOW_RUN_AS_ROOT=1;
Expand All @@ -32,12 +31,12 @@
- if [[ "${BUILD_MPI}" == "ON" ]]; then
MPI_STR=-DGINKGO_MPI_EXEC_SUFFIX=${MPI_SUFFIX};
fi
- export CC=${C_COMPILER} CXX=${CXX_COMPILER} CUDAHOSTCXX=${CXX_COMPILER} CUDACXX=${CUDA_COMPILER}
- cmake ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX}
-GNinja
-DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER}
-DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
-DCMAKE_BUILD_TYPE=${BUILD_TYPE}
-DCMAKE_CXX_FLAGS="${CXX_FLAGS}" -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}
${EXTRA_CMAKE_FLAGS} ${CUDA_ARCH_STR} ${CUDA_HOST_STR}
${EXTRA_CMAKE_FLAGS} ${CUDA_ARCH_STR}
-DGINKGO_COMPILER_FLAGS=${GKO_COMPILER_FLAGS}
-DGINKGO_DEVEL_TOOLS=OFF -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE}
-DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA}
Expand All @@ -64,8 +63,7 @@
script:
- mkdir -p ${CI_JOB_NAME} && cd ${CI_JOB_NAME}
- if [ -n "${CUDA_ARCH}" ]; then
CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH};
CUDA_HOST_STR=-DCMAKE_CUDA_HOST_COMPILER=$(which ${CXX_COMPILER});
export CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH};
fi
- if [ -n "${SYCL_DEVICE_TYPE}" ]; then export SYCL_DEVICE_TYPE; fi
- if [ -n "${SYCL_DEVICE_FILTER}" ]; then export SYCL_DEVICE_FILTER; fi
Expand All @@ -77,12 +75,11 @@
- if [[ "${BUILD_MPI}" == "ON" ]]; then
MPI_STR=-DGINKGO_MPI_EXEC_SUFFIX=${MPI_SUFFIX};
fi
- export CC=${C_COMPILER} CXX=${CXX_COMPILER} CUDAHOSTCXX=${CXX_COMPILER} CUDACXX=${CUDA_COMPILER}
- cmake ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX}
-GNinja
-DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER}
-DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
-GNinja -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
-DCMAKE_CXX_FLAGS="${CXX_FLAGS}" -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}
${EXTRA_CMAKE_FLAGS} ${CUDA_ARCH_STR} ${CUDA_HOST_STR}
${EXTRA_CMAKE_FLAGS} ${CUDA_ARCH_STR}
-DGINKGO_COMPILER_FLAGS=${GKO_COMPILER_FLAGS}
-DGINKGO_DEVEL_TOOLS=OFF -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE}
-DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA}
Expand Down
21 changes: 6 additions & 15 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,4 @@
cmake_minimum_required(VERSION 3.13)

# Use *_ROOT environment variables for find_package calls
cmake_policy(SET CMP0074 NEW)

# Let CAS handle the CUDA architecture flags (for now)
# Windows still gives CMP0104 warning if putting it in cuda.
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
cmake_policy(SET CMP0104 OLD)
endif()
cmake_minimum_required(VERSION 3.16)

project(Ginkgo LANGUAGES C CXX VERSION 1.7.0 DESCRIPTION "A numerical linear algebra library targeting many-core architectures")
set(Ginkgo_VERSION_TAG "develop")
Expand Down Expand Up @@ -69,7 +60,6 @@ set(GINKGO_CUDA_COMPILER_FLAGS "" CACHE STRING
"Set the required NVCC compiler flags, mainly used for warnings. Current default is an empty string")
set(GINKGO_CUDA_ARCHITECTURES "Auto" CACHE STRING
"A list of target NVIDIA GPU architectures. See README.md for more detail.")
option(GINKGO_CUDA_DEFAULT_HOST_COMPILER "Tell Ginkgo to not automatically set the CUDA host compiler" OFF)
# the details of fine/coarse grain memory and unsafe atomic are available https://docs.olcf.ornl.gov/systems/crusher_quick_start_guide.html#floating-point-fp-atomic-operations-and-coarse-fine-grained-memory-allocations
option(GINKGO_HIP_AMD_UNSAFE_ATOMIC "Compiler uses unsafe floating point atomic (only for AMD GPU and ROCM >= 5). Default is ON because we use hipMalloc, which is always on coarse grain. Must turn off when allocating memory on fine grain" ON)
set(GINKGO_HIP_COMPILER_FLAGS "" CACHE STRING
Expand Down Expand Up @@ -201,10 +191,11 @@ check_include_file_cxx(cxxabi.h GKO_HAVE_CXXABI_H)

# Automatically find PAPI and search for the required 'sde' component
set(GINKGO_HAVE_PAPI_SDE 0)
find_package(PAPI OPTIONAL_COMPONENTS sde)
if(PAPI_sde_FOUND)
set(GINKGO_HAVE_PAPI_SDE 1)
endif()
# PAPI is temporarily disabled
#find_package(PAPI OPTIONAL_COMPONENTS sde)
#if(PAPI_sde_FOUND)
# set(GINKGO_HAVE_PAPI_SDE 1)
#endif()

# Automatically find TAU
set(GINKGO_HAVE_TAU 0)
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ Prerequisites

For Ginkgo core library:

* _cmake 3.13+_
* _cmake 3.16+_
* C++14 compliant compiler, one of:
* _gcc 5.5+_
* _clang 3.9+_
Expand All @@ -47,6 +47,7 @@ For Ginkgo core library:

The Ginkgo CUDA module has the following __additional__ requirements:

* _cmake 3.18+_ (If CUDA was installed through the NVIDIA HPC Toolkit, we require _cmake 3.22+_)
* _CUDA 10.1+_ or _NVHPC Package 22.7+_
* Any host compiler restrictions your version of CUDA may impose also apply
here. For the newest CUDA version, this information can be found in the
Expand Down
7 changes: 2 additions & 5 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@ function(ginkgo_benchmark_cusparse_linops type def)
endif()
# make the dependency public to catch issues
target_compile_definitions(cusparse_linops_${type} PUBLIC ${def})
target_link_libraries(cusparse_linops_${type} Ginkgo::ginkgo ${CUDA_RUNTIME_LIBS} ${CUBLAS} ${CUSPARSE})
target_include_directories(cusparse_linops_${type} SYSTEM PRIVATE ${CUDA_INCLUDE_DIRS})
target_compile_definitions(cusparse_linops_${type} PRIVATE ALLOWMP=1)
target_link_libraries(cusparse_linops_${type} Ginkgo::ginkgo CUDA::cudart CUDA::cublas CUDA::cusparse)
endfunction()

function(ginkgo_benchmark_hipsparse_linops type def)
Expand Down Expand Up @@ -122,8 +120,7 @@ if (GINKGO_BUILD_CUDA)
ginkgo_benchmark_cusparse_linops(z GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION)
ginkgo_benchmark_cusparse_linops(c GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION)
add_library(cuda_timer utils/cuda_timer.cpp)
target_link_libraries(cuda_timer ginkgo ${CUDA_RUNTIME_LIBS})
target_include_directories(cuda_timer SYSTEM PRIVATE ${CUDA_INCLUDE_DIRS})
target_link_libraries(cuda_timer ginkgo CUDA::cudart)
endif()
if (GINKGO_BUILD_HIP)
ginkgo_benchmark_hipsparse_linops(d GKO_BENCHMARK_USE_DOUBLE_PRECISION)
Expand Down
2 changes: 0 additions & 2 deletions benchmark/utils/cuda_linops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,9 +438,7 @@ class CusparseCsrEx
trans_(CUSPARSE_OPERATION_NON_TRANSPOSE),
buffer_(exec)
{
#ifdef ALLOWMP
algmode_ = CUSPARSE_ALG_MERGE_PATH;
#endif // ALLOWMP
}

private:
Expand Down
2 changes: 1 addition & 1 deletion cmake/DownloadNonCMakeCMakeLists.txt.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.9)
cmake_minimum_required(VERSION 3.16)
project(${package_name})

include(ExternalProject)
Expand Down
21 changes: 3 additions & 18 deletions cmake/GinkgoConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,8 @@ set(GINKGO_IWYU_PATH @GINKGO_IWYU_PATH@)

set(GINKGO_JACOBI_FULL_OPTIMIZATIONS @GINKGO_JACOBI_FULL_OPTIMIZATIONS@)

set(GINKGO_CUDA_ARCHITECTURES "@GINKGO_CUDA_ARCHITECTURES@")
set(GINKGO_CUDA_DEFAULT_HOST_COMPILER @GINKGO_CUDA_DEFAULT_HOST_COMPILER@)
set(GINKGO_CUDA_ARCH_FLAGS "@GINKGO_CUDA_ARCH_FLAGS@")
set(GINKGO_CUDA_ARCHITECTURES "@CMAKE_CUDA_ARCHITECTURES@")
set(GINKGO_CUDA_HOST_COMPILER "@CMAKE_CUDA_HOST_COMPILER@")

set(GINKGO_HIP_COMPILER_FLAGS "@GINKGO_HIP_COMPILER_FLAGS@")
set(GINKGO_HIP_HCC_COMPILER_FLAGS "@GINKGO_HIP_HCC_COMPILER_FLAGS@")
Expand Down Expand Up @@ -123,11 +122,6 @@ set(GINKGO_CUDA_COMPILER "@CMAKE_CUDA_COMPILER@")
set(GINKGO_CUDA_COMPILER_VERSION @CMAKE_CUDA_COMPILER_VERSION@)
set(GINKGO_CUDA_HOST_LINK_LAUNCHER "@CMAKE_CUDA_HOST_LINK_LAUNCHER@")

set(GINKGO_CUBLAS_LIBRARIES "@CUBLAS@")
set(GINKGO_CUSPARSE_LIBRARIES "@CUSPARSE@")
set(GINKGO_CUDA_LIBRARIES "@CUDA_RUNTIME_LIBS@")
set(GINKGO_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@")

set(GINKGO_CUDA_FLAGS "@CMAKE_CUDA_FLAGS_MODIFY@")
set(GINKGO_CUDA_FLAGS_DEBUG "@CMAKE_CUDA_FLAGS_DEBUG_MODIFY@")
set(GINKGO_CUDA_FLAGS_RELEASE "@CMAKE_CUDA_FLAGS_RELEASE_MODIFY@")
Expand All @@ -148,16 +142,6 @@ set(VTune_PATH "@VTune_PATH@")
# NOTE: we do not export benchmarks, examples, tests or devel tools
# so `third_party` libraries are currently unneeded.

# propagate CUDA_HOST_COMPILER if needed
if (GINKGO_BUILD_CUDA OR (GINKGO_BUILD_HIP
AND GINKGO_HIP_PLATFORM MATCHES "${GINKGO_HIP_PLATFORM_NVIDIA_REGEX}"))
if (GINKGO_CUDA_HOST_COMPILER AND NOT CMAKE_CUDA_HOST_COMPILER
AND EXISTS "${GINKGO_CUDA_HOST_COMPILER}")
message(STATUS "Ginkgo: Setting CUDA host compiler to ${GINKGO_CUDA_HOST_COMPILER}")
set(CMAKE_CUDA_HOST_COMPILER "${GINKGO_CUDA_HOST_COMPILER}" CACHE STRING "" FORCE)
endif()
endif()

if(GINKGO_HAVE_PAPI_SDE)
find_package(PAPI REQUIRED OPTIONAL_COMPONENTS sde)
endif()
Expand All @@ -180,6 +164,7 @@ endif()
# For details, see https://gitlab.kitware.com/cmake/cmake/issues/18614
if((NOT GINKGO_BUILD_SHARED_LIBS) AND GINKGO_BUILD_CUDA)
enable_language(CUDA)
find_package(CUDAToolkit REQUIRED)
find_package(NVTX REQUIRED)
endif()

Expand Down
40 changes: 40 additions & 0 deletions cmake/Modules/CudaArchitectureSelector.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,15 @@
# The command has the same result as ``cas_target_cuda_architectures``. It does
# not add the compiler flags to the target, but stores the compiler flags in
# the variable (string).
#
# cas_variable_cmake_cuda_architectures(
# [<variable>] # variable for storing architecture list
# [<spec>] # list of architecture specifications
# )
#
# The command prepares an architecture list supported by the CMake
# ``CUDA_ARCHITECTURES`` target property and ``CMAKE_CUDA_ARCHITECTURES``
# variable. The architecture specification
#
#
# ``ARCHITECTURES`` specification list
Expand Down Expand Up @@ -404,3 +413,34 @@ function(cas_variable_cuda_architectures variable)
cas_get_compiler_flags(flags ${ARGN})
set(${variable} "${flags}" PARENT_SCOPE)
endfunction()


function(cas_variable_cmake_cuda_architectures variable)
cas_get_supported_architectures(supported_archs)
if("${ARGN}" STREQUAL "All")
set(archs "${supported_archs}")
elseif("${ARGN}" STREQUAL "Auto")
cas_get_onboard_architectures(onboard_archs)
if (onboard_archs)
set(archs "${onboard_archs}")
else()
set(archs "${supported_archs}")
endif()
else()
set(archs)
foreach(arch IN LISTS ARGN)
if(arch MATCHES "${cas_spec_regex}")
if(CMAKE_MATCH_1)
list(APPEND archs ${CMAKE_MATCH_1}-real)
endif()
if(CMAKE_MATCH_3)
list(APPEND archs ${CMAKE_MATCH_3}-virtual)
endif()
else()
cas_get_architectures_by_name("${arch}" arch)
list(APPEND archs ${arch})
endif()
endforeach()
endif()
set("${variable}" "${archs}" PARENT_SCOPE)
endfunction()
4 changes: 2 additions & 2 deletions cmake/Modules/FindNVTX.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
# ``NVTX_FOUND``
# If false, do not try to use the NVTX library.

find_path(NVTX3_INCLUDE_DIR NAMES nvToolsExt.h HINTS ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}/nvtx3)
find_path(NVTX_INCLUDE_DIR NAMES nvToolsExt.h HINTS ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
find_path(NVTX3_INCLUDE_DIR NAMES nvToolsExt.h HINTS ${CUDAToolkit_INCLUDE_DIRS}/nvtx3)
find_path(NVTX_INCLUDE_DIR NAMES nvToolsExt.h HINTS ${CUDAToolkit_INCLUDE_DIRS})
mark_as_advanced(NVTX3_INCLUDE_DIR)
mark_as_advanced(NVTX_INCLUDE_DIR)
include(FindPackageHandleStandardArgs)
Expand Down
1 change: 0 additions & 1 deletion cmake/create_test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ function(ginkgo_create_cuda_test_internal test_name filename test_target_name)
target_compile_definitions(${test_target_name} PRIVATE GKO_COMPILING_CUDA)
target_compile_options(${test_target_name}
PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:${GINKGO_CUDA_ARCH_FLAGS}>
$<$<COMPILE_LANGUAGE:CUDA>:${GINKGO_CUDA_COMPILER_FLAGS}>)
if(MSVC)
target_compile_options(${test_target_name}
Expand Down
Loading

0 comments on commit 7b4134d

Please sign in to comment.