diff --git a/.jenkins b/.jenkins index 5a859420fd3..ae3bffd92d7 100644 --- a/.jenkins +++ b/.jenkins @@ -139,7 +139,7 @@ pipeline { dockerfile { filename 'Dockerfile.hipcc' dir 'scripts/docker' - additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:5.2' + additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:5.2-complete' label 'rocm-docker ' args '-v /tmp/ccache.kokkos:/tmp/ccache --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --env HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES' } @@ -181,7 +181,7 @@ pipeline { dockerfile { filename 'Dockerfile.hipcc' dir 'scripts/docker' - additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:5.6' + additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:5.6-complete' label 'rocm-docker' args '-v /tmp/ccache.kokkos:/tmp/ccache --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --env HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES' } diff --git a/algorithms/src/CMakeLists.txt b/algorithms/src/CMakeLists.txt index 16957789472..7cb47a316b5 100644 --- a/algorithms/src/CMakeLists.txt +++ b/algorithms/src/CMakeLists.txt @@ -30,5 +30,4 @@ KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms ${CMAKE_CURRENT_SOURCE_DIR} ) - - +KOKKOS_LINK_TPL(kokkoscontainers PUBLIC ROCTHRUST) diff --git a/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp b/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp index d87ab09e772..4c174b5fda9 100644 --- a/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp +++ b/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp @@ -63,6 +63,11 @@ #endif +#if defined(KOKKOS_ENABLE_ROCTHRUST) +#include +#include +#endif + #if defined(KOKKOS_ENABLE_ONEDPL) #include #include @@ -184,6 +189,26 @@ void sort_cudathrust(const Cuda& space, } #endif +#if defined(KOKKOS_ENABLE_ROCTHRUST) +template +void sort_rocthrust(const HIP& space, + const Kokkos::View& view, + MaybeComparator&&... maybeComparator) { + using ViewType = Kokkos::View; + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + if (view.extent(0) <= 1) { + return; + } + const auto exec = thrust::hip::par.on(space.hip_stream()); + auto first = ::Kokkos::Experimental::begin(view); + auto last = ::Kokkos::Experimental::end(view); + thrust::sort(exec, first, last, + std::forward(maybeComparator)...); +} +#endif + #if defined(KOKKOS_ENABLE_ONEDPL) template void sort_onedpl(const Kokkos::Experimental::SYCL& space, @@ -274,6 +299,14 @@ void sort_device_view_without_comparator( } #endif +#if defined(KOKKOS_ENABLE_ROCTHRUST) +template +void sort_device_view_without_comparator( + const HIP& exec, const Kokkos::View& view) { + sort_rocthrust(exec, view); +} +#endif + #if defined(KOKKOS_ENABLE_ONEDPL) template void sort_device_view_without_comparator( @@ -320,6 +353,15 @@ void sort_device_view_with_comparator( } #endif +#if defined(KOKKOS_ENABLE_ROCTHRUST) +template +void sort_device_view_with_comparator( + const HIP& exec, const Kokkos::View& view, + const ComparatorType& comparator) { + sort_rocthrust(exec, view, comparator); +} +#endif + #if defined(KOKKOS_ENABLE_ONEDPL) template void sort_device_view_with_comparator( diff --git a/cmake/KokkosCore_config.h.in b/cmake/KokkosCore_config.h.in index 33ebfd266b4..2df0f6c5205 100644 --- a/cmake/KokkosCore_config.h.in +++ b/cmake/KokkosCore_config.h.in @@ -58,6 +58,7 @@ #cmakedefine KOKKOS_ENABLE_LIBDL #cmakedefine KOKKOS_ENABLE_LIBQUADMATH #cmakedefine KOKKOS_ENABLE_ONEDPL +#cmakedefine KOKKOS_ENABLE_ROCTHRUST #cmakedefine KOKKOS_ARCH_ARMV80 #cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX diff --git a/cmake/Modules/FindTPLROCTHRUST.cmake b/cmake/Modules/FindTPLROCTHRUST.cmake new file mode 100644 index 00000000000..ff75e3a3eeb --- /dev/null +++ b/cmake/Modules/FindTPLROCTHRUST.cmake @@ -0,0 +1,15 @@ +# ROCm 5.6 and earlier set AMDGPU_TARGETS and GPU_TARGETS to all the supported +# architectures. Therefore, we end up compiling Kokkos for all the supported +# architecture. Starting with ROCm 5.7 AMDGPU_TARGETS and GPU_TARGETS are empty. +# It is the user's job to set the variables. Since we are injecting the +# architecture flag ourselves, we can let the variables empty. To replicate the +# behavior of ROCm 5.7 and later for earlier version of ROCm we set +# AMDGPU_TARGETS and GPU_TARGETS to empty and set the values in the cache. If +# the values are not cached, FIND_PACKAGE(rocthrust) will overwrite them. +SET(AMDGPU_TARGETS "" CACHE STRING "AMD GPU targets to compile for") +SET(GPU_TARGETS "" CACHE STRING "GPU targets to compile for") +FIND_PACKAGE(rocthrust REQUIRED) +KOKKOS_CREATE_IMPORTED_TPL(ROCTHRUST INTERFACE LINK_LIBRARIES roc::rocthrust) + +# Export ROCTHRUST as a Kokkos dependency +KOKKOS_EXPORT_CMAKE_TPL(ROCTHRUST) diff --git a/cmake/kokkos_tpls.cmake b/cmake/kokkos_tpls.cmake index f80d724f7f4..c9ebcd6b94b 100644 --- a/cmake/kokkos_tpls.cmake +++ b/cmake/kokkos_tpls.cmake @@ -40,6 +40,8 @@ ELSE() SET(ROCM_DEFAULT OFF) ENDIF() KOKKOS_TPL_OPTION(ROCM ${ROCM_DEFAULT}) +KOKKOS_TPL_OPTION(ROCTHRUST ${KOKKOS_ENABLE_HIP}) + IF(KOKKOS_ENABLE_SYCL AND NOT KOKKOS_HAS_TRILINOS) SET(ONEDPL_DEFAULT ON) ELSE() @@ -83,6 +85,7 @@ IF (NOT KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE) KOKKOS_IMPORT_TPL(ONEDPL INTERFACE) ENDIF() KOKKOS_IMPORT_TPL(LIBQUADMATH) +KOKKOS_IMPORT_TPL(ROCTHRUST) IF (Kokkos_ENABLE_DESUL_ATOMICS_EXTERNAL) find_package(desul REQUIRED COMPONENTS atomics) diff --git a/core/src/HIP/Kokkos_HIP_Instance.cpp b/core/src/HIP/Kokkos_HIP_Instance.cpp index 74bab397429..22c0db047f6 100644 --- a/core/src/HIP/Kokkos_HIP_Instance.cpp +++ b/core/src/HIP/Kokkos_HIP_Instance.cpp @@ -90,6 +90,13 @@ void HIPInternal::print_configuration(std::ostream &s) const { << '\n'; #endif + s << "macro KOKKOS_ENABLE_ROCTHRUST : " +#if defined(KOKKOS_ENABLE_ROCTHRUST) + << "defined\n"; +#else + << "undefined\n"; +#endif + for (int i : get_visible_devices()) { hipDeviceProp_t hipProp; KOKKOS_IMPL_HIP_SAFE_CALL(hipGetDeviceProperties(&hipProp, i));