diff --git a/include/alpaka/test/KernelExecutionFixture.hpp b/include/alpaka/test/KernelExecutionFixture.hpp index 0e59344497e..893f85bab94 100644 --- a/include/alpaka/test/KernelExecutionFixture.hpp +++ b/include/alpaka/test/KernelExecutionFixture.hpp @@ -22,7 +22,7 @@ namespace alpaka::test { //! The fixture for executing a kernel on a given accelerator. - template + template class KernelExecutionFixture { public: @@ -80,7 +80,15 @@ namespace alpaka::test getPtrNative(bufAccResult), std::forward(args)...); - exec(m_queue, m_workDiv, kernelFnObj, getPtrNative(bufAccResult), std::forward(args)...); + if constexpr(TCooperative) + execCooperative( + m_queue, + m_workDiv, + kernelFnObj, + getPtrNative(bufAccResult), + std::forward(args)...); + else + exec(m_queue, m_workDiv, kernelFnObj, getPtrNative(bufAccResult), std::forward(args)...); // Copy the result value to the host auto bufHostResult = allocBuf(m_devHost, static_cast(1u)); diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index f3ea6672d1a..9094dbf3406 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -21,6 +21,7 @@ add_subdirectory("block/sync/") add_subdirectory("core/") add_subdirectory("dev/") add_subdirectory("event/") +add_subdirectory("grid/") add_subdirectory("idx/") add_subdirectory("intrinsic/") add_subdirectory("kernel/") diff --git a/test/unit/grid/CMakeLists.txt b/test/unit/grid/CMakeLists.txt new file mode 100644 index 00000000000..5c7bcfb8f7b --- /dev/null +++ b/test/unit/grid/CMakeLists.txt @@ -0,0 +1,20 @@ +# +# Copyright 2024 Mykhailo Varvarin +# SPDX-License-Identifier: MPL-2.0 +# + +set(_TARGET_NAME "gridSyncTest") + +append_recursive_files_add_to_src_group("src/" "src/" "cpp" _FILES_SOURCE) + +alpaka_add_executable( + ${_TARGET_NAME} + ${_FILES_SOURCE}) +target_link_libraries( + ${_TARGET_NAME} + PRIVATE common) + +set_target_properties(${_TARGET_NAME} PROPERTIES FOLDER "test/unit") +target_compile_definitions(${_TARGET_NAME} PRIVATE "-DTEST_UNIT_GRID_SYNC") + +add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME} ${_alpaka_TEST_OPTIONS}) diff --git a/test/unit/grid/src/GridSync.cpp b/test/unit/grid/src/GridSync.cpp new file mode 100644 index 00000000000..13f5107f308 --- /dev/null +++ b/test/unit/grid/src/GridSync.cpp @@ -0,0 +1,86 @@ +/* Copyright 2024 Mykhailo Varvarin + * SPDX-License-Identifier: MPL-2.0 + */ + +#include +#include +#include + +#include +#include + +class GridSyncTestKernel +{ +public: + static constexpr std::uint8_t blockThreadExtentPerDim() + { + return 2u; + } + + ALPAKA_NO_HOST_ACC_WARNING + template + ALPAKA_FN_ACC auto operator()(TAcc const& acc, bool* success, T* array) const -> void + { + using Idx = alpaka::Idx; + + // Get the index of the current thread within the grid and the grid extent and map them to 1D. + auto const gridThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + auto const gridThreadIdx1D = alpaka::mapIdx<1u>(gridThreadIdx, gridThreadExtent)[0u]; + auto const gridThreadExtent1D = gridThreadExtent.prod(); + + + // Write the thread index into the shared array. + array[gridThreadIdx1D] = static_cast(gridThreadIdx1D); + + // Synchronize the threads in the block. + alpaka::syncGridThreads(acc); + + // All other threads within the block should now have written their index into the shared memory. + for(auto i = static_cast(0u); i < gridThreadExtent1D; ++i) + { + ALPAKA_CHECK(*success, static_cast(array[i]) == i); + } + } +}; + +TEMPLATE_LIST_TEST_CASE("synchronize", "[gridSync]", alpaka::test::TestAccs) +{ + using Acc = TestType; + using Dim = alpaka::Dim; + using Idx = alpaka::Idx; + + // Select the first device available on a system, for the chosen accelerator + auto const platformAcc = alpaka::Platform{}; + auto const devAcc = getDevByIdx(platformAcc, 0u); + + + auto const blockThreadExtentMax = alpaka::getAccDevProps(devAcc).m_blockThreadExtentMax; + auto threadsPerBlock = alpaka::elementwise_min( + blockThreadExtentMax, + alpaka::Vec::all(static_cast(GridSyncTestKernel::blockThreadExtentPerDim()))); + + auto elementsPerThread = alpaka::Vec::all(1); + auto blocksPerGrid = alpaka::Vec::all(1); + blocksPerGrid[0] = 200; + + // Allocate memory on the device. + alpaka::Vec, Idx> bufferExtent{ + blocksPerGrid.prod() * threadsPerBlock.prod() * elementsPerThread.prod()}; + auto deviceMemory = alpaka::allocBuf(devAcc, bufferExtent); + + GridSyncTestKernel kernel; + int maxBlocks = alpaka::getMaxActiveBlocks( + devAcc, + kernel, + threadsPerBlock, + elementsPerThread, + alpaka::getPtrNative(deviceMemory)); + + blocksPerGrid[0] = std::min(static_cast(maxBlocks), blocksPerGrid[0]); + constexpr bool IsCooperative = true; + alpaka::test::KernelExecutionFixture fixture( + alpaka::WorkDivMembers{blocksPerGrid, threadsPerBlock, elementsPerThread}); + + REQUIRE(fixture(kernel, alpaka::getPtrNative(deviceMemory))); +}