From ac20ee567564305181296e1759b8944e6b2c8f8e Mon Sep 17 00:00:00 2001 From: Jan Stephan Date: Wed, 13 Sep 2023 10:31:53 +0200 Subject: [PATCH 01/23] Switch to 1.0.0-rc1 --- docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index cc5540c34306..56623cbd60fa 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -13,7 +13,7 @@ # The short X.Y version. version = u'1.0.0' # The full version, including alpha/beta/rc tags. -release = u'1.0.0-develop' +release = u'1.0.0-rc1' # The master toctree document. master_doc = 'index' From f5cf5cf65feccbbad7e330292a0e254f12de0132 Mon Sep 17 00:00:00 2001 From: Jan Stephan Date: Wed, 13 Sep 2023 11:29:16 +0200 Subject: [PATCH 02/23] Silence more nvcc warnings * Add missing ALPAKA_UNREACHABLE * Silence more warnings Co-authored-by: Bernhard Manfred Gruber --- include/alpaka/idx/MapIdx.hpp | 5 +++++ test/common/devCompileOptions.cmake | 8 +++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/include/alpaka/idx/MapIdx.hpp b/include/alpaka/idx/MapIdx.hpp index af0ccea41701..f081252878c0 100644 --- a/include/alpaka/idx/MapIdx.hpp +++ b/include/alpaka/idx/MapIdx.hpp @@ -5,6 +5,7 @@ #pragma once #include "alpaka/core/Common.hpp" +#include "alpaka/core/Unreachable.hpp" #include "alpaka/vec/Traits.hpp" #include "alpaka/vec/Vec.hpp" @@ -50,6 +51,8 @@ namespace alpaka } else static_assert(!sizeof(TElem), "Not implemented"); + + ALPAKA_UNREACHABLE({}); } //! Maps an N dimensional index to a N dimensional position based on the pitches of a view without padding or a @@ -89,5 +92,7 @@ namespace alpaka } else static_assert(!sizeof(TElem), "Not implemented"); + + ALPAKA_UNREACHABLE({}); } } // namespace alpaka diff --git a/test/common/devCompileOptions.cmake b/test/common/devCompileOptions.cmake index 6557c1a078af..f8b3fe36f145 100644 --- a/test/common/devCompileOptions.cmake +++ b/test/common/devCompileOptions.cmake @@ -146,9 +146,15 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Inte list(APPEND alpaka_DEV_COMPILE_OPTIONS "-Wno-extra-semi-stmt") # Silence warnings caused by nvcc-generated code and -Weverything - list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-reserved-id-macro>") + list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-missing-noreturn>") list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-missing-variable-declarations>") list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-old-style-cast>") + list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-overlength-strings>") + list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-reserved-identifier>") + list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-reserved-id-macro>") + list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-unused-macros>") + list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-used-but-marked-unused>") + list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-zero-as-null-pointer-constant>") if(CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0) list(APPEND alpaka_DEV_COMPILE_OPTIONS "-Wno-poison-system-directories") From 1b4b1c4f8c90b3841bf855209fac5177e4d557b5 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Wed, 13 Sep 2023 12:31:58 +0200 Subject: [PATCH 03/23] Disable activemask for SYCL Fixes: #2124 --- include/alpaka/warp/WarpGenericSycl.hpp | 26 +++--- test/unit/warp/src/Activemask.cpp | 106 ++++++++++++++---------- 2 files changed, 77 insertions(+), 55 deletions(-) diff --git a/include/alpaka/warp/WarpGenericSycl.hpp b/include/alpaka/warp/WarpGenericSycl.hpp index a9b3a56fe69f..e420b615585b 100644 --- a/include/alpaka/warp/WarpGenericSycl.hpp +++ b/include/alpaka/warp/WarpGenericSycl.hpp @@ -49,16 +49,22 @@ namespace alpaka::warp::trait // Restrict to warpSize <= 32 for now. static auto activemask(warp::WarpGenericSycl const& warp) -> std::uint32_t { - // SYCL has no way of querying this. Since sub-group functions have to be executed in convergent code - // regions anyway we return the full mask. - auto const sub_group = warp.m_item_warp.get_sub_group(); - auto const mask = sycl::ext::oneapi::group_ballot(sub_group, true); - // FIXME This should be std::uint64_t on AMD GCN architectures and on CPU, - // but the former is not targeted in alpaka and CPU case is not supported in SYCL yet. - // Restrict to warpSize <= 32 for now. - std::uint32_t bits = 0; - mask.extract_bits(bits); - return bits; + static_assert(!sizeof(warp), "activemask is not supported on SYCL"); + // SYCL does not have an API to get the activemask. It is also questionable (to me, bgruber) whether an + // "activemask" even exists on some hardware architectures, since the idea is bound to threads being + // "turned off" when they take different control flow in a warp. A SYCL implementation could run each + // thread as a SIMD lane, in which cause the "thread" is always active, but some SIMD lanes are either + // predicated off, or side-effects are masked out when writing them back. + // + // An implementation via oneAPI's sycl::ext::oneapi::group_ballot causes UB, because activemask is expected + // to be callable when less than all threads are active in a warp (CUDA). But SYCL requires all threads of + // a group to call the function. + // + // Intel's CUDA -> SYCL migration tool also suggests that there is no direct equivalent and the user must + // rewrite their kernel logic. See also: + // https://oneapi-src.github.io/SYCLomatic/dev_guide/diagnostic_ref/dpct1086.html + + return ~std::uint32_t{0}; } }; diff --git a/test/unit/warp/src/Activemask.cpp b/test/unit/warp/src/Activemask.cpp index d433698a836c..223f3535f670 100644 --- a/test/unit/warp/src/Activemask.cpp +++ b/test/unit/warp/src/Activemask.cpp @@ -2,6 +2,8 @@ * SPDX-License-Identifier: MPL-2.0 */ +#include +#include #include #include #include @@ -63,63 +65,77 @@ struct alpaka::trait::WarpSize TEMPLATE_LIST_TEST_CASE("activemask", "[warp]", alpaka::test::TestAccs) { using Acc = TestType; - using Dim = alpaka::Dim; - using Idx = alpaka::Idx; - - auto const platform = alpaka::Platform{}; - auto const dev = alpaka::getDevByIdx(platform, 0); - auto const warpExtents = alpaka::getWarpSizes(dev); - for(auto const warpExtent : warpExtents) + if constexpr(alpaka::accMatchesTags< + Acc, + alpaka::TagCpuSycl, + alpaka::TagGpuSyclIntel, + alpaka::TagFpgaSyclIntel, + alpaka::TagGenericSycl>) { - auto const scalar = Dim::value == 0 || warpExtent == 1; - if(scalar) - { - alpaka::test::KernelExecutionFixture fixture(alpaka::Vec::all(4)); - CHECK(fixture(ActivemaskSingleThreadWarpTestKernel{})); - } - else + std::cout << "Test disabled for SYCL\n"; + return; + } + else + { + using Dim = alpaka::Dim; + using Idx = alpaka::Idx; + + auto const platform = alpaka::Platform{}; + auto const dev = alpaka::getDevByIdx(platform, 0); + auto const warpExtents = alpaka::getWarpSizes(dev); + for(auto const warpExtent : warpExtents) { - using ExecutionFixture = alpaka::test::KernelExecutionFixture; - auto const gridBlockExtent = alpaka::Vec::all(2); - // Enforce one warp per thread block - auto blockThreadExtent = alpaka::Vec::ones(); - blockThreadExtent[0] = static_cast(warpExtent); - auto const threadElementExtent = alpaka::Vec::ones(); - auto workDiv = typename ExecutionFixture::WorkDiv{gridBlockExtent, blockThreadExtent, threadElementExtent}; - auto fixture = ExecutionFixture{workDiv}; - if(warpExtent == 4) + auto const scalar = Dim::value == 0 || warpExtent == 1; + if(scalar) { - for(auto inactiveThreadIdx = 0u; inactiveThreadIdx < warpExtent; inactiveThreadIdx++) - { - CHECK(fixture(ActivemaskMultipleThreadWarpTestKernel<4>{}, inactiveThreadIdx)); - } + alpaka::test::KernelExecutionFixture fixture(alpaka::Vec::all(4)); + CHECK(fixture(ActivemaskSingleThreadWarpTestKernel{})); } - else if(warpExtent == 8) + else { - for(auto inactiveThreadIdx = 0u; inactiveThreadIdx < warpExtent; inactiveThreadIdx++) + using ExecutionFixture = alpaka::test::KernelExecutionFixture; + auto const gridBlockExtent = alpaka::Vec::all(2); + // Enforce one warp per thread block + auto blockThreadExtent = alpaka::Vec::ones(); + blockThreadExtent[0] = static_cast(warpExtent); + auto const threadElementExtent = alpaka::Vec::ones(); + auto workDiv = + typename ExecutionFixture::WorkDiv{gridBlockExtent, blockThreadExtent, threadElementExtent}; + auto fixture = ExecutionFixture{workDiv}; + if(warpExtent == 4) { - CHECK(fixture(ActivemaskMultipleThreadWarpTestKernel<8>{}, inactiveThreadIdx)); + for(auto inactiveThreadIdx = 0u; inactiveThreadIdx < warpExtent; inactiveThreadIdx++) + { + CHECK(fixture(ActivemaskMultipleThreadWarpTestKernel<4>{}, inactiveThreadIdx)); + } } - } - else if(warpExtent == 16) - { - for(auto inactiveThreadIdx = 0u; inactiveThreadIdx < warpExtent; inactiveThreadIdx++) + else if(warpExtent == 8) { - CHECK(fixture(ActivemaskMultipleThreadWarpTestKernel<16>{}, inactiveThreadIdx)); + for(auto inactiveThreadIdx = 0u; inactiveThreadIdx < warpExtent; inactiveThreadIdx++) + { + CHECK(fixture(ActivemaskMultipleThreadWarpTestKernel<8>{}, inactiveThreadIdx)); + } } - } - else if(warpExtent == 32) - { - for(auto inactiveThreadIdx = 0u; inactiveThreadIdx < warpExtent; inactiveThreadIdx++) + else if(warpExtent == 16) { - CHECK(fixture(ActivemaskMultipleThreadWarpTestKernel<32>{}, inactiveThreadIdx)); + for(auto inactiveThreadIdx = 0u; inactiveThreadIdx < warpExtent; inactiveThreadIdx++) + { + CHECK(fixture(ActivemaskMultipleThreadWarpTestKernel<16>{}, inactiveThreadIdx)); + } } - } - else if(warpExtent == 64) - { - for(auto inactiveThreadIdx = 0u; inactiveThreadIdx < warpExtent; inactiveThreadIdx++) + else if(warpExtent == 32) + { + for(auto inactiveThreadIdx = 0u; inactiveThreadIdx < warpExtent; inactiveThreadIdx++) + { + CHECK(fixture(ActivemaskMultipleThreadWarpTestKernel<32>{}, inactiveThreadIdx)); + } + } + else if(warpExtent == 64) { - CHECK(fixture(ActivemaskMultipleThreadWarpTestKernel<64>{}, inactiveThreadIdx)); + for(auto inactiveThreadIdx = 0u; inactiveThreadIdx < warpExtent; inactiveThreadIdx++) + { + CHECK(fixture(ActivemaskMultipleThreadWarpTestKernel<64>{}, inactiveThreadIdx)); + } } } } From fe302547c4a67320d27417e87f145dcd4a7c115f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Widera?= Date: Thu, 14 Sep 2023 13:57:18 +0200 Subject: [PATCH 04/23] refactor template order `allocMappedBufIfSupported` Move template `TPlatform` as the last template. There is no need to provide the platform template signature if we pass the platform as an instance. --- include/alpaka/mem/buf/Traits.hpp | 4 ++-- test/integ/axpy/src/axpy.cpp | 6 +++--- test/integ/hostOnlyAPI/src/hostOnlyAPI.cpp | 3 +-- test/integ/mandelbrot/src/mandelbrot.cpp | 2 +- test/integ/matMul/src/matMul.cpp | 2 +- test/integ/separableCompilation/src/main.cpp | 6 +++--- test/unit/math/src/Buffer.hpp | 5 +---- test/unit/mem/fence/src/FenceTest.cpp | 2 +- 8 files changed, 13 insertions(+), 17 deletions(-) diff --git a/include/alpaka/mem/buf/Traits.hpp b/include/alpaka/mem/buf/Traits.hpp index 5494b7fa5619..33e7c9bda7f1 100644 --- a/include/alpaka/mem/buf/Traits.hpp +++ b/include/alpaka/mem/buf/Traits.hpp @@ -164,14 +164,14 @@ namespace alpaka //! this function is provided for convenience in the cases where the difference is not relevant, //! and the pinned/mapped memory is only used as a performance optimisation. //! - //! \tparam TPlatform The platform from which the buffer is accessible. //! \tparam TElem The element type of the returned buffer. //! \tparam TIdx The linear index type of the buffer. //! \tparam TExtent The extent type of the buffer. + //! \tparam TPlatform The platform from which the buffer is accessible. //! \param host The host device to allocate the buffer on. //! \param extent The extent of the buffer. //! \return The newly allocated buffer. - template + template ALPAKA_FN_HOST auto allocMappedBufIfSupported( DevCpu const& host, TPlatform const& platform, diff --git a/test/integ/axpy/src/axpy.cpp b/test/integ/axpy/src/axpy.cpp index 29a82eb96b55..64789d47b596 100644 --- a/test/integ/axpy/src/axpy.cpp +++ b/test/integ/axpy/src/axpy.cpp @@ -109,9 +109,9 @@ TEMPLATE_LIST_TEST_CASE("axpy", "[axpy]", TestAccs) << std::endl; // Allocate host memory buffers in pinned memory. - auto memBufHostX = alpaka::allocMappedBufIfSupported(devHost, platformAcc, extent); - auto memBufHostOrigY = alpaka::allocMappedBufIfSupported(devHost, platformAcc, extent); - auto memBufHostY = alpaka::allocMappedBufIfSupported(devHost, platformAcc, extent); + auto memBufHostX = alpaka::allocMappedBufIfSupported(devHost, platformAcc, extent); + auto memBufHostOrigY = alpaka::allocMappedBufIfSupported(devHost, platformAcc, extent); + auto memBufHostY = alpaka::allocMappedBufIfSupported(devHost, platformAcc, extent); Val* const pBufHostX = alpaka::getPtrNative(memBufHostX); Val* const pBufHostOrigY = alpaka::getPtrNative(memBufHostOrigY); Val* const pBufHostY = alpaka::getPtrNative(memBufHostY); diff --git a/test/integ/hostOnlyAPI/src/hostOnlyAPI.cpp b/test/integ/hostOnlyAPI/src/hostOnlyAPI.cpp index 5214dfd1b64b..59eb0bdd7600 100644 --- a/test/integ/hostOnlyAPI/src/hostOnlyAPI.cpp +++ b/test/integ/hostOnlyAPI/src/hostOnlyAPI.cpp @@ -47,8 +47,7 @@ TEMPLATE_LIST_TEST_CASE("hostOnlyAPI", "[hostOnlyAPI]", TestAccs) HostQueue hostQueue(host); // host buffer - auto h_buffer1 - = alpaka::allocMappedBufIfSupported, int, Idx>(host, platformAcc, Vec1D{Idx{42}}); + auto h_buffer1 = alpaka::allocMappedBufIfSupported(host, platformAcc, Vec1D{Idx{42}}); INFO( "host buffer allocated at " << alpaka::getPtrNative(h_buffer1) << " with " << alpaka::getExtentProduct(h_buffer1) << " element(s)"); diff --git a/test/integ/mandelbrot/src/mandelbrot.cpp b/test/integ/mandelbrot/src/mandelbrot.cpp index b56b11ca5802..ce94e178de16 100644 --- a/test/integ/mandelbrot/src/mandelbrot.cpp +++ b/test/integ/mandelbrot/src/mandelbrot.cpp @@ -309,7 +309,7 @@ TEMPLATE_LIST_TEST_CASE("mandelbrot", "[mandelbrot]", TestAccs) << std::endl; // allocate host memory, potentially pinned for faster copy to/from the accelerator. - auto bufColorHost = alpaka::allocMappedBufIfSupported(devHost, platformAcc, extent); + auto bufColorHost = alpaka::allocMappedBufIfSupported(devHost, platformAcc, extent); // Allocate the buffer on the accelerator. auto bufColorAcc = alpaka::allocBuf(devAcc, extent); diff --git a/test/integ/matMul/src/matMul.cpp b/test/integ/matMul/src/matMul.cpp index bf45822e9b69..b76c3f13f4b3 100644 --- a/test/integ/matMul/src/matMul.cpp +++ b/test/integ/matMul/src/matMul.cpp @@ -217,7 +217,7 @@ TEMPLATE_LIST_TEST_CASE("matMul", "[matMul]", TestAccs) auto bufBHost = alpaka::createView(devHost, bufBHost1d.data(), extentB); // Allocate C and set it to zero. - auto bufCHost = alpaka::allocMappedBufIfSupported(devHost, platformAcc, extentC); + auto bufCHost = alpaka::allocMappedBufIfSupported(devHost, platformAcc, extentC); alpaka::memset(queueHost, bufCHost, 0u); // Allocate the buffers on the accelerator. diff --git a/test/integ/separableCompilation/src/main.cpp b/test/integ/separableCompilation/src/main.cpp index ff7b79853780..cec49acf5049 100644 --- a/test/integ/separableCompilation/src/main.cpp +++ b/test/integ/separableCompilation/src/main.cpp @@ -103,9 +103,9 @@ TEMPLATE_LIST_TEST_CASE("separableCompilation", "[separableCompilation]", TestAc << ", numElements:" << numElements << ")" << std::endl; // Allocate host memory buffers, potentially pinned for faster copy to/from the accelerator. - auto memBufHostA = alpaka::allocMappedBufIfSupported(devHost, platformAcc, extent); - auto memBufHostB = alpaka::allocMappedBufIfSupported(devHost, platformAcc, extent); - auto memBufHostC = alpaka::allocMappedBufIfSupported(devHost, platformAcc, extent); + auto memBufHostA = alpaka::allocMappedBufIfSupported(devHost, platformAcc, extent); + auto memBufHostB = alpaka::allocMappedBufIfSupported(devHost, platformAcc, extent); + auto memBufHostC = alpaka::allocMappedBufIfSupported(devHost, platformAcc, extent); // Initialize the host input vectors for(Idx i = 0; i < numElements; ++i) diff --git a/test/unit/math/src/Buffer.hpp b/test/unit/math/src/Buffer.hpp index 2bfc7f156ba5..ac0f002c29c0 100644 --- a/test/unit/math/src/Buffer.hpp +++ b/test/unit/math/src/Buffer.hpp @@ -62,10 +62,7 @@ namespace alpaka // Constructor needs to initialize all Buffer. Buffer(DevAcc const& devAcc) : devHost{alpaka::getDevByIdx(platformHost, 0)} - , hostBuffer{alpaka::allocMappedBufIfSupported( - devHost, - platformAcc, - Tcapacity)} + , hostBuffer{alpaka::allocMappedBufIfSupported(devHost, platformAcc, Tcapacity)} , devBuffer{alpaka::allocBuf(devAcc, Tcapacity)} , pHostBuffer{alpaka::getPtrNative(hostBuffer)} , pDevBuffer{alpaka::getPtrNative(devBuffer)} diff --git a/test/unit/mem/fence/src/FenceTest.cpp b/test/unit/mem/fence/src/FenceTest.cpp index 260d6e845ab8..4e1c0df05259 100644 --- a/test/unit/mem/fence/src/FenceTest.cpp +++ b/test/unit/mem/fence/src/FenceTest.cpp @@ -188,7 +188,7 @@ TEMPLATE_LIST_TEST_CASE("FenceTest", "[fence]", TestAccs) auto const numElements = Idx{2ul}; auto const extent = alpaka::Vec{numElements}; - auto vars_host = alpaka::allocMappedBufIfSupported(host, platformAcc, extent); + auto vars_host = alpaka::allocMappedBufIfSupported(host, platformAcc, extent); auto vars_dev = alpaka::allocBuf(dev, extent); vars_host[0] = 1; vars_host[1] = 2; From 1bc3029cdc88cd374f7f671941ea5fc18270f0f4 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Fri, 15 Sep 2023 15:49:05 +0200 Subject: [PATCH 05/23] Remove unused aliases --- test/integ/axpy/src/axpy.cpp | 1 - test/integ/matMul/src/matMul.cpp | 2 -- 2 files changed, 3 deletions(-) diff --git a/test/integ/axpy/src/axpy.cpp b/test/integ/axpy/src/axpy.cpp index 64789d47b596..c5742814a610 100644 --- a/test/integ/axpy/src/axpy.cpp +++ b/test/integ/axpy/src/axpy.cpp @@ -76,7 +76,6 @@ TEMPLATE_LIST_TEST_CASE("axpy", "[axpy]", TestAccs) using Val = float; using DevAcc = alpaka::Dev; - using PlatformAcc = alpaka::Platform; using QueueAcc = alpaka::test::DefaultQueue; // Create the kernel function object. diff --git a/test/integ/matMul/src/matMul.cpp b/test/integ/matMul/src/matMul.cpp index b76c3f13f4b3..149d94df73c9 100644 --- a/test/integ/matMul/src/matMul.cpp +++ b/test/integ/matMul/src/matMul.cpp @@ -162,8 +162,6 @@ TEMPLATE_LIST_TEST_CASE("matMul", "[matMul]", TestAccs) using Val = std::uint32_t; using Vec2 = alpaka::Vec; - using DevAcc = alpaka::Dev; - using PlatformAcc = alpaka::Platform; using QueueAcc = alpaka::test::DefaultQueue>; using QueueHost = alpaka::QueueCpuNonBlocking; From 3d216bfaf0bc8319d2f08fdd1d6f37eb9bd5eaa5 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Thu, 14 Sep 2023 17:20:29 +0200 Subject: [PATCH 06/23] Only add clang warning flag when supported --- test/common/devCompileOptions.cmake | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/common/devCompileOptions.cmake b/test/common/devCompileOptions.cmake index f8b3fe36f145..9024c3686fb8 100644 --- a/test/common/devCompileOptions.cmake +++ b/test/common/devCompileOptions.cmake @@ -150,7 +150,9 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Inte list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-missing-variable-declarations>") list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-old-style-cast>") list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-overlength-strings>") - list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-reserved-identifier>") + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) + list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-reserved-identifier>") + endif() list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-reserved-id-macro>") list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-unused-macros>") list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-used-but-marked-unused>") From 885c27c65aa0708e1f06e144c2937dc815a93b26 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Thu, 14 Sep 2023 17:19:29 +0200 Subject: [PATCH 07/23] Fix a warning with nvcc --- include/alpaka/intrinsic/IntrinsicCpu.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/alpaka/intrinsic/IntrinsicCpu.hpp b/include/alpaka/intrinsic/IntrinsicCpu.hpp index 98bc9df0af77..5db927bd44a8 100644 --- a/include/alpaka/intrinsic/IntrinsicCpu.hpp +++ b/include/alpaka/intrinsic/IntrinsicCpu.hpp @@ -5,6 +5,7 @@ #pragma once #include "alpaka/core/BoostPredef.hpp" +#include "alpaka/core/Unreachable.hpp" #include "alpaka/intrinsic/IntrinsicFallback.hpp" #include "alpaka/intrinsic/Traits.hpp" @@ -52,6 +53,7 @@ namespace alpaka // Fallback to standard library return static_cast(std::bitset(value).count()); #endif + ALPAKA_UNREACHABLE(0); } }; @@ -79,6 +81,7 @@ namespace alpaka #else return alpaka::detail::ffsFallback(value); #endif + ALPAKA_UNREACHABLE(0); } }; } // namespace trait From 02478550236a56ab3d2e64abacc4843dd21a8b7a Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Thu, 14 Sep 2023 18:22:47 +0200 Subject: [PATCH 08/23] Workaround gcc warning on uninitialized PlatformCpu --- include/alpaka/platform/PlatformCpu.hpp | 6 ++++++ include/alpaka/test/KernelExecutionFixture.hpp | 11 ----------- include/alpaka/test/queue/QueueTestFixture.hpp | 10 ---------- test/unit/mem/copy/src/BufSlicing.cpp | 10 ---------- 4 files changed, 6 insertions(+), 31 deletions(-) diff --git a/include/alpaka/platform/PlatformCpu.hpp b/include/alpaka/platform/PlatformCpu.hpp index a1a2cb1137d1..c431fd418785 100644 --- a/include/alpaka/platform/PlatformCpu.hpp +++ b/include/alpaka/platform/PlatformCpu.hpp @@ -16,6 +16,12 @@ namespace alpaka //! The CPU device platform. struct PlatformCpu : concepts::Implements { +#if defined(BOOST_COMP_GNUC) && BOOST_COMP_GNUC >= BOOST_VERSION_NUMBER(11, 0, 0) \ + && BOOST_COMP_GNUC < BOOST_VERSION_NUMBER(12, 0, 0) + // This is a workaround for g++-11 bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96295 + // g++-11 complains in *all* places where a PlatformCpu is used, that it "may be used uninitialized" + char c = {}; +#endif }; namespace trait diff --git a/include/alpaka/test/KernelExecutionFixture.hpp b/include/alpaka/test/KernelExecutionFixture.hpp index 8d783be62186..6d2cf31f8176 100644 --- a/include/alpaka/test/KernelExecutionFixture.hpp +++ b/include/alpaka/test/KernelExecutionFixture.hpp @@ -25,13 +25,6 @@ namespace alpaka::test template class KernelExecutionFixture { -#if defined(BOOST_COMP_GNUC) && BOOST_COMP_GNUC >= BOOST_VERSION_NUMBER(11, 0, 0) \ - && BOOST_COMP_GNUC < BOOST_VERSION_NUMBER(12, 0, 0) -// g++-11 (wrongly) believes that m_platformHost is used in an uninitialized state. -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wmaybe-uninitialized" -#endif - public: using Acc = TAcc; using Dim = alpaka::Dim; @@ -82,9 +75,5 @@ namespace alpaka::test Device m_device{getDevByIdx(m_platform, 0)}; Queue m_queue{m_device}; WorkDiv m_workDiv; -#if defined(BOOST_COMP_GNUC) && BOOST_COMP_GNUC >= BOOST_VERSION_NUMBER(11, 0, 0) \ - && BOOST_COMP_GNUC < BOOST_VERSION_NUMBER(12, 0, 0) -# pragma GCC diagnostic pop -#endif }; } // namespace alpaka::test diff --git a/include/alpaka/test/queue/QueueTestFixture.hpp b/include/alpaka/test/queue/QueueTestFixture.hpp index d55bf70df7e5..ad6f8150afc6 100644 --- a/include/alpaka/test/queue/QueueTestFixture.hpp +++ b/include/alpaka/test/queue/QueueTestFixture.hpp @@ -12,12 +12,6 @@ namespace alpaka::test template struct QueueTestFixture { -#if defined(BOOST_COMP_GNUC) && BOOST_COMP_GNUC >= BOOST_VERSION_NUMBER(11, 0, 0) \ - && BOOST_COMP_GNUC < BOOST_VERSION_NUMBER(12, 0, 0) -// g++-11 (wrongly) believes that m_platform is used in an uninitialized state. -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wmaybe-uninitialized" -#endif using Dev = std::tuple_element_t<0, TDevQueue>; using Queue = std::tuple_element_t<1, TDevQueue>; using Platform = alpaka::Platform; @@ -25,9 +19,5 @@ namespace alpaka::test Platform m_platform{}; Dev m_dev{getDevByIdx(m_platform, 0)}; Queue m_queue{m_dev}; -#if defined(BOOST_COMP_GNUC) && BOOST_COMP_GNUC >= BOOST_VERSION_NUMBER(11, 0, 0) \ - && BOOST_COMP_GNUC < BOOST_VERSION_NUMBER(12, 0, 0) -# pragma GCC diagnostic pop -#endif }; } // namespace alpaka::test diff --git a/test/unit/mem/copy/src/BufSlicing.cpp b/test/unit/mem/copy/src/BufSlicing.cpp index a978562e453e..6169fdaf5ff2 100644 --- a/test/unit/mem/copy/src/BufSlicing.cpp +++ b/test/unit/mem/copy/src/BufSlicing.cpp @@ -20,12 +20,6 @@ template> struct TestContainer { -#if defined(BOOST_COMP_GNUC) && BOOST_COMP_GNUC >= BOOST_VERSION_NUMBER(11, 0, 0) \ - && BOOST_COMP_GNUC < BOOST_VERSION_NUMBER(12, 0, 0) -// g++-11 (wrongly) believes that platformHost is used in an uninitialized state. -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wmaybe-uninitialized" -#endif using AccQueueProperty = alpaka::Blocking; using DevQueue = alpaka::Queue; using DevAcc = alpaka::Dev; @@ -106,10 +100,6 @@ struct TestContainer REQUIRE(ptrA[i] == Catch::Approx(ptrB[i])); } } -#if defined(BOOST_COMP_GNUC) && BOOST_COMP_GNUC >= BOOST_VERSION_NUMBER(11, 0, 0) \ - && BOOST_COMP_GNUC < BOOST_VERSION_NUMBER(12, 0, 0) -# pragma GCC diagnostic pop -#endif }; using DataTypes = std::tuple; From 1f938e083eaf6dfdd0e63c2946d72ce3f1642130 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Thu, 14 Sep 2023 18:04:33 +0200 Subject: [PATCH 09/23] Fix warnings by clang --- include/alpaka/kernel/TaskKernelGpuUniformCudaHipRt.hpp | 7 +++++++ include/alpaka/rand/Philox/MultiplyAndSplit64to32.hpp | 6 +++--- include/alpaka/rand/TinyMT/tinymt32.h | 1 + 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/include/alpaka/kernel/TaskKernelGpuUniformCudaHipRt.hpp b/include/alpaka/kernel/TaskKernelGpuUniformCudaHipRt.hpp index 7863d0173eda..e2d7ed3d1915 100644 --- a/include/alpaka/kernel/TaskKernelGpuUniformCudaHipRt.hpp +++ b/include/alpaka/kernel/TaskKernelGpuUniformCudaHipRt.hpp @@ -50,6 +50,10 @@ namespace alpaka { namespace detail { +# if BOOST_COMP_CLANG +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wunused-template" +# endif //! The GPU CUDA/HIP kernel entry point. // \NOTE: 'A __global__ function or function template cannot have a trailing return type.' // We have put the function into a shallow namespace and gave it a short name, so the mangled name in the @@ -74,6 +78,9 @@ namespace alpaka # endif kernelFnObj(const_cast(acc), args...); } +# if BOOST_COMP_CLANG +# pragma clang diagnostic pop +# endif } // namespace detail namespace uniform_cuda_hip diff --git a/include/alpaka/rand/Philox/MultiplyAndSplit64to32.hpp b/include/alpaka/rand/Philox/MultiplyAndSplit64to32.hpp index c9518b1b9fef..e0c036128770 100644 --- a/include/alpaka/rand/Philox/MultiplyAndSplit64to32.hpp +++ b/include/alpaka/rand/Philox/MultiplyAndSplit64to32.hpp @@ -11,13 +11,13 @@ namespace alpaka::rand { /// Get high 32 bits of a 64-bit number - ALPAKA_FN_HOST_ACC static constexpr auto high32Bits(std::uint64_t const x) -> std::uint32_t + ALPAKA_FN_HOST_ACC inline constexpr auto high32Bits(std::uint64_t const x) -> std::uint32_t { return static_cast(x >> 32); } /// Get low 32 bits of a 64-bit number - ALPAKA_FN_HOST_ACC static constexpr auto low32Bits(std::uint64_t const x) -> std::uint32_t + ALPAKA_FN_HOST_ACC inline constexpr auto low32Bits(std::uint64_t const x) -> std::uint32_t { return static_cast(x & 0xffff'ffff); } @@ -30,7 +30,7 @@ namespace alpaka::rand * @param resultLow low 32 bits of the product a*b */ // TODO: See single-instruction implementations in original Philox source code - ALPAKA_FN_HOST_ACC static constexpr void multiplyAndSplit64to32( + ALPAKA_FN_HOST_ACC inline constexpr void multiplyAndSplit64to32( std::uint64_t const a, std::uint64_t const b, std::uint32_t& resultHigh, diff --git a/include/alpaka/rand/TinyMT/tinymt32.h b/include/alpaka/rand/TinyMT/tinymt32.h index ffa7c1ca9a2b..55a946f2d435 100644 --- a/include/alpaka/rand/TinyMT/tinymt32.h +++ b/include/alpaka/rand/TinyMT/tinymt32.h @@ -37,6 +37,7 @@ #if BOOST_COMP_CLANG # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wunused-function" #endif #if BOOST_COMP_GNUC # pragma GCC diagnostic push From b74f5c4813135ad4f652d75e7dd6a7de9ba8e922 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Thu, 14 Sep 2023 17:28:28 +0200 Subject: [PATCH 10/23] Suppress clang warnings in nvcc generated code --- test/common/devCompileOptions.cmake | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/test/common/devCompileOptions.cmake b/test/common/devCompileOptions.cmake index 9024c3686fb8..e31d9aa05d09 100644 --- a/test/common/devCompileOptions.cmake +++ b/test/common/devCompileOptions.cmake @@ -150,13 +150,22 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Inte list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-missing-variable-declarations>") list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-old-style-cast>") list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-overlength-strings>") - if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) - list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-reserved-identifier>") - endif() list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-reserved-id-macro>") list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-unused-macros>") list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-used-but-marked-unused>") list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-zero-as-null-pointer-constant>") + list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-unreachable-code>") + list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-extra-semi>") + list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-deprecated>") + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) + list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-reserved-identifier>") + endif() + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 15.0) + list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-gnu-line-marker>") + endif() + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10.0) + list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-deprecated-copy>") + endif() if(CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0) list(APPEND alpaka_DEV_COMPILE_OPTIONS "-Wno-poison-system-directories") From 728dc272d40b40028cd6b03c56a19cf19ad24acb Mon Sep 17 00:00:00 2001 From: Jan Stephan Date: Wed, 30 Aug 2023 12:54:51 +0200 Subject: [PATCH 11/23] Pass alpaka_ENABLE_WERROR from environment to CMake --- script/run_generate.sh | 2 +- test/common/devCompileOptions.cmake | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/script/run_generate.sh b/script/run_generate.sh index 96a0d516f374..435aee3a8b9d 100755 --- a/script/run_generate.sh +++ b/script/run_generate.sh @@ -78,7 +78,7 @@ mkdir -p build/ cd build/ "${ALPAKA_CI_CMAKE_EXECUTABLE}" --log-level=VERBOSE -G "${ALPAKA_CI_CMAKE_GENERATOR}" ${ALPAKA_CI_CMAKE_GENERATOR_PLATFORM}\ - -Dalpaka_BUILD_EXAMPLES=ON -DBUILD_TESTING=ON \ + -Dalpaka_BUILD_EXAMPLES=ON -DBUILD_TESTING=ON "$(env2cmake alpaka_ENABLE_WERROR)" \ "$(env2cmake BOOST_ROOT)" -DBOOST_LIBRARYDIR="${ALPAKA_CI_BOOST_LIB_DIR}/lib" -DBoost_USE_STATIC_LIBS=ON -DBoost_USE_MULTITHREADED=ON -DBoost_USE_STATIC_RUNTIME=OFF -DBoost_ARCHITECTURE="-x64" \ "$(env2cmake CMAKE_BUILD_TYPE)" "$(env2cmake CMAKE_CXX_FLAGS)" "$(env2cmake CMAKE_C_COMPILER)" "$(env2cmake CMAKE_CXX_COMPILER)" "$(env2cmake CMAKE_EXE_LINKER_FLAGS)" "$(env2cmake CMAKE_CXX_EXTENSIONS)"\ "$(env2cmake alpaka_ACC_CPU_B_SEQ_T_SEQ_ENABLE)" "$(env2cmake alpaka_ACC_CPU_B_SEQ_T_THREADS_ENABLE)" \ diff --git a/test/common/devCompileOptions.cmake b/test/common/devCompileOptions.cmake index e31d9aa05d09..afae60a8ac7c 100644 --- a/test/common/devCompileOptions.cmake +++ b/test/common/devCompileOptions.cmake @@ -10,7 +10,6 @@ if(alpaka_ACC_GPU_CUDA_ENABLE AND (CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")) if(alpaka_ENABLE_WERROR) list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:--Wreorder>") list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:--Wdefault-stream-launch>") - list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:--Wmissing-launch-bounds>") list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:--Wext-lambda-captures-this>") list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:--Werror all-warnings>") else() From a61287377302c4c464797f1a185195b4e0e37724 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Mon, 18 Sep 2023 15:22:19 +0200 Subject: [PATCH 12/23] Disable GCC warning in nvcc generated code Fixes the following warning: ``` /builds/hzdr/crp/alpaka/test/unit/math/src/Defines.hpp:52:35: error: left operand of comma operator has no effect [-Werror=unused-value] 52 | for(size_t i = 0; i < argsItem.arity_nr; ++i) | ~~~~~~~~~^~~~~~~~~~ ``` --- test/common/devCompileOptions.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/test/common/devCompileOptions.cmake b/test/common/devCompileOptions.cmake index afae60a8ac7c..84e892bf17d7 100644 --- a/test/common/devCompileOptions.cmake +++ b/test/common/devCompileOptions.cmake @@ -123,6 +123,7 @@ if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") list(APPEND alpaka_DEV_COMPILE_OPTIONS "-Walloc-zero") list(APPEND alpaka_DEV_COMPILE_OPTIONS "-Walloca") list(APPEND alpaka_DEV_COMPILE_OPTIONS "-Wcast-align=strict") + list(APPEND alpaka_DEV_COMPILE_OPTIONS "$<$:SHELL:-Xcompiler -Wno-unused-value>") # occurs in nvcc-generated code endif() # Clang, AppleClang, ICPX From 1ee04262f74943e676c6df4591a957ef1b45db9d Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Mon, 18 Sep 2023 18:03:44 +0200 Subject: [PATCH 13/23] Workaround gcc warning on uninitialized PlatformUniformCudaHipRt --- include/alpaka/platform/PlatformUniformCudaHipRt.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/alpaka/platform/PlatformUniformCudaHipRt.hpp b/include/alpaka/platform/PlatformUniformCudaHipRt.hpp index ffda2de2faab..9784f54520b2 100644 --- a/include/alpaka/platform/PlatformUniformCudaHipRt.hpp +++ b/include/alpaka/platform/PlatformUniformCudaHipRt.hpp @@ -28,6 +28,12 @@ namespace alpaka template struct PlatformUniformCudaHipRt : concepts::Implements> { +# if defined(BOOST_COMP_GNUC) && BOOST_COMP_GNUC >= BOOST_VERSION_NUMBER(11, 0, 0) \ + && BOOST_COMP_GNUC < BOOST_VERSION_NUMBER(12, 0, 0) + // This is a workaround for g++-11 bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96295 + // g++-11 complains in *all* places where a PlatformCpu is used, that it "may be used uninitialized" + char c = {}; +# endif }; namespace trait From 1846aa153825dde7bc94d85b3496908d66934443 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Tue, 19 Sep 2023 18:15:19 +0200 Subject: [PATCH 14/23] Fix OpenMP 5.1 atomics * Workaround clang not recognizing ternay expression * Implement atomicInc/atomicDec via omp critical Fixes: #2170 --- include/alpaka/atomic/AtomicOmpBuiltIn.hpp | 44 +++++++--------------- 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/include/alpaka/atomic/AtomicOmpBuiltIn.hpp b/include/alpaka/atomic/AtomicOmpBuiltIn.hpp index 6d4dc96e72ec..440b373fc460 100644 --- a/include/alpaka/atomic/AtomicOmpBuiltIn.hpp +++ b/include/alpaka/atomic/AtomicOmpBuiltIn.hpp @@ -187,7 +187,8 @@ namespace alpaka # pragma omp atomic capture compare { old = ref; - ref = (ref <= value) ? ref : value; + if(value < ref) + ref = value; } return old; } @@ -205,7 +206,8 @@ namespace alpaka # pragma omp atomic capture compare { old = ref; - ref = (ref >= value) ? ref : value; + if(value > ref) + ref = value; } return old; } @@ -217,21 +219,12 @@ namespace alpaka { ALPAKA_FN_HOST static auto atomicOp(AtomicOmpBuiltIn const&, T* const addr, T const& value) -> T { + // TODO(bgruber): atomic increment with wrap around is not implementable in OpenMP 5.1 T old; - auto& ref(*addr); -// atomically update ref, but capture the original value in old -# if BOOST_COMP_GNUC -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wconversion" -# endif -# pragma omp atomic capture compare +# pragma omp critical(AlpakaOmpAtomicOp) { - old = ref; - ref = ((ref >= value) ? 0 : (ref + 1)); + old = AtomicInc{}(addr, value); } -# if BOOST_COMP_GNUC -# pragma GCC diagnostic pop -# endif return old; } }; @@ -242,21 +235,12 @@ namespace alpaka { ALPAKA_FN_HOST static auto atomicOp(AtomicOmpBuiltIn const&, T* const addr, T const& value) -> T { + // TODO(bgruber): atomic decrement with wrap around is not implementable in OpenMP 5.1 T old; - auto& ref(*addr); -// atomically update ref, but capture the original value in old -# if BOOST_COMP_GNUC -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wconversion" -# endif -# pragma omp atomic capture compare +# pragma omp critical(AlpakaOmpAtomicOp) { - old = ref; - ref = ((ref == 0) || (ref > value)) ? value : (ref - 1); + old = AtomicDec{}(addr, value); } -# if BOOST_COMP_GNUC -# pragma GCC diagnostic pop -# endif return old; } }; @@ -293,8 +277,8 @@ namespace alpaka ALPAKA_FN_HOST static auto atomicOp(AtomicOmpBuiltIn const&, T* const addr, T const& value) -> T { T old; -// \TODO: Currently not only the access to the same memory location is protected by a mutex but all atomic ops on all -// threads. + // \TODO: Currently not only the access to the same memory location is protected by a mutex but all + // atomic ops on all threads. # pragma omp critical(AlpakaOmpAtomicOp) { old = TOp()(addr, value); @@ -309,8 +293,8 @@ namespace alpaka T const& value) -> T { T old; -// \TODO: Currently not only the access to the same memory location is protected by a mutex but all atomic ops on all -// threads. + // \TODO: Currently not only the access to the same memory location is protected by a mutex but all + // atomic ops on all threads. # pragma omp critical(AlpakaOmpAtomicOp2) { old = TOp()(addr, compare, value); From 8fc9f6f56ad7e25601bd51786d38fe34f539a8ba Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Wed, 20 Sep 2023 14:28:41 +0200 Subject: [PATCH 15/23] Add clang-17 to CI Fixes: #2169 --- script/job_generator/versions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/job_generator/versions.py b/script/job_generator/versions.py index 2f2251306d25..d45f74d4b42d 100644 --- a/script/job_generator/versions.py +++ b/script/job_generator/versions.py @@ -12,7 +12,7 @@ sw_versions: Dict[str, List[str]] = { GCC: ["9", "10", "11", "12", "13"], - CLANG: ["9", "10", "11", "12", "13", "14", "15", "16"], + CLANG: ["9", "10", "11", "12", "13", "14", "15", "16", "17"], NVCC: [ "11.0", "11.1", From 4a5bec8a2449bb576f85ea3422eda18b5cbf1ff9 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Wed, 20 Sep 2023 16:57:57 +0200 Subject: [PATCH 16/23] Rename lambda captures to workaround warnings clang warns that the names of the captures shadow the outside variables. E.g.: QueueUniformCudaHipRt.hpp:215:57: error: declaration shadows a local variable [-Werror,-Wshadow-uncaptured-local] 215 | auto f = queue.m_callbackThread.submit([data = std::move(data)] { data->t(); }); | ^ --- include/alpaka/mem/buf/BufUniformCudaHipRt.hpp | 4 ++-- include/alpaka/queue/cuda_hip/QueueUniformCudaHipRt.hpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp b/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp index 04dcbb10afd3..79340323ff01 100644 --- a/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp +++ b/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp @@ -310,8 +310,8 @@ namespace alpaka return { dev, reinterpret_cast(memPtr), - [queue = std::move(queue)](TElem* ptr) - { ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_NOEXCEPT(TApi::freeAsync(ptr, queue.getNativeHandle())); }, + [q = std::move(queue)](TElem* ptr) + { ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_NOEXCEPT(TApi::freeAsync(ptr, q.getNativeHandle())); }, extent, static_cast(width) * sizeof(TElem)}; } diff --git a/include/alpaka/queue/cuda_hip/QueueUniformCudaHipRt.hpp b/include/alpaka/queue/cuda_hip/QueueUniformCudaHipRt.hpp index 50d2285c1fcd..3a85fac26e41 100644 --- a/include/alpaka/queue/cuda_hip/QueueUniformCudaHipRt.hpp +++ b/include/alpaka/queue/cuda_hip/QueueUniformCudaHipRt.hpp @@ -212,7 +212,7 @@ namespace alpaka { auto data = std::unique_ptr(reinterpret_cast(arg)); auto& queue = data->q; - auto f = queue.m_callbackThread.submit([data = std::move(data)] { data->t(); }); + auto f = queue.m_callbackThread.submit([d = std::move(data)] { d->t(); }); f.wait(); } From 6b051db829f88eb3b130e6de5d96764493ca2151 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Wed, 20 Sep 2023 17:06:11 +0200 Subject: [PATCH 17/23] Exclude clang CUDA Debug builds from the CI They fail with: ptxas /tmp/randomStrategies-sm_61-768a17.s, line 14415; fatal : Parsing error near '.': syntax error ptxas fatal : Ptx assembly aborted due to errors clang++: error: ptxas command failed with exit code 255 (use -v to see invocation) --- script/job_generator/alpaka_filter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/job_generator/alpaka_filter.py b/script/job_generator/alpaka_filter.py index c7a74d935472..71a43c1f4b70 100644 --- a/script/job_generator/alpaka_filter.py +++ b/script/job_generator/alpaka_filter.py @@ -24,7 +24,7 @@ def alpaka_post_filter(row: List) -> bool: and row[param_map[BUILD_TYPE]][VERSION] == CMAKE_DEBUG and row_check_name(row, DEVICE_COMPILER, "==", CLANG_CUDA) ): - for clang_cuda_version in ["15", "16"]: + for clang_cuda_version in ["15", "16", "17"]: if row_check_version(row, HOST_COMPILER, "==", clang_cuda_version): return False From 921927ff7d881fa7efad7252029eb64a0c67c0da Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Wed, 20 Sep 2023 18:19:17 +0200 Subject: [PATCH 18/23] Add clang-17 to README.md CUDA/HIP/SYCL is marked as untested for now. --- README.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 229a520ca33d..db645d902aca 100644 --- a/README.md +++ b/README.md @@ -65,17 +65,17 @@ Supported Compilers This library uses C++17 (or newer when available). -| Accelerator Back-end | gcc 9.5
(Linux) | gcc 10.4 / 11.1
(Linux) | gcc 12.3
(Linux) | gcc 13.1
(Linux) | clang 9
(Linux) | clang 10 / 11
(Linux) | clang 12
(Linux) | clang 13
(Linux) | clang 14
(Linux) | clang 15
(Linux) | clang 16
(Linux) | icpx 2023.1.0 / 2023.2.0 (Linux) | Xcode 13.2.1 / 14.2 / 14.3.1
(macOS) | Visual Studio 2022
(Windows) | -|--------------------------------------------------------------------------------|-------------------------------------------------|-------------------------------------------------|---------------------------------------------|------------------------|------------------------------------------------------------|-------------------------------------------------------|-------------------------------------------------|---------------------------------------------|---------------------------------------------------|-------------------------------------------|-------------------------------------------|----------------------------------|-------------------------------------------------------|--------------------------------------| -| Serial | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | -| OpenMP 2.0+ blocks | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark:[^3] | :white_check_mark: | :white_check_mark: | -| OpenMP 2.0+ threads | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark:[^3] | :white_check_mark: | :white_check_mark: | -| std::thread | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | -| TBB | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | -| CUDA (nvcc) | :white_check_mark:
(CUDA 11.0 - 12.2)[^2] | :white_check_mark:
(CUDA 11.4 - 12.0)[^2] | :white_check_mark:
(CUDA 12.0 - 12.2) | :x: | :white_check_mark:
(CUDA 11.0-11.2; 11.6 - 12.0)[^2] | :white_check_mark:
(CUDA 11.2, 11.6 - 12.0)[^2] | :white_check_mark:
(CUDA 11.6 - 12.0)[^2] | :white_check_mark:
(CUDA 11.7 - 12.0) | :white_check_mark:
(CUDA 11.8 - 12.0) | :white_check_mark:
(CUDA 12.2) | :x: | :x: | :x: | :x: | -| CUDA (clang) | - | - | - | :x: | :x: | :x: | :x: | :x: | :white_check_mark: (CUDA 11.0 - 11.5) | :white_check_mark: (CUDA 11.0 - 11.5)[^1] | :white_check_mark: (CUDA 11.0 - 11.5)[^1] | :x: | - | - | -| [HIP](https://alpaka.readthedocs.io/en/latest/install/HIP.html) (clang) | - | - | - | :x: | :x: | :x: | :x: | :x: | :white_check_mark: (HIP 5.0 - 5.2) | :white_check_mark: (HIP 5.3 - 5.4) | :white_check_mark: (HIP 5.5) | :x: | - | - | -| SYCL | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :white_check_mark:[^4] | :x: | :x: | +| Accelerator Back-end | gcc 9.5
(Linux) | gcc 10.4 / 11.1
(Linux) | gcc 12.3
(Linux) | gcc 13.1
(Linux) | clang 9
(Linux) | clang 10 / 11
(Linux) | clang 12
(Linux) | clang 13
(Linux) | clang 14
(Linux) | clang 15
(Linux) | clang 16
(Linux) | clang 17
(Linux) | icpx 2023.1.0 / 2023.2.0 (Linux) | Xcode 13.2.1 / 14.2 / 14.3.1
(macOS) | Visual Studio 2022
(Windows) | +|--------------------------------------------------------------------------------|-------------------------------------------------|-------------------------------------------------|---------------------------------------------|------------------------|------------------------------------------------------------|-------------------------------------------------------|-------------------------------------------------|---------------------------------------------|---------------------------------------------------|-------------------------------------------|-------------------------------------------|-------------------------------------------|----------------------------------|-------------------------------------------------------|--------------------------------------| +| Serial | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | +| OpenMP 2.0+ blocks | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark:[^3] | :white_check_mark: | :white_check_mark: | +| OpenMP 2.0+ threads | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark:[^3] | :white_check_mark: | :white_check_mark: | +| std::thread | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | +| TBB | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | +| CUDA (nvcc) | :white_check_mark:
(CUDA 11.0 - 12.2)[^2] | :white_check_mark:
(CUDA 11.4 - 12.0)[^2] | :white_check_mark:
(CUDA 12.0 - 12.2) | :x: | :white_check_mark:
(CUDA 11.0-11.2; 11.6 - 12.0)[^2] | :white_check_mark:
(CUDA 11.2, 11.6 - 12.0)[^2] | :white_check_mark:
(CUDA 11.6 - 12.0)[^2] | :white_check_mark:
(CUDA 11.7 - 12.0) | :white_check_mark:
(CUDA 11.8 - 12.0) | :white_check_mark:
(CUDA 12.2) | :x: | :x: | :x: | :x: | :x: | +| CUDA (clang) | - | - | - | :x: | :x: | :x: | :x: | :x: | :white_check_mark: (CUDA 11.0 - 11.5) | :white_check_mark: (CUDA 11.0 - 11.5)[^1] | :white_check_mark: (CUDA 11.0 - 11.5)[^1] | :white_check_mark: (CUDA 11.0 - 11.8)[^1] | :x: | - | - | +| [HIP](https://alpaka.readthedocs.io/en/latest/install/HIP.html) (clang) | - | - | - | :x: | :x: | :x: | :x: | :x: | :white_check_mark: (HIP 5.0 - 5.2) | :white_check_mark: (HIP 5.3 - 5.4) | :white_check_mark: (HIP 5.5) | :x: | :x: | - | - | +| SYCL | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :white_check_mark:[^4] | :x: | :x: | Other compilers or combinations marked with :x: in the table above may work but are not tested in CI and are therefore not explicitly supported. From 680724b6464f61eba304cde8cb2f1811a5da6093 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Tue, 26 Sep 2023 15:03:05 +0200 Subject: [PATCH 19/23] Fix clang-format version in docs (#2176) --- CONTRIBUTING.md | 8 ++++---- docs/source/dev/style.rst | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 89975c03820b..bae65b41b724 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,19 +2,19 @@ ## Formatting -Please format your code before before opening pull requests using clang-format 14 and the .clang-format file placed in the repository root. +Please format your code before before opening pull requests using clang-format 16 and the .clang-format file placed in the repository root. ### Visual Studio and CLion Suport for clang-format is built-in since Visual Studio 2017 15.7 and CLion 2019.1. The .clang-format file in the repository will be automatically detected and formatting is done as you type, or triggered when pressing the format hotkey. ### Bash -First install clang-format-14. Instructions therefore can be found on the web. +First install clang-format-16. Instructions therefore can be found on the web. To format your changes since branching off develop, you can run this command in bash: ``` -git clang-format-14 develop +git clang-format-16 develop ``` To format all code in your working copy, you can run this command in bash: ``` -find -iname '*.cpp' -o -iname '*.hpp' | xargs clang-format-14 -i +find -iname '*.cpp' -o -iname '*.hpp' | xargs clang-format-16 -i ``` diff --git a/docs/source/dev/style.rst b/docs/source/dev/style.rst index 3f1ed6712130..c95bfc5b51db 100644 --- a/docs/source/dev/style.rst +++ b/docs/source/dev/style.rst @@ -14,13 +14,13 @@ whitespace and braces automatically. Usage: .. code-block:: bash - clang-format-14 -i + clang-format-16 -i * If you want to format the entire code base execute the following command from alpaka's top-level directory: .. code-block:: bash - find example include test -name '*.hpp' -o -name '*.cpp' | xargs clang-format-14 -i + find example include test -name '*.hpp' -o -name '*.cpp' | xargs clang-format-16 -i Windows users should use `Visual Studio's native clang-format integration `. From f79717ba6d11af33ade4a71d58fb09a5cd24ab9a Mon Sep 17 00:00:00 2001 From: Jan Stephan Date: Wed, 27 Sep 2023 15:17:46 +0200 Subject: [PATCH 20/23] Remove unnecessary -fintelfpga flag --- cmake/alpakaCommon.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/alpakaCommon.cmake b/cmake/alpakaCommon.cmake index fdbc38dc0067..cad0809b3826 100644 --- a/cmake/alpakaCommon.cmake +++ b/cmake/alpakaCommon.cmake @@ -614,7 +614,6 @@ if(alpaka_ACC_SYCL_ENABLE) if(alpaka_SYCL_ONEAPI_FPGA) target_compile_definitions(alpaka INTERFACE "ALPAKA_SYCL_ONEAPI_FPGA") - alpaka_set_compiler_options(DEVICE target alpaka "-fintelfpga") if(alpaka_SYCL_ONEAPI_FPGA_MODE STREQUAL "emulation") target_compile_definitions(alpaka INTERFACE "ALPAKA_FPGA_EMULATION") From fc753dcf0e2eb4879c30984b03424207806c6f95 Mon Sep 17 00:00:00 2001 From: Jan Stephan Date: Thu, 31 Aug 2023 00:43:18 +0200 Subject: [PATCH 21/23] Add changelog for alpaka v1.0.0 --- CHANGELOG.md | 279 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 278 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4907b99e1ff..3ab0153c56a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,284 @@ # Changelog All notable changes to this project will be documented in this file. -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). + +## [1.0.0] - 2023-10-13 + +### Added + +- `g++`: + - Added support for `g++-13` #1967 + - Added support for `g++-12` #1721 #1754 #1765 #1867 +- `clang++`: + - Added support for `clang-17` #2171 #2174 + - Added support for `clang-16` #1971 #2006 + - Added support for `clang-15` #1898 + - Added support for `clang-14` #1766 + - Added support for `clang-13` #1756 +- `icpx`: + - Added support for the Intel® oneAPI DPC++/C++ Compiler (`icpx`) #1700 #1706 #1884 #2064 #2081 +- Xcode: + - Added support for Xcode 14.3.1 #1973 + - Added support for Xcode 14.2 #1899 +- CUDA: + - Added support for CUDA 12.2 #2043 + - Added support for CUDA 12.1 #1957 + - Added support for CUDA 11.{6,7,8} and 12.0 #1890 +- ROCm: + - Added support for ROCm 5.5 #1961 + - Added support for ROCm 5.4 #1915 + - Added support for ROCm 5.3 #1838 + - Added support for ROCm 5.2.3 #1812 +- `alpaka::math`: + - Added `alpaka::math::copysign` function #2050 + - Added `alpaka::math::log2` and `alpaka::math::log10` functions #2029 + - Added `alpaka::math::fma` functions #2015 + - Added hyperbolic functions #1828 #2030 + - Added `constants` namespace which contains constants such as π, e, etc. #1710 +- `alpaka::Vec`: + - Added generator constructor #2085 + - Added `front` and `back` methods #2085 + - Added `elementwise_{min,max}` methods #1805 + - `Vec` now features a deduction guide for easier construction #1610 +- Documentation: + - Added example illustrating typical data-parallel patterns with alpaka #1712 + - Added documentation about the behaviour of `constexpr` functions in kernel code #1699 + - Added documentation about CUDA function attributes #1697 + - Added documentation about setting the C++ standard library for clang #1695 +- Test cases: + - Added test for `alpaka::ViewSubView` #2095 + - Added queue test which checks that a task is destroyed after execution #2047 + - Added test for `alpaka::getValidWorkDiv` with `Idx` type #1830 + - Added tests for `alpaka::subDivideGridElements` #1829 +- CI: + - Run test cases with `-Werror` #2163 + - Added UBSan CI job #2059 + - Added CI job to create amalgamated `alpaka.hpp` #1956 #1965 #1972 + - Made GitLab CI jobs interruptible #1904 + - Updated used Boost and CMake versions #1903 #1969 + - Added `agc-manager` support #1871 #1921 + - Added TSan CI job #1851 #2103 #2137 + - GitLab CI jobs are now automatically generated #1785 #1889 #1896 #1951 #1952 #2005 #2041 +- Upgraded to `clang-format-16` #2147 +- Added `alpaka::getPitchesInBytes` function which returns all pitches for a given view as an `alpaka::Vec` #2092 #2093 #2116 #2125 +- Added `alpaka::get{Extents,Offsets}` functions which return all extents/offsets for a given view as an `alpaka::Vec` #2080 +- Added `alpaka_DISABLE_VENDOR_RNG` CMake flag and its corresponding preprocessor macro `ALPAKA_DISABLE_VENDOR_RNG` to optionally disable vendor RNG libraries #2036 +- Added alpaka port of BabelStream #1846 #1934 +- Added utility functions `alpaka::core::{divCeil,intPow,nthRootFloor}` #1830 +- Added `operator==` for `alpaka::WorkDivMembers` #1829 +- Added `alpaka::is{Accelerator,Device,Platform,Queue}` variable templates #1818 +- Added accelerator tags which allow for accelerator-specific code paths without enabling the corresponding back-end #1804 #1814 +- Added experimental support for `std::mdspan` #1788 #2048 #2052 #2053 +- Added `alpaka::ViewConst` which wraps another view but prevents modifying accesses #1746 +- `alpaka::{memcpy,memset}` now support temporary destination views #1743 +- Host memory alignment can now be specified by using the `ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT` macro #1686 +- Added `alpaka::allocMappedBuf` for allocating device-accessible pinned host memory #1685 #1782 #2162 + - Added related trait `alpaka::trait::hasMappedBufSupport` to query the host CPU for device-accessible pinned memory support #1782 + - Added related utility function `alpaka::allocMappedBufIfSupported` to allocate device-accessible pinned memory, if supported, and regular memory otherwise #1782 #2120 +- Relocatable device code can now be enabled using the `alpaka_RELOCATABLE_DEVICE_CODE` CMake option #1467 + +### Changed + +- API changes: + - **Breaking change**: `alpaka::get{Width,Height,Depth}` now always return `1` for unavailable dimensions instead of `static_assert`ing #2148 + - **Breaking change**: alpaka platforms have been renamed from `alpaka::Pltf*` to `alpaka::Platform*` #2024 #2032 + - **Breaking change**: alpaka platforms are now full objects instead of types #1988 #2051 #2165 + - `operator<<(std::ostream&, WorkDivMembers const&)` is now a `friend` of `alpaka::WorkDivMembers` instead of a method #1829 + - **Potentially breaking change**: Switched several view-related methods from `ALPAKA_FN_HOST_ACC` to `ALPAKA_FN_HOST` #1826 + - Accelerators' copy/move constructors and assignment operators are now explicitly `delete`d #1825 + - `alpaka::test::allocAsyncBufIfSupported` was moved into the general `namespace alpaka` #1782 + - Removed unnecessary attribute `ALPAKA_FN_HOST_ACC` from defaulted functions #1761 + - The `UniformCudaHip` types are now templated on traits-like `struct`s which encapsulate the CUDA or HIP API #1665 +- General behavioural changes: + - Improved handling of CMake generator expressions #2146 + - Improved detection of C++20 features #2138 + - Simplified internals of `alpaka_add_{executable,library}` #2072 #2082 + - **Breaking change**: Removed dummy atomics from memory fence implementations. Users now need to guarantee correctness themselves #2071 + - In debug mode MSVC will use the `/Od` optimization level #1977 + - In debug mode clang-based compilers will explicitly use the `-O0` optimization level #1977 + - In debug mode `g++` will use the `-Og` optimization level #1977 + - `-Werror` and its MSVC equivalent `/WX` are no longer enabled by default when `BUILD_TESTING` is set to `ON` #1977 + - A platform's internal `std::vector` containing the `alpaka::Device`s now reserves the necessary memory before initialization #1926 + - **Potentially breaking change**: `ALPAKA_FN_INLINE` now enforces inlining for platforms other than CUDA and HIP #1918 + - Replaced `alpaka::core::ConcurrentExecPool` with `alpaka::core::CallbackThread` in all queue implementations #1870 + - If no back-end is enabled, alpaka automatically selects the serial back-end for examples and test cases #1843 + - On Linux platforms, the free global memory is now determined by a call to `sysconf(_SC_AVPHYS_PAGES)` instead of querying `/proc/sysinfo` #1776 + - **Potentially breaking change**: Changed CMake's look-up of MSVC's runtime libraries (see [here](https://cmake.org/cmake/help/v3.22/policy/CMP0091.html) for an in-depth explanation) #1751 + - Unified `alpaka::{memcpy,memset}`'s internal `static_assert`s #1748 + - `alpaka::core::aligned{Alloc,Free}` now internally use aligned `new`/`delete` instead of OS-specific APIs #1689 +- CUDA/HIP back-end changes: + - `nvcc` now makes correct use of `--Werror` and more CUDA-related warnings #2135 + - Unified `ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK` macros #2090 + - Made some internal constants `constexpr` #2063 + - The CUDA/HIP back-ends will now always use `std::size_t` for internal pitch calculations #2056 + - **Breaking change**: clang as CUDA compiler will only work in `Release` build mode #2027 + - **Potentially breaking change**: In debug mode `ǹvcc` will now use the `-G` flag which enables device-side debug symbols #1977 + - Starting from HIP 5.2.0, the HIP back-end includes `` instead of `` #1914 + - Starting from HIP 5.2.0, the HIP back-end makes use of `hip{Malloc,Free}Async` #1894 + - If clang is used as CUDA compiler together with CUDA 11.3 a warning will be printed #1890 + - Starting from HIP 5.4.0, the HIP back-end internally uses `hipLaunchHostFunc` instead of a work-around #1883 + - Adapted to API changes in CUDA 11.7's stream memory operations #1878 #1919 + - Shortened mangled CUDA kernel names #1795 + - CUDA runtime versions checks are now based upon `CUDART_VERSION` instead of `BOOST_LANG_CUDA` #1777 + - Because of a HIP performance regression the HIP back-end now uses the emulated `atomicAdd(float)` on the `Threads` hierarchy level #1771 + - Changed look-up of built-in and emulated atomic functions for the CUDA and HIP back-ends #1768 + - The HIP back-end now uses the built-in `atomicAdd(double)` #1767 + - CUDA/HIP queues now internally make use of callback threads #1719 #1735 #1976 #2011 +- SYCL back-end changes: + - Removed unnecessary `-fintelfpga` flag from CMake build system when compiling the SYCL back-end for Intel FPGAs #2179 + - **Breaking change**: Support for the `activemask` intrinsic is disabled for the SYCL back-end #2161 + - Updated `README_SYCL.md` #2140 + - **Breaking change**: Reworked CMake handling for SYCL targets #1970 #2066 + - **Breaking change**: The SYCL back-end now accepts SYCL USM pointers as kernel parameters #1845 #2042 + - **Breaking change**: The SYCL CPU selector was generalized to both Intel and non-Intel CPUs and therefore renamed #1845 + - **Breaking change**: The SYCL back-end replaced `sycl::stream` with `printf` for device side printing #1845 #2045 + - The SYCL back-end now features a kernel trait which allows to set the SYCL sub-group (= warp) size #1845 + - The SYCL back-end now supports RNG through the Intel oneAPI libraries #1845 + - The SYCL back-end is now based upon the SYCL 2020 specification #1845 #1981 +- RNG changes: + - **Breaking change**: Philox RNG is now counter-based and stateless #1792 + - Philox random engines are now trivially copyable #1778 +- Documentation: + - Improved documentation of `ALPAKA_FN_INLINE` #2091 + - Reduced example work sizes #2084 + - Improved documentation of `alpaka::QueueCpuOmp2Collective` #2025 + - Clarified kernel and kernel argument requirements #1944 + - Replaced license headers with SPDX license identifiers #1917 + - Collapsed compiler support matrix in `README.md` #1860 +- Refactorings: + - Refactored test classes #2156 #2158 + - Use nested namespace specifiers #2152 + - Removed unnecessary member initialization calls #2151 + - Avoid unnecessary indentions #2149 + - Renamed internal variables of `ViewSubViewTest.cpp` and `ViewPlainPtrTest.cpp` to prevent name shadowing #2144 + - Refactored the internals of `alpaka::{mapIdx,mapIdxPitchBytes}` #2136 + - Replaced Codeplay's STLTuple implementation with `std::tuple` #2106 + - Replaced `ALPAKA_DECAY_T` macro with `std::decay_t` #2104 + - Refactored `alpaka::internal::ViewAccessOps` #2094 + - **Breaking change**: Replaced `alpaka::createVecFromIndexedFn` family of functions with `alpaka::Vec`'s new generator constructor #2085 + - Refactored `alpaka::QueueCpuOmp2Collective` #2013 + - Refactored `alpaka::meta::ndLoop` #1999 + - Refactored `alpaka::TaskKernelCpuThreads` #1998 + - Refactored `alpaka::core::ConcurrentExecPool` and related classes #1852 #2000 + - Refactored `alpaka::subDivideGridElements` #1830 + - Refactored includes inside `alpaka/dev/cpu/SysInfo.hpp` #1776 +- Test changes: + - Catch2 is no longer built with fast math enabled when using `icpx` as compiler #2128 + - `-pedantic` is no longer added when compiling CUDA code #2096 + - Reduced noise from `helloWorld`, `helloWorldLambda` and `TestTemplate` #2076 + - Renamed `fenceTest` to `FenceTest` #2037 + - The `Any` intrinsic unit test now assumes a sub-group size of `4` #2017 + - The `NativeHandleTest` no longer assumes that a native handle is an `int` #2008 + - Test cases are now compiled with MSVC's two phase lookup enabled #1986 + - Kernel names in the test cases are now demangled #1983 + - CUDA/HIP/SYCL atomic tests are now restricted to explicitly supported types #1980 + - Test cases are no longer executed for zero-dimensional SYCL accelerators #1979 + - Tests are disabled by default when using alpaka via CMake's `add_subdirectory` #1912 +- CI changes: + - Removed unused sanitizer blacklists #2154 + - Simplified CI oneTBB installation #2145 + - The GitLab CI now features runtime tests built with `g++` and `clang++` #2131 #2141 + - Upgraded ASan CI job to `clang-16` #2057 + - Upgraded special CUDA jobs to newer versions #2055 + - Re-enabled `g++-9` + CUDA jobs #2040 + - Updated Read the Docs configuration to v2 #2010 + - For ROCm versions <= 5.3 certain warnings are ignored #1932 + - Split compile and runtime CI runners into separate GitLab pipelines #1908 + - Switched more CI runners to C++20 mode #1902 + - LLVM sanitizer libraries are explicitly installed #1900 + - Re-enabled CUDA + `gcc-10` jobs #1890 + - Moved all GitHub jobs from `ubuntu-latest` to `ubuntu-20.04` #1872 + - More jobs are only compiling the test cases but no longer execute them #1869 + - CUDA CI runners no longer manually install the GPU driver #1853 + - Change ROCm CI node #1844 + - Reworked Xcode OpenMP installation #1840 #1922 + - Upgraded to GitHub checkout action v3 #1832 + - Upgraded test infrastructure to Catch2 v3 #1749 #1815 #1861 #1911 + - Upgraded headercheck CI run to clang-13 and CUDA 11.2 #1803 + - Simplified CI clang installation #1763 + - Running CI workflows are now automatically cancelled when their corresponding PRs are updated #1717 + +### Deprecated + +- **Breaking change**: deprecated `alpaka::getPitchBytes[Vec]` functions in favour of new `alpaka::getPitchesInBytes` function #2092 #2116 +- **Breaking change**: deprecated `alpaka::get{Extent,Offset}[Vec]` functions in favour of new `alpaka::get{Extents,Offsets}` functions #2080 #2139 + +### Removed + +- `g++`: + - Dropped support for `g++-{7,8}` #1872 +- `clang++`: + - Removed work-around for very old clang versions #1916 + - Dropped support for clang as CUDA compiler for all versions before `clang-14` #1890 + - Dropped support for `clang-{6,7,8,9}` #1872 + - Dropped support for `clang-5` #1750 +- `icpc`: + - Dropped support for the Intel® C++ Compiler Classic (`icpc`) #1702 +- MSVC: + - Temporarily dropped support for MSVC + CUDA due to a nvcc bug #1958 + - Dropped support for MSVC 2019 #1887 +- Xcode: + - Dropped support for Xcode 12.4.0 #1759 +- CUDA: + - Dropped support for CUDA 10 #1872 + - Dropped support for CUDA 9.2 #1855 +- ROCm: + - Dropped support for ROCm 4 #1886 +- SYCL: + - Removed Xilinx platform support #1970 +- Removed floating point contractions for math test cases #2155 +- Removed `alpaka::set{Extent,Offset}` functions #2087 +- Removed alpaka's experimental accessors #2054 #2062 +- Catch2 is no longer compiled with `CATCH_CONFIG_FAST_COMPILE` set to `ON` #1978 +- Removed OpenMP 5 back-end #1947 +- Removed OpenACC back-end #1941 +- Removed warning for Boost 1.73 since alpaka requires Boost >= 1.74 #1849 +- Removed previously deprecated `alpaka::time` functionality #1841 +- Removed `alpaka::{map,unmap,pin,unpin,isPinned,prepareForAsyncCopy}()` free functions #1790 +- Removed unused `alpaka::ConceptUniformCudaHip` #1736 +- Removed Boost.fiber back-end #1718 + +### Fixed + +- Fixed warnings uncovered by `nvcc` + `clang++ -Werror` #2157 #2159 #2164 #2167 +- Removed useless semicolon #2129 +- Fixed debug information for SYCL zero-dimensional buffer allocations #2127 +- Fixed missing `[[maybe_unused]]` inside `extent/Traits.hpp` #2122 +- Fixed several minor issues with the documentation #2121 #2176 +- Fixed unsigned integer conversion inside `ViewAccessOps.hpp` #2119 +- Fixed several warnings issued by `nvcc` #2118 +- Fixed compiler explorer link #2117 +- `alpaka::core::detail::ThreadPool` now handles a task's `noexcept` specifier correctly #2115 +- Fixed missing `` include in `BlockSyncBarrierOmp.hpp` #2114 +- Fixed integer conversions inside `memViewTest` #2113 +- Fixed `alpaka::BufUniformCudaHipRt` declarations sometimes being a `struct` and sometimes a `class` #2109 +- Fixed `alpaka::wait()` behaviour for events and devices #2108 +- Fixed `alpaka::ViewPlainPtr` not being copyable and moveable #2105 +- **Potentially breaking change**: Fixed `alpaka::core::{CallbackThread,ThreadPool}` not propagatinc exceptions #2067 +- Fixed missing `ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK` calls in debug mode #2034 +- Worked around Catch2 macros not being thread-safe #2022 +- Fixed `alpaka::test::KernelExecutionFixture`'s delegating constructor #2021 +- Fixed missing `` include in `alpaka/rand/Traits.hpp` #1977 +- Fixed ill-formed spelling of `alpaka::EventUniformCudaHipRt`'s constructor in C++20 mode #1968 +- Fixed typo in memory fence documentation #1944 +- Fixed compilation issues for CPU-only jobs running on GPU CI runners #1939 +- Fixed clang-specific warning suppression occurring for other compilers in HIP back-end #1914 +- Fixed CI clang installation #1907 +- Fixed CUDA async / mapped memory allocation bug #1868 +- Fixed several bugs related to thread safety #1850 #1975 #1987 #1989 #2026 #2057 +- Fixed `alpaka::createView` for containers without a size argument #1847 +- Fixed behaviour of `alpaka::detail::nextDivisorLowerOrEqual` #1829 +- Fixed missing `final` keyword for accelerator inheritance #1816 +- Fixed missing template parameters in `alpaka::allocBuf(host, extent)` #1777 +- Fixed look-up of `atomic*_block()` functions for the CUDA back-end when clang is the device compiler #1773 +- Fixed mixed-type and mixed-precision `alpaka::math::pow` implementation #1733 +- Fixed `alpaka::QueueGenericThreadsNonBlocking` not completing running tasks upon its destruction #1728 +- Fixed host memory allocation / pinning on OpenPOWER platforms #1725 +- Fixed `alpaka::ffs` CPU intrinsic in C++20 mode #1716 +- Fixed typo in cheatsheet example for `alpaka::getWorkDiv` #1711 +- Fixed missing braces around aggregate initializers #1704 +- Fixed CI installation of CUDA apt repository keys #1703 ## [0.9.0] - 2022-04-21 ### Compatibility Changes: From 9a51f4f874765ef6d14cae1d1e1724d625c4a140 Mon Sep 17 00:00:00 2001 From: Jan Stephan Date: Thu, 12 Oct 2023 18:27:06 +0200 Subject: [PATCH 22/23] Update author lists Co-authored-by: Bernhard Manfred Gruber --- .zenodo.json | 56 +++++++++++++++++++++++++++++++++++++++++----------- README.md | 14 +++++++++---- 2 files changed, 54 insertions(+), 16 deletions(-) diff --git a/.zenodo.json b/.zenodo.json index f7a2af2a8684..e2988c35a4bd 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -7,6 +7,11 @@ "affiliation": "Helmholtz-Zentrum Dresden-Rossendorf", "orcid": "0000-0003-3396-6154" }, + { + "name": "Bastrakova, Kseniia", + "affiliation": "Helmholtz-Zentrum Dresden-Rossendorf", + "orcid": "0000-0001-8970-5098" + }, { "name": "Bocci, Andrea", "affiliation": "CERN", @@ -22,15 +27,18 @@ "affiliation": "Helmholtz-Zentrum Dresden-Rossendorf", "orcid": "0000-0002-8218-3116" }, + { + "name": "Ferragina, Luca", + "affiliation": "CERN" + }, { "name": "Gruber, Bernhard Manfred", "affiliation": "CASUS, Helmholtz-Zentrum Dresden-Rossendorf, CERN", "orcid": "0000-0001-7848-1690" }, { - "name": "Huebl, Axel", - "affiliation": "Lawrence Berkeley National Laboratory", - "orcid": "0000-0003-1943-7141" + "name": "Kaever, Christian", + "affiliation": "Helmholtz-Zentrum Dresden-Rossendorf" }, { "name": "Kelling, Jeffrey", @@ -38,28 +46,29 @@ "orcid": "0000-0003-1761-2591" }, { - "name": "Pantaleo, Felice", + "name": "Martin-Haugh, Stewart", + "affiliation": "STFC Rutherford Appleton Laboratory", + "orcid": "0000-0001-9457-1928" + }, + { + "name": "Perego, Aurora", "affiliation": "CERN", - "orcid": "0000-0003-3266-4357" + "orcid": "0000-0003-1576-6757" }, { "name": "Stephan, Jan", "affiliation": "CASUS, Helmholtz-Zentrum Dresden-Rossendorf", "orcid": "0000-0001-7839-4386" }, - { - "name": "Vyskočil, Jiří", - "affiliation":"CASUS, Helmholtz-Zentrum Dresden-Rossendorf", - "orcid": "0000-0001-8822-0929" - }, { "name": "Widera, René", "affiliation": "Helmholtz-Zentrum Dresden-Rossendorf", "orcid": "0000-0003-1642-0459" }, { - "name": "Worpitz, Benjamin", - "affiliation": "LogMeIn Inc." + "name": "Young, Jeffrey", + "affiliation": "Georgia Institute of Technology", + "orcid": "0000-0001-9841-4057" } ], "contributors": [ @@ -73,6 +82,12 @@ "affiliation": "TU Dresden", "type": "Other" }, + { + "name": "Hübl, Axel", + "affiliation": "Lawrence Berkeley National Laboratory", + "orcid": "0000-0003-1943-7141", + "type": "Other" + }, { "name": "Knespel, Maximilian", "affiliation": "Helmholtz-Zentrum Dresden-Rossendorf", @@ -99,6 +114,12 @@ "affiliation": "JetBrains", "type": "Other" }, + { + "name": "Pantaleo, Felice", + "affiliation": "CERN", + "orcid": "0000-0003-3266-4357", + "type": "Other" + }, { "name": "Rogers, David M.", "affiliation": "Oak Ridge National Laboratory", @@ -120,6 +141,12 @@ "affiliation": "Deutsches Zentrum für Luft- und Raumfahrt e.V.", "type": "Other" }, + { + "name": "Vyskočil, Jiří", + "affiliation": "CASUS, Helmholtz-Zentrum Dresden-Rossendorf", + "orcid": "0000-0001-8822-0929", + "type": "Other" + }, { "name": "Werner, Matthias", "affiliation": "Helmholtz-Zentrum Dresden-Rossendorf", @@ -130,6 +157,11 @@ "affiliation":"TU Dresden", "type": "Other" }, + { + "name": "Worpitz, Benjamin", + "affiliation": "LogMeIn Inc.", + "type": "Other" + }, { "name": "Zacharias, Malte", "affiliation": "Helmholtz-Zentrum Dresden-Rossendorf", diff --git a/README.md b/README.md index db645d902aca..8fd6b25f94ec 100644 --- a/README.md +++ b/README.md @@ -223,31 +223,37 @@ Authors - Benjamin Worpitz* (original author) - Dr. Sergei Bastrakov* -- Dr. Andrea Bocci +- Kseniia Bastrakova +- Dr. Andrea Bocci* - Dr. Antonio Di Pilato - Simeon Ehrig +- Luca Ferragina - Bernhard Manfred Gruber* -- Dr. Axel Huebl +- Christian Kaever - Dr. Jeffrey Kelling -- Dr. Felice Pantaleo +- Dr. Stewart Martin-Haugh +- Aurora Perego - Jan Stephan* -- Dr. Jiří Vyskočil - René Widera* +- Dr. Jeffrey Young ### Former Members, Contributions and Thanks - Dr. Michael Bussmann - Mat Colgrove - Valentin Gehrke +- Dr. Axel Hübl - Maximilian Knespel - Jakob Krude - Alexander Matthes - Hauke Mewes - Phil Nash +- Dr. Felice Pantaleo - Dr. David M. Rogers - Mutsuo Saito - Jonas Schenke - Daniel Vollmer +- Dr. Jiří Vyskočil - Matthias Werner - Bert Wesarg - Malte Zacharias From 0c3e8eab9af37098417a9b10fa41534c5f10e20b Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Mon, 13 Nov 2023 13:47:36 +0100 Subject: [PATCH 23/23] Set release date in changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ab0153c56a3..0d4ee9864d91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). -## [1.0.0] - 2023-10-13 +## [1.0.0] - 2023-11-14 ### Added