alpaka-group · psychocoderHPC · Oct 2, 2024 · Oct 2, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -154,7 +154,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
   - Starting from HIP 5.4.0, the HIP back-end internally uses `hipLaunchHostFunc` instead of a work-around #1883
   - Adapted to API changes in CUDA 11.7's stream memory operations #1878 #1919 
   - Shortened mangled CUDA kernel names #1795
-  - CUDA runtime versions checks are now based upon `CUDART_VERSION` instead of `BOOST_LANG_CUDA` #1777
+  - CUDA runtime versions checks are now based upon `CUDART_VERSION` instead of `ALPAKA_LANG_CUDA` #1777
   - Because of a HIP performance regression the HIP back-end now uses the emulated `atomicAdd(float)` on the `Threads` hierarchy level #1771
   - Changed look-up of built-in and emulated atomic functions for the CUDA and HIP back-ends #1768
   - The HIP back-end now uses the built-in `atomicAdd(double)` #1767
@@ -396,7 +396,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 - Renamed namespace `traits` to `trait` #1651
 - alpaka now enforces that kernel functions are trivially copyable #1654
 - Replaced the internal `hipLaunchKernelGGL()` call with a `kernel<<<...>>>()` call #1663
-- `BOOST_LANG_HIP` will now report a (somewhat) correct version number (for internal consumption) #1664
+- `ALPAKA_LANG_HIP` will now report a (somewhat) correct version number (for internal consumption) #1664
 - Refactored `Queue` implementation for CUDA and HIP to reduce code duplication #1667
 - `core/CudaHipMath.hpp` was merged back into `math/MathUniformCudaHipBuiltIn.hpp` #1668
 - The OpenMP 5 memory fence no longer explicitly sets the `acq_rel` memory order clause since it is the default #1673
@@ -615,7 +615,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 - fix Intel compiler detection #1070
 - CMake: build type CXX flag not passed to nvcc #1073
 - work around Intel ICE (Internal Compiler Error) when using std::decay on empty template parameter packs #1074
-- BoostPredef.hpp: Add redefinition of BOOST_COMP_PGI #1082
+- BoostPredef.hpp: Add redefinition of ALPAKA_COMP_PGI #1082
 - fix min/max return type deduction #1085
 - CMake: fix boost fiber linking #1088
 - fix HIP-clang compile #1107
@@ -800,7 +800,7 @@ The script only works if you used the full namespace `alpaka::*` for alpaka func
   - This use case can now be handled with the support for external CPU queues as can bee seen in the example QueueCpuOmp2CollectiveImpl
 - previously it was possible to have kernels return values even though they were always ignored. Now kernels are checked to always return void
 - renamed all files with *Stl suffix to *StdLib
-- renamed BOOST_ARCH_CUDA_DEVICE to BOOST_ARCH_PTX
+- renamed ALPAKA_ARCH_CUDA_DEVICE to ALPAKA_ARCH_PTX
 - executors have been renamed due to the upcoming standard C++ feature with a different meaning. All files within alpaka/exec/ have been moved to alpaka/kernel/ and the files and classes have been renamed from Exec* to TaskKernel*. This should not affect users of alpaka but will affect extensions.
 
 ## [0.3.6] - 2020-01-06

diff --git a/README.md b/README.md
@@ -85,9 +85,9 @@ Other compilers or combinations marked with :x: in the table above may work but
 Dependencies
 ------------
 
-[Boost](https://boost.org/) 1.74.0+ is the only mandatory external dependency.
+[Boost](https://boost.org/) 1.74.0+ is an optional dependency. 
+Boost is used for demangle C++ object names and faster atomic operations on CPU backends in case the C++20 feature `std::atomic_ref` is not supported by the compiler. 
 The **alpaka** library itself just requires header-only libraries.
-However some of the accelerator back-end implementations require different boost libraries to be built.
 
 When an accelerator back-end using *CUDA* is enabled, version *11.2* (with nvcc as CUDA compiler) or version *11.2* (with clang as CUDA compiler) of the *CUDA SDK* is the minimum requirement.
 *NOTE*: When using clang as a native *CUDA* compiler, the *CUDA accelerator back-end* can not be enabled together with any *OpenMP accelerator back-end* because this combination is currently unsupported.

diff --git a/cmake/alpakaCommon.cmake b/cmake/alpakaCommon.cmake
@@ -207,17 +207,23 @@ endif()
 
 #-------------------------------------------------------------------------------
 # Find Boost.
+# Boost is optional and only required for fast atomics on CPU side if std::atomic_ref is not available or
+# for class name demangling.
 set(_alpaka_BOOST_MIN_VER "1.74.0")
 
 if(${alpaka_DEBUG} GREATER 1)
     SET(Boost_DEBUG ON)
     SET(Boost_DETAILED_FAILURE_MSG ON)
 endif()
 
-find_package(Boost ${_alpaka_BOOST_MIN_VER} REQUIRED
-        OPTIONAL_COMPONENTS atomic)
+find_package(Boost ${_alpaka_BOOST_MIN_VER} OPTIONAL_COMPONENTS atomic)
 
-target_link_libraries(alpaka INTERFACE Boost::headers)
+if(Boost_FOUND)
+    target_link_libraries(alpaka INTERFACE Boost::headers)
+    target_compile_definitions(alpaka INTERFACE ALPAKA_HAS_BOOST_HEADERS)
+else()
+    message(STATUS "Boost not available, class name demangling is not supported.")
+endif()
 
 if(alpaka_ACC_CPU_B_SEQ_T_SEQ_ENABLE OR
    alpaka_ACC_CPU_B_SEQ_T_THREADS_ENABLE OR
@@ -242,68 +248,22 @@ if(alpaka_ACC_CPU_B_SEQ_T_SEQ_ENABLE OR
             endif()
         endif()
 
-        if(Boost_ATOMIC_FOUND AND (NOT alpaka_HAS_STD_ATOMIC_REF))
-            message(STATUS "boost::atomic_ref<T> found")
-            target_link_libraries(alpaka INTERFACE Boost::atomic)
+        if(NOT alpaka_HAS_STD_ATOMIC_REF)
+            if(Boost_ATOMIC_FOUND)
+                message(STATUS "boost::atomic_ref<T> found")
+                target_link_libraries(alpaka INTERFACE Boost::atomic)
+            else()
+                message(STATUS "boost::atomic_ref<T> NOT found")
+            endif()
         endif()
     endif()
 
     if(alpaka_ACC_CPU_DISABLE_ATOMIC_REF OR ((NOT alpaka_HAS_STD_ATOMIC_REF) AND (NOT Boost_ATOMIC_FOUND)))
-        message(STATUS "atomic_ref<T> was not found or manually disabled. Falling back to lock-based CPU atomics.")
+        message(STATUS "atomic_ref<T> or boost::atomic_ref<T> was not found or manually disabled. Falling back to lock-based CPU atomics.")
         target_compile_definitions(alpaka INTERFACE ALPAKA_DISABLE_ATOMIC_ATOMICREF)
     endif()
 endif()
 
-if(${alpaka_DEBUG} GREATER 1)
-    message(STATUS "Boost in:")
-    cmake_print_variables(BOOST_ROOT)
-    cmake_print_variables(BOOSTROOT)
-    cmake_print_variables(BOOST_INCLUDEDIR)
-    cmake_print_variables(BOOST_LIBRARYDIR)
-    cmake_print_variables(Boost_NO_SYSTEM_PATHS)
-    cmake_print_variables(Boost_ADDITIONAL_VERSIONS)
-    cmake_print_variables(Boost_USE_MULTITHREADED)
-    cmake_print_variables(Boost_USE_STATIC_LIBS)
-    cmake_print_variables(Boost_USE_STATIC_RUNTIME)
-    cmake_print_variables(Boost_USE_DEBUG_RUNTIME)
-    cmake_print_variables(Boost_USE_DEBUG_PYTHON)
-    cmake_print_variables(Boost_USE_STLPORT)
-    cmake_print_variables(Boost_USE_STLPORT_DEPRECATED_NATIVE_IOSTREAMS)
-    cmake_print_variables(Boost_COMPILER)
-    cmake_print_variables(Boost_THREADAPI)
-    cmake_print_variables(Boost_NAMESPACE)
-    cmake_print_variables(Boost_DEBUG)
-    cmake_print_variables(Boost_DETAILED_FAILURE_MSG)
-    cmake_print_variables(Boost_REALPATH)
-    cmake_print_variables(Boost_NO_BOOST_CMAKE)
-    message(STATUS "Boost out:")
-    cmake_print_variables(Boost_FOUND)
-    cmake_print_variables(Boost_INCLUDE_DIRS)
-    cmake_print_variables(Boost_LIBRARY_DIRS)
-    cmake_print_variables(Boost_LIBRARIES)
-    cmake_print_variables(Boost_CONTEXT_FOUND)
-    cmake_print_variables(Boost_CONTEXT_LIBRARY)
-    cmake_print_variables(Boost_SYSTEM_FOUND)
-    cmake_print_variables(Boost_SYSTEM_LIBRARY)
-    cmake_print_variables(Boost_THREAD_FOUND)
-    cmake_print_variables(Boost_THREAD_LIBRARY)
-    cmake_print_variables(Boost_ATOMIC_FOUND)
-    cmake_print_variables(Boost_ATOMIC_LIBRARY)
-    cmake_print_variables(Boost_CHRONO_FOUND)
-    cmake_print_variables(Boost_CHRONO_LIBRARY)
-    cmake_print_variables(Boost_DATE_TIME_FOUND)
-    cmake_print_variables(Boost_DATE_TIME_LIBRARY)
-    cmake_print_variables(Boost_VERSION)
-    cmake_print_variables(Boost_LIB_VERSION)
-    cmake_print_variables(Boost_MAJOR_VERSION)
-    cmake_print_variables(Boost_MINOR_VERSION)
-    cmake_print_variables(Boost_SUBMINOR_VERSION)
-    cmake_print_variables(Boost_LIB_DIAGNOSTIC_DEFINITIONS)
-    message(STATUS "Boost cached:")
-    cmake_print_variables(Boost_INCLUDE_DIR)
-    cmake_print_variables(Boost_LIBRARY_DIR)
-endif()
-
 #-------------------------------------------------------------------------------
 # If available, use C++20 math constants. Otherwise, fall back to M_PI etc.
 if(${alpaka_CXX_STANDARD} VERSION_LESS "20")

diff --git a/include/alpaka/atomic/AtomicAtomicRef.hpp b/include/alpaka/atomic/AtomicAtomicRef.hpp
@@ -219,12 +219,12 @@ namespace alpaka
                 T result;
                 do
                 {
-#    if BOOST_COMP_GNUC || BOOST_COMP_CLANG
+#    if ALPAKA_COMP_GNUC || ALPAKA_COMP_CLANG
 #        pragma GCC diagnostic push
 #        pragma GCC diagnostic ignored "-Wfloat-equal"
 #    endif
                     result = ((old == compare) ? value : old);
-#    if BOOST_COMP_GNUC || BOOST_COMP_CLANG
+#    if ALPAKA_COMP_GNUC || ALPAKA_COMP_CLANG
 #        pragma GCC diagnostic pop
 #    endif
                 } while(!ref.compare_exchange_weak(old, result));

diff --git a/include/alpaka/atomic/AtomicCpu.hpp b/include/alpaka/atomic/AtomicCpu.hpp
@@ -7,14 +7,14 @@
 #include "alpaka/core/BoostPredef.hpp"
 
 // clang 9/10/11 together with nvcc<11.6.0 as host compiler fails at compile time when using boost::atomic_ref
-#ifdef BOOST_COMP_CLANG_AVAILABLE
-#    if(BOOST_COMP_CLANG < BOOST_VERSION_NUMBER(12, 0, 0) && BOOST_COMP_NVCC                                          \
-        && BOOST_COMP_NVCC < BOOST_VERSION_NUMBER(11, 6, 0))
+#ifdef ALPAKA_COMP_CLANG_AVAILABLE
+#    if(ALPAKA_COMP_CLANG < ALPAKA_VERSION_NUMBER(12, 0, 0) && ALPAKA_COMP_NVCC                                       \
+        && ALPAKA_COMP_NVCC < ALPAKA_VERSION_NUMBER(11, 6, 0))
 #        if !defined(ALPAKA_DISABLE_ATOMIC_ATOMICREF)
 #            define ALPAKA_DISABLE_ATOMIC_ATOMICREF
 #        endif
 #    endif
-#endif // BOOST_COMP_CLANG_AVAILABLE
+#endif // ALPAKA_COMP_CLANG_AVAILABLE
 
 #include "alpaka/atomic/AtomicAtomicRef.hpp"
 #include "alpaka/atomic/AtomicStdLibLock.hpp"

diff --git a/include/alpaka/atomic/AtomicOmpBuiltIn.hpp b/include/alpaka/atomic/AtomicOmpBuiltIn.hpp
@@ -35,7 +35,7 @@ namespace alpaka
                 T old;
                 auto& ref(*addr);
 // atomically update ref, but capture the original value in old
-#        if BOOST_COMP_GNUC
+#        if ALPAKA_COMP_GNUC
 #            pragma GCC diagnostic push
 #            pragma GCC diagnostic ignored "-Wconversion"
 #        endif
@@ -44,7 +44,7 @@ namespace alpaka
                     old = ref;
                     ref += value;
                 }
-#        if BOOST_COMP_GNUC
+#        if ALPAKA_COMP_GNUC
 #            pragma GCC diagnostic pop
 #        endif
                 return old;
@@ -60,7 +60,7 @@ namespace alpaka
                 T old;
                 auto& ref(*addr);
 // atomically update ref, but capture the original value in old
-#        if BOOST_COMP_GNUC
+#        if ALPAKA_COMP_GNUC
 #            pragma GCC diagnostic push
 #            pragma GCC diagnostic ignored "-Wconversion"
 #        endif
@@ -69,7 +69,7 @@ namespace alpaka
                     old = ref;
                     ref -= value;
                 }
-#        if BOOST_COMP_GNUC
+#        if ALPAKA_COMP_GNUC
 #            pragma GCC diagnostic pop
 #        endif
                 return old;
@@ -103,7 +103,7 @@ namespace alpaka
                 T old;
                 auto& ref(*addr);
 // atomically update ref, but capture the original value in old
-#        if BOOST_COMP_GNUC
+#        if ALPAKA_COMP_GNUC
 #            pragma GCC diagnostic push
 #            pragma GCC diagnostic ignored "-Wconversion"
 #        endif
@@ -112,7 +112,7 @@ namespace alpaka
                     old = ref;
                     ref &= value;
                 }
-#        if BOOST_COMP_GNUC
+#        if ALPAKA_COMP_GNUC
 #            pragma GCC diagnostic pop
 #        endif
                 return old;
@@ -128,7 +128,7 @@ namespace alpaka
                 T old;
                 auto& ref(*addr);
 // atomically update ref, but capture the original value in old
-#        if BOOST_COMP_GNUC
+#        if ALPAKA_COMP_GNUC
 #            pragma GCC diagnostic push
 #            pragma GCC diagnostic ignored "-Wconversion"
 #        endif
@@ -137,7 +137,7 @@ namespace alpaka
                     old = ref;
                     ref |= value;
                 }
-#        if BOOST_COMP_GNUC
+#        if ALPAKA_COMP_GNUC
 #            pragma GCC diagnostic pop
 #        endif
                 return old;
@@ -153,7 +153,7 @@ namespace alpaka
                 T old;
                 auto& ref(*addr);
 // atomically update ref, but capture the original value in old
-#        if BOOST_COMP_GNUC
+#        if ALPAKA_COMP_GNUC
 #            pragma GCC diagnostic push
 #            pragma GCC diagnostic ignored "-Wconversion"
 #        endif
@@ -162,7 +162,7 @@ namespace alpaka
                     old = ref;
                     ref ^= value;
                 }
-#        if BOOST_COMP_GNUC
+#        if ALPAKA_COMP_GNUC
 #            pragma GCC diagnostic pop
 #        endif
                 return old;

diff --git a/include/alpaka/atomic/AtomicStdLibLock.hpp b/include/alpaka/atomic/AtomicStdLibLock.hpp
@@ -57,15 +57,15 @@ namespace alpaka
             constexpr size_t hashTableSize = THashTableSize == 0u ? 1u : nextPowerOf2(THashTableSize);
 
             size_t const hashedAddr = hash(ptr) & (hashTableSize - 1u);
-#    if BOOST_COMP_CLANG
+#    if ALPAKA_COMP_CLANG
 #        pragma clang diagnostic push
 #        pragma clang diagnostic ignored "-Wexit-time-destructors"
 #    endif
             static std::array<
                 std::mutex,
                 hashTableSize>
                 m_mtxAtomic; //!< The mutex protecting access for an atomic operation.
-#    if BOOST_COMP_CLANG
+#    if ALPAKA_COMP_CLANG
 #        pragma clang diagnostic pop
 #    endif
             return m_mtxAtomic[hashedAddr];

diff --git a/include/alpaka/atomic/AtomicUniformCudaHip.hpp b/include/alpaka/atomic/AtomicUniformCudaHip.hpp
@@ -26,17 +26,17 @@ namespace alpaka
 
 #    if !defined(ALPAKA_HOST_ONLY)
 
-#        if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
+#        if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !ALPAKA_LANG_CUDA
 #            error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
 #        endif
 
-#        if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
+#        if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !ALPAKA_LANG_HIP
 #            error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
 #        endif
 
 //! clang is providing a builtin for different atomic functions even if these is not supported for architectures < 6.0
 #        define CLANG_CUDA_PTX_WORKAROUND                                                                             \
-            (BOOST_COMP_CLANG && BOOST_LANG_CUDA && BOOST_ARCH_PTX < BOOST_VERSION_NUMBER(6, 0, 0))
+            (ALPAKA_COMP_CLANG && ALPAKA_LANG_CUDA && ALPAKA_ARCH_PTX < ALPAKA_VERSION_NUMBER(6, 0, 0))
 
 //! These types must be in the global namespace for checking existence of respective functions in global namespace via
 //! SFINAE, so we use inline namespace.
@@ -136,7 +136,7 @@ inline namespace alpakaGlobal
     };
 #        endif
 
-#        if(BOOST_LANG_HIP)
+#        if(ALPAKA_LANG_HIP)
     // HIP shows bad performance with builtin atomicAdd(float*,float) for the hierarchy threads therefore we do not
     // call the buildin method and instead use the atomicCAS emulation. For details see:
     // https://github.com/alpaka-group/alpaka/issues/1657
@@ -210,7 +210,7 @@ inline namespace alpakaGlobal
 #        endif
 
 // disable HIP atomicMin: see https://github.com/ROCm-Developer-Tools/hipamd/pull/40
-#        if(BOOST_LANG_HIP)
+#        if(ALPAKA_LANG_HIP)
     template<typename THierarchy>
     struct AlpakaBuiltInAtomic<alpaka::AtomicMin, float, THierarchy> : std::false_type
     {
@@ -277,7 +277,7 @@ inline namespace alpakaGlobal
 #        endif
 
     // disable HIP atomicMax: see https://github.com/ROCm-Developer-Tools/hipamd/pull/40
-#        if(BOOST_LANG_HIP)
+#        if(ALPAKA_LANG_HIP)
     template<typename THierarchy>
     struct AlpakaBuiltInAtomic<alpaka::AtomicMax, float, THierarchy> : std::false_type
     {

diff --git a/include/alpaka/atomic/AtomicUniformCudaHipBuiltIn.hpp b/include/alpaka/atomic/AtomicUniformCudaHipBuiltIn.hpp
@@ -18,11 +18,11 @@
 
 #    if !defined(ALPAKA_HOST_ONLY)
 
-#        if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
+#        if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !ALPAKA_LANG_CUDA
 #            error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
 #        endif
 
-#        if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
+#        if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !ALPAKA_LANG_HIP
 #            error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
 #        endif
 
@@ -79,7 +79,7 @@ namespace alpaka::trait
 
                 // Emulating atomics with atomicCAS is mentioned in the programming guide too.
                 // http://docs.nvidia.com/cuda/cuda-c-programming-guide/#atomic-functions
-#        if BOOST_LANG_HIP
+#        if ALPAKA_LANG_HIP
 #            if __has_builtin(__hip_atomic_load)
                 EmulatedType old{__hip_atomic_load(addressAsIntegralType, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT)};
 #            else