Annotate all public APIs with LLAMA_EXPORT

alpaka-group · Oct 4, 2023 · de6724e · de6724e
1 parent 8f7f517
commit de6724e
Show file tree

Hide file tree

Showing 36 changed files with 320 additions and 4 deletions.
diff --git a/include/llama/Accessors.hpp b/include/llama/Accessors.hpp
@@ -14,6 +14,7 @@
 namespace llama::accessor
 {
     /// Default accessor. Passes through the given reference.
+    LLAMA_EXPORT
     struct Default
     {
         template<typename Reference>
@@ -24,6 +25,7 @@ namespace llama::accessor
     };
 
     /// Allows only read access and returns values instead of references to memory.
+    LLAMA_EXPORT
     struct ByValue
     {
         template<typename Reference>
@@ -38,6 +40,7 @@ namespace llama::accessor
     };
 
     /// Allows only read access by qualifying the references to memory with const.
+    LLAMA_EXPORT
     struct Const
     {
         // for l-value references
@@ -93,6 +96,7 @@ namespace llama::accessor
     };
 
     /// Qualifies references to memory with __restrict. Only works on l-value references.
+    LLAMA_EXPORT
     struct Restrict
     {
         template<typename T>
@@ -104,6 +108,7 @@ namespace llama::accessor
 
 #ifdef __cpp_lib_atomic_ref
     /// Accessor wrapping a reference into a std::atomic_ref. Can only wrap l-value references.
+    LLAMA_EXPORT
     struct Atomic
     {
         template<typename T>
@@ -115,6 +120,7 @@ namespace llama::accessor
 #endif
 
     /// Locks a mutex during each access to the data structure.
+    LLAMA_EXPORT
     template<typename Mutex = std::mutex>
     struct Locked
     {
@@ -177,11 +183,13 @@ namespace llama::accessor
 
     /// Accessor combining multiple other accessors. The contained accessors are applied in left to right order to the
     /// memory location when forming the reference returned from a view.
+    LLAMA_EXPORT
     template<typename... Accessors>
     struct Stacked : internal::StackedLeave<0, Default>
     {
     };
 
+    LLAMA_EXPORT
     template<typename FirstAccessor, typename... MoreAccessors>
     struct Stacked<FirstAccessor, MoreAccessors...>
         : internal::StackedLeave<1 + sizeof...(MoreAccessors), FirstAccessor>

diff --git a/include/llama/Array.hpp b/include/llama/Array.hpp
@@ -13,6 +13,7 @@ namespace llama
     /// Array class like `std::array` but suitable for use with offloading devices like GPUs.
     /// \tparam T type if array elements.
     /// \tparam N rank of the array.
+    LLAMA_EXPORT
     template<typename T, std::size_t N>
     // NOLINTNEXTLINE(bugprone-reserved-identifier,cert-dcl37-c,cert-dcl51-cpp,readability-identifier-naming)
     struct Array
@@ -126,6 +127,7 @@ namespace llama
         }
     };
 
+    LLAMA_EXPORT
     template<typename T>
     struct Array<T, 0>
     {
@@ -237,9 +239,11 @@ namespace llama
         }
     };
 
+    LLAMA_EXPORT
     template<typename First, typename... Args>
     Array(First, Args... args) -> Array<First, sizeof...(Args) + 1>;
 
+    LLAMA_EXPORT
     template<typename T, std::size_t N>
     auto operator<<(std::ostream& os, const Array<T, N>& a) -> std::ostream&
     {
@@ -257,6 +261,7 @@ namespace llama
         return os;
     }
 
+    LLAMA_EXPORT
     template<typename T, std::size_t N>
     LLAMA_FN_HOST_ACC_INLINE constexpr auto pushFront([[maybe_unused]] Array<T, N> a, T v) -> Array<T, N + 1>
     {
@@ -268,6 +273,7 @@ namespace llama
         return r;
     }
 
+    LLAMA_EXPORT
     template<typename T, std::size_t N>
     LLAMA_FN_HOST_ACC_INLINE constexpr auto pushBack([[maybe_unused]] Array<T, N> a, T v) -> Array<T, N + 1>
     {
@@ -279,6 +285,7 @@ namespace llama
         return r;
     }
 
+    LLAMA_EXPORT
     template<typename T, std::size_t N>
     LLAMA_FN_HOST_ACC_INLINE constexpr auto popBack([[maybe_unused]] Array<T, N> a)
     {
@@ -290,6 +297,7 @@ namespace llama
         return r;
     }
 
+    LLAMA_EXPORT
     template<typename T, std::size_t N>
     LLAMA_FN_HOST_ACC_INLINE constexpr auto popFront([[maybe_unused]] Array<T, N> a)
     {
@@ -301,6 +309,7 @@ namespace llama
         return r;
     }
 
+    LLAMA_EXPORT
     template<typename T, std::size_t N>
     LLAMA_FN_HOST_ACC_INLINE constexpr auto product(Array<T, N> a) -> T
     {
@@ -310,6 +319,7 @@ namespace llama
         return prod;
     }
 
+    LLAMA_EXPORT
     template<typename T, std::size_t N>
     LLAMA_FN_HOST_ACC_INLINE constexpr auto dot([[maybe_unused]] Array<T, N> a, [[maybe_unused]] Array<T, N> b) -> T
     {
@@ -321,11 +331,13 @@ namespace llama
     }
 } // namespace llama
 
+LLAMA_EXPORT
 template<typename T, size_t N>
 struct std::tuple_size<llama::Array<T, N>> : std::integral_constant<size_t, N> // NOLINT(cert-dcl58-cpp)
 {
 };
 
+LLAMA_EXPORT
 template<size_t I, typename T, size_t N>
 struct std::tuple_element<I, llama::Array<T, N>> // NOLINT(cert-dcl58-cpp)
 {

diff --git a/include/llama/ArrayExtents.hpp b/include/llama/ArrayExtents.hpp
@@ -14,13 +14,15 @@ namespace llama
     // TODO(bgruber): make this an alias in C++20, when we have CTAD for aliases
     /// Represents a run-time index into the array dimensions.
     /// \tparam Dim Compile-time number of dimensions.
+    LLAMA_EXPORT
     template<typename T, std::size_t Dim>
     struct ArrayIndex : Array<T, Dim>
     {
         static constexpr std::size_t rank = Dim;
     };
 
     // allow comparing ArrayIndex with different size types:
+    LLAMA_EXPORT
     template<std::size_t Dim, typename TA, typename TB>
     LLAMA_FN_HOST_ACC_INLINE constexpr auto operator==(ArrayIndex<TA, Dim> a, ArrayIndex<TB, Dim> b) -> bool
     {
@@ -30,6 +32,7 @@ namespace llama
         return true;
     }
 
+    LLAMA_EXPORT
     template<std::size_t Dim, typename TA, typename TB>
     LLAMA_FN_HOST_ACC_INLINE constexpr auto operator!=(ArrayIndex<TA, Dim> a, ArrayIndex<TB, Dim> b) -> bool
     {
@@ -61,16 +64,19 @@ namespace llama
         };
     } // namespace internal
 
+    LLAMA_EXPORT
     template<typename... Args>
     ArrayIndex(Args...)
         -> ArrayIndex<typename internal::IndexTypeFromArgs<std::size_t, Args...>::type, sizeof...(Args)>;
 } // namespace llama
 
+LLAMA_EXPORT
 template<typename V, size_t N>
 struct std::tuple_size<llama::ArrayIndex<V, N>> : std::integral_constant<size_t, N> // NOLINT(cert-dcl58-cpp)
 {
 };
 
+LLAMA_EXPORT
 template<size_t I, typename V, size_t N>
 struct std::tuple_element<I, llama::ArrayIndex<V, N>> // NOLINT(cert-dcl58-cpp)
 {
@@ -116,12 +122,14 @@ namespace llama
         };
     } // namespace internal
 
+    LLAMA_EXPORT
     /// Used as a template argument to \ref ArrayExtents to mark a dynamic extent.
     inline constexpr auto dyn = internal::Dyn{};
 
     /// ArrayExtents holding compile and runtime indices. This is conceptually equivalent to the std::extent of
     /// std::mdspan (@see: https://wg21.link/P0009) including the changes to make the size_type controllable (@see:
     /// https://wg21.link/P2553).
+    LLAMA_EXPORT
     template<typename T = std::size_t, T... Sizes>
     struct ArrayExtents : Array<T, ((Sizes == dyn) + ... + 0)>
     {
@@ -163,6 +171,7 @@ namespace llama
         }
     };
 
+    LLAMA_EXPORT
     template<typename T>
     struct ArrayExtents<T>
     {
@@ -179,6 +188,7 @@ namespace llama
         }
     };
 
+    LLAMA_EXPORT
     template<typename... Args>
     ArrayExtents(Args...) -> ArrayExtents<
         typename internal::IndexTypeFromArgs<std::size_t, Args...>::type,
@@ -191,6 +201,7 @@ namespace llama
     static_assert(std::is_trivially_move_assignable_v<ArrayExtents<std::size_t, 1>>);
     static_assert(std::is_empty_v<ArrayExtents<std::size_t, 1>>);
 
+    LLAMA_EXPORT
     template<typename SizeTypeA, SizeTypeA... SizesA, typename SizeTypeB, SizeTypeB... SizesB>
     LLAMA_FN_HOST_ACC_INLINE constexpr auto operator==(
         ArrayExtents<SizeTypeA, SizesA...> a,
@@ -199,6 +210,7 @@ namespace llama
         return a.toArray() == b.toArray();
     }
 
+    LLAMA_EXPORT
     template<typename SizeTypeA, SizeTypeA... SizesA, typename SizeTypeB, SizeTypeB... SizesB>
     LLAMA_FN_HOST_ACC_INLINE constexpr auto operator!=(
         ArrayExtents<SizeTypeA, SizesA...> a,
@@ -207,6 +219,7 @@ namespace llama
         return !(a == b);
     }
 
+    LLAMA_EXPORT
     template<typename SizeType, SizeType... Sizes>
     LLAMA_FN_HOST_ACC_INLINE constexpr auto product(ArrayExtents<SizeType, Sizes...> e) -> SizeType
     {
@@ -222,14 +235,17 @@ namespace llama
         }
     } // namespace internal
 
+    LLAMA_EXPORT
     /// N-dimensional ArrayExtents where all N extents are Extent.
     template<typename SizeType, std::size_t N, SizeType Extent>
     using ArrayExtentsNCube = decltype(internal::makeArrayExtents<SizeType, Extent>(std::make_index_sequence<N>{}));
 
+    LLAMA_EXPORT
     /// N-dimensional ArrayExtents where all values are dynamic.
     template<typename SizeType, std::size_t N>
     using ArrayExtentsDynamic = ArrayExtentsNCube<SizeType, N, dyn>;
 
+    LLAMA_EXPORT
     template<typename SizeType, std::size_t Dim, typename Func, typename... OuterIndices>
     LLAMA_FN_HOST_ACC_INLINE void forEachArrayIndex(
         [[maybe_unused]] const ArrayIndex<SizeType, Dim>& extents,
@@ -250,19 +266,22 @@ namespace llama
         LLAMA_END_SUPPRESS_HOST_DEVICE_WARNING
     }
 
+    LLAMA_EXPORT
     template<typename SizeType, SizeType... Sizes, typename Func>
     LLAMA_FN_HOST_ACC_INLINE void forEachArrayIndex(ArrayExtents<SizeType, Sizes...> extents, Func&& func)
     {
         forEachArrayIndex(extents.toArray(), std::forward<Func>(func));
     }
 } // namespace llama
 
+LLAMA_EXPORT
 template<typename SizeType, SizeType... Sizes>
 struct std::tuple_size<llama::ArrayExtents<SizeType, Sizes...>> // NOLINT(cert-dcl58-cpp)
     : std::integral_constant<std::size_t, sizeof...(Sizes)>
 {
 };
 
+LLAMA_EXPORT
 template<typename SizeType, std::size_t I, SizeType... Sizes>
 struct std::tuple_element<I, llama::ArrayExtents<SizeType, Sizes...>> // NOLINT(cert-dcl58-cpp)
 {

diff --git a/include/llama/ArrayIndexRange.hpp b/include/llama/ArrayIndexRange.hpp
@@ -17,6 +17,7 @@
 namespace llama
 {
     /// Iterator supporting \ref ArrayIndexRange.
+    LLAMA_EXPORT
     template<typename ArrayExtents>
     struct ArrayIndexIterator
     {
@@ -244,6 +245,7 @@ namespace llama
     };
 
     /// Range allowing to iterate over all indices in an \ref ArrayExtents.
+    LLAMA_EXPORT
     template<typename ArrayExtents>
     struct ArrayIndexRange
         : private ArrayExtents

diff --git a/include/llama/BlobAllocators.hpp b/include/llama/BlobAllocators.hpp
@@ -21,6 +21,7 @@ namespace llama::bloballoc
 {
     /// Allocates statically sized memory for a \ref View, which is copied each time a \ref View is copied.
     /// \tparam BytesToReserve the amount of memory to reserve.
+    LLAMA_EXPORT
     template<std::size_t BytesToReserve>
     struct Array
     {
@@ -44,6 +45,7 @@ namespace llama::bloballoc
 
     /// Allocates heap memory managed by a `std::unique_ptr` for a \ref View. This memory can only be uniquely owned by
     /// a single \ref View.
+    LLAMA_EXPORT
     struct UniquePtr
     {
         template<std::size_t Alignment>
@@ -61,6 +63,7 @@ namespace llama::bloballoc
 
     /// Allocates heap memory managed by a `std::shared_ptr` for a \ref View. This memory is shared between all copies
     /// of a \ref View.
+    LLAMA_EXPORT
     struct SharedPtr
     {
         template<std::size_t Alignment>
@@ -78,6 +81,7 @@ namespace llama::bloballoc
 #endif
 
     /// An STL compatible allocator allowing to specify alignment.
+    LLAMA_EXPORT
     template<typename T, std::size_t Alignment>
     struct AlignedAllocator
     {
@@ -119,6 +123,7 @@ namespace llama::bloballoc
 
     /// Allocates heap memory managed by a `std::vector` for a \ref View, which is copied each time a \ref View is
     /// copied.
+    LLAMA_EXPORT
     struct Vector
     {
         template<std::size_t Alignment>
@@ -135,6 +140,7 @@ namespace llama::bloballoc
     /// Allocates GPU device memory using cudaMalloc. The memory is managed by a std::unique_ptr with a deleter that
     /// calles cudaFree. If you want to use a view created with this allocator in a CUDA kernel, call \ref shallowCopy
     /// on the view before passing it to the kernel.
+    LLAMA_EXPORT
     struct CudaMalloc
     {
         inline static const auto deleter = [](void* p)
@@ -157,6 +163,7 @@ namespace llama::bloballoc
 #endif
 
 #if __has_include(<alpaka/alpaka.hpp>)
+    LLAMA_EXPORT
     template<typename Size, typename Dev>
     struct AlpakaBuf
     {