alpaka-group · bernhardmgruber · Sep 29, 2023 · Sep 29, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -127,7 +127,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 - added macros `LLAMA_FORCE_INLINE` and `LLAMA_HOST_ACC` #366
 - support clang as CUDA compiler #366
 - `llama::mapping::SoA` and `llama::mapping::AoSoA` now support custom record dimension flatteners #371
-- added the `llama::mapping::FlattenRecordDimIncreasingAlignment`, `llama::mapping::FlattenRecordDimDecreasingAlignment` and `llama::mapping::FlattenRecordDimMinimizePadding` record dimension flatteners #371
+- added the `llama::mapping::PermuteFieldsIncreasingAlignment`, `llama::mapping::PermuteFieldsDecreasingAlignment` and `llama::mapping::PermuteFieldsMinimizePadding` record dimension flatteners #371
 - added new mapping `llama::mapping::BitPackedIntSoA` bitpacking integers in the record dimension into SoA arrays, and added new example #372, #427, #441, #446
 - added new mapping `llama::mapping::BitPackedFloatSoA` bitpacking floating-point types in the record dimension into SoA arrays, and added new example #414, #427, #446
 - `LLAMA_FORCE_INLINE` views can be created on `const` blobs #375

diff --git a/docs/pages/api.rst b/docs/pages/api.rst
@@ -178,15 +178,15 @@ Acessors
 .. doxygenstruct:: llama::accessor::Restrict
 .. doxygenstruct:: llama::accessor::Atomic
 .. doxygenstruct:: llama::accessor::Stacked
+RecordDim field permuters
+^^^^^^^^^^^^^^^^^^^^^^^^^
 
-RecordDim flattener
-^^^^^^^^^^^^^^^^^^^
 
-.. doxygenstruct:: llama::mapping::FlattenRecordDimInOrder
-.. doxygenstruct:: llama::mapping::FlattenRecordDimSorted
-.. doxygentypedef:: llama::mapping::FlattenRecordDimIncreasingAlignment
-.. doxygentypedef:: llama::mapping::FlattenRecordDimDecreasingAlignment
-.. doxygentypedef:: llama::mapping::FlattenRecordDimMinimizePadding
+.. doxygenstruct:: llama::mapping::PermuteFieldsInOrder
+.. doxygenstruct:: llama::mapping::PermuteFieldsSorted
+.. doxygentypedef:: llama::mapping::PermuteFieldsIncreasingAlignment
+.. doxygentypedef:: llama::mapping::PermuteFieldsDecreasingAlignment
+.. doxygentypedef:: llama::mapping::PermuteFieldsMinimizePadding
 
 Common utilities
 ^^^^^^^^^^^^^^^^

diff --git a/examples/cuda/pitch/pitch.cu b/examples/cuda/pitch/pitch.cu
@@ -60,14 +60,14 @@ namespace llamaex
         typename TArrayExtents,
         typename TRecordDim,
         bool AlignAndPad = true,
-        template<typename> typename FlattenRecordDim = mapping::FlattenRecordDimInOrder>
+        template<typename> typename PermuteFields = mapping::PermuteFieldsInOrder>
     struct PitchedAoS : mapping::MappingBase<TArrayExtents, TRecordDim>
     {
     private:
         static constexpr std::size_t dim = TArrayExtents{}.size();
 
         using Base = mapping::MappingBase<TArrayExtents, TRecordDim>;
-        using Flattener = FlattenRecordDim<TRecordDim>;
+        using Permuter = PermuteFields<FlatRecordDim<TRecordDim>>;
 
         Array<std::size_t, dim> pitches;
 
@@ -116,9 +116,9 @@ namespace llamaex
 #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ <= 6
                 *& // mess with nvcc compiler state to workaround bug
 #endif
-                 Flattener::template flatIndex<RecordCoords...>;
+                 Permuter::template permute<flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>>;
             const auto offset
-                = dot(pitches, ai) + flatOffsetOf<typename Flattener::FlatRecordDim, flatFieldIndex, AlignAndPad>;
+                = dot(pitches, ai) + flatOffsetOf<typename Permuter::FlatRecordDim, flatFieldIndex, AlignAndPad>;
             return {0, offset};
         }
     };

diff --git a/examples/memmap/memmap.cpp b/examples/memmap/memmap.cpp
@@ -64,7 +64,7 @@ auto main(int argc, const char* argv[]) -> int
         Triangle,
         llama::mapping::FieldAlignment::Pack,
         llama::mapping::LinearizeArrayDimsCpp,
-        llama::mapping::FlattenRecordDimInOrder>{{n}};
+        llama::mapping::PermuteFieldsInOrder>{{n}};
     if(size != 80u + 4u + mapping.blobSize(0))
     {
         std::cout << "File size (" << size << ") != 80 + 4 + mapping size: (" << mapping.blobSize(0) << ")\n";

diff --git a/include/llama/Simd.hpp b/include/llama/Simd.hpp
@@ -195,7 +195,7 @@ namespace llama
             {
                 static_assert(mapping::isAoS<Mapping>);
                 static constexpr auto srcStride = flatSizeOf<
-                    typename Mapping::Flattener::FlatRecordDim,
+                    typename Mapping::Permuter::FlatRecordDim,
                     Mapping::fieldAlignment == llama::mapping::FieldAlignment::Align>;
                 const auto* srcBaseAddr = reinterpret_cast<const std::byte*>(&srcRef(rc));
                 ElementSimd elemSimd; // g++-12 really needs the intermediate elemSimd and memcpy
@@ -235,7 +235,7 @@ namespace llama
             else if constexpr(mapping::isAoS<Mapping>)
             {
                 static constexpr auto stride = flatSizeOf<
-                    typename Mapping::Flattener::FlatRecordDim,
+                    typename Mapping::Permuter::FlatRecordDim,
                     Mapping::fieldAlignment == llama::mapping::FieldAlignment::Align>;
                 auto* dstBaseAddr = reinterpret_cast<std::byte*>(&dstRef(rc));
                 const ElementSimd elemSimd = srcSimd(rc);

diff --git a/include/llama/mapping/AoS.hpp b/include/llama/mapping/AoS.hpp
@@ -12,15 +12,15 @@ namespace llama::mapping
     /// If Pack, struct members are tightly packed.
     /// \tparam TLinearizeArrayDimsFunctor Defines how the array dimensions should be mapped into linear numbers and
     /// how big the linear domain gets.
-    /// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened. See \ref
-    /// FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref FlattenRecordDimDecreasingAlignment and
-    /// \ref FlattenRecordDimMinimizePadding.
+    /// \tparam PermuteFields Defines how the record dimension's fields should be permuted. See \ref
+    /// PermuteFieldsInOrder, \ref PermuteFieldsIncreasingAlignment, \ref PermuteFieldsDecreasingAlignment and
+    /// \ref PermuteFieldsMinimizePadding.
     template<
         typename TArrayExtents,
         typename TRecordDim,
         FieldAlignment TFieldAlignment = FieldAlignment::Align,
         typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp,
-        template<typename> typename FlattenRecordDim = FlattenRecordDimInOrder>
+        template<typename> typename PermuteFields = PermuteFieldsInOrder>
     struct AoS : MappingBase<TArrayExtents, TRecordDim>
     {
     private:
@@ -30,15 +30,15 @@ namespace llama::mapping
     public:
         inline static constexpr FieldAlignment fieldAlignment = TFieldAlignment;
         using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor;
-        using Flattener = FlattenRecordDim<TRecordDim>;
+        using Permuter = PermuteFields<FlatRecordDim<TRecordDim>>;
         inline static constexpr std::size_t blobCount = 1;
 
         using Base::Base;
 
         LLAMA_FN_HOST_ACC_INLINE constexpr auto blobSize(size_type) const -> size_type
         {
             return LinearizeArrayDimsFunctor{}.size(Base::extents())
-                * flatSizeOf<typename Flattener::FlatRecordDim, fieldAlignment == FieldAlignment::Align>;
+                * flatSizeOf<typename Permuter::FlatRecordDim, fieldAlignment == FieldAlignment::Align>;
         }
 
         template<std::size_t... RecordCoords>
@@ -50,13 +50,13 @@ namespace llama::mapping
 #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ <= 6
                 *& // mess with nvcc compiler state to workaround bug
 #endif
-                 Flattener::template flatIndex<RecordCoords...>;
+                 Permuter::template permute<flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>>;
             const auto offset
                 = LinearizeArrayDimsFunctor{}(ai, Base::extents())
                     * static_cast<size_type>(
-                        flatSizeOf<typename Flattener::FlatRecordDim, fieldAlignment == FieldAlignment::Align>)
+                        flatSizeOf<typename Permuter::FlatRecordDim, fieldAlignment == FieldAlignment::Align>)
                 + static_cast<size_type>(flatOffsetOf<
-                                         typename Flattener::FlatRecordDim,
+                                         typename Permuter::FlatRecordDim,
                                          flatFieldIndex,
                                          fieldAlignment == FieldAlignment::Align>);
             return {size_type{0}, offset};
@@ -75,12 +75,8 @@ namespace llama::mapping
     /// Array of struct mapping preserving the alignment of the field types by inserting padding and permuting the
     /// field order to minimize this padding. \see AoS
     template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp>
-    using MinAlignedAoS = AoS<
-        ArrayExtents,
-        RecordDim,
-        FieldAlignment::Align,
-        LinearizeArrayDimsFunctor,
-        FlattenRecordDimMinimizePadding>;
+    using MinAlignedAoS
+        = AoS<ArrayExtents, RecordDim, FieldAlignment::Align, LinearizeArrayDimsFunctor, PermuteFieldsMinimizePadding>;
 
     /// Array of struct mapping packing the field types tightly, violating the type's alignment requirements.
     /// \see AoS
@@ -107,8 +103,7 @@ namespace llama::mapping
         FieldAlignment FieldAlignment,
         typename LinearizeArrayDimsFunctor,
         template<typename>
-        typename FlattenRecordDim>
-    inline constexpr bool
-        isAoS<AoS<ArrayExtents, RecordDim, FieldAlignment, LinearizeArrayDimsFunctor, FlattenRecordDim>>
+        typename PermuteFields>
+    inline constexpr bool isAoS<AoS<ArrayExtents, RecordDim, FieldAlignment, LinearizeArrayDimsFunctor, PermuteFields>>
         = true;
 } // namespace llama::mapping
diff --git a/include/llama/mapping/AoSoA.hpp b/include/llama/mapping/AoSoA.hpp
@@ -26,15 +26,15 @@ namespace llama::mapping
 
     /// Array of struct of arrays mapping. Used to create a \ref View via \ref allocView.
     /// \tparam Lanes The size of the inner arrays of this array of struct of arrays.
-    /// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened. See \ref
-    /// FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref FlattenRecordDimDecreasingAlignment and
-    /// \ref FlattenRecordDimMinimizePadding.
+    /// \tparam PermuteFields Defines how the record dimension's fields should be permuted. See \ref
+    /// PermuteFieldsInOrder, \ref PermuteFieldsIncreasingAlignment, \ref PermuteFieldsDecreasingAlignment and
+    /// \ref PermuteFieldsMinimizePadding.
     template<
         typename TArrayExtents,
         typename TRecordDim,
         typename TArrayExtents::value_type Lanes,
         typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp,
-        template<typename> typename FlattenRecordDim = FlattenRecordDimInOrder>
+        template<typename> typename PermuteFields = PermuteFieldsInOrder>
     struct AoSoA : MappingBase<TArrayExtents, TRecordDim>
     {
     private:
@@ -44,7 +44,7 @@ namespace llama::mapping
     public:
         inline static constexpr typename TArrayExtents::value_type lanes = Lanes;
         using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor;
-        using Flattener = FlattenRecordDim<TRecordDim>;
+        using Permuter = PermuteFields<FlatRecordDim<TRecordDim>>;
         inline static constexpr std::size_t blobCount = 1;
 
 #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ >= 12
@@ -72,13 +72,12 @@ namespace llama::mapping
 #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ <= 6
                 *& // mess with nvcc compiler state to workaround bug
 #endif
-                 Flattener::template flatIndex<RecordCoords...>;
+                 Permuter::template permute<flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>>;
             const auto flatArrayIndex = LinearizeArrayDimsFunctor{}(ai, Base::extents());
             const auto blockIndex = flatArrayIndex / Lanes;
             const auto laneIndex = flatArrayIndex % Lanes;
             const auto offset = static_cast<size_type>(sizeOf<TRecordDim> * Lanes) * blockIndex
-                + static_cast<size_type>(flatOffsetOf<typename Flattener::FlatRecordDim, flatFieldIndex, false>)
-                    * Lanes
+                + static_cast<size_type>(flatOffsetOf<typename Permuter::FlatRecordDim, flatFieldIndex, false>) * Lanes
                 + static_cast<size_type>(sizeof(GetType<TRecordDim, RecordCoord<RecordCoords...>>)) * laneIndex;
             return {0, offset};
         }

diff --git a/include/llama/mapping/BitPackedFloat.hpp b/include/llama/mapping/BitPackedFloat.hpp
@@ -315,7 +315,7 @@ namespace llama::mapping
         typename ExponentBits = typename TArrayExtents::value_type,
         typename MantissaBits = ExponentBits,
         typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp,
-        template<typename> typename FlattenRecordDim = FlattenRecordDimInOrder,
+        template<typename> typename PermuteFields = PermuteFieldsInOrder,
         typename TStoredIntegral = internal::StoredIntegralFor<TRecordDim>>
     struct LLAMA_DECLSPEC_EMPTY_BASES BitPackedFloatAoS
         : MappingBase<TArrayExtents, TRecordDim>
@@ -332,7 +332,7 @@ namespace llama::mapping
         using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor;
         using StoredIntegral = TStoredIntegral;
 
-        using Flattener = FlattenRecordDim<TRecordDim>;
+        using Permuter = PermuteFields<FlatRecordDim<TRecordDim>>;
         static constexpr std::size_t blobCount = 1;
 
         LLAMA_FN_HOST_ACC_INLINE
@@ -382,7 +382,8 @@ namespace llama::mapping
             RecordCoord<RecordCoords...>,
             Blobs& blobs) const
         {
-            constexpr auto flatFieldIndex = static_cast<size_type>(Flattener::template flatIndex<RecordCoords...>);
+            constexpr auto flatFieldIndex = static_cast<size_type>(
+                Permuter::template permute<flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>>);
             const auto bitOffset = ((TLinearizeArrayDimsFunctor{}(ai, Base::extents())
                                      * static_cast<size_type>(flatFieldCount<TRecordDim>))
                                     + flatFieldIndex)
@@ -404,7 +405,7 @@ namespace llama::mapping
         typename ExponentBits = unsigned,
         typename MantissaBits = ExponentBits,
         typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp,
-        template<typename> typename FlattenRecordDim = FlattenRecordDimInOrder,
+        template<typename> typename PermuteFields = PermuteFieldsInOrder,
         typename StoredIntegral = void>
     struct BindBitPackedFloatAoS
     {
@@ -415,7 +416,7 @@ namespace llama::mapping
             ExponentBits,
             MantissaBits,
             LinearizeArrayDimsFunctor,
-            FlattenRecordDim,
+            PermuteFields,
             std::conditional_t<
                 !std::is_void_v<StoredIntegral>,
                 StoredIntegral,
@@ -432,15 +433,15 @@ namespace llama::mapping
         typename MantissaBits,
         typename LinearizeArrayDimsFunctor,
         template<typename>
-        typename FlattenRecordDim,
+        typename PermuteFields,
         typename StoredIntegral>
     inline constexpr bool isBitPackedFloatAoS<BitPackedFloatAoS<
         ArrayExtents,
         RecordDim,
         ExponentBits,
         MantissaBits,
         LinearizeArrayDimsFunctor,
-        FlattenRecordDim,
+        PermuteFields,
         StoredIntegral>>
         = true;
 } // namespace llama::mapping
diff --git a/include/llama/mapping/BitPackedInt.hpp b/include/llama/mapping/BitPackedInt.hpp
@@ -449,9 +449,9 @@ namespace llama::mapping
     /// numbers will be read back positive.
     /// \tparam TLinearizeArrayDimsFunctor Defines how the array dimensions should be mapped into linear numbers and
     /// how big the linear domain gets.
-    /// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened. See \ref
-    //  FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref FlattenRecordDimDecreasingAlignment and
-    //  \ref FlattenRecordDimMinimizePadding.
+    /// \tparam PermuteFields Defines how the record dimension's fields should be permuted. See \ref
+    //  PermuteFieldsInOrder, \ref PermuteFieldsIncreasingAlignment, \ref PermuteFieldsDecreasingAlignment and
+    //  \ref PermuteFieldsMinimizePadding.
     /// \tparam TStoredIntegral Integral type used as storage of reduced precision integers. Must be std::uint32_t or
     /// std::uint64_t.
     template<
@@ -460,7 +460,7 @@ namespace llama::mapping
         typename Bits = typename TArrayExtents::value_type,
         SignBit SignBit = SignBit::Keep,
         typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp,
-        template<typename> typename FlattenRecordDim = FlattenRecordDimInOrder,
+        template<typename> typename PermuteFields = PermuteFieldsInOrder,
         typename TStoredIntegral = internal::StoredUnsignedFor<TRecordDim>>
     struct BitPackedIntAoS
         : internal::
@@ -475,7 +475,7 @@ namespace llama::mapping
         using typename Base::size_type;
         using VHBits = typename Base::VHBits; // use plain using declaration with nvcc >= 11.8
 
-        using Flattener = FlattenRecordDim<TRecordDim>;
+        using Permuter = PermuteFields<TRecordDim>;
         static constexpr std::size_t blobCount = 1;
 
         LLAMA_FN_HOST_ACC_INLINE
@@ -493,7 +493,8 @@ namespace llama::mapping
             RecordCoord<RecordCoords...>,
             Blobs& blobs) const
         {
-            constexpr auto flatFieldIndex = static_cast<size_type>(Flattener::template flatIndex<RecordCoords...>);
+            constexpr auto flatFieldIndex = static_cast<size_type>(
+                Permuter::template permute<flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>>);
             const auto bitOffset = ((TLinearizeArrayDimsFunctor{}(ai, Base::extents())
                                      * static_cast<size_type>(flatFieldCount<TRecordDim>))
                                     + flatFieldIndex)
@@ -516,7 +517,7 @@ namespace llama::mapping
         typename Bits = void,
         SignBit SignBit = SignBit::Keep,
         typename LinearizeArrayDimsFunctor = mapping::LinearizeArrayDimsCpp,
-        template<typename> typename FlattenRecordDim = FlattenRecordDimInOrder,
+        template<typename> typename PermuteFields = PermuteFieldsInOrder,
         typename StoredIntegral = void>
     struct BindBitPackedIntAoS
     {
@@ -527,7 +528,7 @@ namespace llama::mapping
             std::conditional_t<!std::is_void_v<Bits>, Bits, typename ArrayExtents::value_type>,
             SignBit,
             LinearizeArrayDimsFunctor,
-            FlattenRecordDim,
+            PermuteFields,
             std::conditional_t<
                 !std::is_void_v<StoredIntegral>,
                 StoredIntegral,
@@ -544,15 +545,15 @@ namespace llama::mapping
         SignBit SignBit,
         typename LinearizeArrayDimsFunctor,
         template<typename>
-        typename FlattenRecordDim,
+        typename PermuteFields,
         typename StoredIntegral>
     inline constexpr bool isBitPackedIntAoS<BitPackedIntAoS<
         ArrayExtents,
         RecordDim,
         Bits,
         SignBit,
         LinearizeArrayDimsFunctor,
-        FlattenRecordDim,
+        PermuteFields,
         StoredIntegral>>
         = true;
 } // namespace llama::mapping