From 3dc36650dff9f2e698ae5ff40e89ceed423f4c4e Mon Sep 17 00:00:00 2001
From: Bernhard Manfred Gruber <bernhardmgruber@gmail.com>
Date: Fri, 29 Sep 2023 19:31:01 +0200
Subject: [PATCH] Separate record dim flattening from field permutations

---
 CHANGELOG.md                             |  2 +-
 docs/pages/api.rst                       | 14 ++---
 examples/cuda/pitch/pitch.cu             |  8 +--
 examples/memmap/memmap.cpp               |  2 +-
 include/llama/Simd.hpp                   |  4 +-
 include/llama/mapping/AoS.hpp            | 31 ++++------
 include/llama/mapping/AoSoA.hpp          | 15 +++--
 include/llama/mapping/BitPackedFloat.hpp | 15 ++---
 include/llama/mapping/BitPackedInt.hpp   | 21 ++++---
 include/llama/mapping/Common.hpp         | 46 ++++++--------
 include/llama/mapping/One.hpp            | 30 ++++-----
 include/llama/mapping/SoA.hpp            | 18 +++---
 tests/mapping.BitPackedInt.cpp           |  2 +-
 tests/mapping.cpp                        | 78 ++++++++++++------------
 14 files changed, 138 insertions(+), 148 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 13ae6ced27..e7fa889ba2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -127,7 +127,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 - added macros `LLAMA_FORCE_INLINE` and `LLAMA_HOST_ACC` #366
 - support clang as CUDA compiler #366
 - `llama::mapping::SoA` and `llama::mapping::AoSoA` now support custom record dimension flatteners #371
-- added the `llama::mapping::FlattenRecordDimIncreasingAlignment`, `llama::mapping::FlattenRecordDimDecreasingAlignment` and `llama::mapping::FlattenRecordDimMinimizePadding` record dimension flatteners #371
+- added the `llama::mapping::PermuteFieldsIncreasingAlignment`, `llama::mapping::PermuteFieldsDecreasingAlignment` and `llama::mapping::PermuteFieldsMinimizePadding` record dimension flatteners #371
 - added new mapping `llama::mapping::BitPackedIntSoA` bitpacking integers in the record dimension into SoA arrays, and added new example #372, #427, #441, #446
 - added new mapping `llama::mapping::BitPackedFloatSoA` bitpacking floating-point types in the record dimension into SoA arrays, and added new example #414, #427, #446
 - `LLAMA_FORCE_INLINE` views can be created on `const` blobs #375
diff --git a/docs/pages/api.rst b/docs/pages/api.rst
index d3b0719853..749b035f4c 100644
--- a/docs/pages/api.rst
+++ b/docs/pages/api.rst
@@ -178,15 +178,15 @@ Acessors
 .. doxygenstruct:: llama::accessor::Restrict
 .. doxygenstruct:: llama::accessor::Atomic
 .. doxygenstruct:: llama::accessor::Stacked
+RecordDim field permuters
+^^^^^^^^^^^^^^^^^^^^^^^^^
 
-RecordDim flattener
-^^^^^^^^^^^^^^^^^^^
 
-.. doxygenstruct:: llama::mapping::FlattenRecordDimInOrder
-.. doxygenstruct:: llama::mapping::FlattenRecordDimSorted
-.. doxygentypedef:: llama::mapping::FlattenRecordDimIncreasingAlignment
-.. doxygentypedef:: llama::mapping::FlattenRecordDimDecreasingAlignment
-.. doxygentypedef:: llama::mapping::FlattenRecordDimMinimizePadding
+.. doxygenstruct:: llama::mapping::PermuteFieldsInOrder
+.. doxygenstruct:: llama::mapping::PermuteFieldsSorted
+.. doxygentypedef:: llama::mapping::PermuteFieldsIncreasingAlignment
+.. doxygentypedef:: llama::mapping::PermuteFieldsDecreasingAlignment
+.. doxygentypedef:: llama::mapping::PermuteFieldsMinimizePadding
 
 Common utilities
 ^^^^^^^^^^^^^^^^
diff --git a/examples/cuda/pitch/pitch.cu b/examples/cuda/pitch/pitch.cu
index 76868a5e05..b7d9a33be8 100644
--- a/examples/cuda/pitch/pitch.cu
+++ b/examples/cuda/pitch/pitch.cu
@@ -60,14 +60,14 @@ namespace llamaex
         typename TArrayExtents,
         typename TRecordDim,
         bool AlignAndPad = true,
-        template<typename> typename FlattenRecordDim = mapping::FlattenRecordDimInOrder>
+        template<typename> typename PermuteFields = mapping::PermuteFieldsInOrder>
     struct PitchedAoS : mapping::MappingBase<TArrayExtents, TRecordDim>
     {
     private:
         static constexpr std::size_t dim = TArrayExtents{}.size();
 
         using Base = mapping::MappingBase<TArrayExtents, TRecordDim>;
-        using Flattener = FlattenRecordDim<TRecordDim>;
+        using Permuter = PermuteFields<FlatRecordDim<TRecordDim>>;
 
         Array<std::size_t, dim> pitches;
 
@@ -116,9 +116,9 @@ namespace llamaex
 #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ <= 6
                 *& // mess with nvcc compiler state to workaround bug
 #endif
-                 Flattener::template flatIndex<RecordCoords...>;
+                 Permuter::template permute<flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>>;
             const auto offset
-                = dot(pitches, ai) + flatOffsetOf<typename Flattener::FlatRecordDim, flatFieldIndex, AlignAndPad>;
+                = dot(pitches, ai) + flatOffsetOf<typename Permuter::FlatRecordDim, flatFieldIndex, AlignAndPad>;
             return {0, offset};
         }
     };
diff --git a/examples/memmap/memmap.cpp b/examples/memmap/memmap.cpp
index a001df1d27..d6e5859410 100644
--- a/examples/memmap/memmap.cpp
+++ b/examples/memmap/memmap.cpp
@@ -64,7 +64,7 @@ auto main(int argc, const char* argv[]) -> int
         Triangle,
         llama::mapping::FieldAlignment::Pack,
         llama::mapping::LinearizeArrayDimsCpp,
-        llama::mapping::FlattenRecordDimInOrder>{{n}};
+        llama::mapping::PermuteFieldsInOrder>{{n}};
     if(size != 80u + 4u + mapping.blobSize(0))
     {
         std::cout << "File size (" << size << ") != 80 + 4 + mapping size: (" << mapping.blobSize(0) << ")\n";
diff --git a/include/llama/Simd.hpp b/include/llama/Simd.hpp
index d40f303f97..7fbf906ed8 100644
--- a/include/llama/Simd.hpp
+++ b/include/llama/Simd.hpp
@@ -195,7 +195,7 @@ namespace llama
             {
                 static_assert(mapping::isAoS<Mapping>);
                 static constexpr auto srcStride = flatSizeOf<
-                    typename Mapping::Flattener::FlatRecordDim,
+                    typename Mapping::Permuter::FlatRecordDim,
                     Mapping::fieldAlignment == llama::mapping::FieldAlignment::Align>;
                 const auto* srcBaseAddr = reinterpret_cast<const std::byte*>(&srcRef(rc));
                 ElementSimd elemSimd; // g++-12 really needs the intermediate elemSimd and memcpy
@@ -235,7 +235,7 @@ namespace llama
             else if constexpr(mapping::isAoS<Mapping>)
             {
                 static constexpr auto stride = flatSizeOf<
-                    typename Mapping::Flattener::FlatRecordDim,
+                    typename Mapping::Permuter::FlatRecordDim,
                     Mapping::fieldAlignment == llama::mapping::FieldAlignment::Align>;
                 auto* dstBaseAddr = reinterpret_cast<std::byte*>(&dstRef(rc));
                 const ElementSimd elemSimd = srcSimd(rc);
diff --git a/include/llama/mapping/AoS.hpp b/include/llama/mapping/AoS.hpp
index 5dd85f1be0..072d1f2c6d 100644
--- a/include/llama/mapping/AoS.hpp
+++ b/include/llama/mapping/AoS.hpp
@@ -12,15 +12,15 @@ namespace llama::mapping
     /// If Pack, struct members are tightly packed.
     /// \tparam TLinearizeArrayDimsFunctor Defines how the array dimensions should be mapped into linear numbers and
     /// how big the linear domain gets.
-    /// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened. See \ref
-    /// FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref FlattenRecordDimDecreasingAlignment and
-    /// \ref FlattenRecordDimMinimizePadding.
+    /// \tparam PermuteFields Defines how the record dimension's fields should be permuted. See \ref
+    /// PermuteFieldsInOrder, \ref PermuteFieldsIncreasingAlignment, \ref PermuteFieldsDecreasingAlignment and
+    /// \ref PermuteFieldsMinimizePadding.
     template<
         typename TArrayExtents,
         typename TRecordDim,
         FieldAlignment TFieldAlignment = FieldAlignment::Align,
         typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp,
-        template<typename> typename FlattenRecordDim = FlattenRecordDimInOrder>
+        template<typename> typename PermuteFields = PermuteFieldsInOrder>
     struct AoS : MappingBase<TArrayExtents, TRecordDim>
     {
     private:
@@ -30,7 +30,7 @@ namespace llama::mapping
     public:
         inline static constexpr FieldAlignment fieldAlignment = TFieldAlignment;
         using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor;
-        using Flattener = FlattenRecordDim<TRecordDim>;
+        using Permuter = PermuteFields<FlatRecordDim<TRecordDim>>;
         inline static constexpr std::size_t blobCount = 1;
 
         using Base::Base;
@@ -38,7 +38,7 @@ namespace llama::mapping
         LLAMA_FN_HOST_ACC_INLINE constexpr auto blobSize(size_type) const -> size_type
         {
             return LinearizeArrayDimsFunctor{}.size(Base::extents())
-                * flatSizeOf<typename Flattener::FlatRecordDim, fieldAlignment == FieldAlignment::Align>;
+                * flatSizeOf<typename Permuter::FlatRecordDim, fieldAlignment == FieldAlignment::Align>;
         }
 
         template<std::size_t... RecordCoords>
@@ -50,13 +50,13 @@ namespace llama::mapping
 #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ <= 6
                 *& // mess with nvcc compiler state to workaround bug
 #endif
-                 Flattener::template flatIndex<RecordCoords...>;
+                 Permuter::template permute<flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>>;
             const auto offset
                 = LinearizeArrayDimsFunctor{}(ai, Base::extents())
                     * static_cast<size_type>(
-                        flatSizeOf<typename Flattener::FlatRecordDim, fieldAlignment == FieldAlignment::Align>)
+                        flatSizeOf<typename Permuter::FlatRecordDim, fieldAlignment == FieldAlignment::Align>)
                 + static_cast<size_type>(flatOffsetOf<
-                                         typename Flattener::FlatRecordDim,
+                                         typename Permuter::FlatRecordDim,
                                          flatFieldIndex,
                                          fieldAlignment == FieldAlignment::Align>);
             return {size_type{0}, offset};
@@ -75,12 +75,8 @@ namespace llama::mapping
     /// Array of struct mapping preserving the alignment of the field types by inserting padding and permuting the
     /// field order to minimize this padding. \see AoS
     template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp>
-    using MinAlignedAoS = AoS<
-        ArrayExtents,
-        RecordDim,
-        FieldAlignment::Align,
-        LinearizeArrayDimsFunctor,
-        FlattenRecordDimMinimizePadding>;
+    using MinAlignedAoS
+        = AoS<ArrayExtents, RecordDim, FieldAlignment::Align, LinearizeArrayDimsFunctor, PermuteFieldsMinimizePadding>;
 
     /// Array of struct mapping packing the field types tightly, violating the type's alignment requirements.
     /// \see AoS
@@ -107,8 +103,7 @@ namespace llama::mapping
         FieldAlignment FieldAlignment,
         typename LinearizeArrayDimsFunctor,
         template<typename>
-        typename FlattenRecordDim>
-    inline constexpr bool
-        isAoS<AoS<ArrayExtents, RecordDim, FieldAlignment, LinearizeArrayDimsFunctor, FlattenRecordDim>>
+        typename PermuteFields>
+    inline constexpr bool isAoS<AoS<ArrayExtents, RecordDim, FieldAlignment, LinearizeArrayDimsFunctor, PermuteFields>>
         = true;
 } // namespace llama::mapping
diff --git a/include/llama/mapping/AoSoA.hpp b/include/llama/mapping/AoSoA.hpp
index 417918ae8e..f1f8258579 100644
--- a/include/llama/mapping/AoSoA.hpp
+++ b/include/llama/mapping/AoSoA.hpp
@@ -26,15 +26,15 @@ namespace llama::mapping
 
     /// Array of struct of arrays mapping. Used to create a \ref View via \ref allocView.
     /// \tparam Lanes The size of the inner arrays of this array of struct of arrays.
-    /// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened. See \ref
-    /// FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref FlattenRecordDimDecreasingAlignment and
-    /// \ref FlattenRecordDimMinimizePadding.
+    /// \tparam PermuteFields Defines how the record dimension's fields should be permuted. See \ref
+    /// PermuteFieldsInOrder, \ref PermuteFieldsIncreasingAlignment, \ref PermuteFieldsDecreasingAlignment and
+    /// \ref PermuteFieldsMinimizePadding.
     template<
         typename TArrayExtents,
         typename TRecordDim,
         typename TArrayExtents::value_type Lanes,
         typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp,
-        template<typename> typename FlattenRecordDim = FlattenRecordDimInOrder>
+        template<typename> typename PermuteFields = PermuteFieldsInOrder>
     struct AoSoA : MappingBase<TArrayExtents, TRecordDim>
     {
     private:
@@ -44,7 +44,7 @@ namespace llama::mapping
     public:
         inline static constexpr typename TArrayExtents::value_type lanes = Lanes;
         using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor;
-        using Flattener = FlattenRecordDim<TRecordDim>;
+        using Permuter = PermuteFields<FlatRecordDim<TRecordDim>>;
         inline static constexpr std::size_t blobCount = 1;
 
 #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ >= 12
@@ -72,13 +72,12 @@ namespace llama::mapping
 #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ <= 6
                 *& // mess with nvcc compiler state to workaround bug
 #endif
-                 Flattener::template flatIndex<RecordCoords...>;
+                 Permuter::template permute<flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>>;
             const auto flatArrayIndex = LinearizeArrayDimsFunctor{}(ai, Base::extents());
             const auto blockIndex = flatArrayIndex / Lanes;
             const auto laneIndex = flatArrayIndex % Lanes;
             const auto offset = static_cast<size_type>(sizeOf<TRecordDim> * Lanes) * blockIndex
-                + static_cast<size_type>(flatOffsetOf<typename Flattener::FlatRecordDim, flatFieldIndex, false>)
-                    * Lanes
+                + static_cast<size_type>(flatOffsetOf<typename Permuter::FlatRecordDim, flatFieldIndex, false>) * Lanes
                 + static_cast<size_type>(sizeof(GetType<TRecordDim, RecordCoord<RecordCoords...>>)) * laneIndex;
             return {0, offset};
         }
diff --git a/include/llama/mapping/BitPackedFloat.hpp b/include/llama/mapping/BitPackedFloat.hpp
index 9fce3e3405..578360e4ef 100644
--- a/include/llama/mapping/BitPackedFloat.hpp
+++ b/include/llama/mapping/BitPackedFloat.hpp
@@ -315,7 +315,7 @@ namespace llama::mapping
         typename ExponentBits = typename TArrayExtents::value_type,
         typename MantissaBits = ExponentBits,
         typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp,
-        template<typename> typename FlattenRecordDim = FlattenRecordDimInOrder,
+        template<typename> typename PermuteFields = PermuteFieldsInOrder,
         typename TStoredIntegral = internal::StoredIntegralFor<TRecordDim>>
     struct LLAMA_DECLSPEC_EMPTY_BASES BitPackedFloatAoS
         : MappingBase<TArrayExtents, TRecordDim>
@@ -332,7 +332,7 @@ namespace llama::mapping
         using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor;
         using StoredIntegral = TStoredIntegral;
 
-        using Flattener = FlattenRecordDim<TRecordDim>;
+        using Permuter = PermuteFields<FlatRecordDim<TRecordDim>>;
         static constexpr std::size_t blobCount = 1;
 
         LLAMA_FN_HOST_ACC_INLINE
@@ -382,7 +382,8 @@ namespace llama::mapping
             RecordCoord<RecordCoords...>,
             Blobs& blobs) const
         {
-            constexpr auto flatFieldIndex = static_cast<size_type>(Flattener::template flatIndex<RecordCoords...>);
+            constexpr auto flatFieldIndex = static_cast<size_type>(
+                Permuter::template permute<flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>>);
             const auto bitOffset = ((TLinearizeArrayDimsFunctor{}(ai, Base::extents())
                                      * static_cast<size_type>(flatFieldCount<TRecordDim>))
                                     + flatFieldIndex)
@@ -404,7 +405,7 @@ namespace llama::mapping
         typename ExponentBits = unsigned,
         typename MantissaBits = ExponentBits,
         typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp,
-        template<typename> typename FlattenRecordDim = FlattenRecordDimInOrder,
+        template<typename> typename PermuteFields = PermuteFieldsInOrder,
         typename StoredIntegral = void>
     struct BindBitPackedFloatAoS
     {
@@ -415,7 +416,7 @@ namespace llama::mapping
             ExponentBits,
             MantissaBits,
             LinearizeArrayDimsFunctor,
-            FlattenRecordDim,
+            PermuteFields,
             std::conditional_t<
                 !std::is_void_v<StoredIntegral>,
                 StoredIntegral,
@@ -432,7 +433,7 @@ namespace llama::mapping
         typename MantissaBits,
         typename LinearizeArrayDimsFunctor,
         template<typename>
-        typename FlattenRecordDim,
+        typename PermuteFields,
         typename StoredIntegral>
     inline constexpr bool isBitPackedFloatAoS<BitPackedFloatAoS<
         ArrayExtents,
@@ -440,7 +441,7 @@ namespace llama::mapping
         ExponentBits,
         MantissaBits,
         LinearizeArrayDimsFunctor,
-        FlattenRecordDim,
+        PermuteFields,
         StoredIntegral>>
         = true;
 } // namespace llama::mapping
diff --git a/include/llama/mapping/BitPackedInt.hpp b/include/llama/mapping/BitPackedInt.hpp
index 6680034bcb..d982d5160a 100644
--- a/include/llama/mapping/BitPackedInt.hpp
+++ b/include/llama/mapping/BitPackedInt.hpp
@@ -449,9 +449,9 @@ namespace llama::mapping
     /// numbers will be read back positive.
     /// \tparam TLinearizeArrayDimsFunctor Defines how the array dimensions should be mapped into linear numbers and
     /// how big the linear domain gets.
-    /// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened. See \ref
-    //  FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref FlattenRecordDimDecreasingAlignment and
-    //  \ref FlattenRecordDimMinimizePadding.
+    /// \tparam PermuteFields Defines how the record dimension's fields should be permuted. See \ref
+    //  PermuteFieldsInOrder, \ref PermuteFieldsIncreasingAlignment, \ref PermuteFieldsDecreasingAlignment and
+    //  \ref PermuteFieldsMinimizePadding.
     /// \tparam TStoredIntegral Integral type used as storage of reduced precision integers. Must be std::uint32_t or
     /// std::uint64_t.
     template<
@@ -460,7 +460,7 @@ namespace llama::mapping
         typename Bits = typename TArrayExtents::value_type,
         SignBit SignBit = SignBit::Keep,
         typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp,
-        template<typename> typename FlattenRecordDim = FlattenRecordDimInOrder,
+        template<typename> typename PermuteFields = PermuteFieldsInOrder,
         typename TStoredIntegral = internal::StoredUnsignedFor<TRecordDim>>
     struct BitPackedIntAoS
         : internal::
@@ -475,7 +475,7 @@ namespace llama::mapping
         using typename Base::size_type;
         using VHBits = typename Base::VHBits; // use plain using declaration with nvcc >= 11.8
 
-        using Flattener = FlattenRecordDim<TRecordDim>;
+        using Permuter = PermuteFields<TRecordDim>;
         static constexpr std::size_t blobCount = 1;
 
         LLAMA_FN_HOST_ACC_INLINE
@@ -493,7 +493,8 @@ namespace llama::mapping
             RecordCoord<RecordCoords...>,
             Blobs& blobs) const
         {
-            constexpr auto flatFieldIndex = static_cast<size_type>(Flattener::template flatIndex<RecordCoords...>);
+            constexpr auto flatFieldIndex = static_cast<size_type>(
+                Permuter::template permute<flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>>);
             const auto bitOffset = ((TLinearizeArrayDimsFunctor{}(ai, Base::extents())
                                      * static_cast<size_type>(flatFieldCount<TRecordDim>))
                                     + flatFieldIndex)
@@ -516,7 +517,7 @@ namespace llama::mapping
         typename Bits = void,
         SignBit SignBit = SignBit::Keep,
         typename LinearizeArrayDimsFunctor = mapping::LinearizeArrayDimsCpp,
-        template<typename> typename FlattenRecordDim = FlattenRecordDimInOrder,
+        template<typename> typename PermuteFields = PermuteFieldsInOrder,
         typename StoredIntegral = void>
     struct BindBitPackedIntAoS
     {
@@ -527,7 +528,7 @@ namespace llama::mapping
             std::conditional_t<!std::is_void_v<Bits>, Bits, typename ArrayExtents::value_type>,
             SignBit,
             LinearizeArrayDimsFunctor,
-            FlattenRecordDim,
+            PermuteFields,
             std::conditional_t<
                 !std::is_void_v<StoredIntegral>,
                 StoredIntegral,
@@ -544,7 +545,7 @@ namespace llama::mapping
         SignBit SignBit,
         typename LinearizeArrayDimsFunctor,
         template<typename>
-        typename FlattenRecordDim,
+        typename PermuteFields,
         typename StoredIntegral>
     inline constexpr bool isBitPackedIntAoS<BitPackedIntAoS<
         ArrayExtents,
@@ -552,7 +553,7 @@ namespace llama::mapping
         Bits,
         SignBit,
         LinearizeArrayDimsFunctor,
-        FlattenRecordDim,
+        PermuteFields,
         StoredIntegral>>
         = true;
 } // namespace llama::mapping
diff --git a/include/llama/mapping/Common.hpp b/include/llama/mapping/Common.hpp
index e64f98d974..4a67ed1108 100644
--- a/include/llama/mapping/Common.hpp
+++ b/include/llama/mapping/Common.hpp
@@ -165,24 +165,23 @@ namespace llama::mapping
         }
     };
 
-    /// Flattens the record dimension in the order fields are written.
-    template<typename RecordDim>
-    struct FlattenRecordDimInOrder
+    /// Retains the order of the record dimension's fields.
+    template<typename TFlatRecordDim>
+    struct PermuteFieldsInOrder
     {
-        using FlatRecordDim = llama::FlatRecordDim<RecordDim>;
+        using FlatRecordDim = TFlatRecordDim;
 
-        template<std::size_t... RecordCoords>
-        static constexpr std::size_t flatIndex = flatRecordCoord<RecordDim, RecordCoord<RecordCoords...>>;
+        template<std::size_t FlatRecordCoord>
+        static constexpr std::size_t permute = FlatRecordCoord;
     };
 
-    /// Flattens the record dimension by sorting the fields according to a given predicate on the field types.
+    /// Sorts the record dimension's the fields according to a given predicate on the field types.
     /// @tparam Less A binary predicate accepting two field types, which exposes a member value. Value must be true if
     /// the first field type is less than the second one, otherwise false.
-    template<typename RecordDim, template<typename, typename> typename Less>
-    struct FlattenRecordDimSorted
+    template<typename FlatOrigRecordDim, template<typename, typename> typename Less>
+    struct PermuteFieldsSorted
     {
     private:
-        using FlatOrigRecordDim = llama::FlatRecordDim<RecordDim>;
         using FlatSortedRecordDim = mp_sort<FlatOrigRecordDim, Less>;
 
         template<typename A, typename B>
@@ -201,13 +200,8 @@ namespace llama::mapping
     public:
         using FlatRecordDim = FlatSortedRecordDim;
 
-        template<std::size_t... RecordCoords>
-        static constexpr std::size_t flatIndex = []() constexpr
-        {
-            constexpr auto indexBefore = flatRecordCoord<RecordDim, RecordCoord<RecordCoords...>>;
-            constexpr auto indexAfter = mp_at_c<InversePermutedIndices, indexBefore>::value;
-            return indexAfter;
-        }();
+        template<std::size_t FlatRecordCoord>
+        static constexpr std::size_t permute = mp_at_c<InversePermutedIndices, FlatRecordCoord>::value;
     };
 
     namespace internal
@@ -219,17 +213,17 @@ namespace llama::mapping
         using MoreAlignment = std::bool_constant<(alignof(A) > alignof(B))>;
     } // namespace internal
 
-    /// Flattens and sorts the record dimension by increasing alignment of its fields.
-    template<typename RecordDim>
-    using FlattenRecordDimIncreasingAlignment = FlattenRecordDimSorted<RecordDim, internal::LessAlignment>;
+    /// Sorts the record dimension fields by increasing alignment of its fields.
+    template<typename FlatRecordDim>
+    using PermuteFieldsIncreasingAlignment = PermuteFieldsSorted<FlatRecordDim, internal::LessAlignment>;
 
-    /// Flattens and sorts the record dimension by decreasing alignment of its fields.
-    template<typename RecordDim>
-    using FlattenRecordDimDecreasingAlignment = FlattenRecordDimSorted<RecordDim, internal::MoreAlignment>;
+    /// Sorts the record dimension fields by decreasing alignment of its fields.
+    template<typename FlatRecordDim>
+    using PermuteFieldsDecreasingAlignment = PermuteFieldsSorted<FlatRecordDim, internal::MoreAlignment>;
 
-    /// Flattens and sorts the record dimension by the alignment of its fields to minimize padding.
-    template<typename RecordDim>
-    using FlattenRecordDimMinimizePadding = FlattenRecordDimIncreasingAlignment<RecordDim>;
+    /// Sorts the record dimension fields by the alignment of its fields to minimize padding.
+    template<typename FlatRecordDim>
+    using PermuteFieldsMinimizePadding = PermuteFieldsIncreasingAlignment<FlatRecordDim>;
 
     namespace internal
     {
diff --git a/include/llama/mapping/One.hpp b/include/llama/mapping/One.hpp
index 046df5c516..a9e6aa98e8 100644
--- a/include/llama/mapping/One.hpp
+++ b/include/llama/mapping/One.hpp
@@ -12,14 +12,14 @@ namespace llama::mapping
     /// used for temporary, single element views.
     /// \tparam TFieldAlignment If Align, padding bytes are inserted to guarantee that struct members are properly
     /// aligned. If false, struct members are tightly packed.
-    /// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened. See \ref
-    /// FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref FlattenRecordDimDecreasingAlignment and
-    /// \ref FlattenRecordDimMinimizePadding.
+    /// \tparam PermuteFields Defines how the record dimension's fields should be permuted. See \ref
+    /// PermuteFieldsInOrder, \ref PermuteFieldsIncreasingAlignment, \ref PermuteFieldsDecreasingAlignment and
+    /// \ref PermuteFieldsMinimizePadding.
     template<
         typename TArrayExtents,
         typename TRecordDim,
         FieldAlignment TFieldAlignment = FieldAlignment::Align,
-        template<typename> typename FlattenRecordDim = FlattenRecordDimMinimizePadding>
+        template<typename> typename PermuteFields = PermuteFieldsMinimizePadding>
     struct One : MappingBase<TArrayExtents, TRecordDim>
     {
     private:
@@ -28,7 +28,7 @@ namespace llama::mapping
 
     public:
         inline static constexpr FieldAlignment fieldAlignment = TFieldAlignment;
-        using Flattener = FlattenRecordDim<TRecordDim>;
+        using Permuter = PermuteFields<FlatRecordDim<TRecordDim>>;
         static constexpr std::size_t blobCount = 1;
 
 #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ >= 12
@@ -44,7 +44,7 @@ namespace llama::mapping
         LLAMA_FN_HOST_ACC_INLINE constexpr auto blobSize(size_type) const -> size_type
         {
             return flatSizeOf<
-                typename Flattener::FlatRecordDim,
+                typename Permuter::FlatRecordDim,
                 fieldAlignment == FieldAlignment::Align,
                 false>; // no tail padding
         }
@@ -58,9 +58,9 @@ namespace llama::mapping
 #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ <= 6
                 *& // mess with nvcc compiler state to workaround bug
 #endif
-                 Flattener::template flatIndex<RecordCoords...>;
+                 Permuter::template permute<flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>>;
             constexpr auto offset = static_cast<size_type>(flatOffsetOf<
-                                                           typename Flattener::FlatRecordDim,
+                                                           typename Permuter::FlatRecordDim,
                                                            flatFieldIndex,
                                                            fieldAlignment == FieldAlignment::Align>);
             return {size_type{0}, offset};
@@ -70,28 +70,28 @@ namespace llama::mapping
     /// One mapping preserving the alignment of the field types by inserting padding.
     /// \see One
     template<typename ArrayExtents, typename RecordDim>
-    using AlignedOne = One<ArrayExtents, RecordDim, FieldAlignment::Align, FlattenRecordDimInOrder>;
+    using AlignedOne = One<ArrayExtents, RecordDim, FieldAlignment::Align, PermuteFieldsInOrder>;
 
     /// One mapping preserving the alignment of the field types by inserting padding and permuting the field order to
     /// minimize this padding.
     /// \see One
     template<typename ArrayExtents, typename RecordDim>
-    using MinAlignedOne = One<ArrayExtents, RecordDim, FieldAlignment::Align, FlattenRecordDimMinimizePadding>;
+    using MinAlignedOne = One<ArrayExtents, RecordDim, FieldAlignment::Align, PermuteFieldsMinimizePadding>;
 
     /// One mapping packing the field types tightly, violating the types' alignment requirements.
     /// \see One
     template<typename ArrayExtents, typename RecordDim>
-    using PackedOne = One<ArrayExtents, RecordDim, FieldAlignment::Pack, FlattenRecordDimInOrder>;
+    using PackedOne = One<ArrayExtents, RecordDim, FieldAlignment::Pack, PermuteFieldsInOrder>;
 
     /// Binds parameters to a \ref One mapping except for array and record dimension, producing a quoted
     /// meta function accepting the latter two. Useful to to prepare this mapping for a meta mapping.
     template<
         FieldAlignment FieldAlignment = FieldAlignment::Align,
-        template<typename> typename FlattenRecordDim = FlattenRecordDimMinimizePadding>
+        template<typename> typename PermuteFields = PermuteFieldsMinimizePadding>
     struct BindOne
     {
         template<typename ArrayExtents, typename RecordDim>
-        using fn = One<ArrayExtents, RecordDim, FieldAlignment, FlattenRecordDim>;
+        using fn = One<ArrayExtents, RecordDim, FieldAlignment, PermuteFields>;
     };
 
     template<typename Mapping>
@@ -102,6 +102,6 @@ namespace llama::mapping
         typename RecordDim,
         FieldAlignment FieldAlignment,
         template<typename>
-        typename FlattenRecordDim>
-    inline constexpr bool isOne<One<ArrayExtents, RecordDim, FieldAlignment, FlattenRecordDim>> = true;
+        typename PermuteFields>
+    inline constexpr bool isOne<One<ArrayExtents, RecordDim, FieldAlignment, PermuteFields>> = true;
 } // namespace llama::mapping
diff --git a/include/llama/mapping/SoA.hpp b/include/llama/mapping/SoA.hpp
index b051bb7e36..f04fc44ac5 100644
--- a/include/llama/mapping/SoA.hpp
+++ b/include/llama/mapping/SoA.hpp
@@ -29,9 +29,9 @@ namespace llama::mapping
     /// overhead to the mapping logic.
     /// \tparam TLinearizeArrayDimsFunctor Defines how the array dimensions should be mapped into linear numbers and
     /// how big the linear domain gets.
-    /// \tparam FlattenRecordDimSingleBlob Defines how the record dimension's fields should be flattened if Blobs is
-    /// Single. See \ref FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref
-    /// FlattenRecordDimDecreasingAlignment and \ref FlattenRecordDimMinimizePadding.
+    /// \tparam PermuteFieldsSingleBlob Defines how the record dimension's fields should be permuted if Blobs is
+    /// Single. See \ref PermuteFieldsInOrder, \ref PermuteFieldsIncreasingAlignment, \ref
+    /// PermuteFieldsDecreasingAlignment and \ref PermuteFieldsMinimizePadding.
     template<
         typename TArrayExtents,
         typename TRecordDim,
@@ -39,7 +39,7 @@ namespace llama::mapping
         SubArrayAlignment TSubArrayAlignment
         = TBlobs == Blobs::Single ? SubArrayAlignment::Align : SubArrayAlignment::Pack,
         typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp,
-        template<typename> typename FlattenRecordDimSingleBlob = FlattenRecordDimInOrder>
+        template<typename> typename PermuteFieldsSingleBlob = PermuteFieldsInOrder>
     struct SoA : MappingBase<TArrayExtents, TRecordDim>
     {
     private:
@@ -50,7 +50,7 @@ namespace llama::mapping
         inline static constexpr Blobs blobs = TBlobs;
         inline static constexpr SubArrayAlignment subArrayAlignment = TSubArrayAlignment;
         using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor;
-        using Flattener = FlattenRecordDimSingleBlob<TRecordDim>;
+        using Permuter = PermuteFieldsSingleBlob<FlatRecordDim<TRecordDim>>;
         inline static constexpr std::size_t blobCount
             = blobs == Blobs::OnePerField ? mp_size<FlatRecordDim<TRecordDim>>::value : 1;
 
@@ -82,7 +82,7 @@ namespace llama::mapping
             else if constexpr(subArrayAlignment == SubArrayAlignment::Align)
             {
                 size_type size = 0;
-                using FRD = typename Flattener::FlatRecordDim;
+                using FRD = typename Permuter::FlatRecordDim;
                 mp_for_each<mp_transform<mp_identity, FRD>>(
                     [&](auto ti) LLAMA_LAMBDA_INLINE
                     {
@@ -101,7 +101,7 @@ namespace llama::mapping
     private:
         static LLAMA_CONSTEVAL auto computeSubArrayOffsets()
         {
-            using FRD = typename Flattener::FlatRecordDim;
+            using FRD = typename Permuter::FlatRecordDim;
             constexpr auto staticFlatSize = LinearizeArrayDimsFunctor{}.size(TArrayExtents{});
             constexpr auto subArrays = mp_size<FRD>::value;
             Array<size_type, subArrays> r{};
@@ -138,9 +138,9 @@ namespace llama::mapping
 #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ <= 6
                     *& // mess with nvcc compiler state to workaround bug
 #endif
-                     Flattener::template flatIndex<RecordCoords...>;
+                     Permuter::template permute<flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>>;
                 const size_type flatSize = LinearizeArrayDimsFunctor{}.size(Base::extents());
-                using FRD = typename Flattener::FlatRecordDim;
+                using FRD = typename Permuter::FlatRecordDim;
                 if constexpr(subArrayAlignment == SubArrayAlignment::Align)
                 {
                     if constexpr(TArrayExtents::rankStatic == TArrayExtents::rank)
diff --git a/tests/mapping.BitPackedInt.cpp b/tests/mapping.BitPackedInt.cpp
index 21fa7003c3..7aaf9014fe 100644
--- a/tests/mapping.BitPackedInt.cpp
+++ b/tests/mapping.BitPackedInt.cpp
@@ -326,7 +326,7 @@ TEMPLATE_TEST_CASE(
         unsigned,
         llama::mapping::SignBit::Keep,
         llama::mapping::LinearizeArrayDimsCpp,
-        llama::mapping::FlattenRecordDimInOrder,
+        llama::mapping::PermuteFieldsInOrder,
         std::uint32_t>) )
 {
     CHECK_THROWS(TestType{{}, 40});
diff --git a/tests/mapping.cpp b/tests/mapping.cpp
index 27680d2a2c..bcf5e9fe2f 100644
--- a/tests/mapping.cpp
+++ b/tests/mapping.cpp
@@ -349,59 +349,59 @@ TEST_CASE("mapping.LinearizeArrayDimsMorton")
     CHECK(lin(llama::ArrayIndex{3, 3}, extents) == 15);
 }
 
-TEST_CASE("mapping.FlattenRecordDimInOrder")
+TEST_CASE("mapping.PermuteFieldsInOrder")
 {
-    using F = llama::mapping::FlattenRecordDimInOrder<Particle>;
+    using F = llama::mapping::PermuteFieldsInOrder<llama::FlatRecordDim<Particle>>;
     STATIC_REQUIRE(std::is_same_v<
                    F::FlatRecordDim,
                    mp_list<double, double, double, float, double, double, double, bool, bool, bool, bool>>);
-    STATIC_REQUIRE(F::flatIndex<0, 0> == 0);
-    STATIC_REQUIRE(F::flatIndex<0, 1> == 1);
-    STATIC_REQUIRE(F::flatIndex<0, 2> == 2);
-    STATIC_REQUIRE(F::flatIndex<1> == 3);
-    STATIC_REQUIRE(F::flatIndex<2, 0> == 4);
-    STATIC_REQUIRE(F::flatIndex<2, 1> == 5);
-    STATIC_REQUIRE(F::flatIndex<2, 2> == 6);
-    STATIC_REQUIRE(F::flatIndex<3, 0> == 7);
-    STATIC_REQUIRE(F::flatIndex<3, 1> == 8);
-    STATIC_REQUIRE(F::flatIndex<3, 2> == 9);
-    STATIC_REQUIRE(F::flatIndex<3, 3> == 10);
+    STATIC_REQUIRE(F::permute<0> == 0);
+    STATIC_REQUIRE(F::permute<1> == 1);
+    STATIC_REQUIRE(F::permute<2> == 2);
+    STATIC_REQUIRE(F::permute<3> == 3);
+    STATIC_REQUIRE(F::permute<4> == 4);
+    STATIC_REQUIRE(F::permute<5> == 5);
+    STATIC_REQUIRE(F::permute<6> == 6);
+    STATIC_REQUIRE(F::permute<7> == 7);
+    STATIC_REQUIRE(F::permute<8> == 8);
+    STATIC_REQUIRE(F::permute<9> == 9);
+    STATIC_REQUIRE(F::permute<10> == 10);
 }
 
-TEST_CASE("mapping.FlattenRecordDimIncreasingAlignment")
+TEST_CASE("mapping.PermuteFieldsIncreasingAlignment")
 {
-    using F = llama::mapping::FlattenRecordDimIncreasingAlignment<Particle>;
+    using F = llama::mapping::PermuteFieldsIncreasingAlignment<llama::FlatRecordDim<Particle>>;
     STATIC_REQUIRE(std::is_same_v<
                    F::FlatRecordDim,
                    mp_list<bool, bool, bool, bool, float, double, double, double, double, double, double>>);
-    STATIC_REQUIRE(F::flatIndex<0, 0> == 5);
-    STATIC_REQUIRE(F::flatIndex<0, 1> == 6);
-    STATIC_REQUIRE(F::flatIndex<0, 2> == 7);
-    STATIC_REQUIRE(F::flatIndex<1> == 4);
-    STATIC_REQUIRE(F::flatIndex<2, 0> == 8);
-    STATIC_REQUIRE(F::flatIndex<2, 1> == 9);
-    STATIC_REQUIRE(F::flatIndex<2, 2> == 10);
-    STATIC_REQUIRE(F::flatIndex<3, 0> == 0);
-    STATIC_REQUIRE(F::flatIndex<3, 1> == 1);
-    STATIC_REQUIRE(F::flatIndex<3, 2> == 2);
-    STATIC_REQUIRE(F::flatIndex<3, 3> == 3);
+    STATIC_REQUIRE(F::permute<0> == 5);
+    STATIC_REQUIRE(F::permute<1> == 6);
+    STATIC_REQUIRE(F::permute<2> == 7);
+    STATIC_REQUIRE(F::permute<3> == 4);
+    STATIC_REQUIRE(F::permute<4> == 8);
+    STATIC_REQUIRE(F::permute<5> == 9);
+    STATIC_REQUIRE(F::permute<6> == 10);
+    STATIC_REQUIRE(F::permute<7> == 0);
+    STATIC_REQUIRE(F::permute<8> == 1);
+    STATIC_REQUIRE(F::permute<9> == 2);
+    STATIC_REQUIRE(F::permute<10> == 3);
 }
 
-TEST_CASE("mapping.FlattenRecordDimDecreasingAlignment")
+TEST_CASE("mapping.PermuteFieldsDecreasingAlignment")
 {
-    using F = llama::mapping::FlattenRecordDimDecreasingAlignment<Particle>;
+    using F = llama::mapping::PermuteFieldsDecreasingAlignment<llama::FlatRecordDim<Particle>>;
     STATIC_REQUIRE(std::is_same_v<
                    F::FlatRecordDim,
                    mp_list<double, double, double, double, double, double, float, bool, bool, bool, bool>>);
-    STATIC_REQUIRE(F::flatIndex<0, 0> == 0);
-    STATIC_REQUIRE(F::flatIndex<0, 1> == 1);
-    STATIC_REQUIRE(F::flatIndex<0, 2> == 2);
-    STATIC_REQUIRE(F::flatIndex<1> == 6);
-    STATIC_REQUIRE(F::flatIndex<2, 0> == 3);
-    STATIC_REQUIRE(F::flatIndex<2, 1> == 4);
-    STATIC_REQUIRE(F::flatIndex<2, 2> == 5);
-    STATIC_REQUIRE(F::flatIndex<3, 0> == 7);
-    STATIC_REQUIRE(F::flatIndex<3, 1> == 8);
-    STATIC_REQUIRE(F::flatIndex<3, 2> == 9);
-    STATIC_REQUIRE(F::flatIndex<3, 3> == 10);
+    STATIC_REQUIRE(F::permute<0> == 0);
+    STATIC_REQUIRE(F::permute<1> == 1);
+    STATIC_REQUIRE(F::permute<2> == 2);
+    STATIC_REQUIRE(F::permute<3> == 6);
+    STATIC_REQUIRE(F::permute<4> == 3);
+    STATIC_REQUIRE(F::permute<5> == 4);
+    STATIC_REQUIRE(F::permute<6> == 5);
+    STATIC_REQUIRE(F::permute<7> == 7);
+    STATIC_REQUIRE(F::permute<8> == 8);
+    STATIC_REQUIRE(F::permute<9> == 9);
+    STATIC_REQUIRE(F::permute<10> == 10);
 }