From 3dc36650dff9f2e698ae5ff40e89ceed423f4c4e Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Fri, 29 Sep 2023 19:31:01 +0200 Subject: [PATCH] Separate record dim flattening from field permutations --- CHANGELOG.md | 2 +- docs/pages/api.rst | 14 ++--- examples/cuda/pitch/pitch.cu | 8 +-- examples/memmap/memmap.cpp | 2 +- include/llama/Simd.hpp | 4 +- include/llama/mapping/AoS.hpp | 31 ++++------ include/llama/mapping/AoSoA.hpp | 15 +++-- include/llama/mapping/BitPackedFloat.hpp | 15 ++--- include/llama/mapping/BitPackedInt.hpp | 21 ++++--- include/llama/mapping/Common.hpp | 46 ++++++-------- include/llama/mapping/One.hpp | 30 ++++----- include/llama/mapping/SoA.hpp | 18 +++--- tests/mapping.BitPackedInt.cpp | 2 +- tests/mapping.cpp | 78 ++++++++++++------------ 14 files changed, 138 insertions(+), 148 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 13ae6ced27..e7fa889ba2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -127,7 +127,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - added macros `LLAMA_FORCE_INLINE` and `LLAMA_HOST_ACC` #366 - support clang as CUDA compiler #366 - `llama::mapping::SoA` and `llama::mapping::AoSoA` now support custom record dimension flatteners #371 -- added the `llama::mapping::FlattenRecordDimIncreasingAlignment`, `llama::mapping::FlattenRecordDimDecreasingAlignment` and `llama::mapping::FlattenRecordDimMinimizePadding` record dimension flatteners #371 +- added the `llama::mapping::PermuteFieldsIncreasingAlignment`, `llama::mapping::PermuteFieldsDecreasingAlignment` and `llama::mapping::PermuteFieldsMinimizePadding` record dimension flatteners #371 - added new mapping `llama::mapping::BitPackedIntSoA` bitpacking integers in the record dimension into SoA arrays, and added new example #372, #427, #441, #446 - added new mapping `llama::mapping::BitPackedFloatSoA` bitpacking floating-point types in the record dimension into SoA arrays, and added new example #414, #427, #446 - `LLAMA_FORCE_INLINE` views can be created on `const` blobs #375 diff --git a/docs/pages/api.rst b/docs/pages/api.rst index d3b0719853..749b035f4c 100644 --- a/docs/pages/api.rst +++ b/docs/pages/api.rst @@ -178,15 +178,15 @@ Acessors .. doxygenstruct:: llama::accessor::Restrict .. doxygenstruct:: llama::accessor::Atomic .. doxygenstruct:: llama::accessor::Stacked +RecordDim field permuters +^^^^^^^^^^^^^^^^^^^^^^^^^ -RecordDim flattener -^^^^^^^^^^^^^^^^^^^ -.. doxygenstruct:: llama::mapping::FlattenRecordDimInOrder -.. doxygenstruct:: llama::mapping::FlattenRecordDimSorted -.. doxygentypedef:: llama::mapping::FlattenRecordDimIncreasingAlignment -.. doxygentypedef:: llama::mapping::FlattenRecordDimDecreasingAlignment -.. doxygentypedef:: llama::mapping::FlattenRecordDimMinimizePadding +.. doxygenstruct:: llama::mapping::PermuteFieldsInOrder +.. doxygenstruct:: llama::mapping::PermuteFieldsSorted +.. doxygentypedef:: llama::mapping::PermuteFieldsIncreasingAlignment +.. doxygentypedef:: llama::mapping::PermuteFieldsDecreasingAlignment +.. doxygentypedef:: llama::mapping::PermuteFieldsMinimizePadding Common utilities ^^^^^^^^^^^^^^^^ diff --git a/examples/cuda/pitch/pitch.cu b/examples/cuda/pitch/pitch.cu index 76868a5e05..b7d9a33be8 100644 --- a/examples/cuda/pitch/pitch.cu +++ b/examples/cuda/pitch/pitch.cu @@ -60,14 +60,14 @@ namespace llamaex typename TArrayExtents, typename TRecordDim, bool AlignAndPad = true, - template typename FlattenRecordDim = mapping::FlattenRecordDimInOrder> + template typename PermuteFields = mapping::PermuteFieldsInOrder> struct PitchedAoS : mapping::MappingBase { private: static constexpr std::size_t dim = TArrayExtents{}.size(); using Base = mapping::MappingBase; - using Flattener = FlattenRecordDim; + using Permuter = PermuteFields>; Array pitches; @@ -116,9 +116,9 @@ namespace llamaex #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ <= 6 *& // mess with nvcc compiler state to workaround bug #endif - Flattener::template flatIndex; + Permuter::template permute>>; const auto offset - = dot(pitches, ai) + flatOffsetOf; + = dot(pitches, ai) + flatOffsetOf; return {0, offset}; } }; diff --git a/examples/memmap/memmap.cpp b/examples/memmap/memmap.cpp index a001df1d27..d6e5859410 100644 --- a/examples/memmap/memmap.cpp +++ b/examples/memmap/memmap.cpp @@ -64,7 +64,7 @@ auto main(int argc, const char* argv[]) -> int Triangle, llama::mapping::FieldAlignment::Pack, llama::mapping::LinearizeArrayDimsCpp, - llama::mapping::FlattenRecordDimInOrder>{{n}}; + llama::mapping::PermuteFieldsInOrder>{{n}}; if(size != 80u + 4u + mapping.blobSize(0)) { std::cout << "File size (" << size << ") != 80 + 4 + mapping size: (" << mapping.blobSize(0) << ")\n"; diff --git a/include/llama/Simd.hpp b/include/llama/Simd.hpp index d40f303f97..7fbf906ed8 100644 --- a/include/llama/Simd.hpp +++ b/include/llama/Simd.hpp @@ -195,7 +195,7 @@ namespace llama { static_assert(mapping::isAoS); static constexpr auto srcStride = flatSizeOf< - typename Mapping::Flattener::FlatRecordDim, + typename Mapping::Permuter::FlatRecordDim, Mapping::fieldAlignment == llama::mapping::FieldAlignment::Align>; const auto* srcBaseAddr = reinterpret_cast(&srcRef(rc)); ElementSimd elemSimd; // g++-12 really needs the intermediate elemSimd and memcpy @@ -235,7 +235,7 @@ namespace llama else if constexpr(mapping::isAoS) { static constexpr auto stride = flatSizeOf< - typename Mapping::Flattener::FlatRecordDim, + typename Mapping::Permuter::FlatRecordDim, Mapping::fieldAlignment == llama::mapping::FieldAlignment::Align>; auto* dstBaseAddr = reinterpret_cast(&dstRef(rc)); const ElementSimd elemSimd = srcSimd(rc); diff --git a/include/llama/mapping/AoS.hpp b/include/llama/mapping/AoS.hpp index 5dd85f1be0..072d1f2c6d 100644 --- a/include/llama/mapping/AoS.hpp +++ b/include/llama/mapping/AoS.hpp @@ -12,15 +12,15 @@ namespace llama::mapping /// If Pack, struct members are tightly packed. /// \tparam TLinearizeArrayDimsFunctor Defines how the array dimensions should be mapped into linear numbers and /// how big the linear domain gets. - /// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened. See \ref - /// FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref FlattenRecordDimDecreasingAlignment and - /// \ref FlattenRecordDimMinimizePadding. + /// \tparam PermuteFields Defines how the record dimension's fields should be permuted. See \ref + /// PermuteFieldsInOrder, \ref PermuteFieldsIncreasingAlignment, \ref PermuteFieldsDecreasingAlignment and + /// \ref PermuteFieldsMinimizePadding. template< typename TArrayExtents, typename TRecordDim, FieldAlignment TFieldAlignment = FieldAlignment::Align, typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp, - template typename FlattenRecordDim = FlattenRecordDimInOrder> + template typename PermuteFields = PermuteFieldsInOrder> struct AoS : MappingBase { private: @@ -30,7 +30,7 @@ namespace llama::mapping public: inline static constexpr FieldAlignment fieldAlignment = TFieldAlignment; using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor; - using Flattener = FlattenRecordDim; + using Permuter = PermuteFields>; inline static constexpr std::size_t blobCount = 1; using Base::Base; @@ -38,7 +38,7 @@ namespace llama::mapping LLAMA_FN_HOST_ACC_INLINE constexpr auto blobSize(size_type) const -> size_type { return LinearizeArrayDimsFunctor{}.size(Base::extents()) - * flatSizeOf; + * flatSizeOf; } template @@ -50,13 +50,13 @@ namespace llama::mapping #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ <= 6 *& // mess with nvcc compiler state to workaround bug #endif - Flattener::template flatIndex; + Permuter::template permute>>; const auto offset = LinearizeArrayDimsFunctor{}(ai, Base::extents()) * static_cast( - flatSizeOf) + flatSizeOf) + static_cast(flatOffsetOf< - typename Flattener::FlatRecordDim, + typename Permuter::FlatRecordDim, flatFieldIndex, fieldAlignment == FieldAlignment::Align>); return {size_type{0}, offset}; @@ -75,12 +75,8 @@ namespace llama::mapping /// Array of struct mapping preserving the alignment of the field types by inserting padding and permuting the /// field order to minimize this padding. \see AoS template - using MinAlignedAoS = AoS< - ArrayExtents, - RecordDim, - FieldAlignment::Align, - LinearizeArrayDimsFunctor, - FlattenRecordDimMinimizePadding>; + using MinAlignedAoS + = AoS; /// Array of struct mapping packing the field types tightly, violating the type's alignment requirements. /// \see AoS @@ -107,8 +103,7 @@ namespace llama::mapping FieldAlignment FieldAlignment, typename LinearizeArrayDimsFunctor, template - typename FlattenRecordDim> - inline constexpr bool - isAoS> + typename PermuteFields> + inline constexpr bool isAoS> = true; } // namespace llama::mapping diff --git a/include/llama/mapping/AoSoA.hpp b/include/llama/mapping/AoSoA.hpp index 417918ae8e..f1f8258579 100644 --- a/include/llama/mapping/AoSoA.hpp +++ b/include/llama/mapping/AoSoA.hpp @@ -26,15 +26,15 @@ namespace llama::mapping /// Array of struct of arrays mapping. Used to create a \ref View via \ref allocView. /// \tparam Lanes The size of the inner arrays of this array of struct of arrays. - /// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened. See \ref - /// FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref FlattenRecordDimDecreasingAlignment and - /// \ref FlattenRecordDimMinimizePadding. + /// \tparam PermuteFields Defines how the record dimension's fields should be permuted. See \ref + /// PermuteFieldsInOrder, \ref PermuteFieldsIncreasingAlignment, \ref PermuteFieldsDecreasingAlignment and + /// \ref PermuteFieldsMinimizePadding. template< typename TArrayExtents, typename TRecordDim, typename TArrayExtents::value_type Lanes, typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp, - template typename FlattenRecordDim = FlattenRecordDimInOrder> + template typename PermuteFields = PermuteFieldsInOrder> struct AoSoA : MappingBase { private: @@ -44,7 +44,7 @@ namespace llama::mapping public: inline static constexpr typename TArrayExtents::value_type lanes = Lanes; using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor; - using Flattener = FlattenRecordDim; + using Permuter = PermuteFields>; inline static constexpr std::size_t blobCount = 1; #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ >= 12 @@ -72,13 +72,12 @@ namespace llama::mapping #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ <= 6 *& // mess with nvcc compiler state to workaround bug #endif - Flattener::template flatIndex; + Permuter::template permute>>; const auto flatArrayIndex = LinearizeArrayDimsFunctor{}(ai, Base::extents()); const auto blockIndex = flatArrayIndex / Lanes; const auto laneIndex = flatArrayIndex % Lanes; const auto offset = static_cast(sizeOf * Lanes) * blockIndex - + static_cast(flatOffsetOf) - * Lanes + + static_cast(flatOffsetOf) * Lanes + static_cast(sizeof(GetType>)) * laneIndex; return {0, offset}; } diff --git a/include/llama/mapping/BitPackedFloat.hpp b/include/llama/mapping/BitPackedFloat.hpp index 9fce3e3405..578360e4ef 100644 --- a/include/llama/mapping/BitPackedFloat.hpp +++ b/include/llama/mapping/BitPackedFloat.hpp @@ -315,7 +315,7 @@ namespace llama::mapping typename ExponentBits = typename TArrayExtents::value_type, typename MantissaBits = ExponentBits, typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp, - template typename FlattenRecordDim = FlattenRecordDimInOrder, + template typename PermuteFields = PermuteFieldsInOrder, typename TStoredIntegral = internal::StoredIntegralFor> struct LLAMA_DECLSPEC_EMPTY_BASES BitPackedFloatAoS : MappingBase @@ -332,7 +332,7 @@ namespace llama::mapping using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor; using StoredIntegral = TStoredIntegral; - using Flattener = FlattenRecordDim; + using Permuter = PermuteFields>; static constexpr std::size_t blobCount = 1; LLAMA_FN_HOST_ACC_INLINE @@ -382,7 +382,8 @@ namespace llama::mapping RecordCoord, Blobs& blobs) const { - constexpr auto flatFieldIndex = static_cast(Flattener::template flatIndex); + constexpr auto flatFieldIndex = static_cast( + Permuter::template permute>>); const auto bitOffset = ((TLinearizeArrayDimsFunctor{}(ai, Base::extents()) * static_cast(flatFieldCount)) + flatFieldIndex) @@ -404,7 +405,7 @@ namespace llama::mapping typename ExponentBits = unsigned, typename MantissaBits = ExponentBits, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp, - template typename FlattenRecordDim = FlattenRecordDimInOrder, + template typename PermuteFields = PermuteFieldsInOrder, typename StoredIntegral = void> struct BindBitPackedFloatAoS { @@ -415,7 +416,7 @@ namespace llama::mapping ExponentBits, MantissaBits, LinearizeArrayDimsFunctor, - FlattenRecordDim, + PermuteFields, std::conditional_t< !std::is_void_v, StoredIntegral, @@ -432,7 +433,7 @@ namespace llama::mapping typename MantissaBits, typename LinearizeArrayDimsFunctor, template - typename FlattenRecordDim, + typename PermuteFields, typename StoredIntegral> inline constexpr bool isBitPackedFloatAoS> = true; } // namespace llama::mapping diff --git a/include/llama/mapping/BitPackedInt.hpp b/include/llama/mapping/BitPackedInt.hpp index 6680034bcb..d982d5160a 100644 --- a/include/llama/mapping/BitPackedInt.hpp +++ b/include/llama/mapping/BitPackedInt.hpp @@ -449,9 +449,9 @@ namespace llama::mapping /// numbers will be read back positive. /// \tparam TLinearizeArrayDimsFunctor Defines how the array dimensions should be mapped into linear numbers and /// how big the linear domain gets. - /// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened. See \ref - // FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref FlattenRecordDimDecreasingAlignment and - // \ref FlattenRecordDimMinimizePadding. + /// \tparam PermuteFields Defines how the record dimension's fields should be permuted. See \ref + // PermuteFieldsInOrder, \ref PermuteFieldsIncreasingAlignment, \ref PermuteFieldsDecreasingAlignment and + // \ref PermuteFieldsMinimizePadding. /// \tparam TStoredIntegral Integral type used as storage of reduced precision integers. Must be std::uint32_t or /// std::uint64_t. template< @@ -460,7 +460,7 @@ namespace llama::mapping typename Bits = typename TArrayExtents::value_type, SignBit SignBit = SignBit::Keep, typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp, - template typename FlattenRecordDim = FlattenRecordDimInOrder, + template typename PermuteFields = PermuteFieldsInOrder, typename TStoredIntegral = internal::StoredUnsignedFor> struct BitPackedIntAoS : internal:: @@ -475,7 +475,7 @@ namespace llama::mapping using typename Base::size_type; using VHBits = typename Base::VHBits; // use plain using declaration with nvcc >= 11.8 - using Flattener = FlattenRecordDim; + using Permuter = PermuteFields; static constexpr std::size_t blobCount = 1; LLAMA_FN_HOST_ACC_INLINE @@ -493,7 +493,8 @@ namespace llama::mapping RecordCoord, Blobs& blobs) const { - constexpr auto flatFieldIndex = static_cast(Flattener::template flatIndex); + constexpr auto flatFieldIndex = static_cast( + Permuter::template permute>>); const auto bitOffset = ((TLinearizeArrayDimsFunctor{}(ai, Base::extents()) * static_cast(flatFieldCount)) + flatFieldIndex) @@ -516,7 +517,7 @@ namespace llama::mapping typename Bits = void, SignBit SignBit = SignBit::Keep, typename LinearizeArrayDimsFunctor = mapping::LinearizeArrayDimsCpp, - template typename FlattenRecordDim = FlattenRecordDimInOrder, + template typename PermuteFields = PermuteFieldsInOrder, typename StoredIntegral = void> struct BindBitPackedIntAoS { @@ -527,7 +528,7 @@ namespace llama::mapping std::conditional_t, Bits, typename ArrayExtents::value_type>, SignBit, LinearizeArrayDimsFunctor, - FlattenRecordDim, + PermuteFields, std::conditional_t< !std::is_void_v, StoredIntegral, @@ -544,7 +545,7 @@ namespace llama::mapping SignBit SignBit, typename LinearizeArrayDimsFunctor, template - typename FlattenRecordDim, + typename PermuteFields, typename StoredIntegral> inline constexpr bool isBitPackedIntAoS> = true; } // namespace llama::mapping diff --git a/include/llama/mapping/Common.hpp b/include/llama/mapping/Common.hpp index e64f98d974..4a67ed1108 100644 --- a/include/llama/mapping/Common.hpp +++ b/include/llama/mapping/Common.hpp @@ -165,24 +165,23 @@ namespace llama::mapping } }; - /// Flattens the record dimension in the order fields are written. - template - struct FlattenRecordDimInOrder + /// Retains the order of the record dimension's fields. + template + struct PermuteFieldsInOrder { - using FlatRecordDim = llama::FlatRecordDim; + using FlatRecordDim = TFlatRecordDim; - template - static constexpr std::size_t flatIndex = flatRecordCoord>; + template + static constexpr std::size_t permute = FlatRecordCoord; }; - /// Flattens the record dimension by sorting the fields according to a given predicate on the field types. + /// Sorts the record dimension's the fields according to a given predicate on the field types. /// @tparam Less A binary predicate accepting two field types, which exposes a member value. Value must be true if /// the first field type is less than the second one, otherwise false. - template typename Less> - struct FlattenRecordDimSorted + template typename Less> + struct PermuteFieldsSorted { private: - using FlatOrigRecordDim = llama::FlatRecordDim; using FlatSortedRecordDim = mp_sort; template @@ -201,13 +200,8 @@ namespace llama::mapping public: using FlatRecordDim = FlatSortedRecordDim; - template - static constexpr std::size_t flatIndex = []() constexpr - { - constexpr auto indexBefore = flatRecordCoord>; - constexpr auto indexAfter = mp_at_c::value; - return indexAfter; - }(); + template + static constexpr std::size_t permute = mp_at_c::value; }; namespace internal @@ -219,17 +213,17 @@ namespace llama::mapping using MoreAlignment = std::bool_constant<(alignof(A) > alignof(B))>; } // namespace internal - /// Flattens and sorts the record dimension by increasing alignment of its fields. - template - using FlattenRecordDimIncreasingAlignment = FlattenRecordDimSorted; + /// Sorts the record dimension fields by increasing alignment of its fields. + template + using PermuteFieldsIncreasingAlignment = PermuteFieldsSorted; - /// Flattens and sorts the record dimension by decreasing alignment of its fields. - template - using FlattenRecordDimDecreasingAlignment = FlattenRecordDimSorted; + /// Sorts the record dimension fields by decreasing alignment of its fields. + template + using PermuteFieldsDecreasingAlignment = PermuteFieldsSorted; - /// Flattens and sorts the record dimension by the alignment of its fields to minimize padding. - template - using FlattenRecordDimMinimizePadding = FlattenRecordDimIncreasingAlignment; + /// Sorts the record dimension fields by the alignment of its fields to minimize padding. + template + using PermuteFieldsMinimizePadding = PermuteFieldsIncreasingAlignment; namespace internal { diff --git a/include/llama/mapping/One.hpp b/include/llama/mapping/One.hpp index 046df5c516..a9e6aa98e8 100644 --- a/include/llama/mapping/One.hpp +++ b/include/llama/mapping/One.hpp @@ -12,14 +12,14 @@ namespace llama::mapping /// used for temporary, single element views. /// \tparam TFieldAlignment If Align, padding bytes are inserted to guarantee that struct members are properly /// aligned. If false, struct members are tightly packed. - /// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened. See \ref - /// FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref FlattenRecordDimDecreasingAlignment and - /// \ref FlattenRecordDimMinimizePadding. + /// \tparam PermuteFields Defines how the record dimension's fields should be permuted. See \ref + /// PermuteFieldsInOrder, \ref PermuteFieldsIncreasingAlignment, \ref PermuteFieldsDecreasingAlignment and + /// \ref PermuteFieldsMinimizePadding. template< typename TArrayExtents, typename TRecordDim, FieldAlignment TFieldAlignment = FieldAlignment::Align, - template typename FlattenRecordDim = FlattenRecordDimMinimizePadding> + template typename PermuteFields = PermuteFieldsMinimizePadding> struct One : MappingBase { private: @@ -28,7 +28,7 @@ namespace llama::mapping public: inline static constexpr FieldAlignment fieldAlignment = TFieldAlignment; - using Flattener = FlattenRecordDim; + using Permuter = PermuteFields>; static constexpr std::size_t blobCount = 1; #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ >= 12 @@ -44,7 +44,7 @@ namespace llama::mapping LLAMA_FN_HOST_ACC_INLINE constexpr auto blobSize(size_type) const -> size_type { return flatSizeOf< - typename Flattener::FlatRecordDim, + typename Permuter::FlatRecordDim, fieldAlignment == FieldAlignment::Align, false>; // no tail padding } @@ -58,9 +58,9 @@ namespace llama::mapping #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ <= 6 *& // mess with nvcc compiler state to workaround bug #endif - Flattener::template flatIndex; + Permuter::template permute>>; constexpr auto offset = static_cast(flatOffsetOf< - typename Flattener::FlatRecordDim, + typename Permuter::FlatRecordDim, flatFieldIndex, fieldAlignment == FieldAlignment::Align>); return {size_type{0}, offset}; @@ -70,28 +70,28 @@ namespace llama::mapping /// One mapping preserving the alignment of the field types by inserting padding. /// \see One template - using AlignedOne = One; + using AlignedOne = One; /// One mapping preserving the alignment of the field types by inserting padding and permuting the field order to /// minimize this padding. /// \see One template - using MinAlignedOne = One; + using MinAlignedOne = One; /// One mapping packing the field types tightly, violating the types' alignment requirements. /// \see One template - using PackedOne = One; + using PackedOne = One; /// Binds parameters to a \ref One mapping except for array and record dimension, producing a quoted /// meta function accepting the latter two. Useful to to prepare this mapping for a meta mapping. template< FieldAlignment FieldAlignment = FieldAlignment::Align, - template typename FlattenRecordDim = FlattenRecordDimMinimizePadding> + template typename PermuteFields = PermuteFieldsMinimizePadding> struct BindOne { template - using fn = One; + using fn = One; }; template @@ -102,6 +102,6 @@ namespace llama::mapping typename RecordDim, FieldAlignment FieldAlignment, template - typename FlattenRecordDim> - inline constexpr bool isOne> = true; + typename PermuteFields> + inline constexpr bool isOne> = true; } // namespace llama::mapping diff --git a/include/llama/mapping/SoA.hpp b/include/llama/mapping/SoA.hpp index b051bb7e36..f04fc44ac5 100644 --- a/include/llama/mapping/SoA.hpp +++ b/include/llama/mapping/SoA.hpp @@ -29,9 +29,9 @@ namespace llama::mapping /// overhead to the mapping logic. /// \tparam TLinearizeArrayDimsFunctor Defines how the array dimensions should be mapped into linear numbers and /// how big the linear domain gets. - /// \tparam FlattenRecordDimSingleBlob Defines how the record dimension's fields should be flattened if Blobs is - /// Single. See \ref FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref - /// FlattenRecordDimDecreasingAlignment and \ref FlattenRecordDimMinimizePadding. + /// \tparam PermuteFieldsSingleBlob Defines how the record dimension's fields should be permuted if Blobs is + /// Single. See \ref PermuteFieldsInOrder, \ref PermuteFieldsIncreasingAlignment, \ref + /// PermuteFieldsDecreasingAlignment and \ref PermuteFieldsMinimizePadding. template< typename TArrayExtents, typename TRecordDim, @@ -39,7 +39,7 @@ namespace llama::mapping SubArrayAlignment TSubArrayAlignment = TBlobs == Blobs::Single ? SubArrayAlignment::Align : SubArrayAlignment::Pack, typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp, - template typename FlattenRecordDimSingleBlob = FlattenRecordDimInOrder> + template typename PermuteFieldsSingleBlob = PermuteFieldsInOrder> struct SoA : MappingBase { private: @@ -50,7 +50,7 @@ namespace llama::mapping inline static constexpr Blobs blobs = TBlobs; inline static constexpr SubArrayAlignment subArrayAlignment = TSubArrayAlignment; using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor; - using Flattener = FlattenRecordDimSingleBlob; + using Permuter = PermuteFieldsSingleBlob>; inline static constexpr std::size_t blobCount = blobs == Blobs::OnePerField ? mp_size>::value : 1; @@ -82,7 +82,7 @@ namespace llama::mapping else if constexpr(subArrayAlignment == SubArrayAlignment::Align) { size_type size = 0; - using FRD = typename Flattener::FlatRecordDim; + using FRD = typename Permuter::FlatRecordDim; mp_for_each>( [&](auto ti) LLAMA_LAMBDA_INLINE { @@ -101,7 +101,7 @@ namespace llama::mapping private: static LLAMA_CONSTEVAL auto computeSubArrayOffsets() { - using FRD = typename Flattener::FlatRecordDim; + using FRD = typename Permuter::FlatRecordDim; constexpr auto staticFlatSize = LinearizeArrayDimsFunctor{}.size(TArrayExtents{}); constexpr auto subArrays = mp_size::value; Array r{}; @@ -138,9 +138,9 @@ namespace llama::mapping #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ <= 6 *& // mess with nvcc compiler state to workaround bug #endif - Flattener::template flatIndex; + Permuter::template permute>>; const size_type flatSize = LinearizeArrayDimsFunctor{}.size(Base::extents()); - using FRD = typename Flattener::FlatRecordDim; + using FRD = typename Permuter::FlatRecordDim; if constexpr(subArrayAlignment == SubArrayAlignment::Align) { if constexpr(TArrayExtents::rankStatic == TArrayExtents::rank) diff --git a/tests/mapping.BitPackedInt.cpp b/tests/mapping.BitPackedInt.cpp index 21fa7003c3..7aaf9014fe 100644 --- a/tests/mapping.BitPackedInt.cpp +++ b/tests/mapping.BitPackedInt.cpp @@ -326,7 +326,7 @@ TEMPLATE_TEST_CASE( unsigned, llama::mapping::SignBit::Keep, llama::mapping::LinearizeArrayDimsCpp, - llama::mapping::FlattenRecordDimInOrder, + llama::mapping::PermuteFieldsInOrder, std::uint32_t>) ) { CHECK_THROWS(TestType{{}, 40}); diff --git a/tests/mapping.cpp b/tests/mapping.cpp index 27680d2a2c..bcf5e9fe2f 100644 --- a/tests/mapping.cpp +++ b/tests/mapping.cpp @@ -349,59 +349,59 @@ TEST_CASE("mapping.LinearizeArrayDimsMorton") CHECK(lin(llama::ArrayIndex{3, 3}, extents) == 15); } -TEST_CASE("mapping.FlattenRecordDimInOrder") +TEST_CASE("mapping.PermuteFieldsInOrder") { - using F = llama::mapping::FlattenRecordDimInOrder; + using F = llama::mapping::PermuteFieldsInOrder>; STATIC_REQUIRE(std::is_same_v< F::FlatRecordDim, mp_list>); - STATIC_REQUIRE(F::flatIndex<0, 0> == 0); - STATIC_REQUIRE(F::flatIndex<0, 1> == 1); - STATIC_REQUIRE(F::flatIndex<0, 2> == 2); - STATIC_REQUIRE(F::flatIndex<1> == 3); - STATIC_REQUIRE(F::flatIndex<2, 0> == 4); - STATIC_REQUIRE(F::flatIndex<2, 1> == 5); - STATIC_REQUIRE(F::flatIndex<2, 2> == 6); - STATIC_REQUIRE(F::flatIndex<3, 0> == 7); - STATIC_REQUIRE(F::flatIndex<3, 1> == 8); - STATIC_REQUIRE(F::flatIndex<3, 2> == 9); - STATIC_REQUIRE(F::flatIndex<3, 3> == 10); + STATIC_REQUIRE(F::permute<0> == 0); + STATIC_REQUIRE(F::permute<1> == 1); + STATIC_REQUIRE(F::permute<2> == 2); + STATIC_REQUIRE(F::permute<3> == 3); + STATIC_REQUIRE(F::permute<4> == 4); + STATIC_REQUIRE(F::permute<5> == 5); + STATIC_REQUIRE(F::permute<6> == 6); + STATIC_REQUIRE(F::permute<7> == 7); + STATIC_REQUIRE(F::permute<8> == 8); + STATIC_REQUIRE(F::permute<9> == 9); + STATIC_REQUIRE(F::permute<10> == 10); } -TEST_CASE("mapping.FlattenRecordDimIncreasingAlignment") +TEST_CASE("mapping.PermuteFieldsIncreasingAlignment") { - using F = llama::mapping::FlattenRecordDimIncreasingAlignment; + using F = llama::mapping::PermuteFieldsIncreasingAlignment>; STATIC_REQUIRE(std::is_same_v< F::FlatRecordDim, mp_list>); - STATIC_REQUIRE(F::flatIndex<0, 0> == 5); - STATIC_REQUIRE(F::flatIndex<0, 1> == 6); - STATIC_REQUIRE(F::flatIndex<0, 2> == 7); - STATIC_REQUIRE(F::flatIndex<1> == 4); - STATIC_REQUIRE(F::flatIndex<2, 0> == 8); - STATIC_REQUIRE(F::flatIndex<2, 1> == 9); - STATIC_REQUIRE(F::flatIndex<2, 2> == 10); - STATIC_REQUIRE(F::flatIndex<3, 0> == 0); - STATIC_REQUIRE(F::flatIndex<3, 1> == 1); - STATIC_REQUIRE(F::flatIndex<3, 2> == 2); - STATIC_REQUIRE(F::flatIndex<3, 3> == 3); + STATIC_REQUIRE(F::permute<0> == 5); + STATIC_REQUIRE(F::permute<1> == 6); + STATIC_REQUIRE(F::permute<2> == 7); + STATIC_REQUIRE(F::permute<3> == 4); + STATIC_REQUIRE(F::permute<4> == 8); + STATIC_REQUIRE(F::permute<5> == 9); + STATIC_REQUIRE(F::permute<6> == 10); + STATIC_REQUIRE(F::permute<7> == 0); + STATIC_REQUIRE(F::permute<8> == 1); + STATIC_REQUIRE(F::permute<9> == 2); + STATIC_REQUIRE(F::permute<10> == 3); } -TEST_CASE("mapping.FlattenRecordDimDecreasingAlignment") +TEST_CASE("mapping.PermuteFieldsDecreasingAlignment") { - using F = llama::mapping::FlattenRecordDimDecreasingAlignment; + using F = llama::mapping::PermuteFieldsDecreasingAlignment>; STATIC_REQUIRE(std::is_same_v< F::FlatRecordDim, mp_list>); - STATIC_REQUIRE(F::flatIndex<0, 0> == 0); - STATIC_REQUIRE(F::flatIndex<0, 1> == 1); - STATIC_REQUIRE(F::flatIndex<0, 2> == 2); - STATIC_REQUIRE(F::flatIndex<1> == 6); - STATIC_REQUIRE(F::flatIndex<2, 0> == 3); - STATIC_REQUIRE(F::flatIndex<2, 1> == 4); - STATIC_REQUIRE(F::flatIndex<2, 2> == 5); - STATIC_REQUIRE(F::flatIndex<3, 0> == 7); - STATIC_REQUIRE(F::flatIndex<3, 1> == 8); - STATIC_REQUIRE(F::flatIndex<3, 2> == 9); - STATIC_REQUIRE(F::flatIndex<3, 3> == 10); + STATIC_REQUIRE(F::permute<0> == 0); + STATIC_REQUIRE(F::permute<1> == 1); + STATIC_REQUIRE(F::permute<2> == 2); + STATIC_REQUIRE(F::permute<3> == 6); + STATIC_REQUIRE(F::permute<4> == 3); + STATIC_REQUIRE(F::permute<5> == 4); + STATIC_REQUIRE(F::permute<6> == 5); + STATIC_REQUIRE(F::permute<7> == 7); + STATIC_REQUIRE(F::permute<8> == 8); + STATIC_REQUIRE(F::permute<9> == 9); + STATIC_REQUIRE(F::permute<10> == 10); }