From 46f82709103cd98e7fd319c7db671b162fa32c28 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Mon, 10 Jun 2024 11:52:19 -0600 Subject: [PATCH] Rework of submdspan for layout_left/right (#337) Reworked layout_left/right submdspan_mapping to return padded layouts where required by C++26. This does not generate static padding values yet, which will come later. Co-authored-by: Mark Hoemmen --- .../__p2630_bits/submdspan_mapping.hpp | 526 ++++++++++-------- .../__p2642_bits/layout_padded.hpp | 11 + tests/foo_customizations.hpp | 39 +- tests/test_submdspan.cpp | 12 +- tests/test_submdspan_static_slice.cpp | 28 +- 5 files changed, 363 insertions(+), 253 deletions(-) diff --git a/include/experimental/__p2630_bits/submdspan_mapping.hpp b/include/experimental/__p2630_bits/submdspan_mapping.hpp index 543a0919..cf1bdd1e 100644 --- a/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -17,10 +17,30 @@ #pragma once #include -#include #include +#include #include // index_sequence +// Suppress spurious warning with NVCC about no return statement. +// This is a known issue in NVCC and NVC++ +// Depending on the CUDA and GCC version we need both the builtin +// and the diagnostic push. I tried really hard to find something shorter +// but no luck ... +#if defined __NVCC__ +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic push +#pragma nv_diag_suppress = implicit_return_from_non_void_function +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic push +#pragma diag_suppress implicit_return_from_non_void_function +#endif +#endif +#elif defined __NVCOMPILER +#pragma diagnostic push +#pragma diag_suppress = implicit_return_from_non_void_function +#endif + namespace MDSPAN_IMPL_STANDARD_NAMESPACE { //****************************************** // Return type of submdspan_mapping overloads @@ -31,60 +51,68 @@ template struct submdspan_mapping_result { }; namespace detail { - // We use const Slice& and not Slice&& because the various // submdspan_mapping_impl overloads use their slices arguments // multiple times. This makes perfect forwarding not useful, but we // still don't want to pass those (possibly of size 64 x 3 bits) // objects by value. -template -MDSPAN_INLINE_FUNCTION -constexpr bool -one_slice_out_of_bounds(const IndexType& ext, const Slice& slice) -{ - using common_t = std::common_type_t; - return static_cast(detail::first_of(slice)) == static_cast(ext); +template +MDSPAN_INLINE_FUNCTION constexpr bool +one_slice_out_of_bounds(const IndexType &ext, const Slice &slice) { + using common_t = + std::common_type_t; + return static_cast(detail::first_of(slice)) == + static_cast(ext); } -template -MDSPAN_INLINE_FUNCTION -constexpr bool +template +MDSPAN_INLINE_FUNCTION constexpr bool any_slice_out_of_bounds_helper(std::index_sequence, - const extents& exts, - const Slices& ... slices) -{ + const extents &exts, + const Slices &... slices) { return _MDSPAN_FOLD_OR( - (one_slice_out_of_bounds(exts.extent(RankIndices), slices)) - ); + (one_slice_out_of_bounds(exts.extent(RankIndices), slices))); } -template -MDSPAN_INLINE_FUNCTION -constexpr bool -any_slice_out_of_bounds(const extents& exts, - const Slices& ... slices) -{ +template +MDSPAN_INLINE_FUNCTION constexpr bool +any_slice_out_of_bounds(const extents &exts, + const Slices &... slices) { return any_slice_out_of_bounds_helper( - std::make_index_sequence(), - exts, slices...); + std::make_index_sequence(), exts, slices...); } - + // constructs sub strides template -MDSPAN_INLINE_FUNCTION -constexpr auto -construct_sub_strides(const SrcMapping &src_mapping, - std::index_sequence, - const std::tuple &slices_stride_factor) { +MDSPAN_INLINE_FUNCTION constexpr auto construct_sub_strides( + const SrcMapping &src_mapping, std::index_sequence, + const std::tuple &slices_stride_factor) { using index_type = typename SrcMapping::index_type; return std::array{ (static_cast(src_mapping.stride(InvMapIdxs)) * static_cast(std::get(slices_stride_factor)))...}; } + +template +struct is_range_slice { + constexpr static bool value = + std::is_same_v || + std::is_convertible_v>; +}; + +template +constexpr bool is_range_slice_v = is_range_slice::value; + +template +struct is_index_slice { + constexpr static bool value = std::is_convertible_v; +}; + +template +constexpr bool is_index_slice_v = is_index_slice::value; + } // namespace detail //********************************** @@ -93,52 +121,75 @@ construct_sub_strides(const SrcMapping &src_mapping, namespace detail { // Figure out whether to preserve layout_left -template -struct preserve_layout_left_mapping; +template +struct deduce_layout_left_submapping; -template -struct preserve_layout_left_mapping, SubRank, - SliceSpecifiers...> { - constexpr static bool value = - // Preserve layout for rank 0 - (SubRank == 0) || - ( - // Slice specifiers up to subrank need to be full_extent_t - except - // for the last one which could also be tuple but not a strided index - // range slice specifiers after subrank are integrals - ((Idx > SubRank - 1) || // these are only integral slice specifiers - (std::is_same_v) || - ((Idx == SubRank - 1) && - std::is_convertible_v>)) && - ...); +template +struct deduce_layout_left_submapping< + IndexType, SubRank, std::index_sequence, SliceSpecifiers...> { + + using count_range = index_sequence_scan_impl< + 0, (is_index_slice_v ? 0 : 1)...>; + + constexpr static int gap_len = + (((Idx > 0 && count_range::get(Idx) == 1 && + is_index_slice_v) + ? 1 + : 0) + + ... + 0); + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_left_value() { + // Use layout_left for rank 0 + if constexpr (SubRank == 0) { + return true; + // Use layout_left for rank 1 result if leftmost slice specifier is range like + } else if constexpr (SubRank == 1) { + return ((Idx > 0 || is_range_slice_v)&&...); + } else { + // Preserve if leftmost SubRank-1 slices are full_extent_t and + // the slice at idx Subrank - 1 is a range and + // for idx > SubRank the slice is an index + return ((((Idx < SubRank - 1) && std::is_same_v) || + ((Idx == SubRank - 1) && is_range_slice_v) || + ((Idx > SubRank - 1) && is_index_slice_v)) && ...); + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif + } + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_left_padded_value() { + // Technically could also keep layout_left_padded for SubRank==0 + // and SubRank==1 with leftmost slice specifier being a contiguous range + // but we intercept these cases separately + + // In all other cases: + // leftmost slice must be range + // then there can be a gap with index slices + // then SubRank - 2 full_extent slices + // then another range slice + // then more index slices + // e.g. R I I I F F F R I I for obtaining a rank-5 from a rank-10 + return ((((Idx == 0) && is_range_slice_v) || + ((Idx > 0 && Idx <= gap_len) && is_index_slice_v) || + ((Idx > gap_len && Idx < gap_len + SubRank - 1) && std::is_same_v) || + ((Idx == gap_len + SubRank - 1) && is_range_slice_v) || + ((Idx > gap_len + SubRank - 1) && is_index_slice_v)) && ... ); + } }; + } // namespace detail -// Suppress spurious warning with NVCC about no return statement. -// This is a known issue in NVCC and NVC++ -// Depending on the CUDA and GCC version we need both the builtin -// and the diagnostic push. I tried really hard to find something shorter -// but no luck ... -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic push - #pragma nv_diag_suppress = implicit_return_from_non_void_function - #else - #ifdef __CUDA_ARCH__ - #pragma diagnostic push - #pragma diag_suppress implicit_return_from_non_void_function - #endif - #endif -#elif defined __NVCOMPILER - #pragma diagnostic push - #pragma diag_suppress = implicit_return_from_non_void_function -#endif // Actual submdspan mapping call template template -MDSPAN_INLINE_FUNCTION -constexpr auto -layout_left::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) const { +MDSPAN_INLINE_FUNCTION constexpr auto +layout_left::mapping::submdspan_mapping_impl( + SliceSpecifiers... slices) const { // compute sub extents using src_ext_t = Extents; @@ -146,60 +197,61 @@ layout_left::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) using dst_ext_t = decltype(dst_ext); // figure out sub layout type - constexpr bool preserve_layout = detail::preserve_layout_left_mapping< - decltype(std::make_index_sequence()), dst_ext_t::rank(), - SliceSpecifiers...>::value; - using dst_layout_t = - std::conditional_t; + using deduce_layout = detail::deduce_layout_left_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + std::make_index_sequence, + SliceSpecifiers...>; + + using dst_layout_t = std::conditional_t< + deduce_layout::layout_left_value(), layout_left, + std::conditional_t< + deduce_layout::layout_left_padded_value(), + MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded, + layout_stride>>; using dst_mapping_t = typename dst_layout_t::template mapping; // Figure out if any slice's lower bound equals the corresponding extent. // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. const bool out_of_bounds = - detail::any_slice_out_of_bounds(this->extents(), slices...); + detail::any_slice_out_of_bounds(this->extents(), slices...); auto offset = static_cast( - out_of_bounds ? - this->required_span_size() : - this->operator()(detail::first_of(slices)...) - ); + out_of_bounds ? this->required_span_size() + : this->operator()(detail::first_of(slices)...)); if constexpr (std::is_same_v) { // layout_left case - return submdspan_mapping_result{dst_mapping_t(dst_ext), offset}; + return submdspan_mapping_result{dst_mapping_t(dst_ext), + offset}; + } else if constexpr (std::is_same_v>) { + return submdspan_mapping_result{ + dst_mapping_t(dst_ext, stride(1 + deduce_layout::gap_len)), offset}; } else { // layout_stride case - auto inv_map = detail::inv_map_rank( - std::integral_constant(), - std::index_sequence<>(), - slices...); - return submdspan_mapping_result{ - dst_mapping_t(dst_ext, detail::construct_sub_strides( - *this, inv_map, - // HIP needs deduction guides to have markups so we need to be explicit - // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have the issue - // But Clang-CUDA also doesn't accept the use of deduction guide so disable it for CUDA alltogether - #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) - std::tuple{detail::stride_of(slices)...})), - #else - std::tuple{detail::stride_of(slices)...})), - #endif - offset}; + auto inv_map = detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + return submdspan_mapping_result { + dst_mapping_t(dst_ext, + detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue But Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA altogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + std::tuple{ + detail::stride_of(slices)...})), +#else + std::tuple{detail::stride_of(slices)...})), +#endif + offset + }; } #if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) __builtin_unreachable(); #endif } -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic pop - #else - #ifdef __CUDA_ARCH__ - #pragma diagnostic pop - #endif - #endif -#elif defined __NVCOMPILER - #pragma diagnostic pop -#endif //********************************** // layout_right submdspan_mapping @@ -207,154 +259,194 @@ layout_left::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) namespace detail { // Figure out whether to preserve layout_right -template -struct preserve_layout_right_mapping; +template +struct deduce_layout_right_submapping; -template -struct preserve_layout_right_mapping, SubRank, - SliceSpecifiers...> { - constexpr static size_t SrcRank = sizeof...(SliceSpecifiers); - constexpr static bool value = - // Preserve layout for rank 0 - (SubRank == 0) || - ( - // The last subrank slice specifiers need to be full_extent_t - except - // for the srcrank-subrank one which could also be tuple but not a - // strided index range slice specifiers before srcrank-subrank are - // integrals - ((Idx < - SrcRank - SubRank) || // these are only integral slice specifiers - (std::is_same_v) || - ((Idx == SrcRank - SubRank) && - std::is_convertible_v>)) && - ...); +template +struct deduce_layout_right_submapping< + IndexType, SubRank, std::index_sequence, SliceSpecifiers...> { + + static constexpr size_t Rank = sizeof...(Idx); + using count_range = index_sequence_scan_impl< + 0, (std::is_convertible_v ? 0 : 1)...>; + //__static_partial_sums...>; + constexpr static int gap_len = + (((Idx < Rank - 1 && count_range::get(Idx) == SubRank - 1 && + std::is_convertible_v) + ? 1 + : 0) + + ... + 0); + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_right_value() { + // Use layout_right for rank 0 + if constexpr (SubRank == 0) { + return true; + // Use layout_right for rank 1 result if rightmost slice specifier is range like + } else if constexpr (SubRank == 1) { + return ((Idx < Rank - 1 || is_range_slice_v)&&...); + } else { + // Preserve if rightmost SubRank-1 slices are full_extent_t and + // the slice at idx Rank-Subrank is a range and + // for idx < Rank - SubRank the slice is an index + return ((((Idx >= Rank - SubRank) && std::is_same_v) || + ((Idx == Rank - SubRank) && is_range_slice_v) || + ((Idx < Rank - SubRank) && is_index_slice_v)) && ...); + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif + } + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_right_padded_value() { + // Technically could also keep layout_right_padded for SubRank==0 + // and SubRank==1 with rightmost slice specifier being a contiguous range + // but we intercept these cases separately + + // In all other cases: + // rightmost slice must be range + // then there can be a gap with index slices + // then SubRank - 2 full_extent slices + // then another range slice + // then more index slices + // e.g. I I R F F F I I I R for obtaining a rank-5 from a rank-10 + return ((((Idx == Rank - 1) && is_range_slice_v) || + ((Idx >= Rank - gap_len - 1 && Idx < Rank - 1) && is_index_slice_v) || + ((Idx > Rank - gap_len - SubRank && Idx < Rank - gap_len - 1) && std::is_same_v) || + ((Idx == Rank - gap_len - SubRank) && is_range_slice_v) || + ((Idx < Rank - gap_len - SubRank) && is_index_slice_v)) && ... ); + } }; + } // namespace detail -// Suppress spurious warning with NVCC about no return statement. -// This is a known issue in NVCC and NVC++ -// Depending on the CUDA and GCC version we need both the builtin -// and the diagnostic push. I tried really hard to find something shorter -// but no luck ... -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic push - #pragma nv_diag_suppress = implicit_return_from_non_void_function - #else - #ifdef __CUDA_ARCH__ - #pragma diagnostic push - #pragma diag_suppress implicit_return_from_non_void_function - #endif - #endif -#elif defined __NVCOMPILER - #pragma diagnostic push - #pragma diag_suppress = implicit_return_from_non_void_function -#endif +// Actual submdspan mapping call template template -MDSPAN_INLINE_FUNCTION -constexpr auto +MDSPAN_INLINE_FUNCTION constexpr auto layout_right::mapping::submdspan_mapping_impl( - SliceSpecifiers... slices) const { - // get sub extents + SliceSpecifiers... slices) const { + + // compute sub extents using src_ext_t = Extents; auto dst_ext = submdspan_extents(extents(), slices...); using dst_ext_t = decltype(dst_ext); - // determine new layout type - constexpr bool preserve_layout = detail::preserve_layout_right_mapping< - decltype(std::make_index_sequence()), dst_ext_t::rank(), - SliceSpecifiers...>::value; - using dst_layout_t = - std::conditional_t; + // figure out sub layout type + using deduce_layout = detail::deduce_layout_right_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + std::make_index_sequence, + SliceSpecifiers...>; + + using dst_layout_t = std::conditional_t< + deduce_layout::layout_right_value(), layout_right, + std::conditional_t< + deduce_layout::layout_right_padded_value(), + MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded, + layout_stride>>; using dst_mapping_t = typename dst_layout_t::template mapping; // Figure out if any slice's lower bound equals the corresponding extent. // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. const bool out_of_bounds = - detail::any_slice_out_of_bounds(this->extents(), slices...); + detail::any_slice_out_of_bounds(this->extents(), slices...); auto offset = static_cast( - out_of_bounds ? - this->required_span_size() : - this->operator()(detail::first_of(slices)...) - ); - + out_of_bounds ? this->required_span_size() + : this->operator()(detail::first_of(slices)...)); + if constexpr (std::is_same_v) { // layout_right case - return submdspan_mapping_result{dst_mapping_t(dst_ext), offset}; - } else { - // layout_stride case - auto inv_map = detail::inv_map_rank( - std::integral_constant(), - std::index_sequence<>(), - slices...); + return submdspan_mapping_result{dst_mapping_t(dst_ext), + offset}; + } else if constexpr (std::is_same_v< + dst_layout_t, + MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded< + dynamic_extent>>) { return submdspan_mapping_result{ - dst_mapping_t(dst_ext, detail::construct_sub_strides( - *this, inv_map, - // HIP needs deduction guides to have markups so we need to be explicit - // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have the issue - // But Clang-CUDA also doesn't accept the use of deduction guide so disable it for CUDA alltogether - #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) - std::tuple{detail::stride_of(slices)...})), - #else - std::tuple{detail::stride_of(slices)...})), - #endif + dst_mapping_t(dst_ext, + stride(src_ext_t::rank() - 2 - deduce_layout::gap_len)), offset}; + } else { + // layout_stride case + auto inv_map = detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + return submdspan_mapping_result { + dst_mapping_t(dst_ext, + detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue But Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA altogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + std::tuple{ + detail::stride_of(slices)...})), +#else + std::tuple{detail::stride_of(slices)...})), +#endif + offset + }; } #if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) __builtin_unreachable(); #endif } -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic pop - #else - #ifdef __CUDA_ARCH__ - #pragma diagnostic pop - #endif - #endif -#elif defined __NVCOMPILER - #pragma diagnostic pop -#endif //********************************** // layout_stride submdspan_mapping //********************************* template template -MDSPAN_INLINE_FUNCTION -constexpr auto +MDSPAN_INLINE_FUNCTION constexpr auto layout_stride::mapping::submdspan_mapping_impl( - SliceSpecifiers... slices) const { + SliceSpecifiers... slices) const { auto dst_ext = submdspan_extents(extents(), slices...); using dst_ext_t = decltype(dst_ext); - auto inv_map = detail::inv_map_rank( - std::integral_constant(), - std::index_sequence<>(), - slices...); + auto inv_map = detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); using dst_mapping_t = typename layout_stride::template mapping; // Figure out if any slice's lower bound equals the corresponding extent. // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. const bool out_of_bounds = - detail::any_slice_out_of_bounds(this->extents(), slices...); + detail::any_slice_out_of_bounds(this->extents(), slices...); auto offset = static_cast( - out_of_bounds ? - this->required_span_size() : - this->operator()(detail::first_of(slices)...) - ); - - return submdspan_mapping_result{ - dst_mapping_t(dst_ext, detail::construct_sub_strides( - *this, inv_map, - // HIP needs deduction guides to have markups so we need to be explicit - // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have the issue - #if defined(_MDSPAN_HAS_HIP) || (defined(__NVCC__) && (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10) < 1120) - std::tuple(detail::stride_of(slices)...))), + out_of_bounds ? this->required_span_size() + : this->operator()(detail::first_of(slices)...)); + + return submdspan_mapping_result { + dst_mapping_t(dst_ext, + detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue +#if defined(_MDSPAN_HAS_HIP) || \ + (defined(__NVCC__) && \ + (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10) < 1120) + std::tuple( + detail::stride_of(slices)...))), #else - std::tuple(detail::stride_of(slices)...))), + std::tuple(detail::stride_of(slices)...))), #endif - offset}; + offset + }; } } // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +#if defined __NVCC__ +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic pop +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic pop +#endif +#endif +#elif defined __NVCOMPILER +#pragma diagnostic pop +#endif diff --git a/include/experimental/__p2642_bits/layout_padded.hpp b/include/experimental/__p2642_bits/layout_padded.hpp index 1b02233f..99e24fa4 100644 --- a/include/experimental/__p2642_bits/layout_padded.hpp +++ b/include/experimental/__p2642_bits/layout_padded.hpp @@ -494,6 +494,17 @@ class layout_left_padded::mapping { return !(left == right); } #endif + + // [mdspan.submdspan.mapping], submdspan mapping specialization + template + constexpr auto submdspan_mapping_impl( + SliceSpecifiers... slices) const; + + template + friend constexpr auto submdspan_mapping( + const mapping& src, SliceSpecifiers... slices) { + return src.submdspan_mapping_impl(slices...); + } }; template diff --git a/tests/foo_customizations.hpp b/tests/foo_customizations.hpp index e04bb44e..381b1ad9 100644 --- a/tests/foo_customizations.hpp +++ b/tests/foo_customizations.hpp @@ -225,31 +225,30 @@ class layout_foo::mapping { } #endif +#if MDSPAN_HAS_CXX_17 + template + friend constexpr auto submdspan_mapping( + const mapping& src, SliceSpecifiers... slices) { + // use the fact that layout_foo is layout_right with rank 1 or rank 2 + // i.e. we don't need to implement everything here, we just reuse submdspan_mapping for layout_right + Kokkos::layout_right::mapping compatible_mapping(src.extents()); + auto sub_right = submdspan_mapping(compatible_mapping, slices...); + if constexpr (std::is_same_v) { + // NVCC does not like deduction here, so get the extents type explicitly + using sub_ext_t = std::remove_const_t>; + auto sub_mapping = layout_foo::mapping(sub_right.mapping.extents()); + return Kokkos::submdspan_mapping_result{sub_mapping, sub_right.offset}; + } else { + return sub_right; + } + } +#endif + private: _MDSPAN_NO_UNIQUE_ADDRESS extents_type __extents{}; }; -#if MDSPAN_HAS_CXX_17 -template -MDSPAN_INLINE_FUNCTION -constexpr auto -submdspan_mapping(const layout_foo::mapping &src_mapping, - SliceSpecifiers... slices) { - // use the fact that layout_foo is layout_right with rank 1 or rank 2 - // i.e. we don't need to implement everything here, we just reuse submdspan_mapping for layout_right - Kokkos::layout_right::mapping compatible_mapping(src_mapping.extents()); - auto sub_right = submdspan_mapping(compatible_mapping, slices...); - if constexpr (std::is_same_v) { - // NVCC does not like deduction here, so get the extents type explicitly - using sub_ext_t = std::remove_const_t>; - auto sub_mapping = layout_foo::mapping(sub_right.mapping.extents()); - return Kokkos::submdspan_mapping_result{sub_mapping, sub_right.offset}; - } else { - return sub_right; - } -} -#endif } #endif diff --git a/tests/test_submdspan.cpp b/tests/test_submdspan.cpp index c565056c..7ab0ceba 100644 --- a/tests/test_submdspan.cpp +++ b/tests/test_submdspan.cpp @@ -145,20 +145,28 @@ using submdspan_test_types = , std::tuple, args_t<6,4,5,6,7,8>, Kokkos::extents, int, int, int, std::pair, Kokkos::full_extent_t, Kokkos::full_extent_t> , std::tuple, args_t<6,4,5,6,7,8>, Kokkos::extents, int, int, int, int, std::pair, Kokkos::full_extent_t> , std::tuple, args_t<6,4,5,6,7,8>, Kokkos::extents, int, int, int, int, int, Kokkos::full_extent_t> + // LayoutLeft to layout_left_padded + , std::tuple, Kokkos::dextents, args_t<10,20>, Kokkos::dextents, std::pair, Kokkos::full_extent_t> + , std::tuple, Kokkos::dextents, args_t<10,20,30>, Kokkos::dextents, std::pair, int, Kokkos::full_extent_t> + , std::tuple, Kokkos::dextents, args_t<10,20,30,40>, Kokkos::dextents, std::pair, int, Kokkos::full_extent_t, std::pair> + , std::tuple, Kokkos::dextents, args_t<10,20,30,40,50>, Kokkos::dextents, std::pair, int, Kokkos::full_extent_t, std::pair, int> // LayoutLeft to LayoutStride , std::tuple, args_t<10>, Kokkos::dextents, Kokkos::strided_slice> , std::tuple, args_t<10,20>, Kokkos::dextents, Kokkos::strided_slice, int> - , std::tuple, args_t<10,20>, Kokkos::dextents, std::pair, Kokkos::full_extent_t> , std::tuple, args_t<10,20>, Kokkos::dextents, std::pair, Kokkos::strided_slice> , std::tuple, args_t<10,20>, Kokkos::dextents, Kokkos::strided_slice, std::pair> , std::tuple, args_t<10,20>, Kokkos::dextents, Kokkos::strided_slice, Kokkos::strided_slice> , std::tuple, args_t<6,4,5,6,7,8>, Kokkos::extents, Kokkos::full_extent_t, int, std::pair, int, int, Kokkos::full_extent_t> , std::tuple, args_t<6,4,5,6,7,8>, Kokkos::extents, int, Kokkos::full_extent_t, std::pair, int, Kokkos::full_extent_t, int> + // layout_right to layout_right_padded + , std::tuple, Kokkos::dextents, args_t<10,20>, Kokkos::dextents, Kokkos::full_extent_t, std::pair> + , std::tuple, Kokkos::dextents, args_t<10,20,30>, Kokkos::dextents, Kokkos::full_extent_t, int, std::pair> + , std::tuple, Kokkos::dextents, args_t<10,20,30,40>, Kokkos::dextents, std::pair, Kokkos::full_extent_t, int, std::pair> + , std::tuple, Kokkos::dextents, args_t<10,20,30,40,50>, Kokkos::dextents, int, std::pair, Kokkos::full_extent_t, int, std::pair> // layout_right to layout_stride , std::tuple, args_t<10>, Kokkos::dextents, Kokkos::strided_slice> , std::tuple, args_t<10>, Kokkos::extents, Kokkos::strided_slice,std::integral_constant>> , std::tuple, args_t<10,20>, Kokkos::dextents, Kokkos::strided_slice, int> - , std::tuple, args_t<10,20>, Kokkos::dextents, Kokkos::full_extent_t, std::pair> , std::tuple, args_t<10,20>, Kokkos::dextents, std::pair, Kokkos::strided_slice> , std::tuple, args_t<10,20>, Kokkos::dextents, Kokkos::strided_slice, std::pair> , std::tuple, args_t<10,20>, Kokkos::dextents, Kokkos::strided_slice, Kokkos::strided_slice> diff --git a/tests/test_submdspan_static_slice.cpp b/tests/test_submdspan_static_slice.cpp index d0567a89..e7427e49 100644 --- a/tests/test_submdspan_static_slice.cpp +++ b/tests/test_submdspan_static_slice.cpp @@ -175,7 +175,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Left_iddd_FullIndexFull) { { using expected_extents_type = Kokkos::dextents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_left_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto integralConstant) { @@ -202,7 +202,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Left_i345_FullIndexFull) { { using expected_extents_type = Kokkos::extents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_left_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto integralConstant) { @@ -382,7 +382,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Left_iddd_TupleFullTuple) { { using expected_extents_type = Kokkos::dextents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_left_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto sliceSpec0, auto sliceSpec1) { @@ -396,7 +396,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Left_iddd_TupleFullTuple) { } { using expected_extents_type = Kokkos::extents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_left_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto sliceSpec0, auto sliceSpec1) { @@ -424,7 +424,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Left_i345_TupleFullTuple) { { using expected_extents_type = Kokkos::extents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_left_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto sliceSpec0, auto sliceSpec1) { @@ -438,7 +438,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Left_i345_TupleFullTuple) { } { using expected_extents_type = Kokkos::extents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_left_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto sliceSpec0, auto sliceSpec1) { @@ -466,7 +466,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Right_iddd_TupleFullTuple) { { using expected_extents_type = Kokkos::dextents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_right_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto sliceSpec0, auto sliceSpec1) { @@ -480,7 +480,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Right_iddd_TupleFullTuple) { } { using expected_extents_type = Kokkos::extents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_right_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto sliceSpec0, auto sliceSpec1) { @@ -508,7 +508,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Right_i345_TupleFullTuple) { { using expected_extents_type = Kokkos::extents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_right_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto sliceSpec0, auto sliceSpec1) { @@ -522,7 +522,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Right_i345_TupleFullTuple) { } { using expected_extents_type = Kokkos::extents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_right_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&] (auto sliceSpec0, auto sliceSpec1) { @@ -598,7 +598,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Right_idd_FullTuple) { input_extents_type input_extents{3, 4}; { using expected_extents_type = Kokkos::dextents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_right_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&](auto sliceSpec) { @@ -611,7 +611,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Right_idd_FullTuple) { } { using expected_extents_type = Kokkos::extents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_right_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&](auto sliceSpec) { @@ -633,7 +633,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Left_idd_TupleFull) { input_extents_type input_extents{3, 4}; { using expected_extents_type = Kokkos::dextents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_left_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&](auto sliceSpec) { @@ -646,7 +646,7 @@ TEST(TestMdspan, SubmdspanStaticSlice_Left_idd_TupleFull) { } { using expected_extents_type = Kokkos::extents; - using expected_layout_type = Kokkos::layout_stride; + using expected_layout_type = Kokkos::Experimental::layout_left_padded; using expected_output_mdspan_type = Kokkos::mdspan; auto runTest = [&](auto sliceSpec) {