From 51664bb2ac476132731eb1a241ce63fc7fb253cd Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Mon, 9 Oct 2023 07:10:22 +0000 Subject: [PATCH] Sync DDC --- vendor/ddc/include/ddc/chunk_span.hpp | 10 +-- .../ddc/include/ddc/detail/tagged_vector.hpp | 2 + vendor/ddc/include/ddc/detail/type_seq.hpp | 55 ++++++++++++++++ vendor/ddc/include/ddc/discrete_domain.hpp | 40 +++++++++++- vendor/ddc/include/ddc/for_each.hpp | 32 +++++++++ vendor/ddc/include/ddc/kernels/fft.hpp | 9 ++- vendor/ddc/include/ddc/transform_reduce.hpp | 65 +++++++++++++++++-- vendor/ddc/tests/discrete_domain.cpp | 23 ++++++- vendor/ddc/tests/for_each.cpp | 37 +++++++++++ vendor/ddc/tests/transform_reduce.cpp | 64 +++++++++++++++++- vendor/ddc/tests/type_seq.cpp | 12 ++++ 11 files changed, 332 insertions(+), 17 deletions(-) diff --git a/vendor/ddc/include/ddc/chunk_span.hpp b/vendor/ddc/include/ddc/chunk_span.hpp index 198b4a79a..2b537f755 100644 --- a/vendor/ddc/include/ddc/chunk_span.hpp +++ b/vendor/ddc/include/ddc/chunk_span.hpp @@ -89,7 +89,7 @@ class ChunkSpan, LayoutStridedPolicy, Memo protected: template - auto get_slicer_for(DiscreteElement const& c) const + constexpr auto get_slicer_for(DiscreteElement const& c) const { DDC_IF_NVCC_THEN_PUSH_AND_SUPPRESS(implicit_return_from_non_void_function) if constexpr (in_tags_v>) { @@ -101,7 +101,7 @@ class ChunkSpan, LayoutStridedPolicy, Memo } template - auto get_slicer_for(DiscreteDomain const& c) const + constexpr auto get_slicer_for(DiscreteDomain const& c) const { DDC_IF_NVCC_THEN_PUSH_AND_SUPPRESS(implicit_return_from_non_void_function) if constexpr (in_tags_v>) { @@ -176,7 +176,9 @@ class ChunkSpan, LayoutStridedPolicy, Memo * @param allocation_mdspan the allocation mdspan to the data * @param domain the domain that sustains the view */ - constexpr ChunkSpan(allocation_mdspan_type allocation_mdspan, mdomain_type const& domain) + KOKKOS_FUNCTION constexpr ChunkSpan( + allocation_mdspan_type allocation_mdspan, + mdomain_type const& domain) { namespace stdex = std::experimental; extents_type extents_s((front(domain) + extents(domain)).uid()...); @@ -229,7 +231,7 @@ class ChunkSpan, LayoutStridedPolicy, Memo memory_space>(subview, select_by_type_seq(this->m_domain)); } - /** Slice out some dimensions + /** Restrict to a subdomain */ template constexpr auto operator[](DiscreteDomain const& odomain) const diff --git a/vendor/ddc/include/ddc/detail/tagged_vector.hpp b/vendor/ddc/include/ddc/detail/tagged_vector.hpp index dace14d5b..0f86dcbe1 100644 --- a/vendor/ddc/include/ddc/detail/tagged_vector.hpp +++ b/vendor/ddc/include/ddc/detail/tagged_vector.hpp @@ -242,6 +242,8 @@ class TaggedVector : public ConversionOperators m_values; public: + using value_type = ElementType; + static constexpr std::size_t size() noexcept { return sizeof...(Tags); diff --git a/vendor/ddc/include/ddc/detail/type_seq.hpp b/vendor/ddc/include/ddc/detail/type_seq.hpp index 9c139e885..470a89006 100644 --- a/vendor/ddc/include/ddc/detail/type_seq.hpp +++ b/vendor/ddc/include/ddc/detail/type_seq.hpp @@ -101,6 +101,58 @@ struct TypeSeqMerge, TypeSeq, TypeSeq { }; +/// A is replaced by element of C at same position than the first element of B equal to A. +/// Remark : It may not be usefull in its own, it is an helper for TypeSeqReplace +template +struct TypeSeqReplaceSingle; + +template +struct TypeSeqReplaceSingle, TypeSeq<>> +{ + using type = TagA; +}; + +template +struct TypeSeqReplaceSingle< + TagA, + TypeSeq, + TypeSeq> + : std::conditional_t< + std::is_same_v, + TypeSeqReplaceSingle, TypeSeq<>>, + TypeSeqReplaceSingle, TypeSeq>> +{ +}; + +/// R contains all elements of A except those of B which are replaced by those of C. +/// Remark : This operation preserves the orders. +template +struct TypeSeqReplace; + +template +struct TypeSeqReplace, TypeSeq, TypeSeq, TypeSeq> +{ + using type = TypeSeq; +}; + +template +struct TypeSeqReplace< + TypeSeq, + TypeSeq, + TypeSeq, + TypeSeq> + : TypeSeqReplace< + TypeSeq, + TypeSeq, + TypeSeq, + TypeSeq, + TypeSeq>::type>> +{ +}; + } // namespace detail template @@ -137,4 +189,7 @@ using type_seq_remove_t = typename detail::TypeSeqRemove using type_seq_merge_t = typename detail::TypeSeqMerge::type; +template +using type_seq_replace_t = + typename detail::TypeSeqReplace>::type; } // namespace ddc diff --git a/vendor/ddc/include/ddc/discrete_domain.hpp b/vendor/ddc/include/ddc/discrete_domain.hpp index f235a7df5..f323e9967 100644 --- a/vendor/ddc/include/ddc/discrete_domain.hpp +++ b/vendor/ddc/include/ddc/discrete_domain.hpp @@ -95,7 +95,7 @@ class DiscreteDomain } #endif - std::size_t size() const + constexpr std::size_t size() const { return (1ul * ... * (uid(m_element_end) - uid(m_element_begin))); } @@ -405,6 +405,44 @@ constexpr auto remove_dims_of( return detail::convert_type_seq_to_discrete_domain(DDom_a); } + +// Checks if dimension of DDom_a is DDim1. If not, returns restriction to DDim2 of DDom_b. May not be usefull in its own, it helps for replace_dim_of +template +constexpr std::conditional_t< + std::is_same_v, + ddc::DiscreteDomain, + ddc::DiscreteDomain> +replace_dim_of_1d( + DiscreteDomain const& DDom_a, + [[maybe_unused]] DiscreteDomain const& DDom_b) noexcept +{ + if constexpr (std::is_same_v) { + return ddc::select(DDom_b); + } else { + return DDom_a; + } +} + +// Replace in DDom_a the dimension Dim1 by the dimension Dim2 of DDom_b +template +constexpr auto replace_dim_of( + DiscreteDomain const& DDom_a, + [[maybe_unused]] DiscreteDomain const& DDom_b) noexcept +{ + // TODO : static_asserts + using TagSeqA = detail::TypeSeq; + using TagSeqB = detail::TypeSeq; + using TagSeqC = detail::TypeSeq; + + using type_seq_r = ddc::type_seq_replace_t; + return ddc::detail::convert_type_seq_to_discrete_domain( + replace_dim_of_1d< + DDim1, + DDim2, + DDimsA, + DDimsB...>(ddc::select(DDom_a), DDom_b)...); +} + template constexpr DiscreteVector extents(DiscreteDomain const& domain) noexcept { diff --git a/vendor/ddc/include/ddc/for_each.hpp b/vendor/ddc/include/ddc/for_each.hpp index 41c66562a..cf27f4352 100644 --- a/vendor/ddc/include/ddc/for_each.hpp +++ b/vendor/ddc/include/ddc/for_each.hpp @@ -30,11 +30,27 @@ class ForEachKokkosLambdaAdapter public: ForEachKokkosLambdaAdapter(F const& f) : m_f(f) {} + template = true> + KOKKOS_IMPL_FORCEINLINE void operator()([[maybe_unused]] index_type unused_id) const + { + m_f(DiscreteElement<>()); + } + + template = true> + KOKKOS_FORCEINLINE_FUNCTION void operator()( + use_annotated_operator, + [[maybe_unused]] index_type unused_id) const + { + m_f(DiscreteElement<>()); + } + + template 0), bool> = true> KOKKOS_IMPL_FORCEINLINE void operator()(index_type... ids) const { m_f(DiscreteElement(ids...)); } + template 0), bool> = true> KOKKOS_FORCEINLINE_FUNCTION void operator()(use_annotated_operator, index_type... ids) const { @@ -42,6 +58,22 @@ class ForEachKokkosLambdaAdapter } }; +template +inline void for_each_kokkos( + [[maybe_unused]] DiscreteDomain<> const& domain, + Functor const& f) noexcept +{ + if constexpr (need_annotated_operator()) { + Kokkos::parallel_for( + Kokkos::RangePolicy(0, 1), + ForEachKokkosLambdaAdapter(f)); + } else { + Kokkos::parallel_for( + Kokkos::RangePolicy(0, 1), + ForEachKokkosLambdaAdapter(f)); + } +} + template inline void for_each_kokkos(DiscreteDomain const& domain, Functor const& f) noexcept { diff --git a/vendor/ddc/include/ddc/kernels/fft.hpp b/vendor/ddc/include/ddc/kernels/fft.hpp index 9d73b3725..9807bd550 100644 --- a/vendor/ddc/include/ddc/kernels/fft.hpp +++ b/vendor/ddc/include/ddc/kernels/fft.hpp @@ -153,7 +153,10 @@ using _fftw_plan = std::conditional_t, float>, fft // _fftw_plan_many_dft : templated function working for all types of transformation template -_fftw_plan _fftw_plan_many_dft(PenultArg penultArg, LastArg lastArg, Args... args) +_fftw_plan _fftw_plan_many_dft( + [[maybe_unused]] PenultArg penultArg, + LastArg lastArg, + Args... args) { // Ugly, penultArg and lastArg are passed before the rest because of a limitation of C++ (parameter packs must be last arguments) const TransformType transformType = transform_type_v; if constexpr (transformType == TransformType::R2C && std::is_same_v) @@ -216,7 +219,7 @@ constexpr auto cufft_transform_type() // cufftExec : argument passed in the cufftMakePlan function // _fftw_plan_many_dft : templated function working for all types of transformation template -cufftResult _cufftExec(LastArg lastArg, Args... args) +cufftResult _cufftExec([[maybe_unused]] LastArg lastArg, Args... args) { // Ugly for same reason as fftw const TransformType transformType = transform_type_v; if constexpr (transformType == TransformType::R2C && std::is_same_v) @@ -280,7 +283,7 @@ constexpr auto hipfft_transform_type() // hipfftExec : argument passed in the hipfftMakePlan function // _fftw_plan_many_dft : templated function working for all types of transformation template -hipfftResult _hipfftExec(LastArg lastArg, Args... args) +hipfftResult _hipfftExec([[maybe_unused]] LastArg lastArg, Args... args) { const TransformType transformType = transform_type_v; if constexpr (transformType == TransformType::R2C && std::is_same_v) diff --git a/vendor/ddc/include/ddc/transform_reduce.hpp b/vendor/ddc/include/ddc/transform_reduce.hpp index 90848289b..aa9708126 100644 --- a/vendor/ddc/include/ddc/transform_reduce.hpp +++ b/vendor/ddc/include/ddc/transform_reduce.hpp @@ -138,6 +138,25 @@ class TransformReducerKokkosLambdaAdapter { } + template = true> + KOKKOS_IMPL_FORCEINLINE void operator()( + [[maybe_unused]] index_type unused_id, + typename Reducer::value_type& a) const + { + a = reducer(a, functor(DiscreteElement<>())); + } + + template = true> + KOKKOS_FORCEINLINE_FUNCTION void operator()( + use_annotated_operator, + [[maybe_unused]] index_type unused_id, + typename Reducer::value_type& a) const + + { + a = reducer(a, functor(DiscreteElement<>())); + } + + template 0), bool> = true> KOKKOS_IMPL_FORCEINLINE void operator()( index_type... ids, typename Reducer::value_type& a) const @@ -145,6 +164,8 @@ class TransformReducerKokkosLambdaAdapter a = reducer(a, functor(DiscreteElement(ids...))); } + + template 0), bool> = true> KOKKOS_FORCEINLINE_FUNCTION void operator()( use_annotated_operator, index_type... ids, @@ -154,6 +175,40 @@ class TransformReducerKokkosLambdaAdapter } }; +/** A parallel reduction over a nD domain using the default Kokkos execution space + * @param[in] domain the range over which to apply the algorithm + * @param[in] neutral the neutral element of the reduction operation + * @param[in] reduce a binary FunctionObject that will be applied in unspecified order to the + * results of transform, the results of other reduce and neutral. + * @param[in] transform a unary FunctionObject that will be applied to each element of the input + * range. The return type must be acceptable as input to reduce + */ +template +inline T transform_reduce_kokkos( + [[maybe_unused]] DiscreteDomain<> const& domain, + T neutral, + BinaryReductionOp const& reduce, + UnaryTransformOp const& transform) noexcept +{ + T result = neutral; + if constexpr (need_annotated_operator()) { + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, 1), + TransformReducerKokkosLambdaAdapter< + BinaryReductionOp, + UnaryTransformOp>(reduce, transform), + ddc_to_kokkos_reducer_t(result)); + } else { + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, 1), + TransformReducerKokkosLambdaAdapter< + BinaryReductionOp, + UnaryTransformOp>(reduce, transform), + ddc_to_kokkos_reducer_t(result)); + } + return result; +} + /** A parallel reduction over a nD domain using the default Kokkos execution space * @param[in] domain the range over which to apply the algorithm * @param[in] neutral the neutral element of the reduction operation @@ -172,9 +227,9 @@ inline T transform_reduce_kokkos( T result = neutral; if constexpr (need_annotated_operator()) { Kokkos::parallel_reduce( - Kokkos::RangePolicy( - select(domain).front().uid(), - select(domain).back().uid() + 1), + Kokkos::RangePolicy< + ExecSpace, + use_annotated_operator>(domain.front().uid(), domain.back().uid() + 1), TransformReducerKokkosLambdaAdapter< BinaryReductionOp, UnaryTransformOp, @@ -182,9 +237,7 @@ inline T transform_reduce_kokkos( ddc_to_kokkos_reducer_t(result)); } else { Kokkos::parallel_reduce( - Kokkos::RangePolicy( - select(domain).front().uid(), - select(domain).back().uid() + 1), + Kokkos::RangePolicy(domain.front().uid(), domain.back().uid() + 1), TransformReducerKokkosLambdaAdapter< BinaryReductionOp, UnaryTransformOp, diff --git a/vendor/ddc/tests/discrete_domain.cpp b/vendor/ddc/tests/discrete_domain.cpp index 1ee421378..63655eaad 100644 --- a/vendor/ddc/tests/discrete_domain.cpp +++ b/vendor/ddc/tests/discrete_domain.cpp @@ -35,6 +35,10 @@ using DElemYX = ddc::DiscreteElement; using DVectYX = ddc::DiscreteVector; using DDomYX = ddc::DiscreteDomain; +using DElemXZ = ddc::DiscreteElement; +using DVectXZ = ddc::DiscreteVector; +using DDomXZ = ddc::DiscreteDomain; + using DElemZY = ddc::DiscreteElement; using DVectZY = ddc::DiscreteVector; using DDomZY = ddc::DiscreteDomain; @@ -44,19 +48,24 @@ using DDomZY = ddc::DiscreteDomain; static DElemX constexpr lbound_x(50); static DVectX constexpr nelems_x(3); static DElemX constexpr sentinel_x(lbound_x + nelems_x); -static DElemX constexpr ubound_x(sentinel_x - 1); //TODO: correct type +static DElemX constexpr ubound_x(sentinel_x - 1); static DElemY constexpr lbound_y(4); static DVectY constexpr nelems_y(12); static DElemY constexpr sentinel_y(lbound_y + nelems_y); -static DElemY constexpr ubound_y(sentinel_y - 1); //TODO: correct type +static DElemY constexpr ubound_y(sentinel_y - 1); +static DElemZ constexpr lbound_z(7); +static DVectZ constexpr nelems_z(15); static DElemXY constexpr lbound_x_y(lbound_x, lbound_y); static DVectXY constexpr nelems_x_y(nelems_x, nelems_y); static DElemXY constexpr ubound_x_y(ubound_x, ubound_y); +static DElemXZ constexpr lbound_x_z(lbound_x, lbound_z); +static DVectXZ constexpr nelems_x_z(nelems_x, nelems_z); + } // namespace TEST(ProductMDomainTest, Constructor) @@ -129,6 +138,16 @@ TEST(ProductMDomainTest, Diff) EXPECT_EQ(subdomain, dom_x); } +TEST(ProductMDomainTest, Replace) +{ + DDomXY const dom_x_y = DDomXY(lbound_x_y, nelems_x_y); + DDomZ const dom_z = DDomZ(lbound_z, nelems_z); + DDomXZ const dom_x_z = DDomXZ(lbound_x_z, nelems_x_z); + auto const subdomain = ddc::replace_dim_of(dom_x_y, dom_z); + EXPECT_EQ(subdomain, dom_x_z); +} + + TEST(ProductMDomainTest, TakeFirst) { DDomXY const dom_x_y(lbound_x_y, nelems_x_y); diff --git a/vendor/ddc/tests/for_each.cpp b/vendor/ddc/tests/for_each.cpp index 04144f4a5..7056576c9 100644 --- a/vendor/ddc/tests/for_each.cpp +++ b/vendor/ddc/tests/for_each.cpp @@ -72,6 +72,15 @@ TEST(ForEachSerialHost, TwoDimensions) EXPECT_EQ(std::count(storage.begin(), storage.end(), 1), dom.size()); } +TEST(ForEachParallelHost, ZeroDimension) +{ + DDom0D const dom; + std::vector storage(dom.size(), 0); + ddc::ChunkSpan view(storage.data(), dom); + ddc::for_each(ddc::policies::parallel_host, dom, [=](DElem0D const i) { view(i) += 1; }); + EXPECT_EQ(std::count(storage.begin(), storage.end(), 1), dom.size()); +} + TEST(ForEachParallelHost, OneDimension) { DDomX const dom(lbound_x, nelems_x); @@ -92,6 +101,34 @@ TEST(ForEachParallelHost, TwoDimensions) namespace { +void TestForEachParallelDeviceZeroDimension() +{ + DDom0D const dom; + ddc::Chunk> storage(dom); + Kokkos::deep_copy(storage.allocation_kokkos_view(), 0); + ddc::ChunkSpan view(storage.span_view()); + ddc::for_each( + ddc::policies::parallel_device, + dom, + DDC_LAMBDA(DElem0D const i) { view(i) += 1; }); + int const* const ptr = storage.data_handle(); + int sum; + Kokkos::parallel_reduce( + dom.size(), + KOKKOS_LAMBDA(std::size_t i, int& local_sum) { local_sum += ptr[i]; }, + Kokkos::Sum(sum)); + EXPECT_EQ(sum, dom.size()); +} + +} // namespace + +TEST(ForEachParallelDevice, ZeroDimension) +{ + TestForEachParallelDeviceZeroDimension(); +} + +namespace { + void TestForEachParallelDeviceOneDimension() { DDomX const dom(lbound_x, nelems_x); diff --git a/vendor/ddc/tests/transform_reduce.cpp b/vendor/ddc/tests/transform_reduce.cpp index 2d44e0ef1..900f6d4e0 100644 --- a/vendor/ddc/tests/transform_reduce.cpp +++ b/vendor/ddc/tests/transform_reduce.cpp @@ -6,6 +6,10 @@ #include +using DElem0D = ddc::DiscreteElement<>; +using DVect0D = ddc::DiscreteVector<>; +using DDom0D = ddc::DiscreteDomain<>; + struct DDimX; using DElemX = ddc::DiscreteElement; using DVectX = ddc::DiscreteVector; @@ -29,6 +33,23 @@ static DVectY constexpr nelems_y(12); static DElemXY constexpr lbound_x_y(lbound_x, lbound_y); static DVectXY constexpr nelems_x_y(nelems_x, nelems_y); +TEST(TransformReduceSerialHost, ZeroDimension) +{ + DDom0D const dom; + std::vector storage(dom.size(), 0); + ddc::ChunkSpan chunk(storage.data(), dom); + int count = 0; + ddc::for_each(dom, [&](DElem0D const i) { chunk(i) = count++; }); + EXPECT_EQ( + ddc::transform_reduce( + ddc::policies::serial_host, + dom, + 0, + ddc::reducer::sum(), + [&](DElem0D const i) { return chunk(i); }), + dom.size() * (dom.size() - 1) / 2); +} + TEST(TransformReduceSerialHost, OneDimension) { DDomX const dom(lbound_x, nelems_x); @@ -62,7 +83,22 @@ TEST(TransformReduceSerialHost, TwoDimensions) [&](DElemXY const ixy) { return chunk(ixy); }), dom.size() * (dom.size() - 1) / 2); } - +TEST(TransformReduceParallelHost, ZeroDimension) +{ + DDom0D const dom; + std::vector storage(dom.size(), 0); + ddc::ChunkSpan chunk(storage.data(), dom); + int count = 0; + ddc::for_each(dom, [&](DElem0D const i) { chunk(i) = count++; }); + EXPECT_EQ( + ddc::transform_reduce( + ddc::policies::parallel_host, + dom, + 0, + ddc::reducer::sum(), + [&](DElem0D const i) { return chunk(i); }), + dom.size() * (dom.size() - 1) / 2); +} TEST(TransformReduceParallelHost, OneDimension) { DDomX const dom(lbound_x, nelems_x); @@ -97,6 +133,32 @@ TEST(TransformReduceParallelHost, TwoDimensions) dom.size() * (dom.size() - 1) / 2); } +static void TestTransformReduceParallelDeviceZeroDimension() +{ + DDom0D const dom; + ddc::Chunk> storage(dom); + ddc::ChunkSpan const chunk(storage.span_view()); + Kokkos::View count("count"); + Kokkos::deep_copy(count, 0); + ddc::for_each( + ddc::policies::parallel_device, + dom, + DDC_LAMBDA(DElem0D const i) { chunk(i) = Kokkos::atomic_fetch_add(&count(), 1); }); + EXPECT_EQ( + ddc::transform_reduce( + ddc::policies::parallel_device, + dom, + 0, + ddc::reducer::sum(), + DDC_LAMBDA(DElem0D const i) { return chunk(i); }), + dom.size() * (dom.size() - 1) / 2); +} + +TEST(TransformReduceParallelDevice, ZeroDimension) +{ + TestTransformReduceParallelDeviceZeroDimension(); +} + static void TestTransformReduceParallelDeviceOneDimension() { DDomX const dom(lbound_x, nelems_x); diff --git a/vendor/ddc/tests/type_seq.cpp b/vendor/ddc/tests/type_seq.cpp index 054e27761..9a8d650b7 100644 --- a/vendor/ddc/tests/type_seq.cpp +++ b/vendor/ddc/tests/type_seq.cpp @@ -11,6 +11,8 @@ namespace { struct a; struct b; struct c; +struct d; +struct e; struct y; struct z; @@ -62,3 +64,13 @@ TEST(TypeSeqTest, Merge) using ExpectedR = ddc::detail::TypeSeq; EXPECT_TRUE((ddc::type_seq_same_v)); } + +TEST(TypeSeqTest, Replace) +{ + using A = ddc::detail::TypeSeq; + using B = ddc::detail::TypeSeq; + using C = ddc::detail::TypeSeq; + using R = ddc::type_seq_replace_t; + using ExpectedR = ddc::detail::TypeSeq; + EXPECT_TRUE((ddc::type_seq_same_v)); +}