Skip to content

Commit

Permalink
Make ReduceData::value safer
Browse files Browse the repository at this point in the history
Although ReduceData::value is meant to be called only once, the user might
call it multiple times. This will result in the wrong result for sum
reduction because of how the value function is implemented. In this PR, we
fix this defect by allowing the function be called multiple times.
  • Loading branch information
WeiqunZhang committed Jul 13, 2023
1 parent 5117d21 commit 66c1e69
Showing 1 changed file with 22 additions and 8 deletions.
30 changes: 22 additions & 8 deletions Src/Base/AMReX_Reduce.H
Original file line number Diff line number Diff line change
Expand Up @@ -666,25 +666,29 @@ public:
template <typename D>
typename D::Type value (D & reduce_data)
{
auto hp = reduce_data.hostPtr();

if (m_result_is_ready) {
return *hp;
}

using ReduceTuple = typename D::Type;
auto const& stream = Gpu::gpuStream();
auto hp = reduce_data.hostPtr();
auto dp = reduce_data.devicePtr();
auto const& nblocks = reduce_data.nBlocks();
#if defined(AMREX_USE_SYCL)
if (reduce_data.maxStreamIndex() == 0 && nblocks[0] <= 4096) {
const int N = nblocks[0];
if (N == 0) {
Reduce::detail::for_each_init<0, ReduceTuple, Ps...>(*hp);
return *hp;
} else {
Gpu::PinnedVector<ReduceTuple> tmp(N);
Gpu::dtoh_memcpy_async(tmp.data(), dp, sizeof(ReduceTuple)*N);
Gpu::streamSynchronize();
for (int i = 1; i < N; ++i) {
Reduce::detail::for_each_local<0, ReduceTuple, Ps...>(tmp[0], tmp[i]);
}
return tmp[0];
*hp = tmp[0];
}
} else
#endif
Expand Down Expand Up @@ -738,9 +742,14 @@ public:
});
#endif
Gpu::streamSynchronize();
return *hp;
}

m_result_is_ready = true;
return *hp;
}

private:
bool m_result_is_ready = false;
};

namespace Reduce {
Expand Down Expand Up @@ -1135,15 +1144,20 @@ public:
template <typename D>
typename D::Type value (D & reduce_data)
{
using ReduceTuple = typename D::Type;
auto& rrv = reduce_data.reference();
if (rrv.size() > 1) {
for (int i = 1, N = rrv.size(); i < N; ++i) {
Reduce::detail::for_each_local<0, ReduceTuple, Ps...>(rrv[0], rrv[i]);
if (! m_result_is_ready) {
using ReduceTuple = typename D::Type;
if (rrv.size() > 1) {
for (int i = 1, N = rrv.size(); i < N; ++i) {
Reduce::detail::for_each_local<0, ReduceTuple, Ps...>(rrv[0], rrv[i]);
}
}
m_result_is_ready = true;
}
return rrv[0];
}

bool m_result_is_ready = false;
};

namespace Reduce {
Expand Down

0 comments on commit 66c1e69

Please sign in to comment.