Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert to val-op reductions #485

Closed
wants to merge 8 commits into from
1 change: 1 addition & 0 deletions src/algorithm/ATOMIC.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class ATOMIC : public KernelBase
void setOpenMPTuningDefinitions(VariantID vid);
void setCudaTuningDefinitions(VariantID vid);
void setHipTuningDefinitions(VariantID vid);
void setOpenMPTargetTuningDefinitions(VariantID vid);

template < size_t replication >
void runSeqVariantReplicate(VariantID vid);
Expand Down
3 changes: 2 additions & 1 deletion src/algorithm/REDUCE_SUM-Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,8 @@ void REDUCE_SUM::runCudaVariantRAJANewReduce(VariantID vid)
RAJA::forall<exec_policy>( res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tsum),
[=] __device__ (Index_type i, Real_type& sum) {
[=] __device__ (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& sum) {
REDUCE_SUM_BODY;
}
);
Expand Down
3 changes: 2 additions & 1 deletion src/algorithm/REDUCE_SUM-Hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,8 @@ void REDUCE_SUM::runHipVariantRAJANewReduce(VariantID vid)
RAJA::forall<exec_policy>( res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tsum),
[=] __device__ (Index_type i, Real_type& sum) {
[=] __device__ (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& sum) {
REDUCE_SUM_BODY;
}
);
Expand Down
9 changes: 6 additions & 3 deletions src/algorithm/REDUCE_SUM-OMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,16 @@ void REDUCE_SUM::runOpenMPVariant(VariantID vid, size_t tune_idx)

case RAJA_OpenMP : {

RAJA::resources::Host res;

if (tune_idx == 0) {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::ReduceSum<RAJA::omp_reduce, Real_type> sum(m_sum_init);

RAJA::forall<RAJA::omp_parallel_for_exec>(
RAJA::forall<RAJA::omp_parallel_for_exec>(res,
RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
REDUCE_SUM_BODY;
Expand All @@ -101,10 +103,11 @@ void REDUCE_SUM::runOpenMPVariant(VariantID vid, size_t tune_idx)

Real_type tsum = m_sum_init;

RAJA::forall<RAJA::omp_parallel_for_exec>(
RAJA::forall<RAJA::omp_parallel_for_exec>(res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tsum),
[=] (Index_type i, Real_type& sum) {
[=] (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& sum) {
REDUCE_SUM_BODY;
}
);
Expand Down
3 changes: 2 additions & 1 deletion src/algorithm/REDUCE_SUM-OMPTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ void REDUCE_SUM::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_AR
RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tsum),
[=] (Index_type i, Real_type& sum) {
[=] (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& sum) {
REDUCE_SUM_BODY;
}
);
Expand Down
12 changes: 8 additions & 4 deletions src/algorithm/REDUCE_SUM-Seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,17 @@ void REDUCE_SUM::runSeqVariant(VariantID vid, size_t tune_idx)

case RAJA_Seq : {

RAJA::resources::Host res;

if (tune_idx == 0) {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::ReduceSum<RAJA::seq_reduce, Real_type> sum(m_sum_init);

RAJA::forall<RAJA::seq_exec>( RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
RAJA::forall<RAJA::seq_exec>(res,
RAJA::RangeSegment(ibegin, iend), [=](Index_type i) {
REDUCE_SUM_BODY;
});

Expand All @@ -100,9 +102,11 @@ void REDUCE_SUM::runSeqVariant(VariantID vid, size_t tune_idx)

Real_type tsum = m_sum_init;

RAJA::forall<RAJA::seq_exec>( RAJA::RangeSegment(ibegin, iend),
RAJA::forall<RAJA::seq_exec>(res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tsum),
[=] (Index_type i, Real_type& sum) {
[=] (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& sum) {
REDUCE_SUM_BODY;
}
);
Expand Down
4 changes: 3 additions & 1 deletion src/algorithm/REDUCE_SUM-Sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,13 @@ void REDUCE_SUM::runSyclVariantImpl(VariantID vid)
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

Real_type tsum = m_sum_init;

RAJA::forall< RAJA::sycl_exec<work_group_size, true /*async*/> >(
res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tsum),
[=] (Index_type i, Real_type& sum) {
[=] (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& sum) {
REDUCE_SUM_BODY;
}
);
Expand Down
3 changes: 2 additions & 1 deletion src/basic/PI_REDUCE-Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ void PI_REDUCE::runCudaVariantRAJANewReduce(VariantID vid)
RAJA::forall< exec_policy >( res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tpi),
[=] __device__ (Index_type i, Real_type& pi) {
[=] __device__ (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& pi) {
PI_REDUCE_BODY;
}
);
Expand Down
3 changes: 2 additions & 1 deletion src/basic/PI_REDUCE-Hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ void PI_REDUCE::runHipVariantRAJANewReduce(VariantID vid)
res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tpi),
[=] __device__ (Index_type i, Real_type& pi) {
[=] __device__ (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& pi) {
PI_REDUCE_BODY;
}
);
Expand Down
12 changes: 7 additions & 5 deletions src/basic/PI_REDUCE-OMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,16 +77,17 @@ void PI_REDUCE::runOpenMPVariant(VariantID vid, size_t tune_idx)

case RAJA_OpenMP : {

RAJA::resources::Host res;

if (tune_idx == 0) {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::ReduceSum<RAJA::omp_reduce, Real_type> pi(m_pi_init);

RAJA::forall<RAJA::omp_parallel_for_exec>(
RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
RAJA::forall<RAJA::omp_parallel_for_exec>(res,
RAJA::RangeSegment(ibegin, iend), [=](Index_type i) {
PI_REDUCE_BODY;
});

Expand All @@ -102,10 +103,11 @@ void PI_REDUCE::runOpenMPVariant(VariantID vid, size_t tune_idx)

Real_type tpi = m_pi_init;

RAJA::forall<RAJA::omp_parallel_for_exec>(
RAJA::forall<RAJA::omp_parallel_for_exec>(res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tpi),
[=] (Index_type i, Real_type& pi) {
[=] (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& pi) {
PI_REDUCE_BODY;
}
);
Expand Down
3 changes: 2 additions & 1 deletion src/basic/PI_REDUCE-OMPTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ void PI_REDUCE::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG
RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tpi),
[=] (Index_type i, Real_type& pi) {
[=] (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& pi) {
PI_REDUCE_BODY;
}
);
Expand Down
10 changes: 7 additions & 3 deletions src/basic/PI_REDUCE-Seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,16 @@ void PI_REDUCE::runSeqVariant(VariantID vid, size_t tune_idx)

case RAJA_Seq : {

RAJA::resources::Host res;

if (tune_idx == 0) {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::ReduceSum<RAJA::seq_reduce, Real_type> pi(m_pi_init);

RAJA::forall<RAJA::seq_exec>( RAJA::RangeSegment(ibegin, iend),
RAJA::forall<RAJA::seq_exec>(res, RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
PI_REDUCE_BODY;
});
Expand All @@ -101,9 +103,11 @@ void PI_REDUCE::runSeqVariant(VariantID vid, size_t tune_idx)

Real_type tpi = m_pi_init;

RAJA::forall<RAJA::seq_exec>( RAJA::RangeSegment(ibegin, iend),
RAJA::forall<RAJA::seq_exec>(res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tpi),
[=] (Index_type i, Real_type& pi) {
[=] (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& pi) {
PI_REDUCE_BODY;
}
);
Expand Down
3 changes: 2 additions & 1 deletion src/basic/PI_REDUCE-Sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ void PI_REDUCE::runSyclVariantImpl(VariantID vid)
res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tpi),
[=] (Index_type i, Real_type& pi) {
[=] (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& pi) {
PI_REDUCE_BODY;
}
);
Expand Down
6 changes: 4 additions & 2 deletions src/basic/REDUCE3_INT-Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,10 @@ void REDUCE3_INT::runCudaVariantRAJANewReduce(VariantID vid)
RAJA::expt::Reduce<RAJA::operators::minimum>(&tvmin),
RAJA::expt::Reduce<RAJA::operators::maximum>(&tvmax),
[=] __device__ (Index_type i,
Int_type& vsum, Int_type& vmin, Int_type& vmax) {
REDUCE3_INT_BODY;
RAJA::expt::ValOp<Int_type, RAJA::operators::plus>& vsum,
RAJA::expt::ValOp<Int_type, RAJA::operators::minimum>& vmin,
RAJA::expt::ValOp<Int_type, RAJA::operators::maximum>& vmax) {
REDUCE3_INT_BODY_RAJA;
}
);

Expand Down
6 changes: 4 additions & 2 deletions src/basic/REDUCE3_INT-Hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,10 @@ void REDUCE3_INT::runHipVariantRAJANewReduce(VariantID vid)
RAJA::expt::Reduce<RAJA::operators::minimum>(&tvmin),
RAJA::expt::Reduce<RAJA::operators::maximum>(&tvmax),
[=] __device__ (Index_type i,
Int_type& vsum, Int_type& vmin, Int_type& vmax) {
REDUCE3_INT_BODY;
RAJA::expt::ValOp<Int_type, RAJA::operators::plus>& vsum,
RAJA::expt::ValOp<Int_type, RAJA::operators::minimum>& vmin,
RAJA::expt::ValOp<Int_type, RAJA::operators::maximum>& vmax) {
REDUCE3_INT_BODY_RAJA;
}
);

Expand Down
13 changes: 9 additions & 4 deletions src/basic/REDUCE3_INT-OMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ void REDUCE3_INT::runOpenMPVariant(VariantID vid, size_t tune_idx)

case RAJA_OpenMP : {

RAJA::resources::Host res;

if (tune_idx == 0) {

startTimer();
Expand All @@ -100,7 +102,7 @@ void REDUCE3_INT::runOpenMPVariant(VariantID vid, size_t tune_idx)
RAJA::ReduceMin<RAJA::omp_reduce, Int_type> vmin(m_vmin_init);
RAJA::ReduceMax<RAJA::omp_reduce, Int_type> vmax(m_vmax_init);

RAJA::forall<RAJA::omp_parallel_for_exec>(
RAJA::forall<RAJA::omp_parallel_for_exec>(res,
RAJA::RangeSegment(ibegin, iend), [=](Index_type i) {
REDUCE3_INT_BODY_RAJA;
});
Expand All @@ -121,13 +123,16 @@ void REDUCE3_INT::runOpenMPVariant(VariantID vid, size_t tune_idx)
Int_type tvmin = m_vmin_init;
Int_type tvmax = m_vmax_init;

RAJA::forall<RAJA::omp_parallel_for_exec>(
RAJA::forall<RAJA::omp_parallel_for_exec>(res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tvsum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&tvmin),
RAJA::expt::Reduce<RAJA::operators::maximum>(&tvmax),
[=](Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) {
REDUCE3_INT_BODY;
[=](Index_type i,
RAJA::expt::ValOp<Int_type, RAJA::operators::plus>& vsum,
RAJA::expt::ValOp<Int_type, RAJA::operators::minimum>& vmin,
RAJA::expt::ValOp<Int_type, RAJA::operators::maximum>& vmax) {
REDUCE3_INT_BODY_RAJA;
}
);

Expand Down
7 changes: 5 additions & 2 deletions src/basic/REDUCE3_INT-OMPTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,11 @@ void REDUCE3_INT::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_A
RAJA::expt::Reduce<RAJA::operators::plus>(&tvsum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&tvmin),
RAJA::expt::Reduce<RAJA::operators::maximum>(&tvmax),
[=](Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) {
REDUCE3_INT_BODY;
[=](Index_type i,
RAJA::expt::ValOp<Int_type, RAJA::operators::plus>& vsum,
RAJA::expt::ValOp<Int_type, RAJA::operators::minimum>& vmin,
RAJA::expt::ValOp<Int_type, RAJA::operators::maximum>& vmax) {
REDUCE3_INT_BODY_RAJA;
}
);

Expand Down
13 changes: 9 additions & 4 deletions src/basic/REDUCE3_INT-Seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ void REDUCE3_INT::runSeqVariant(VariantID vid, size_t tune_idx)

case RAJA_Seq : {

RAJA::resources::Host res;

if (tune_idx == 0) {

startTimer();
Expand All @@ -96,7 +98,7 @@ void REDUCE3_INT::runSeqVariant(VariantID vid, size_t tune_idx)
RAJA::ReduceMin<RAJA::seq_reduce, Int_type> vmin(m_vmin_init);
RAJA::ReduceMax<RAJA::seq_reduce, Int_type> vmax(m_vmax_init);

RAJA::forall<RAJA::seq_exec>(
RAJA::forall<RAJA::seq_exec>(res,
RAJA::RangeSegment(ibegin, iend), [=](Index_type i) {
REDUCE3_INT_BODY_RAJA;
});
Expand All @@ -117,13 +119,16 @@ void REDUCE3_INT::runSeqVariant(VariantID vid, size_t tune_idx)
Int_type tvmin = m_vmin_init;
Int_type tvmax = m_vmax_init;

RAJA::forall<RAJA::seq_exec>(
RAJA::forall<RAJA::seq_exec>(res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tvsum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&tvmin),
RAJA::expt::Reduce<RAJA::operators::maximum>(&tvmax),
[=](Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) {
REDUCE3_INT_BODY;
[=](Index_type i,
RAJA::expt::ValOp<Int_type, RAJA::operators::plus>& vsum,
RAJA::expt::ValOp<Int_type, RAJA::operators::minimum>& vmin,
RAJA::expt::ValOp<Int_type, RAJA::operators::maximum>& vmax) {
REDUCE3_INT_BODY_RAJA;
}
);

Expand Down
7 changes: 5 additions & 2 deletions src/basic/REDUCE3_INT-Sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,11 @@ void REDUCE3_INT::runSyclVariantImpl(VariantID vid)
RAJA::expt::Reduce<RAJA::operators::plus>(&tvsum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&tvmin),
RAJA::expt::Reduce<RAJA::operators::maximum>(&tvmax),
[=] (Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) {
REDUCE3_INT_BODY;
[=] (Index_type i,
RAJA::expt::ValOp<Int_type, RAJA::operators::plus>& vsum,
RAJA::expt::ValOp<Int_type, RAJA::operators::minimum>& vmin,
RAJA::expt::ValOp<Int_type, RAJA::operators::maximum>& vmax) {
REDUCE3_INT_BODY_RAJA;
}
);

Expand Down
14 changes: 9 additions & 5 deletions src/basic/REDUCE_STRUCT-Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,18 +233,22 @@ void REDUCE_STRUCT::runCudaVariantRAJANewReduce(VariantID vid)
Real_type txmax = m_init_max;
Real_type tymax = m_init_max;

RAJA::forall<exec_policy>(
RAJA::forall<exec_policy>(res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&txsum),
RAJA::expt::Reduce<RAJA::operators::plus>(&tysum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&txmin),
RAJA::expt::Reduce<RAJA::operators::minimum>(&tymin),
RAJA::expt::Reduce<RAJA::operators::maximum>(&txmax),
RAJA::expt::Reduce<RAJA::operators::maximum>(&tymax),
[=] __device__ (Index_type i, Real_type& xsum, Real_type& ysum,
Real_type& xmin, Real_type& ymin,
Real_type& xmax, Real_type& ymax) {
REDUCE_STRUCT_BODY;
[=] __device__ (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& xsum,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& ysum,
RAJA::expt::ValOp<Real_type, RAJA::operators::minimum>& xmin,
RAJA::expt::ValOp<Real_type, RAJA::operators::minimum>& ymin,
RAJA::expt::ValOp<Real_type, RAJA::operators::maximum>& xmax,
RAJA::expt::ValOp<Real_type, RAJA::operators::maximum>& ymax ) {
REDUCE_STRUCT_BODY_RAJA;
}
);

Expand Down
Loading
Loading