Skip to content

Commit

Permalink
Merge branch 'task/rhornung67/new-reductions' into woptim/spack-update
Browse files Browse the repository at this point in the history
  • Loading branch information
adrienbernede committed Oct 16, 2024
2 parents 86784f1 + 161da8f commit a4e2ef1
Show file tree
Hide file tree
Showing 42 changed files with 230 additions and 131 deletions.
1 change: 1 addition & 0 deletions src/algorithm/ATOMIC.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class ATOMIC : public KernelBase
void setOpenMPTuningDefinitions(VariantID vid);
void setCudaTuningDefinitions(VariantID vid);
void setHipTuningDefinitions(VariantID vid);
void setOpenMPTargetTuningDefinitions(VariantID vid);

template < size_t replication >
void runSeqVariantReplicate(VariantID vid);
Expand Down
3 changes: 2 additions & 1 deletion src/algorithm/REDUCE_SUM-Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,8 @@ void REDUCE_SUM::runCudaVariantRAJANewReduce(VariantID vid)
RAJA::forall<exec_policy>( res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tsum),
[=] __device__ (Index_type i, Real_type& sum) {
[=] __device__ (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& sum) {
REDUCE_SUM_BODY;
}
);
Expand Down
3 changes: 2 additions & 1 deletion src/algorithm/REDUCE_SUM-Hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,8 @@ void REDUCE_SUM::runHipVariantRAJANewReduce(VariantID vid)
RAJA::forall<exec_policy>( res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tsum),
[=] __device__ (Index_type i, Real_type& sum) {
[=] __device__ (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& sum) {
REDUCE_SUM_BODY;
}
);
Expand Down
9 changes: 6 additions & 3 deletions src/algorithm/REDUCE_SUM-OMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,16 @@ void REDUCE_SUM::runOpenMPVariant(VariantID vid, size_t tune_idx)

case RAJA_OpenMP : {

RAJA::resources::Host res;

if (tune_idx == 0) {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::ReduceSum<RAJA::omp_reduce, Real_type> sum(m_sum_init);

RAJA::forall<RAJA::omp_parallel_for_exec>(
RAJA::forall<RAJA::omp_parallel_for_exec>(res,
RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
REDUCE_SUM_BODY;
Expand All @@ -101,10 +103,11 @@ void REDUCE_SUM::runOpenMPVariant(VariantID vid, size_t tune_idx)

Real_type tsum = m_sum_init;

RAJA::forall<RAJA::omp_parallel_for_exec>(
RAJA::forall<RAJA::omp_parallel_for_exec>(res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tsum),
[=] (Index_type i, Real_type& sum) {
[=] (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& sum) {
REDUCE_SUM_BODY;
}
);
Expand Down
3 changes: 2 additions & 1 deletion src/algorithm/REDUCE_SUM-OMPTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ void REDUCE_SUM::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_AR
RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tsum),
[=] (Index_type i, Real_type& sum) {
[=] (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& sum) {
REDUCE_SUM_BODY;
}
);
Expand Down
12 changes: 8 additions & 4 deletions src/algorithm/REDUCE_SUM-Seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,17 @@ void REDUCE_SUM::runSeqVariant(VariantID vid, size_t tune_idx)

case RAJA_Seq : {

RAJA::resources::Host res;

if (tune_idx == 0) {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::ReduceSum<RAJA::seq_reduce, Real_type> sum(m_sum_init);

RAJA::forall<RAJA::seq_exec>( RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
RAJA::forall<RAJA::seq_exec>(res,
RAJA::RangeSegment(ibegin, iend), [=](Index_type i) {
REDUCE_SUM_BODY;
});

Expand All @@ -100,9 +102,11 @@ void REDUCE_SUM::runSeqVariant(VariantID vid, size_t tune_idx)

Real_type tsum = m_sum_init;

RAJA::forall<RAJA::seq_exec>( RAJA::RangeSegment(ibegin, iend),
RAJA::forall<RAJA::seq_exec>(res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tsum),
[=] (Index_type i, Real_type& sum) {
[=] (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& sum) {
REDUCE_SUM_BODY;
}
);
Expand Down
4 changes: 3 additions & 1 deletion src/algorithm/REDUCE_SUM-Sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,13 @@ void REDUCE_SUM::runSyclVariantImpl(VariantID vid)
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

Real_type tsum = m_sum_init;

RAJA::forall< RAJA::sycl_exec<work_group_size, true /*async*/> >(
res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tsum),
[=] (Index_type i, Real_type& sum) {
[=] (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& sum) {
REDUCE_SUM_BODY;
}
);
Expand Down
3 changes: 2 additions & 1 deletion src/basic/PI_REDUCE-Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ void PI_REDUCE::runCudaVariantRAJANewReduce(VariantID vid)
RAJA::forall< exec_policy >( res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tpi),
[=] __device__ (Index_type i, Real_type& pi) {
[=] __device__ (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& pi) {
PI_REDUCE_BODY;
}
);
Expand Down
3 changes: 2 additions & 1 deletion src/basic/PI_REDUCE-Hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ void PI_REDUCE::runHipVariantRAJANewReduce(VariantID vid)
res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tpi),
[=] __device__ (Index_type i, Real_type& pi) {
[=] __device__ (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& pi) {
PI_REDUCE_BODY;
}
);
Expand Down
12 changes: 7 additions & 5 deletions src/basic/PI_REDUCE-OMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,16 +77,17 @@ void PI_REDUCE::runOpenMPVariant(VariantID vid, size_t tune_idx)

case RAJA_OpenMP : {

RAJA::resources::Host res;

if (tune_idx == 0) {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::ReduceSum<RAJA::omp_reduce, Real_type> pi(m_pi_init);

RAJA::forall<RAJA::omp_parallel_for_exec>(
RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
RAJA::forall<RAJA::omp_parallel_for_exec>(res,
RAJA::RangeSegment(ibegin, iend), [=](Index_type i) {
PI_REDUCE_BODY;
});

Expand All @@ -102,10 +103,11 @@ void PI_REDUCE::runOpenMPVariant(VariantID vid, size_t tune_idx)

Real_type tpi = m_pi_init;

RAJA::forall<RAJA::omp_parallel_for_exec>(
RAJA::forall<RAJA::omp_parallel_for_exec>(res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tpi),
[=] (Index_type i, Real_type& pi) {
[=] (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& pi) {
PI_REDUCE_BODY;
}
);
Expand Down
3 changes: 2 additions & 1 deletion src/basic/PI_REDUCE-OMPTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ void PI_REDUCE::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG
RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tpi),
[=] (Index_type i, Real_type& pi) {
[=] (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& pi) {
PI_REDUCE_BODY;
}
);
Expand Down
10 changes: 7 additions & 3 deletions src/basic/PI_REDUCE-Seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,16 @@ void PI_REDUCE::runSeqVariant(VariantID vid, size_t tune_idx)

case RAJA_Seq : {

RAJA::resources::Host res;

if (tune_idx == 0) {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::ReduceSum<RAJA::seq_reduce, Real_type> pi(m_pi_init);

RAJA::forall<RAJA::seq_exec>( RAJA::RangeSegment(ibegin, iend),
RAJA::forall<RAJA::seq_exec>(res, RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
PI_REDUCE_BODY;
});
Expand All @@ -101,9 +103,11 @@ void PI_REDUCE::runSeqVariant(VariantID vid, size_t tune_idx)

Real_type tpi = m_pi_init;

RAJA::forall<RAJA::seq_exec>( RAJA::RangeSegment(ibegin, iend),
RAJA::forall<RAJA::seq_exec>(res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tpi),
[=] (Index_type i, Real_type& pi) {
[=] (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& pi) {
PI_REDUCE_BODY;
}
);
Expand Down
3 changes: 2 additions & 1 deletion src/basic/PI_REDUCE-Sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ void PI_REDUCE::runSyclVariantImpl(VariantID vid)
res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tpi),
[=] (Index_type i, Real_type& pi) {
[=] (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& pi) {
PI_REDUCE_BODY;
}
);
Expand Down
6 changes: 4 additions & 2 deletions src/basic/REDUCE3_INT-Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,10 @@ void REDUCE3_INT::runCudaVariantRAJANewReduce(VariantID vid)
RAJA::expt::Reduce<RAJA::operators::minimum>(&tvmin),
RAJA::expt::Reduce<RAJA::operators::maximum>(&tvmax),
[=] __device__ (Index_type i,
Int_type& vsum, Int_type& vmin, Int_type& vmax) {
REDUCE3_INT_BODY;
RAJA::expt::ValOp<Int_type, RAJA::operators::plus>& vsum,
RAJA::expt::ValOp<Int_type, RAJA::operators::minimum>& vmin,
RAJA::expt::ValOp<Int_type, RAJA::operators::maximum>& vmax) {
REDUCE3_INT_BODY_RAJA;
}
);

Expand Down
6 changes: 4 additions & 2 deletions src/basic/REDUCE3_INT-Hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,10 @@ void REDUCE3_INT::runHipVariantRAJANewReduce(VariantID vid)
RAJA::expt::Reduce<RAJA::operators::minimum>(&tvmin),
RAJA::expt::Reduce<RAJA::operators::maximum>(&tvmax),
[=] __device__ (Index_type i,
Int_type& vsum, Int_type& vmin, Int_type& vmax) {
REDUCE3_INT_BODY;
RAJA::expt::ValOp<Int_type, RAJA::operators::plus>& vsum,
RAJA::expt::ValOp<Int_type, RAJA::operators::minimum>& vmin,
RAJA::expt::ValOp<Int_type, RAJA::operators::maximum>& vmax) {
REDUCE3_INT_BODY_RAJA;
}
);

Expand Down
13 changes: 9 additions & 4 deletions src/basic/REDUCE3_INT-OMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ void REDUCE3_INT::runOpenMPVariant(VariantID vid, size_t tune_idx)

case RAJA_OpenMP : {

RAJA::resources::Host res;

if (tune_idx == 0) {

startTimer();
Expand All @@ -100,7 +102,7 @@ void REDUCE3_INT::runOpenMPVariant(VariantID vid, size_t tune_idx)
RAJA::ReduceMin<RAJA::omp_reduce, Int_type> vmin(m_vmin_init);
RAJA::ReduceMax<RAJA::omp_reduce, Int_type> vmax(m_vmax_init);

RAJA::forall<RAJA::omp_parallel_for_exec>(
RAJA::forall<RAJA::omp_parallel_for_exec>(res,
RAJA::RangeSegment(ibegin, iend), [=](Index_type i) {
REDUCE3_INT_BODY_RAJA;
});
Expand All @@ -121,13 +123,16 @@ void REDUCE3_INT::runOpenMPVariant(VariantID vid, size_t tune_idx)
Int_type tvmin = m_vmin_init;
Int_type tvmax = m_vmax_init;

RAJA::forall<RAJA::omp_parallel_for_exec>(
RAJA::forall<RAJA::omp_parallel_for_exec>(res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tvsum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&tvmin),
RAJA::expt::Reduce<RAJA::operators::maximum>(&tvmax),
[=](Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) {
REDUCE3_INT_BODY;
[=](Index_type i,
RAJA::expt::ValOp<Int_type, RAJA::operators::plus>& vsum,
RAJA::expt::ValOp<Int_type, RAJA::operators::minimum>& vmin,
RAJA::expt::ValOp<Int_type, RAJA::operators::maximum>& vmax) {
REDUCE3_INT_BODY_RAJA;
}
);

Expand Down
7 changes: 5 additions & 2 deletions src/basic/REDUCE3_INT-OMPTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,11 @@ void REDUCE3_INT::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_A
RAJA::expt::Reduce<RAJA::operators::plus>(&tvsum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&tvmin),
RAJA::expt::Reduce<RAJA::operators::maximum>(&tvmax),
[=](Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) {
REDUCE3_INT_BODY;
[=](Index_type i,
RAJA::expt::ValOp<Int_type, RAJA::operators::plus>& vsum,
RAJA::expt::ValOp<Int_type, RAJA::operators::minimum>& vmin,
RAJA::expt::ValOp<Int_type, RAJA::operators::maximum>& vmax) {
REDUCE3_INT_BODY_RAJA;
}
);

Expand Down
13 changes: 9 additions & 4 deletions src/basic/REDUCE3_INT-Seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ void REDUCE3_INT::runSeqVariant(VariantID vid, size_t tune_idx)

case RAJA_Seq : {

RAJA::resources::Host res;

if (tune_idx == 0) {

startTimer();
Expand All @@ -96,7 +98,7 @@ void REDUCE3_INT::runSeqVariant(VariantID vid, size_t tune_idx)
RAJA::ReduceMin<RAJA::seq_reduce, Int_type> vmin(m_vmin_init);
RAJA::ReduceMax<RAJA::seq_reduce, Int_type> vmax(m_vmax_init);

RAJA::forall<RAJA::seq_exec>(
RAJA::forall<RAJA::seq_exec>(res,
RAJA::RangeSegment(ibegin, iend), [=](Index_type i) {
REDUCE3_INT_BODY_RAJA;
});
Expand All @@ -117,13 +119,16 @@ void REDUCE3_INT::runSeqVariant(VariantID vid, size_t tune_idx)
Int_type tvmin = m_vmin_init;
Int_type tvmax = m_vmax_init;

RAJA::forall<RAJA::seq_exec>(
RAJA::forall<RAJA::seq_exec>(res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tvsum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&tvmin),
RAJA::expt::Reduce<RAJA::operators::maximum>(&tvmax),
[=](Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) {
REDUCE3_INT_BODY;
[=](Index_type i,
RAJA::expt::ValOp<Int_type, RAJA::operators::plus>& vsum,
RAJA::expt::ValOp<Int_type, RAJA::operators::minimum>& vmin,
RAJA::expt::ValOp<Int_type, RAJA::operators::maximum>& vmax) {
REDUCE3_INT_BODY_RAJA;
}
);

Expand Down
7 changes: 5 additions & 2 deletions src/basic/REDUCE3_INT-Sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,11 @@ void REDUCE3_INT::runSyclVariantImpl(VariantID vid)
RAJA::expt::Reduce<RAJA::operators::plus>(&tvsum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&tvmin),
RAJA::expt::Reduce<RAJA::operators::maximum>(&tvmax),
[=] (Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) {
REDUCE3_INT_BODY;
[=] (Index_type i,
RAJA::expt::ValOp<Int_type, RAJA::operators::plus>& vsum,
RAJA::expt::ValOp<Int_type, RAJA::operators::minimum>& vmin,
RAJA::expt::ValOp<Int_type, RAJA::operators::maximum>& vmax) {
REDUCE3_INT_BODY_RAJA;
}
);

Expand Down
14 changes: 9 additions & 5 deletions src/basic/REDUCE_STRUCT-Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,18 +233,22 @@ void REDUCE_STRUCT::runCudaVariantRAJANewReduce(VariantID vid)
Real_type txmax = m_init_max;
Real_type tymax = m_init_max;

RAJA::forall<exec_policy>(
RAJA::forall<exec_policy>(res,
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&txsum),
RAJA::expt::Reduce<RAJA::operators::plus>(&tysum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&txmin),
RAJA::expt::Reduce<RAJA::operators::minimum>(&tymin),
RAJA::expt::Reduce<RAJA::operators::maximum>(&txmax),
RAJA::expt::Reduce<RAJA::operators::maximum>(&tymax),
[=] __device__ (Index_type i, Real_type& xsum, Real_type& ysum,
Real_type& xmin, Real_type& ymin,
Real_type& xmax, Real_type& ymax) {
REDUCE_STRUCT_BODY;
[=] __device__ (Index_type i,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& xsum,
RAJA::expt::ValOp<Real_type, RAJA::operators::plus>& ysum,
RAJA::expt::ValOp<Real_type, RAJA::operators::minimum>& xmin,
RAJA::expt::ValOp<Real_type, RAJA::operators::minimum>& ymin,
RAJA::expt::ValOp<Real_type, RAJA::operators::maximum>& xmax,
RAJA::expt::ValOp<Real_type, RAJA::operators::maximum>& ymax ) {
REDUCE_STRUCT_BODY_RAJA;
}
);

Expand Down
Loading

0 comments on commit a4e2ef1

Please sign in to comment.