Skip to content

Commit

Permalink
Only use new reductions in OMP Target variants
Browse files Browse the repository at this point in the history
  • Loading branch information
rhornung67 committed Oct 3, 2024
1 parent 186d419 commit 85d0c5a
Show file tree
Hide file tree
Showing 16 changed files with 109 additions and 355 deletions.
56 changes: 12 additions & 44 deletions src/algorithm/REDUCE_SUM-OMPTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,62 +56,30 @@ void REDUCE_SUM::runOpenMPTargetVariant(VariantID vid, size_t tune_idx)

} else if ( vid == RAJA_OpenMPTarget ) {

if (tune_idx == 0) {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::ReduceSum<RAJA::omp_target_reduce, Real_type> sum(m_sum_init);

RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
REDUCE_SUM_BODY;
});

m_sum = sum.get();

}
stopTimer();

} else if (tune_idx == 1) {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

Real_type tsum = m_sum_init;
startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tsum),
[=] (Index_type i, Real_type& sum) {
REDUCE_SUM_BODY;
}
);
Real_type tsum = m_sum_init;

m_sum = static_cast<Real_type>(tsum);
RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tsum),
[=] (Index_type i, Real_type& sum) {
REDUCE_SUM_BODY;
}
);

}
stopTimer();
m_sum = static_cast<Real_type>(tsum);

} else {
getCout() << "\n REDUCE_SUM : Unknown OMP Target tuning index = " << tune_idx << std::endl;
}
stopTimer();

} else {
getCout() << "\n REDUCE_SUM : Unknown OMP Target variant id = " << vid << std::endl;
}

}

void REDUCE_SUM::setOpenMPTargetTuningDefinitions(VariantID vid)
{
addVariantTuningName(vid, "default");
if (vid == RAJA_OpenMPTarget) {
addVariantTuningName(vid, "new");
}
}

} // end namespace algorithm
} // end namespace rajaperf

Expand Down
1 change: 0 additions & 1 deletion src/algorithm/REDUCE_SUM.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ class REDUCE_SUM : public KernelBase
void setOpenMPTuningDefinitions(VariantID vid);
void setCudaTuningDefinitions(VariantID vid);
void setHipTuningDefinitions(VariantID vid);
void setOpenMPTargetTuningDefinitions(VariantID vid);
void setSyclTuningDefinitions(VariantID vid);

void runCudaVariantCub(VariantID vid);
Expand Down
56 changes: 12 additions & 44 deletions src/basic/PI_REDUCE-OMPTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,62 +56,30 @@ void PI_REDUCE::runOpenMPTargetVariant(VariantID vid, size_t tune_idx)

} else if ( vid == RAJA_OpenMPTarget ) {

if (tune_idx == 0) {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::ReduceSum<RAJA::omp_target_reduce, Real_type> pi(m_pi_init);

RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
PI_REDUCE_BODY;
});

m_pi = 4.0 * pi.get();

}
stopTimer();

} else if (tune_idx == 1) {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

Real_type tpi = m_pi_init;
startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tpi),
[=] (Index_type i, Real_type& pi) {
PI_REDUCE_BODY;
}
);
Real_type tpi = m_pi_init;

m_pi = static_cast<Real_type>(tpi) * 4.0;
RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tpi),
[=] (Index_type i, Real_type& pi) {
PI_REDUCE_BODY;
}
);

}
stopTimer();
m_pi = static_cast<Real_type>(tpi) * 4.0;

} else {
getCout() << "\n PI_REDUCE : Unknown OMP Target tuning index = " << tune_idx << std::endl;
}
stopTimer();

} else {
getCout() << "\n PI_REDUCE : Unknown OMP Target variant id = " << vid << std::endl;
}

}

void PI_REDUCE::setOpenMPTargetTuningDefinitions(VariantID vid)
{
addVariantTuningName(vid, "default");
if (vid == RAJA_OpenMPTarget) {
addVariantTuningName(vid, "new");
}
}

} // end namespace basic
} // end namespace rajaperf

Expand Down
1 change: 0 additions & 1 deletion src/basic/PI_REDUCE.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ class PI_REDUCE : public KernelBase
void setOpenMPTuningDefinitions(VariantID vid);
void setCudaTuningDefinitions(VariantID vid);
void setHipTuningDefinitions(VariantID vid);
void setOpenMPTargetTuningDefinitions(VariantID vid);
void setSyclTuningDefinitions(VariantID vid);

template < size_t block_size, typename MappingHelper >
Expand Down
71 changes: 18 additions & 53 deletions src/basic/REDUCE3_INT-OMPTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,70 +62,35 @@ void REDUCE3_INT::runOpenMPTargetVariant(VariantID vid, size_t tune_idx)

} else if ( vid == RAJA_OpenMPTarget ) {

if (tune_idx == 0) {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::ReduceSum<RAJA::omp_target_reduce, Int_type> vsum(m_vsum_init);
RAJA::ReduceMin<RAJA::omp_target_reduce, Int_type> vmin(m_vmin_init);
RAJA::ReduceMax<RAJA::omp_target_reduce, Int_type> vmax(m_vmax_init);

RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::RangeSegment(ibegin, iend), [=](Index_type i) {
REDUCE3_INT_BODY_RAJA;
});

m_vsum += static_cast<Int_type>(vsum.get());
m_vmin = RAJA_MIN(m_vmin, static_cast<Int_type>(vmin.get()));
m_vmax = RAJA_MAX(m_vmax, static_cast<Int_type>(vmax.get()));

}
stopTimer();

} else if (tune_idx == 1) {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

Int_type tvsum = m_vsum_init;
Int_type tvmin = m_vmin_init;
Int_type tvmax = m_vmax_init;
startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tvsum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&tvmin),
RAJA::expt::Reduce<RAJA::operators::maximum>(&tvmax),
[=](Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) {
REDUCE3_INT_BODY;
}
);
Int_type tvsum = m_vsum_init;
Int_type tvmin = m_vmin_init;
Int_type tvmax = m_vmax_init;

m_vsum += static_cast<Int_type>(tvsum);
m_vmin = RAJA_MIN(m_vmin, static_cast<Int_type>(tvmin));
m_vmax = RAJA_MAX(m_vmax, static_cast<Int_type>(tvmax));
RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&tvsum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&tvmin),
RAJA::expt::Reduce<RAJA::operators::maximum>(&tvmax),
[=](Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) {
REDUCE3_INT_BODY;
}
);

}
stopTimer();
m_vsum += static_cast<Int_type>(tvsum);
m_vmin = RAJA_MIN(m_vmin, static_cast<Int_type>(tvmin));
m_vmax = RAJA_MAX(m_vmax, static_cast<Int_type>(tvmax));

} else {
getCout() << "\n REDUCE3_INT : Unknown OMP Target tuning index = " << tune_idx << std::endl;
}
stopTimer();

} else {
getCout() << "\n REDUCE3_INT : Unknown OMP Target variant id = " << vid << std::endl;
}
}

void REDUCE3_INT::setOpenMPTargetTuningDefinitions(VariantID vid)
{
addVariantTuningName(vid, "default");
if (vid == RAJA_OpenMPTarget) {
addVariantTuningName(vid, "new");
}
}

} // end namespace basic
} // end namespace rajaperf

Expand Down
1 change: 0 additions & 1 deletion src/basic/REDUCE3_INT.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ class REDUCE3_INT : public KernelBase
void setOpenMPTuningDefinitions(VariantID vid);
void setCudaTuningDefinitions(VariantID vid);
void setHipTuningDefinitions(VariantID vid);
void setOpenMPTargetTuningDefinitions(VariantID vid);
void setSyclTuningDefinitions(VariantID vid);

template < size_t block_size, typename MappingHelper >
Expand Down
107 changes: 32 additions & 75 deletions src/basic/REDUCE_STRUCT-OMPTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,76 +83,41 @@ void REDUCE_STRUCT::runOpenMPTargetVariant(VariantID vid, size_t tune_idx)

case RAJA_OpenMPTarget : {

if (tune_idx == 0) {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

RAJA::ReduceSum<RAJA::omp_target_reduce, Real_type> xsum(m_init_sum);
RAJA::ReduceSum<RAJA::omp_target_reduce, Real_type> ysum(m_init_sum);
RAJA::ReduceMin<RAJA::omp_target_reduce, Real_type> xmin(m_init_min);
RAJA::ReduceMin<RAJA::omp_target_reduce, Real_type> ymin(m_init_min);
RAJA::ReduceMax<RAJA::omp_target_reduce, Real_type> xmax(m_init_max);
RAJA::ReduceMax<RAJA::omp_target_reduce, Real_type> ymax(m_init_max);

RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::RangeSegment(ibegin, iend),
[=](Index_type i) {
REDUCE_STRUCT_BODY_RAJA;
});

points.SetCenter(xsum.get()/(points.N),
ysum.get()/(points.N));
points.SetXMin(xmin.get());
points.SetXMax(xmax.get());
points.SetYMin(ymin.get());
points.SetYMax(ymax.get());
m_points = points;

}
stopTimer();

} else if (tune_idx == 1) {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

Real_type txsum = m_init_sum;
Real_type tysum = m_init_sum;
Real_type txmin = m_init_min;
Real_type tymin = m_init_min;
Real_type txmax = m_init_max;
Real_type tymax = m_init_max;

RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&txsum),
RAJA::expt::Reduce<RAJA::operators::plus>(&tysum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&txmin),
RAJA::expt::Reduce<RAJA::operators::minimum>(&tymin),
RAJA::expt::Reduce<RAJA::operators::maximum>(&txmax),
RAJA::expt::Reduce<RAJA::operators::maximum>(&tymax),
[=](Index_type i, Real_type& xsum, Real_type& ysum,
Real_type& xmin, Real_type& ymin,
Real_type& xmax, Real_type& ymax) {
REDUCE_STRUCT_BODY;
}
);

points.SetCenter(static_cast<Real_type>(txsum)/(points.N),
static_cast<Real_type>(tysum)/(points.N));
points.SetXMin(static_cast<Real_type>(txmin));
points.SetXMax(static_cast<Real_type>(txmax));
points.SetYMin(static_cast<Real_type>(tymin));
points.SetYMax(static_cast<Real_type>(tymax));
m_points = points;
startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

}
stopTimer();
Real_type txsum = m_init_sum;
Real_type tysum = m_init_sum;
Real_type txmin = m_init_min;
Real_type tymin = m_init_min;
Real_type txmax = m_init_max;
Real_type tymax = m_init_max;

RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>(
RAJA::RangeSegment(ibegin, iend),
RAJA::expt::Reduce<RAJA::operators::plus>(&txsum),
RAJA::expt::Reduce<RAJA::operators::plus>(&tysum),
RAJA::expt::Reduce<RAJA::operators::minimum>(&txmin),
RAJA::expt::Reduce<RAJA::operators::minimum>(&tymin),
RAJA::expt::Reduce<RAJA::operators::maximum>(&txmax),
RAJA::expt::Reduce<RAJA::operators::maximum>(&tymax),
[=](Index_type i, Real_type& xsum, Real_type& ysum,
Real_type& xmin, Real_type& ymin,
Real_type& xmax, Real_type& ymax) {
REDUCE_STRUCT_BODY;
}
);

points.SetCenter(static_cast<Real_type>(txsum)/(points.N),
static_cast<Real_type>(tysum)/(points.N));
points.SetXMin(static_cast<Real_type>(txmin));
points.SetXMax(static_cast<Real_type>(txmax));
points.SetYMin(static_cast<Real_type>(tymin));
points.SetYMax(static_cast<Real_type>(tymax));
m_points = points;

} else {
getCout() << "\n REDUCE_STRUCT : Unknown OMP Target tuning index = " << tune_idx << std::endl;
}
stopTimer();

break;
}
Expand All @@ -163,14 +128,6 @@ void REDUCE_STRUCT::runOpenMPTargetVariant(VariantID vid, size_t tune_idx)

}

void REDUCE_STRUCT::setOpenMPTargetTuningDefinitions(VariantID vid)
{
addVariantTuningName(vid, "default");
if (vid == RAJA_OpenMPTarget) {
addVariantTuningName(vid, "new");
}
}

} // end namespace basic
} // end namespace rajaperf

Expand Down
1 change: 0 additions & 1 deletion src/basic/REDUCE_STRUCT.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ class REDUCE_STRUCT : public KernelBase
void setOpenMPTuningDefinitions(VariantID vid);
void setCudaTuningDefinitions(VariantID vid);
void setHipTuningDefinitions(VariantID vid);
void setOpenMPTargetTuningDefinitions(VariantID vid);

template < size_t block_size, typename MappingHelper >
void runCudaVariantBase(VariantID vid);
Expand Down
Loading

0 comments on commit 85d0c5a

Please sign in to comment.