diff --git a/.gitlab/jobs/corona.yml b/.gitlab/jobs/corona.yml index 9af5ba72b..c1fdc1c53 100644 --- a/.gitlab/jobs/corona.yml +++ b/.gitlab/jobs/corona.yml @@ -27,6 +27,9 @@ # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -# With GitLab CI, included files cannot be empty. -variables: - INCLUDED_FILE_CANNOT_BE_EMPTY: "True" +clang_19_0_0_sycl_gcc_10_3_1_rocmcc_5_7_1_hip: + variables: + SPEC: " ~shared +sycl ~openmp %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\" ^blt@develop" + MODULE_LIST: "rocm/5.7.1" + extends: .job_on_corona + allow_failure: true diff --git a/.gitlab/jobs/lassen.yml b/.gitlab/jobs/lassen.yml index 1b9bc0eda..ef997eb6d 100644 --- a/.gitlab/jobs/lassen.yml +++ b/.gitlab/jobs/lassen.yml @@ -18,14 +18,7 @@ # We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} So that # the comparison with the original job is easier. -# Overriding shared spec: Longer allocation + extra flags -xl_2022_08_19_gcc_8_3_1_cuda_11_2_0: - variables: - SPEC: "${PROJECT_LASSEN_VARIANTS} +cuda cxxflags==\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" %xl@=16.1.1.12.gcc.8.3.1 ^cuda@11.2.0+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS}" - MODULE_LIST: "cuda/11.2.0" - LASSEN_JOB_ALLOC: "1 -W 60 -q pci" - extends: .job_on_lassen - +# No jobs overridden ############ # Extra jobs @@ -36,7 +29,7 @@ xl_2022_08_19_gcc_8_3_1_cuda_11_2_0: gcc_8_3_1: variables: - SPEC: " ~shared +openmp %gcc@=8.3.1 ${PROJECT_LASSEN_DEPS}" + SPEC: " ~shared +openmp %gcc@=8.3.1 ^blt@develop" extends: .job_on_lassen gcc_8_3_1_cuda_11_5_0_ats_disabled: @@ -69,8 +62,23 @@ clang_13_0_1_libcpp: # LSAN_OPTIONS: "suppressions=${CI_PROJECT_DIR}/tpl/RAJA/suppressions.asan" # extends: .job_on_lassen -# Activated in RAJA, but we don't use desul atomics here -#gcc_8_3_1_cuda_10_1_168_desul_atomics: -# variables: -# SPEC: "+openmp +cuda +desul %gcc@=8.3.1 cuda_arch=70 cuda_arch=70 ^cuda@10.1.243+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS}" -# extends: .job_on_lassen +clang_16_0_6_ibm_omptarget: + variables: + SPEC: " ~shared +openmp +omptarget %clang@=16.0.6.ibm.gcc.8.3.1 ^blt@develop" + ON_LASSEN: "OFF" + extends: .job_on_lassen + +xl_2022_08_19_gcc_8_3_1_cuda_11_2_0: + variables: + SPEC: " ~shared +openmp cuda_arch=70 +cuda cxxflags==\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" %xl@=16.1.1.12.gcc.8.3.1 ^cuda@11.2.0+allow-unsupported-compilers ^blt@develop" + MODULE_LIST: "cuda/11.2.0" + LASSEN_JOB_ALLOC: "1 -W 60 -q pci" + extends: .job_on_lassen + +xl_2023_06_28_gcc_11_2_1_cuda_11_8_0: + variables: + SPEC: " ~shared +openmp cuda_arch=70 +cuda cxxflags==\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" %xl@=16.1.1.14.cuda.11.8.0.gcc.11.2.1 ^cuda@11.8.0+allow-unsupported-compilers ^blt@develop" + MODULE_LIST: "cuda/11.8.0" + LASSEN_JOB_ALLOC: "1 -W 60 -q pci" + extends: .job_on_lassen + diff --git a/.gitlab/jobs/poodle.yml b/.gitlab/jobs/poodle.yml index 8e86158f0..56709a184 100644 --- a/.gitlab/jobs/poodle.yml +++ b/.gitlab/jobs/poodle.yml @@ -18,27 +18,25 @@ # We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} So that # the comparison with the original job is easier. +# allow failure +intel_2023_2_1: + variables: + SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=2023.2.1 ${PROJECT_POODLE_DEPS}" + extends: .job_on_poodle + allow_failure: true + +# omptask variant clang_14_0_6: variables: SPEC: "${PROJECT_POODLE_VARIANTS} +omptask %clang@=14.0.6 ${PROJECT_POODLE_DEPS}" extends: .job_on_poodle +# omptask variant gcc_10_3_1: variables: SPEC: "${PROJECT_POODLE_VARIANTS} +omptask %gcc@=10.3.1 ${PROJECT_POODLE_DEPS}" extends: .job_on_poodle -intel_19_1_2_gcc_10_3_1: - variables: - SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=19.1.2.gcc.10.3.1 ${PROJECT_POODLE_DEPS}" - extends: .job_on_poodle - -intel_2022_1_0: - variables: - SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=2022.1.0 ${PROJECT_POODLE_DEPS}" - allow_failure: true - extends: .job_on_poodle - ############ # Extra jobs ############ @@ -49,5 +47,4 @@ intel_2022_1_0: intel_2022_1_0_mpi: variables: SPEC: "~shared +openmp +mpi %intel@=2022.1.0 ^mvapich2 ^blt@develop" - allow_failure: true extends: .job_on_poodle diff --git a/.gitlab/jobs/ruby.yml b/.gitlab/jobs/ruby.yml index c19e36d12..f3c448001 100644 --- a/.gitlab/jobs/ruby.yml +++ b/.gitlab/jobs/ruby.yml @@ -18,28 +18,26 @@ # We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} So that # the comparison with the original job is easier. +# allow failure +intel_2023_2_1: + variables: + SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=2023.2.1 ${PROJECT_RUBY_DEPS}" + extends: .job_on_ruby + allow_failure: true + +# omptask variant clang_14_0_6: variables: SPEC: "${PROJECT_RUBY_VARIANTS} +omptask %clang@=14.0.6 ${PROJECT_RUBY_DEPS}" extends: .job_on_ruby +# omptask variant gcc_10_3_1: variables: SPEC: "${PROJECT_RUBY_VARIANTS} +omptask %gcc@=10.3.1 ${PROJECT_RUBY_DEPS}" RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=60 --nodes=1" extends: .job_on_ruby -intel_19_1_2_gcc_10_3_1: - variables: - SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=19.1.2.gcc.10.3.1 ${PROJECT_RUBY_DEPS}" - RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=40 --nodes=1" - extends: .job_on_ruby - -intel_2022_1_0: - variables: - SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=2022.1.0 ${PROJECT_RUBY_DEPS}" - extends: .job_on_ruby - ############ # Extra jobs ############ diff --git a/.gitlab/jobs/tioga.yml b/.gitlab/jobs/tioga.yml index 00ed3c276..36db68790 100644 --- a/.gitlab/jobs/tioga.yml +++ b/.gitlab/jobs/tioga.yml @@ -27,13 +27,13 @@ # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -rocmcc_6_1_1_hip_openmp: +rocmcc_6_2_0_hip_openmp: variables: - SPEC: "~shared +rocm +openmp amdgpu_target=gfx90a %rocmcc@=6.1.1 ^hip@6.1.1 ^blt@develop" + SPEC: "~shared +rocm +openmp amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0 ^blt@develop" extends: .job_on_tioga -rocmcc_6_1_1_hip_openmp_mpi: +rocmcc_6_2_0_hip_openmp_mpi: variables: - SPEC: "~shared +rocm +openmp +mpi amdgpu_target=gfx90a %rocmcc@=6.1.1 ^hip@6.1.1 ^blt@develop" + SPEC: "~shared +rocm +openmp +mpi amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0 ^blt@develop" extends: .job_on_tioga allow_failure: true diff --git a/.uberenv_config.json b/.uberenv_config.json index fda595d3a..5c3fc32d8 100644 --- a/.uberenv_config.json +++ b/.uberenv_config.json @@ -1,10 +1,10 @@ { -"package_name" : "raja_perf", +"package_name" : "raja-perf", "package_version" : "develop", "package_final_phase" : "initconfig", "package_source_dir" : "../..", "spack_url": "https://github.com/spack/spack.git", -"spack_branch": "develop-2024-05-26", +"spack_branch": "develop-2024-10-06", "spack_activate" : {}, "spack_configs_path": "tpl/RAJA/scripts/radiuss-spack-configs", "spack_packages_path": "tpl/RAJA/scripts/radiuss-spack-configs/packages", diff --git a/scripts/gitlab/build_and_test.sh b/scripts/gitlab/build_and_test.sh index f2d020918..44a89a1f3 100755 --- a/scripts/gitlab/build_and_test.sh +++ b/scripts/gitlab/build_and_test.sh @@ -28,6 +28,7 @@ job_unique_id=${CI_JOB_ID:-""} use_dev_shm=${USE_DEV_SHM:-true} spack_debug=${SPACK_DEBUG:-false} debug_mode=${DEBUG_MODE:-false} +push_to_registry=${PUSH_TO_REGISTRY:-true} raja_version=${UPDATE_RAJA:-""} sys_type=${SYS_TYPE:-""} @@ -59,6 +60,7 @@ then echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" use_dev_shm=false spack_debug=true + push_to_registry=false fi if [[ -n ${module_list} ]] @@ -149,7 +151,7 @@ then timed_message "Spack build of dependencies" ${uberenv_cmd} --skip-setup-and-env --spec="${spec}" ${prefix_opt} ${upstream_opt} - if [[ -n ${ci_registry_token} && ${debug_mode} == false ]] + if [[ -n ${ci_registry_token} && ${push_to_registry} == true ]] then timed_message "Push dependencies to buildcache" ${spack_cmd} -D ${spack_env_path} buildcache push --only dependencies gitlab_ci diff --git a/src/algorithm/ATOMIC.hpp b/src/algorithm/ATOMIC.hpp index 800d3ad92..68fa4e1ef 100644 --- a/src/algorithm/ATOMIC.hpp +++ b/src/algorithm/ATOMIC.hpp @@ -74,6 +74,7 @@ class ATOMIC : public KernelBase void setOpenMPTuningDefinitions(VariantID vid); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); + void setOpenMPTargetTuningDefinitions(VariantID vid); template < size_t replication > void runSeqVariantReplicate(VariantID vid); diff --git a/src/algorithm/REDUCE_SUM-Cuda.cpp b/src/algorithm/REDUCE_SUM-Cuda.cpp index 302ab35d6..836089ab6 100644 --- a/src/algorithm/REDUCE_SUM-Cuda.cpp +++ b/src/algorithm/REDUCE_SUM-Cuda.cpp @@ -239,7 +239,8 @@ void REDUCE_SUM::runCudaVariantRAJANewReduce(VariantID vid) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsum), - [=] __device__ (Index_type i, Real_type& sum) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& sum) { REDUCE_SUM_BODY; } ); diff --git a/src/algorithm/REDUCE_SUM-Hip.cpp b/src/algorithm/REDUCE_SUM-Hip.cpp index 831978015..f7c689593 100644 --- a/src/algorithm/REDUCE_SUM-Hip.cpp +++ b/src/algorithm/REDUCE_SUM-Hip.cpp @@ -266,7 +266,8 @@ void REDUCE_SUM::runHipVariantRAJANewReduce(VariantID vid) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsum), - [=] __device__ (Index_type i, Real_type& sum) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& sum) { REDUCE_SUM_BODY; } ); diff --git a/src/algorithm/REDUCE_SUM-OMP.cpp b/src/algorithm/REDUCE_SUM-OMP.cpp index 1295887f5..6c9cd738e 100644 --- a/src/algorithm/REDUCE_SUM-OMP.cpp +++ b/src/algorithm/REDUCE_SUM-OMP.cpp @@ -76,6 +76,8 @@ void REDUCE_SUM::runOpenMPVariant(VariantID vid, size_t tune_idx) case RAJA_OpenMP : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -83,7 +85,7 @@ void REDUCE_SUM::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::ReduceSum sum(m_sum_init); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { REDUCE_SUM_BODY; @@ -101,10 +103,11 @@ void REDUCE_SUM::runOpenMPVariant(VariantID vid, size_t tune_idx) Real_type tsum = m_sum_init; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsum), - [=] (Index_type i, Real_type& sum) { + [=] (Index_type i, + RAJA::expt::ValOp& sum) { REDUCE_SUM_BODY; } ); diff --git a/src/algorithm/REDUCE_SUM-OMPTarget.cpp b/src/algorithm/REDUCE_SUM-OMPTarget.cpp index bac174094..3b7482156 100644 --- a/src/algorithm/REDUCE_SUM-OMPTarget.cpp +++ b/src/algorithm/REDUCE_SUM-OMPTarget.cpp @@ -64,7 +64,8 @@ void REDUCE_SUM::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_AR RAJA::forall>( RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsum), - [=] (Index_type i, Real_type& sum) { + [=] (Index_type i, + RAJA::expt::ValOp& sum) { REDUCE_SUM_BODY; } ); diff --git a/src/algorithm/REDUCE_SUM-Seq.cpp b/src/algorithm/REDUCE_SUM-Seq.cpp index 8d4fdacb2..8b2006c13 100644 --- a/src/algorithm/REDUCE_SUM-Seq.cpp +++ b/src/algorithm/REDUCE_SUM-Seq.cpp @@ -76,6 +76,8 @@ void REDUCE_SUM::runSeqVariant(VariantID vid, size_t tune_idx) case RAJA_Seq : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -83,8 +85,8 @@ void REDUCE_SUM::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::ReduceSum sum(m_sum_init); - RAJA::forall( RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { + RAJA::forall(res, + RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { REDUCE_SUM_BODY; }); @@ -100,9 +102,11 @@ void REDUCE_SUM::runSeqVariant(VariantID vid, size_t tune_idx) Real_type tsum = m_sum_init; - RAJA::forall( RAJA::RangeSegment(ibegin, iend), + RAJA::forall(res, + RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsum), - [=] (Index_type i, Real_type& sum) { + [=] (Index_type i, + RAJA::expt::ValOp& sum) { REDUCE_SUM_BODY; } ); diff --git a/src/algorithm/REDUCE_SUM-Sycl.cpp b/src/algorithm/REDUCE_SUM-Sycl.cpp index 516048863..810a71bf2 100644 --- a/src/algorithm/REDUCE_SUM-Sycl.cpp +++ b/src/algorithm/REDUCE_SUM-Sycl.cpp @@ -76,11 +76,13 @@ void REDUCE_SUM::runSyclVariantImpl(VariantID vid) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { Real_type tsum = m_sum_init; + RAJA::forall< RAJA::sycl_exec >( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsum), - [=] (Index_type i, Real_type& sum) { + [=] (Index_type i, + RAJA::expt::ValOp& sum) { REDUCE_SUM_BODY; } ); diff --git a/src/basic/PI_REDUCE-Cuda.cpp b/src/basic/PI_REDUCE-Cuda.cpp index 8529897c3..449c0b634 100644 --- a/src/basic/PI_REDUCE-Cuda.cpp +++ b/src/basic/PI_REDUCE-Cuda.cpp @@ -168,7 +168,8 @@ void PI_REDUCE::runCudaVariantRAJANewReduce(VariantID vid) RAJA::forall< exec_policy >( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tpi), - [=] __device__ (Index_type i, Real_type& pi) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& pi) { PI_REDUCE_BODY; } ); diff --git a/src/basic/PI_REDUCE-Hip.cpp b/src/basic/PI_REDUCE-Hip.cpp index ed2dfd8dd..2db8c8c98 100644 --- a/src/basic/PI_REDUCE-Hip.cpp +++ b/src/basic/PI_REDUCE-Hip.cpp @@ -168,7 +168,8 @@ void PI_REDUCE::runHipVariantRAJANewReduce(VariantID vid) res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tpi), - [=] __device__ (Index_type i, Real_type& pi) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& pi) { PI_REDUCE_BODY; } ); diff --git a/src/basic/PI_REDUCE-OMP.cpp b/src/basic/PI_REDUCE-OMP.cpp index 5c83aba6f..b31160f86 100644 --- a/src/basic/PI_REDUCE-OMP.cpp +++ b/src/basic/PI_REDUCE-OMP.cpp @@ -77,6 +77,8 @@ void PI_REDUCE::runOpenMPVariant(VariantID vid, size_t tune_idx) case RAJA_OpenMP : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -84,9 +86,8 @@ void PI_REDUCE::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::ReduceSum pi(m_pi_init); - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { + RAJA::forall(res, + RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { PI_REDUCE_BODY; }); @@ -102,10 +103,11 @@ void PI_REDUCE::runOpenMPVariant(VariantID vid, size_t tune_idx) Real_type tpi = m_pi_init; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tpi), - [=] (Index_type i, Real_type& pi) { + [=] (Index_type i, + RAJA::expt::ValOp& pi) { PI_REDUCE_BODY; } ); diff --git a/src/basic/PI_REDUCE-OMPTarget.cpp b/src/basic/PI_REDUCE-OMPTarget.cpp index c74f3d551..efdef965c 100644 --- a/src/basic/PI_REDUCE-OMPTarget.cpp +++ b/src/basic/PI_REDUCE-OMPTarget.cpp @@ -64,7 +64,8 @@ void PI_REDUCE::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG RAJA::forall>( RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tpi), - [=] (Index_type i, Real_type& pi) { + [=] (Index_type i, + RAJA::expt::ValOp& pi) { PI_REDUCE_BODY; } ); diff --git a/src/basic/PI_REDUCE-Seq.cpp b/src/basic/PI_REDUCE-Seq.cpp index 4a5b28815..b139d5fc2 100644 --- a/src/basic/PI_REDUCE-Seq.cpp +++ b/src/basic/PI_REDUCE-Seq.cpp @@ -77,6 +77,8 @@ void PI_REDUCE::runSeqVariant(VariantID vid, size_t tune_idx) case RAJA_Seq : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -84,7 +86,7 @@ void PI_REDUCE::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::ReduceSum pi(m_pi_init); - RAJA::forall( RAJA::RangeSegment(ibegin, iend), + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { PI_REDUCE_BODY; }); @@ -101,9 +103,11 @@ void PI_REDUCE::runSeqVariant(VariantID vid, size_t tune_idx) Real_type tpi = m_pi_init; - RAJA::forall( RAJA::RangeSegment(ibegin, iend), + RAJA::forall(res, + RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tpi), - [=] (Index_type i, Real_type& pi) { + [=] (Index_type i, + RAJA::expt::ValOp& pi) { PI_REDUCE_BODY; } ); diff --git a/src/basic/PI_REDUCE-Sycl.cpp b/src/basic/PI_REDUCE-Sycl.cpp index c95e29583..3f09ffdf7 100644 --- a/src/basic/PI_REDUCE-Sycl.cpp +++ b/src/basic/PI_REDUCE-Sycl.cpp @@ -87,7 +87,8 @@ void PI_REDUCE::runSyclVariantImpl(VariantID vid) res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tpi), - [=] (Index_type i, Real_type& pi) { + [=] (Index_type i, + RAJA::expt::ValOp& pi) { PI_REDUCE_BODY; } ); diff --git a/src/basic/REDUCE3_INT-Cuda.cpp b/src/basic/REDUCE3_INT-Cuda.cpp index a8d68b31c..cf7bb9716 100644 --- a/src/basic/REDUCE3_INT-Cuda.cpp +++ b/src/basic/REDUCE3_INT-Cuda.cpp @@ -194,8 +194,10 @@ void REDUCE3_INT::runCudaVariantRAJANewReduce(VariantID vid) RAJA::expt::Reduce(&tvmin), RAJA::expt::Reduce(&tvmax), [=] __device__ (Index_type i, - Int_type& vsum, Int_type& vmin, Int_type& vmax) { - REDUCE3_INT_BODY; + RAJA::expt::ValOp& vsum, + RAJA::expt::ValOp& vmin, + RAJA::expt::ValOp& vmax) { + REDUCE3_INT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE3_INT-Hip.cpp b/src/basic/REDUCE3_INT-Hip.cpp index 12d172de7..f28aecc5b 100644 --- a/src/basic/REDUCE3_INT-Hip.cpp +++ b/src/basic/REDUCE3_INT-Hip.cpp @@ -194,8 +194,10 @@ void REDUCE3_INT::runHipVariantRAJANewReduce(VariantID vid) RAJA::expt::Reduce(&tvmin), RAJA::expt::Reduce(&tvmax), [=] __device__ (Index_type i, - Int_type& vsum, Int_type& vmin, Int_type& vmax) { - REDUCE3_INT_BODY; + RAJA::expt::ValOp& vsum, + RAJA::expt::ValOp& vmin, + RAJA::expt::ValOp& vmax) { + REDUCE3_INT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE3_INT-OMP.cpp b/src/basic/REDUCE3_INT-OMP.cpp index c9848ac98..fedbe96a3 100644 --- a/src/basic/REDUCE3_INT-OMP.cpp +++ b/src/basic/REDUCE3_INT-OMP.cpp @@ -91,6 +91,8 @@ void REDUCE3_INT::runOpenMPVariant(VariantID vid, size_t tune_idx) case RAJA_OpenMP : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -100,7 +102,7 @@ void REDUCE3_INT::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::ReduceMin vmin(m_vmin_init); RAJA::ReduceMax vmax(m_vmax_init); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { REDUCE3_INT_BODY_RAJA; }); @@ -121,13 +123,16 @@ void REDUCE3_INT::runOpenMPVariant(VariantID vid, size_t tune_idx) Int_type tvmin = m_vmin_init; Int_type tvmax = m_vmax_init; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tvsum), RAJA::expt::Reduce(&tvmin), RAJA::expt::Reduce(&tvmax), - [=](Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) { - REDUCE3_INT_BODY; + [=](Index_type i, + RAJA::expt::ValOp& vsum, + RAJA::expt::ValOp& vmin, + RAJA::expt::ValOp& vmax) { + REDUCE3_INT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE3_INT-OMPTarget.cpp b/src/basic/REDUCE3_INT-OMPTarget.cpp index 5cd18f176..4a1c08349 100644 --- a/src/basic/REDUCE3_INT-OMPTarget.cpp +++ b/src/basic/REDUCE3_INT-OMPTarget.cpp @@ -74,8 +74,11 @@ void REDUCE3_INT::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_A RAJA::expt::Reduce(&tvsum), RAJA::expt::Reduce(&tvmin), RAJA::expt::Reduce(&tvmax), - [=](Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) { - REDUCE3_INT_BODY; + [=](Index_type i, + RAJA::expt::ValOp& vsum, + RAJA::expt::ValOp& vmin, + RAJA::expt::ValOp& vmax) { + REDUCE3_INT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE3_INT-Seq.cpp b/src/basic/REDUCE3_INT-Seq.cpp index 32bcfbef6..68949e57f 100644 --- a/src/basic/REDUCE3_INT-Seq.cpp +++ b/src/basic/REDUCE3_INT-Seq.cpp @@ -87,6 +87,8 @@ void REDUCE3_INT::runSeqVariant(VariantID vid, size_t tune_idx) case RAJA_Seq : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -96,7 +98,7 @@ void REDUCE3_INT::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::ReduceMin vmin(m_vmin_init); RAJA::ReduceMax vmax(m_vmax_init); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { REDUCE3_INT_BODY_RAJA; }); @@ -117,13 +119,16 @@ void REDUCE3_INT::runSeqVariant(VariantID vid, size_t tune_idx) Int_type tvmin = m_vmin_init; Int_type tvmax = m_vmax_init; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tvsum), RAJA::expt::Reduce(&tvmin), RAJA::expt::Reduce(&tvmax), - [=](Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) { - REDUCE3_INT_BODY; + [=](Index_type i, + RAJA::expt::ValOp& vsum, + RAJA::expt::ValOp& vmin, + RAJA::expt::ValOp& vmax) { + REDUCE3_INT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE3_INT-Sycl.cpp b/src/basic/REDUCE3_INT-Sycl.cpp index 58ac6f082..dbf81acaa 100644 --- a/src/basic/REDUCE3_INT-Sycl.cpp +++ b/src/basic/REDUCE3_INT-Sycl.cpp @@ -110,8 +110,11 @@ void REDUCE3_INT::runSyclVariantImpl(VariantID vid) RAJA::expt::Reduce(&tvsum), RAJA::expt::Reduce(&tvmin), RAJA::expt::Reduce(&tvmax), - [=] (Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) { - REDUCE3_INT_BODY; + [=] (Index_type i, + RAJA::expt::ValOp& vsum, + RAJA::expt::ValOp& vmin, + RAJA::expt::ValOp& vmax) { + REDUCE3_INT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE_STRUCT-Cuda.cpp b/src/basic/REDUCE_STRUCT-Cuda.cpp index 898b453f0..2ac752316 100644 --- a/src/basic/REDUCE_STRUCT-Cuda.cpp +++ b/src/basic/REDUCE_STRUCT-Cuda.cpp @@ -233,7 +233,7 @@ void REDUCE_STRUCT::runCudaVariantRAJANewReduce(VariantID vid) Real_type txmax = m_init_max; Real_type tymax = m_init_max; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&txsum), RAJA::expt::Reduce(&tysum), @@ -241,10 +241,14 @@ void REDUCE_STRUCT::runCudaVariantRAJANewReduce(VariantID vid) RAJA::expt::Reduce(&tymin), RAJA::expt::Reduce(&txmax), RAJA::expt::Reduce(&tymax), - [=] __device__ (Index_type i, Real_type& xsum, Real_type& ysum, - Real_type& xmin, Real_type& ymin, - Real_type& xmax, Real_type& ymax) { - REDUCE_STRUCT_BODY; + [=] __device__ (Index_type i, + RAJA::expt::ValOp& xsum, + RAJA::expt::ValOp& ysum, + RAJA::expt::ValOp& xmin, + RAJA::expt::ValOp& ymin, + RAJA::expt::ValOp& xmax, + RAJA::expt::ValOp& ymax ) { + REDUCE_STRUCT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE_STRUCT-Hip.cpp b/src/basic/REDUCE_STRUCT-Hip.cpp index 17fe5ad83..cac5a2989 100644 --- a/src/basic/REDUCE_STRUCT-Hip.cpp +++ b/src/basic/REDUCE_STRUCT-Hip.cpp @@ -241,10 +241,14 @@ void REDUCE_STRUCT::runHipVariantRAJANewReduce(VariantID vid) RAJA::expt::Reduce(&tymin), RAJA::expt::Reduce(&txmax), RAJA::expt::Reduce(&tymax), - [=] __device__ (Index_type i, Real_type& xsum, Real_type& ysum, - Real_type& xmin, Real_type& ymin, - Real_type& xmax, Real_type& ymax) { - REDUCE_STRUCT_BODY; + [=] __device__ (Index_type i, + RAJA::expt::ValOp& xsum, + RAJA::expt::ValOp& ysum, + RAJA::expt::ValOp& xmin, + RAJA::expt::ValOp& ymin, + RAJA::expt::ValOp& xmax, + RAJA::expt::ValOp& ymax ) { + REDUCE_STRUCT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE_STRUCT-OMP.cpp b/src/basic/REDUCE_STRUCT-OMP.cpp index 8c44d02c0..c7ef77de8 100644 --- a/src/basic/REDUCE_STRUCT-OMP.cpp +++ b/src/basic/REDUCE_STRUCT-OMP.cpp @@ -110,6 +110,8 @@ void REDUCE_STRUCT::runOpenMPVariant(VariantID vid, size_t tune_idx) case RAJA_OpenMP : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -122,7 +124,7 @@ void REDUCE_STRUCT::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::ReduceMax xmax(m_init_max); RAJA::ReduceMax ymax(m_init_max); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { REDUCE_STRUCT_BODY_RAJA; }); @@ -150,7 +152,7 @@ void REDUCE_STRUCT::runOpenMPVariant(VariantID vid, size_t tune_idx) Real_type txmax = m_init_max; Real_type tymax = m_init_max; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&txsum), RAJA::expt::Reduce(&tysum), @@ -158,10 +160,14 @@ void REDUCE_STRUCT::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::expt::Reduce(&tymin), RAJA::expt::Reduce(&txmax), RAJA::expt::Reduce(&tymax), - [=](Index_type i, Real_type& xsum, Real_type& ysum, - Real_type& xmin, Real_type& ymin, - Real_type& xmax, Real_type& ymax) { - REDUCE_STRUCT_BODY; + [=](Index_type i, + RAJA::expt::ValOp& xsum, + RAJA::expt::ValOp& ysum, + RAJA::expt::ValOp& xmin, + RAJA::expt::ValOp& ymin, + RAJA::expt::ValOp& xmax, + RAJA::expt::ValOp& ymax ) { + REDUCE_STRUCT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE_STRUCT-OMPTarget.cpp b/src/basic/REDUCE_STRUCT-OMPTarget.cpp index f8775bf71..543e314d8 100644 --- a/src/basic/REDUCE_STRUCT-OMPTarget.cpp +++ b/src/basic/REDUCE_STRUCT-OMPTarget.cpp @@ -101,10 +101,14 @@ void REDUCE_STRUCT::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED RAJA::expt::Reduce(&tymin), RAJA::expt::Reduce(&txmax), RAJA::expt::Reduce(&tymax), - [=](Index_type i, Real_type& xsum, Real_type& ysum, - Real_type& xmin, Real_type& ymin, - Real_type& xmax, Real_type& ymax) { - REDUCE_STRUCT_BODY; + [=](Index_type i, + RAJA::expt::ValOp& xsum, + RAJA::expt::ValOp& ysum, + RAJA::expt::ValOp& xmin, + RAJA::expt::ValOp& ymin, + RAJA::expt::ValOp& xmax, + RAJA::expt::ValOp& ymax ) { + REDUCE_STRUCT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE_STRUCT-Seq.cpp b/src/basic/REDUCE_STRUCT-Seq.cpp index 1e2a68d43..06da6af92 100644 --- a/src/basic/REDUCE_STRUCT-Seq.cpp +++ b/src/basic/REDUCE_STRUCT-Seq.cpp @@ -100,6 +100,8 @@ void REDUCE_STRUCT::runSeqVariant(VariantID vid, size_t tune_idx) case RAJA_Seq : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -112,7 +114,7 @@ void REDUCE_STRUCT::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::ReduceMax xmax(m_init_max); RAJA::ReduceMax ymax(m_init_max); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { REDUCE_STRUCT_BODY_RAJA; }); @@ -140,7 +142,7 @@ void REDUCE_STRUCT::runSeqVariant(VariantID vid, size_t tune_idx) Real_type txmax = m_init_max; Real_type tymax = m_init_max; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&txsum), RAJA::expt::Reduce(&tysum), @@ -148,10 +150,14 @@ void REDUCE_STRUCT::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::expt::Reduce(&tymin), RAJA::expt::Reduce(&txmax), RAJA::expt::Reduce(&tymax), - [=](Index_type i, Real_type& xsum, Real_type& ysum, - Real_type& xmin, Real_type& ymin, - Real_type& xmax, Real_type& ymax) { - REDUCE_STRUCT_BODY; + [=](Index_type i, + RAJA::expt::ValOp& xsum, + RAJA::expt::ValOp& ysum, + RAJA::expt::ValOp& xmin, + RAJA::expt::ValOp& ymin, + RAJA::expt::ValOp& xmax, + RAJA::expt::ValOp& ymax ) { + REDUCE_STRUCT_BODY_RAJA; } ); diff --git a/src/basic/TRAP_INT-Cuda.cpp b/src/basic/TRAP_INT-Cuda.cpp index e58e86923..717fef6f5 100644 --- a/src/basic/TRAP_INT-Cuda.cpp +++ b/src/basic/TRAP_INT-Cuda.cpp @@ -175,7 +175,8 @@ void TRAP_INT::runCudaVariantRAJANewReduce(VariantID vid) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsumx), - [=] __device__ (Index_type i, Real_type& sumx) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& sumx) { TRAP_INT_BODY; } ); diff --git a/src/basic/TRAP_INT-Hip.cpp b/src/basic/TRAP_INT-Hip.cpp index e60b3ccff..de3140258 100644 --- a/src/basic/TRAP_INT-Hip.cpp +++ b/src/basic/TRAP_INT-Hip.cpp @@ -176,7 +176,8 @@ void TRAP_INT::runHipVariantRAJANewReduce(VariantID vid) res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsumx), - [=] __device__ (Index_type i, Real_type& sumx) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& sumx) { TRAP_INT_BODY; } ); diff --git a/src/basic/TRAP_INT-OMP.cpp b/src/basic/TRAP_INT-OMP.cpp index f1961483a..5decf749f 100644 --- a/src/basic/TRAP_INT-OMP.cpp +++ b/src/basic/TRAP_INT-OMP.cpp @@ -79,6 +79,8 @@ void TRAP_INT::runOpenMPVariant(VariantID vid, size_t tune_idx) case RAJA_OpenMP : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -86,7 +88,7 @@ void TRAP_INT::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::ReduceSum sumx(m_sumx_init); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { TRAP_INT_BODY; }); @@ -103,10 +105,11 @@ void TRAP_INT::runOpenMPVariant(VariantID vid, size_t tune_idx) Real_type tsumx = m_sumx_init; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsumx), - [=] (Index_type i, Real_type& sumx) { + [=] (Index_type i, + RAJA::expt::ValOp& sumx) { TRAP_INT_BODY; } ); diff --git a/src/basic/TRAP_INT-OMPTarget.cpp b/src/basic/TRAP_INT-OMPTarget.cpp index 9fde43876..3a5d76306 100644 --- a/src/basic/TRAP_INT-OMPTarget.cpp +++ b/src/basic/TRAP_INT-OMPTarget.cpp @@ -71,7 +71,8 @@ void TRAP_INT::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( RAJA::forall>( RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsumx), - [=] (Index_type i, Real_type& sumx) { + [=] (Index_type i, + RAJA::expt::ValOp& sumx) { TRAP_INT_BODY; } ); diff --git a/src/basic/TRAP_INT-Seq.cpp b/src/basic/TRAP_INT-Seq.cpp index fa74efdcf..c998ebfa7 100644 --- a/src/basic/TRAP_INT-Seq.cpp +++ b/src/basic/TRAP_INT-Seq.cpp @@ -79,6 +79,8 @@ void TRAP_INT::runSeqVariant(VariantID vid, size_t tune_idx) case RAJA_Seq : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -86,7 +88,7 @@ void TRAP_INT::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::ReduceSum sumx(m_sumx_init); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { TRAP_INT_BODY; }); @@ -103,10 +105,11 @@ void TRAP_INT::runSeqVariant(VariantID vid, size_t tune_idx) Real_type tsumx = m_sumx_init; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsumx), - [=] (Index_type i, Real_type& sumx) { + [=] (Index_type i, + RAJA::expt::ValOp& sumx) { TRAP_INT_BODY; } ); diff --git a/src/basic/TRAP_INT-Sycl.cpp b/src/basic/TRAP_INT-Sycl.cpp index a9795c77e..b1ce89d9b 100644 --- a/src/basic/TRAP_INT-Sycl.cpp +++ b/src/basic/TRAP_INT-Sycl.cpp @@ -85,7 +85,8 @@ void TRAP_INT::runSyclVariantImpl(VariantID vid) res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsumx), - [=] (Index_type i, Real_type& sumx) { + [=] (Index_type i, + RAJA::expt::ValOp& sumx) { TRAP_INT_BODY; } ); diff --git a/src/lcals/FIRST_MIN-Cuda.cpp b/src/lcals/FIRST_MIN-Cuda.cpp index 08f2ab240..11d11b46a 100644 --- a/src/lcals/FIRST_MIN-Cuda.cpp +++ b/src/lcals/FIRST_MIN-Cuda.cpp @@ -168,22 +168,23 @@ void FIRST_MIN::runCudaVariantRAJANewReduce(VariantID vid) if ( vid == RAJA_CUDA ) { - using VL_TYPE = RAJA::expt::ValLoc; - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - VL_TYPE tloc(m_xmin_init, m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tloc), - [=] __device__ (Index_type i, VL_TYPE& loc) { - loc.min(x[i], i); + RAJA::expt::Reduce(&tminloc), + [=] __device__ (Index_type i, + RAJA::expt::ValLocOp& minloc) { + minloc.minloc(x[i], i); } ); - m_minloc = static_cast(tloc.getLoc()); + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/lcals/FIRST_MIN-Hip.cpp b/src/lcals/FIRST_MIN-Hip.cpp index 3c6fd7b35..b602b4fca 100644 --- a/src/lcals/FIRST_MIN-Hip.cpp +++ b/src/lcals/FIRST_MIN-Hip.cpp @@ -168,22 +168,23 @@ void FIRST_MIN::runHipVariantRAJANewReduce(VariantID vid) if ( vid == RAJA_HIP ) { - using VL_TYPE = RAJA::expt::ValLoc; - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - VL_TYPE tloc(m_xmin_init, m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tloc), - [=] __device__ (Index_type i, VL_TYPE& loc) { - loc.min(x[i], i); + RAJA::expt::Reduce(&tminloc), + [=] __device__ (Index_type i, + RAJA::expt::ValLocOp& minloc) { + minloc.minloc(x[i], i); } ); - m_minloc = static_cast(tloc.getLoc()); + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/lcals/FIRST_MIN-OMP.cpp b/src/lcals/FIRST_MIN-OMP.cpp index a9a7f1ba1..0a90546ca 100644 --- a/src/lcals/FIRST_MIN-OMP.cpp +++ b/src/lcals/FIRST_MIN-OMP.cpp @@ -87,6 +87,8 @@ void FIRST_MIN::runOpenMPVariant(VariantID vid, size_t tune_idx) case RAJA_OpenMP : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -95,7 +97,7 @@ void FIRST_MIN::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::ReduceMinLoc loc( m_xmin_init, m_initloc); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { FIRST_MIN_BODY_RAJA; }); @@ -107,22 +109,23 @@ void FIRST_MIN::runOpenMPVariant(VariantID vid, size_t tune_idx) } else if (tune_idx == 1) { - using VL_TYPE = RAJA::expt::ValLoc; - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - VL_TYPE tloc(m_xmin_init, m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tloc), - [=](Index_type i, VL_TYPE& loc) { - loc.min(x[i], i); + RAJA::expt::Reduce(&tminloc), + [=](Index_type i, + RAJA::expt::ValLocOp& minloc) { + minloc.minloc(x[i], i); } ); - m_minloc = static_cast(tloc.getLoc()); + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/lcals/FIRST_MIN-OMPTarget.cpp b/src/lcals/FIRST_MIN-OMPTarget.cpp index 14991e1b7..906c73127 100644 --- a/src/lcals/FIRST_MIN-OMPTarget.cpp +++ b/src/lcals/FIRST_MIN-OMPTarget.cpp @@ -60,22 +60,23 @@ void FIRST_MIN::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG } else if ( vid == RAJA_OpenMPTarget ) { - using VL_TYPE = RAJA::expt::ValLoc; - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - VL_TYPE tloc(m_xmin_init, m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); RAJA::forall>( RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tloc), - [=](Index_type i, VL_TYPE& loc) { - loc.min(x[i], i); + RAJA::expt::Reduce(&tminloc), + [=](Index_type i, + RAJA::expt::ValLocOp& minloc) { + minloc.minloc(x[i], i); } ); - m_minloc = static_cast(tloc.getLoc()); + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/lcals/FIRST_MIN-Seq.cpp b/src/lcals/FIRST_MIN-Seq.cpp index a32ed4962..89bd3c4a0 100644 --- a/src/lcals/FIRST_MIN-Seq.cpp +++ b/src/lcals/FIRST_MIN-Seq.cpp @@ -79,15 +79,17 @@ void FIRST_MIN::runSeqVariant(VariantID vid, size_t tune_idx) case RAJA_Seq : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { RAJA::ReduceMinLoc loc( - m_xmin_init, m_initloc); + m_xmin_init, m_initloc); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { FIRST_MIN_BODY_RAJA; }); @@ -99,22 +101,23 @@ void FIRST_MIN::runSeqVariant(VariantID vid, size_t tune_idx) } else if (tune_idx == 1) { - using VL_TYPE = RAJA::expt::ValLoc; - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - VL_TYPE tloc(m_xmin_init, m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tloc), - [=](Index_type i, VL_TYPE& loc) { - loc.min(x[i], i); + RAJA::expt::Reduce(&tminloc), + [=](Index_type i, + RAJA::expt::ValLocOp& minloc) { + minloc.minloc(x[i], i); } ); - m_minloc = static_cast(tloc.getLoc()); + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/lcals/FIRST_MIN-Sycl.cpp b/src/lcals/FIRST_MIN-Sycl.cpp index 616c84dcb..6cd00ea38 100644 --- a/src/lcals/FIRST_MIN-Sycl.cpp +++ b/src/lcals/FIRST_MIN-Sycl.cpp @@ -84,23 +84,24 @@ void FIRST_MIN::runSyclVariantImpl(VariantID vid) } else if ( vid == RAJA_SYCL ) { - using VL_TYPE = RAJA::expt::ValLoc; - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - VL_TYPE tloc(m_xmin_init, m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); RAJA::forall< RAJA::sycl_exec >( res, RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tloc), - [=] (Index_type i, VL_TYPE& loc) { - loc.min(x[i], i); + RAJA::expt::Reduce(&tminloc), + [=] (Index_type i, + RAJA::expt::ValLocOp& minloc) { + minloc.minloc(x[i], i); } ); - m_minloc = static_cast(tloc.getLoc()); + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/stream/DOT-Cuda.cpp b/src/stream/DOT-Cuda.cpp index 031355a3e..c8910ee8c 100644 --- a/src/stream/DOT-Cuda.cpp +++ b/src/stream/DOT-Cuda.cpp @@ -164,7 +164,8 @@ void DOT::runCudaVariantRAJANewReduce(VariantID vid) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tdot), - [=] __device__ (Index_type i, Real_type& dot) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& dot) { DOT_BODY; } ); diff --git a/src/stream/DOT-Hip.cpp b/src/stream/DOT-Hip.cpp index 0c3c914a9..24984f300 100644 --- a/src/stream/DOT-Hip.cpp +++ b/src/stream/DOT-Hip.cpp @@ -164,7 +164,8 @@ void DOT::runHipVariantRAJANewReduce(VariantID vid) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tdot), - [=] __device__ (Index_type i, Real_type& dot) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& dot) { DOT_BODY; } ); diff --git a/src/stream/DOT-OMP.cpp b/src/stream/DOT-OMP.cpp index d7112336a..28ccdfc30 100644 --- a/src/stream/DOT-OMP.cpp +++ b/src/stream/DOT-OMP.cpp @@ -76,6 +76,8 @@ void DOT::runOpenMPVariant(VariantID vid, size_t tune_idx) case RAJA_OpenMP : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -83,7 +85,7 @@ void DOT::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::ReduceSum dot(m_dot_init); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { DOT_BODY; }); @@ -100,10 +102,11 @@ void DOT::runOpenMPVariant(VariantID vid, size_t tune_idx) Real_type tdot = m_dot_init; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tdot), - [=] (Index_type i, Real_type& dot) { + [=] (Index_type i, + RAJA::expt::ValOp& dot) { DOT_BODY; } ); diff --git a/src/stream/DOT-OMPTarget.cpp b/src/stream/DOT-OMPTarget.cpp index fd7d02a70..10a7bfea6 100644 --- a/src/stream/DOT-OMPTarget.cpp +++ b/src/stream/DOT-OMPTarget.cpp @@ -68,7 +68,8 @@ void DOT::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ RAJA::forall>( RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tdot), - [=] (Index_type i, Real_type& dot) { + [=] (Index_type i, + RAJA::expt::ValOp& dot) { DOT_BODY; } ); diff --git a/src/stream/DOT-Seq.cpp b/src/stream/DOT-Seq.cpp index 4d359775f..8c57ac02c 100644 --- a/src/stream/DOT-Seq.cpp +++ b/src/stream/DOT-Seq.cpp @@ -76,6 +76,8 @@ void DOT::runSeqVariant(VariantID vid, size_t tune_idx) case RAJA_Seq : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -83,7 +85,7 @@ void DOT::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::ReduceSum dot(m_dot_init); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { DOT_BODY; }); @@ -100,9 +102,11 @@ void DOT::runSeqVariant(VariantID vid, size_t tune_idx) Real_type tdot = m_dot_init; - RAJA::forall( RAJA::RangeSegment(ibegin, iend), + RAJA::forall(res, + RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tdot), - [=] (Index_type i, Real_type& dot) { + [=] (Index_type i, + RAJA::expt::ValOp& dot) { DOT_BODY; } ); diff --git a/src/stream/DOT-Sycl.cpp b/src/stream/DOT-Sycl.cpp index 250f0b680..4f3fb40f5 100644 --- a/src/stream/DOT-Sycl.cpp +++ b/src/stream/DOT-Sycl.cpp @@ -76,11 +76,13 @@ void DOT::runSyclVariantImpl(VariantID vid) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { Real_type tdot = m_dot_init; + RAJA::forall< RAJA::sycl_exec >( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tdot), - [=] (Index_type i, Real_type& dot) { + [=] (Index_type i, + RAJA::expt::ValOp& dot) { DOT_BODY; } ); diff --git a/tpl/RAJA b/tpl/RAJA index 378199aac..8b3c04e3d 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 378199aac342ee21c2ddfbcbb48413bd1dfac612 +Subproject commit 8b3c04e3da0cf508d30c98dc03cb4751893195db