diff --git a/include/alpaka/acc/AccGenericSycl.hpp b/include/alpaka/acc/AccGenericSycl.hpp index 2fb0b081977f..ba9b29a782b2 100644 --- a/include/alpaka/acc/AccGenericSycl.hpp +++ b/include/alpaka/acc/AccGenericSycl.hpp @@ -75,25 +75,11 @@ namespace alpaka AccGenericSycl( Vec const& threadElemExtent, - sycl::nd_item work_item, sycl::local_accessor dyn_shared_acc, sycl::local_accessor st_shared_acc) - : WorkDivGenericSycl{threadElemExtent, work_item} - , gb::IdxGbGenericSycl{work_item} - , bt::IdxBtGenericSycl{work_item} - , AtomicHierarchy{} - , math::MathGenericSycl{} + : WorkDivGenericSycl{threadElemExtent} , BlockSharedMemDynGenericSycl{dyn_shared_acc} , BlockSharedMemStGenericSycl{st_shared_acc} - , BlockSyncGenericSycl{work_item} - , IntrinsicGenericSycl{} - , MemFenceGenericSycl{} -# ifdef ALPAKA_DISABLE_VENDOR_RNG - , rand::RandDefault{} -# else - , rand::RandGenericSycl{work_item} -# endif - , warp::WarpGenericSycl{work_item} { } }; diff --git a/include/alpaka/block/sync/BlockSyncGenericSycl.hpp b/include/alpaka/block/sync/BlockSyncGenericSycl.hpp index 67e97493fee4..9461c6028e06 100644 --- a/include/alpaka/block/sync/BlockSyncGenericSycl.hpp +++ b/include/alpaka/block/sync/BlockSyncGenericSycl.hpp @@ -19,11 +19,7 @@ namespace alpaka public: using BlockSyncBase = BlockSyncGenericSycl; - BlockSyncGenericSycl(sycl::nd_item work_item) : my_item{work_item} - { - } - - sycl::nd_item my_item; + BlockSyncGenericSycl() = default; }; } // namespace alpaka @@ -32,20 +28,22 @@ namespace alpaka::trait template struct SyncBlockThreads> { - static auto syncBlockThreads(BlockSyncGenericSycl const& blockSync) -> void + static auto syncBlockThreads(BlockSyncGenericSycl const&) -> void { - blockSync.my_item.barrier(); + auto const item = sycl::ext::oneapi::experimental::this_nd_item(); + item.barrier(); } }; template struct SyncBlockThreadsPredicate> { - static auto syncBlockThreadsPredicate(BlockSyncGenericSycl const& blockSync, int predicate) -> int + static auto syncBlockThreadsPredicate(BlockSyncGenericSycl const&, int predicate) -> int { - auto const group = blockSync.my_item.get_group(); - blockSync.my_item.barrier(); + auto const item = sycl::ext::oneapi::experimental::this_nd_item(); + item.barrier(); + auto const group = item.get_group(); auto const counter = (predicate != 0) ? 1 : 0; return sycl::reduce_over_group(group, counter, sycl::plus<>{}); } @@ -54,11 +52,12 @@ namespace alpaka::trait template struct SyncBlockThreadsPredicate> { - static auto syncBlockThreadsPredicate(BlockSyncGenericSycl const& blockSync, int predicate) -> int + static auto syncBlockThreadsPredicate(BlockSyncGenericSycl const&, int predicate) -> int { - auto const group = blockSync.my_item.get_group(); - blockSync.my_item.barrier(); + auto const item = sycl::ext::oneapi::experimental::this_nd_item(); + item.barrier(); + auto const group = item.get_group(); return static_cast(sycl::all_of_group(group, static_cast(predicate))); } }; @@ -66,11 +65,12 @@ namespace alpaka::trait template struct SyncBlockThreadsPredicate> { - static auto syncBlockThreadsPredicate(BlockSyncGenericSycl const& blockSync, int predicate) -> int + static auto syncBlockThreadsPredicate(BlockSyncGenericSycl const&, int predicate) -> int { - auto const group = blockSync.my_item.get_group(); - blockSync.my_item.barrier(); + auto const item = sycl::ext::oneapi::experimental::this_nd_item(); + item.barrier(); + auto const group = item.get_group(); return static_cast(sycl::any_of_group(group, static_cast(predicate))); } }; diff --git a/include/alpaka/idx/bt/IdxBtGenericSycl.hpp b/include/alpaka/idx/bt/IdxBtGenericSycl.hpp index 54ef78014f1f..d07aeb982955 100644 --- a/include/alpaka/idx/bt/IdxBtGenericSycl.hpp +++ b/include/alpaka/idx/bt/IdxBtGenericSycl.hpp @@ -23,11 +23,7 @@ namespace alpaka::bt public: using IdxBtBase = IdxBtGenericSycl; - explicit IdxBtGenericSycl(sycl::nd_item work_item) : m_item_bt{work_item} - { - } - - sycl::nd_item m_item_bt; + IdxBtGenericSycl() = default; }; } // namespace alpaka::bt @@ -46,22 +42,24 @@ namespace alpaka::trait { //! \return The index of the current thread in the block. template - static auto getIdx(bt::IdxBtGenericSycl const& idx, TWorkDiv const&) -> Vec + static auto getIdx(bt::IdxBtGenericSycl const&, TWorkDiv const&) -> Vec { + auto const item = sycl::ext::oneapi::experimental::this_nd_item(); + if constexpr(TDim::value == 1) - return Vec{static_cast(idx.m_item_bt.get_local_id(0))}; + return Vec{static_cast(item.get_local_id(0))}; else if constexpr(TDim::value == 2) { return Vec{ - static_cast(idx.m_item_bt.get_local_id(1)), - static_cast(idx.m_item_bt.get_local_id(0))}; + static_cast(item.get_local_id(1)), + static_cast(item.get_local_id(0))}; } else { return Vec{ - static_cast(idx.m_item_bt.get_local_id(2)), - static_cast(idx.m_item_bt.get_local_id(1)), - static_cast(idx.m_item_bt.get_local_id(0))}; + static_cast(item.get_local_id(2)), + static_cast(item.get_local_id(1)), + static_cast(item.get_local_id(0))}; } } }; diff --git a/include/alpaka/idx/gb/IdxGbGenericSycl.hpp b/include/alpaka/idx/gb/IdxGbGenericSycl.hpp index 42547effd6c3..ca138be3f8a2 100644 --- a/include/alpaka/idx/gb/IdxGbGenericSycl.hpp +++ b/include/alpaka/idx/gb/IdxGbGenericSycl.hpp @@ -23,11 +23,7 @@ namespace alpaka::gb public: using IdxGbBase = IdxGbGenericSycl; - explicit IdxGbGenericSycl(sycl::nd_item work_item) : m_item_gb{work_item} - { - } - - sycl::nd_item m_item_gb; + IdxGbGenericSycl() = default; }; } // namespace alpaka::gb @@ -46,22 +42,22 @@ namespace alpaka::trait { //! \return The index of the current block in the grid. template - static auto getIdx(gb::IdxGbGenericSycl const& idx, TWorkDiv const&) + static auto getIdx(gb::IdxGbGenericSycl const&, TWorkDiv const&) { + auto const item = sycl::ext::oneapi::experimental::this_nd_item(); + if constexpr(TDim::value == 1) - return Vec(static_cast(idx.m_item_gb.get_group(0))); + return Vec(static_cast(item.get_group(0))); else if constexpr(TDim::value == 2) { - return Vec( - static_cast(idx.m_item_gb.get_group(1)), - static_cast(idx.m_item_gb.get_group(0))); + return Vec(static_cast(item.get_group(1)), static_cast(item.get_group(0))); } else { return Vec( - static_cast(idx.m_item_gb.get_group(2)), - static_cast(idx.m_item_gb.get_group(1)), - static_cast(idx.m_item_gb.get_group(0))); + static_cast(item.get_group(2)), + static_cast(item.get_group(1)), + static_cast(item.get_group(0))); } } }; diff --git a/include/alpaka/kernel/TaskKernelGenericSycl.hpp b/include/alpaka/kernel/TaskKernelGenericSycl.hpp index 9d1c572ecbc0..c788853e07de 100644 --- a/include/alpaka/kernel/TaskKernelGenericSycl.hpp +++ b/include/alpaka/kernel/TaskKernelGenericSycl.hpp @@ -40,10 +40,10 @@ # define LAUNCH_SYCL_KERNEL_IF_SUBGROUP_SIZE_IS(sub_group_size) \ cgh.parallel_for( \ sycl::nd_range{global_size, local_size}, \ - [item_elements, dyn_shared_accessor, st_shared_accessor, k_func, k_args]( \ - sycl::nd_item work_item) [[intel::reqd_sub_group_size(sub_group_size)]] \ + [item_elements, dyn_shared_accessor, st_shared_accessor, k_func, k_args](sycl::nd_item) \ + [[intel::reqd_sub_group_size(sub_group_size)]] \ { \ - auto acc = TAcc{item_elements, work_item, dyn_shared_accessor, st_shared_accessor}; \ + auto acc = TAcc{item_elements, dyn_shared_accessor, st_shared_accessor}; \ core::apply( \ [k_func, &acc](typename std::decay_t const&... args) { k_func(acc, args...); }, \ k_args); \ @@ -52,10 +52,9 @@ # define LAUNCH_SYCL_KERNEL_WITH_DEFAULT_SUBGROUP_SIZE \ cgh.parallel_for( \ sycl::nd_range{global_size, local_size}, \ - [item_elements, dyn_shared_accessor, st_shared_accessor, k_func, k_args]( \ - sycl::nd_item work_item) \ + [item_elements, dyn_shared_accessor, st_shared_accessor, k_func, k_args](sycl::nd_item) \ { \ - auto acc = TAcc{item_elements, work_item, dyn_shared_accessor, st_shared_accessor}; \ + auto acc = TAcc{item_elements, dyn_shared_accessor, st_shared_accessor}; \ core::apply( \ [k_func, &acc](typename std::decay_t const&... args) { k_func(acc, args...); }, \ k_args); \ @@ -65,8 +64,7 @@ throw sycl::exception(sycl::make_error_code(sycl::errc::kernel_not_supported)); \ cgh.parallel_for( \ sycl::nd_range{global_size, local_size}, \ - [item_elements, dyn_shared_accessor, st_shared_accessor, k_func, k_args]( \ - sycl::nd_item work_item) {}); + [item_elements, dyn_shared_accessor, st_shared_accessor, k_func, k_args](sycl::nd_item) {}); namespace alpaka { diff --git a/include/alpaka/rand/RandGenericSycl.hpp b/include/alpaka/rand/RandGenericSycl.hpp index 99329a0a52bb..accb7c959c45 100644 --- a/include/alpaka/rand/RandGenericSycl.hpp +++ b/include/alpaka/rand/RandGenericSycl.hpp @@ -40,11 +40,7 @@ namespace alpaka::rand template struct RandGenericSycl : concepts::Implements> { - explicit RandGenericSycl(sycl::nd_item my_item) : m_item_rand{my_item} - { - } - - sycl::nd_item m_item_rand; + RandGenericSycl() = default; }; # if !defined(ALPAKA_HOST_ONLY) @@ -72,7 +68,8 @@ namespace alpaka::rand Minstd(RandGenericSycl rand, std::uint32_t const& seed) { - oneapi::dpl::minstd_rand engine(seed, rand.m_item_rand.get_global_linear_id()); + auto const item = sycl::ext::oneapi::experimental::this_nd_item(); + oneapi::dpl::minstd_rand engine(seed, item.get_global_linear_id()); rng_engine = engine; } diff --git a/include/alpaka/warp/WarpGenericSycl.hpp b/include/alpaka/warp/WarpGenericSycl.hpp index a9b3a56fe69f..15e012cbe10b 100644 --- a/include/alpaka/warp/WarpGenericSycl.hpp +++ b/include/alpaka/warp/WarpGenericSycl.hpp @@ -20,11 +20,7 @@ namespace alpaka::warp class WarpGenericSycl : public concepts::Implements> { public: - WarpGenericSycl(sycl::nd_item my_item) : m_item_warp{my_item} - { - } - - sycl::nd_item m_item_warp; + WarpGenericSycl() = default; }; } // namespace alpaka::warp @@ -33,9 +29,10 @@ namespace alpaka::warp::trait template struct GetSize> { - static auto getSize(warp::WarpGenericSycl const& warp) -> std::int32_t + static auto getSize(warp::WarpGenericSycl const&) -> std::int32_t { - auto const sub_group = warp.m_item_warp.get_sub_group(); + auto const item = sycl::ext::oneapi::experimental::this_nd_item(); + auto const sub_group = item.get_sub_group(); // SYCL sub-groups are always 1D return static_cast(sub_group.get_max_local_range()[0]); } @@ -47,11 +44,12 @@ namespace alpaka::warp::trait // FIXME This should be std::uint64_t on AMD GCN architectures and on CPU, // but the former is not targeted in alpaka and CPU case is not supported in SYCL yet. // Restrict to warpSize <= 32 for now. - static auto activemask(warp::WarpGenericSycl const& warp) -> std::uint32_t + static auto activemask(warp::WarpGenericSycl const&) -> std::uint32_t { // SYCL has no way of querying this. Since sub-group functions have to be executed in convergent code // regions anyway we return the full mask. - auto const sub_group = warp.m_item_warp.get_sub_group(); + auto const item = sycl::ext::oneapi::experimental::this_nd_item(); + auto const sub_group = item.get_sub_group(); auto const mask = sycl::ext::oneapi::group_ballot(sub_group, true); // FIXME This should be std::uint64_t on AMD GCN architectures and on CPU, // but the former is not targeted in alpaka and CPU case is not supported in SYCL yet. @@ -65,9 +63,10 @@ namespace alpaka::warp::trait template struct All> { - static auto all(warp::WarpGenericSycl const& warp, std::int32_t predicate) -> std::int32_t + static auto all(warp::WarpGenericSycl const&, std::int32_t predicate) -> std::int32_t { - auto const sub_group = warp.m_item_warp.get_sub_group(); + auto const item = sycl::ext::oneapi::experimental::this_nd_item(); + auto const sub_group = item.get_sub_group(); return static_cast(sycl::all_of_group(sub_group, static_cast(predicate))); } }; @@ -75,9 +74,10 @@ namespace alpaka::warp::trait template struct Any> { - static auto any(warp::WarpGenericSycl const& warp, std::int32_t predicate) -> std::int32_t + static auto any(warp::WarpGenericSycl const&, std::int32_t predicate) -> std::int32_t { - auto const sub_group = warp.m_item_warp.get_sub_group(); + auto const item = sycl::ext::oneapi::experimental::this_nd_item(); + auto const sub_group = item.get_sub_group(); return static_cast(sycl::any_of_group(sub_group, static_cast(predicate))); } }; @@ -88,9 +88,10 @@ namespace alpaka::warp::trait // FIXME This should be std::uint64_t on AMD GCN architectures and on CPU, // but the former is not targeted in alpaka and CPU case is not supported in SYCL yet. // Restrict to warpSize <= 32 for now. - static auto ballot(warp::WarpGenericSycl const& warp, std::int32_t predicate) -> std::uint32_t + static auto ballot(warp::WarpGenericSycl const&, std::int32_t predicate) -> std::uint32_t { - auto const sub_group = warp.m_item_warp.get_sub_group(); + auto const item = sycl::ext::oneapi::experimental::this_nd_item(); + auto const sub_group = item.get_sub_group(); auto const mask = sycl::ext::oneapi::group_ballot(sub_group, static_cast(predicate)); // FIXME This should be std::uint64_t on AMD GCN architectures and on CPU, // but the former is not targeted in alpaka and CPU case is not supported in SYCL yet. @@ -105,7 +106,7 @@ namespace alpaka::warp::trait struct Shfl> { template - static auto shfl(warp::WarpGenericSycl const& warp, T value, std::int32_t srcLane, std::int32_t width) + static auto shfl(warp::WarpGenericSycl const&, T value, std::int32_t srcLane, std::int32_t width) { ALPAKA_ASSERT_OFFLOAD(width > 0); ALPAKA_ASSERT_OFFLOAD(srcLane < width); @@ -117,7 +118,8 @@ namespace alpaka::warp::trait Example: If we assume a sub-group size of 32 and a width of 16 we will receive two subdivisions: The first starts at sub-group index 0 and the second at sub-group index 16. For srcLane = 4 the first subdivision will access the value at sub-group index 4 and the second at sub-group index 20. */ - auto const actual_group = warp.m_item_warp.get_sub_group(); + auto const item = sycl::ext::oneapi::experimental::this_nd_item(); + auto const actual_group = item.get_sub_group(); auto const actual_item_id = static_cast(actual_group.get_local_linear_id()); auto const actual_group_id = actual_item_id / width; auto const actual_src_id = static_cast(srcLane + actual_group_id * width); diff --git a/include/alpaka/workdiv/WorkDivGenericSycl.hpp b/include/alpaka/workdiv/WorkDivGenericSycl.hpp index 26e00750e42d..14b6e38a8870 100644 --- a/include/alpaka/workdiv/WorkDivGenericSycl.hpp +++ b/include/alpaka/workdiv/WorkDivGenericSycl.hpp @@ -23,14 +23,11 @@ namespace alpaka public: using WorkDivBase = WorkDivGenericSycl; - WorkDivGenericSycl(Vec const& threadElemExtent, sycl::nd_item work_item) - : m_threadElemExtent{threadElemExtent} - , m_item_workdiv{work_item} + WorkDivGenericSycl(Vec const& threadElemExtent) : m_threadElemExtent{threadElemExtent} { } Vec const& m_threadElemExtent; - sycl::nd_item m_item_workdiv; }; } // namespace alpaka @@ -55,24 +52,26 @@ namespace alpaka::trait struct GetWorkDiv, origin::Grid, unit::Blocks> { //! \return The number of blocks in each dimension of the grid. - static auto getWorkDiv(WorkDivGenericSycl const& workDiv) -> Vec + static auto getWorkDiv(WorkDivGenericSycl const&) -> Vec { + auto const item = sycl::ext::oneapi::experimental::this_nd_item(); + if constexpr(TDim::value == 0) return Vec{}; else if constexpr(TDim::value == 1) - return Vec{static_cast(workDiv.m_item_workdiv.get_group_range(0))}; + return Vec{static_cast(item.get_group_range(0))}; else if constexpr(TDim::value == 2) { return Vec{ - static_cast(workDiv.m_item_workdiv.get_group_range(1)), - static_cast(workDiv.m_item_workdiv.get_group_range(0))}; + static_cast(item.get_group_range(1)), + static_cast(item.get_group_range(0))}; } else { return Vec{ - static_cast(workDiv.m_item_workdiv.get_group_range(2)), - static_cast(workDiv.m_item_workdiv.get_group_range(1)), - static_cast(workDiv.m_item_workdiv.get_group_range(0))}; + static_cast(item.get_group_range(2)), + static_cast(item.get_group_range(1)), + static_cast(item.get_group_range(0))}; } } }; @@ -82,24 +81,26 @@ namespace alpaka::trait struct GetWorkDiv, origin::Block, unit::Threads> { //! \return The number of threads in each dimension of a block. - static auto getWorkDiv(WorkDivGenericSycl const& workDiv) -> Vec + static auto getWorkDiv(WorkDivGenericSycl const&) -> Vec { + auto const item = sycl::ext::oneapi::experimental::this_nd_item(); + if constexpr(TDim::value == 0) return Vec{}; else if constexpr(TDim::value == 1) - return Vec{static_cast(workDiv.m_item_workdiv.get_local_range(0))}; + return Vec{static_cast(item.get_local_range(0))}; else if constexpr(TDim::value == 2) { return Vec{ - static_cast(workDiv.m_item_workdiv.get_local_range(1)), - static_cast(workDiv.m_item_workdiv.get_local_range(0))}; + static_cast(item.get_local_range(1)), + static_cast(item.get_local_range(0))}; } else { return Vec{ - static_cast(workDiv.m_item_workdiv.get_local_range(2)), - static_cast(workDiv.m_item_workdiv.get_local_range(1)), - static_cast(workDiv.m_item_workdiv.get_local_range(0))}; + static_cast(item.get_local_range(2)), + static_cast(item.get_local_range(1)), + static_cast(item.get_local_range(0))}; } } };