Skip to content

Commit

Permalink
Support local_accessor, group::get_local_range() inside hierarchical PF
Browse files Browse the repository at this point in the history
  • Loading branch information
fknorr committed Jan 3, 2024
1 parent cdd637e commit 22c2bcb
Show file tree
Hide file tree
Showing 7 changed files with 137 additions and 93 deletions.
1 change: 0 additions & 1 deletion include/simsycl/detail/group_operation_impl.hh
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ struct concurrent_group {
std::vector<concurrent_nd_item *> concurrent_nd_items;
std::vector<allocation> local_memory_allocations;
group_instance instance;
size_t cur_hier_local_size = 0;
};

template<int Dimensions>
Expand Down
46 changes: 35 additions & 11 deletions include/simsycl/detail/schedule.hh
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
#pragma once

#include <cstddef>
#include <cstring>
#include <memory>
#include <vector>
#include "allocation.hh"

#include "../sycl/device.hh"
#include "../sycl/forward.hh"
Expand All @@ -14,6 +11,11 @@
#include "../sycl/nd_range.hh"
#include "../sycl/range.hh"

#include <cstddef>
#include <cstring>
#include <memory>
#include <vector>


namespace simsycl::detail {

Expand Down Expand Up @@ -85,9 +87,15 @@ template<typename WorkgroupFunctionType>
void sequential_for_work_group(sycl::range<1> num_work_groups, std::optional<sycl::range<1>> work_group_size,
const WorkgroupFunctionType &kernel_func) {
sycl::id<1> group_id;
const auto type
= work_group_size.has_value() ? group_type::hierarchical_explicit_size : group_type::hierarchical_implicit_size;
for(group_id[0] = 0; group_id[0] < num_work_groups[0]; ++group_id[0]) {
concurrent_group impl;
sycl::group<1> group = make_hierarchical_group(make_item(group_id, num_work_groups), work_group_size, &impl);
const auto group_item = make_item(group_id, num_work_groups);
const auto local_item = make_item(sycl::id(0), work_group_size.value_or(sycl::range(1)));
const auto global_item = make_item(
group_id * sycl::id(local_item.get_range()), local_item.get_range() * group_item.get_range(), sycl::id(0));
sycl::group<1> group = make_group(type, local_item, global_item, group_item, &impl);
kernel_func(group);
}
}
Expand All @@ -96,11 +104,16 @@ template<typename WorkgroupFunctionType>
void sequential_for_work_group(sycl::range<2> num_work_groups, std::optional<sycl::range<2>> work_group_size,
const WorkgroupFunctionType &kernel_func) {
sycl::id<2> group_id;
const auto type
= work_group_size.has_value() ? group_type::hierarchical_explicit_size : group_type::hierarchical_implicit_size;
for(group_id[0] = 0; group_id[0] < num_work_groups[0]; ++group_id[0]) {
for(group_id[1] = 0; group_id[1] < num_work_groups[1]; ++group_id[1]) {
concurrent_group impl;
sycl::group<2> group
= make_hierarchical_group(make_item(group_id, num_work_groups), work_group_size, &impl);
const auto group_item = make_item(group_id, num_work_groups);
const auto local_item = make_item(sycl::id(0, 0), work_group_size.value_or(sycl::range(1, 1)));
const auto global_item = make_item(group_id * sycl::id(local_item.get_range()),
local_item.get_range() * group_item.get_range(), sycl::id(0, 0));
sycl::group<2> group = make_group(type, local_item, global_item, group_item, &impl);
kernel_func(group);
}
}
Expand All @@ -110,12 +123,17 @@ template<typename WorkgroupFunctionType>
void sequential_for_work_group(sycl::range<3> num_work_groups, std::optional<sycl::range<3>> work_group_size,
const WorkgroupFunctionType &kernel_func) {
sycl::id<3> group_id;
const auto type
= work_group_size.has_value() ? group_type::hierarchical_explicit_size : group_type::hierarchical_implicit_size;
for(group_id[0] = 0; group_id[0] < num_work_groups[0]; ++group_id[0]) {
for(group_id[1] = 0; group_id[1] < num_work_groups[1]; ++group_id[1]) {
for(group_id[2] = 0; group_id[2] < num_work_groups[2]; ++group_id[2]) {
concurrent_group impl;
sycl::group<3> group
= make_hierarchical_group(make_item(group_id, num_work_groups), work_group_size, &impl);
const auto group_item = make_item(group_id, num_work_groups);
const auto local_item = make_item(sycl::id(0, 0, 0), work_group_size.value_or(sycl::range(1, 1, 1)));
const auto global_item = make_item(group_id * sycl::id(local_item.get_range()),
local_item.get_range() * group_item.get_range(), sycl::id(0, 0, 0));
sycl::group<3> group = make_group(type, local_item, global_item, group_item, &impl);
kernel_func(group);
}
}
Expand Down Expand Up @@ -197,10 +215,16 @@ void parallel_for(const sycl::device &device, sycl::nd_range<Dimensions> executi
std::index_sequence<sizeof...(Rest) - 1>());
}

template<int Dimensions>
[[nodiscard]] std::vector<allocation> prepare_hierarchical_parallel_for(const sycl::device &device,
std::optional<sycl::range<Dimensions>> work_group_size, const std::vector<local_memory_requirement> &local_memory);

template<typename KernelName, int Dimensions, typename WorkgroupFunctionType>
void parallel_for_work_group(sycl::range<Dimensions> num_work_groups,
std::optional<sycl::range<Dimensions>> work_group_size, const WorkgroupFunctionType &kernel_func) {
void parallel_for_work_group(const sycl::device &device, sycl::range<Dimensions> num_work_groups,
std::optional<sycl::range<Dimensions>> work_group_size, const std::vector<local_memory_requirement> &local_memory,
const WorkgroupFunctionType &kernel_func) {
register_kernel_on_static_construction<KernelName, WorkgroupFunctionType>();
const auto local_allocations = prepare_hierarchical_parallel_for(device, work_group_size, local_memory);
sequential_for_work_group(num_work_groups, work_group_size, kernel_func);
}

Expand Down
127 changes: 53 additions & 74 deletions include/simsycl/sycl/group.hh
Original file line number Diff line number Diff line change
Expand Up @@ -14,37 +14,39 @@

namespace simsycl::detail {

enum class group_type { nd_range, hierarchical_implicit_size, hierarchical_explicit_size };

template<int Dimensions>
sycl::group<Dimensions> make_group(const sycl::item<Dimensions, false> &local_item,
sycl::group<Dimensions> make_group(const group_type type, const sycl::item<Dimensions, false> &local_item,
const sycl::item<Dimensions, true> &global_item, const sycl::item<Dimensions, false> &group_item,
detail::concurrent_group *impl) {
return sycl::group<Dimensions>(local_item, global_item, group_item, impl);
}

template<int Dimensions>
sycl::group<Dimensions> make_hierarchical_group(const sycl::item<Dimensions, false> &group_item,
const std::optional<sycl::range<Dimensions>> &hier_local_range, detail::concurrent_group *impl) {
return sycl::group<Dimensions>(group_item, hier_local_range, impl);
return sycl::group<Dimensions>(type, local_item, global_item, group_item, impl);
}

template<int Dimensions>
bool is_hierarchical_group(const sycl::group<Dimensions> &g) {
return g.m_hierarchical;
group_type get_group_type(const sycl::group<Dimensions> &g) {
return g.m_type;
}

template<typename G>
template<typename G, int Dimensions>
class hierarchical_group_size_setter {
public:
hierarchical_group_size_setter(G &g, size_t size) : m_g(g) {
m_old_size = get_concurrent_group(m_g).cur_hier_local_size;
get_concurrent_group(m_g).cur_hier_local_size = size;
hierarchical_group_size_setter(G &g, sycl::range<Dimensions> flexible_size)
: m_g(g), m_old_local_item(g.m_local_item), m_old_global_item(g.m_global_item) {
g.m_local_item = simsycl::detail::make_item(sycl::id<Dimensions>(), flexible_size);
g.m_global_item
= simsycl::detail::make_item(sycl::id<Dimensions>(), g.m_group_item.get_range() * flexible_size);
}

~hierarchical_group_size_setter() { get_concurrent_group(m_g).cur_hier_local_size = m_old_size; }
~hierarchical_group_size_setter() {
m_g.m_local_item = m_old_local_item;
m_g.m_global_item = m_old_global_item;
}

private:
G &m_g;
size_t m_old_size;
sycl::item<Dimensions, false> m_old_local_item;
sycl::item<Dimensions, true> m_old_global_item;
};

} // namespace simsycl::detail
Expand Down Expand Up @@ -79,53 +81,35 @@ class group {

size_t get_group_id(int dimension) const { return m_group_item.get_id()[dimension]; }

SIMSYCL_DETAIL_DEPRECATED_IN_SYCL range<Dimensions> get_global_range() const {
[[deprecated("non-standard")]] range<Dimensions> get_global_range() const {
SIMSYCL_CHECK(
!m_hierarchical && "get_global_range is not supported for from within a parallel_for_work_item context");
m_global_item.get_range().size() != 0 && "get_global_range called from hierarchical group scope?");
return m_global_item.get_range();
}

size_t get_global_range(int dimension) const {
SIMSYCL_CHECK(
!m_hierarchical && "get_global_range is not supported for from within a parallel_for_work_item context");
[[deprecated("non-standard")]] size_t get_global_range(int dimension) const {
return get_global_range()[dimension];
}

id_type get_local_id() const {
SIMSYCL_CHECK(
!m_hierarchical && "get_local_id is not supported for from within a parallel_for_work_item context");
SIMSYCL_CHECK(m_type == detail::group_type::nd_range
&& "get_local_id is not supported for from within a parallel_for_work_item context");
return m_local_item.get_id();
}

size_t get_local_id(int dimension) const {
SIMSYCL_CHECK(
!m_hierarchical && "get_local_id is not supported for from within a parallel_for_work_item context");
return get_local_id()[dimension];
}
size_t get_local_id(int dimension) const { return get_local_id()[dimension]; }

size_t get_local_linear_id() const {
SIMSYCL_CHECK(
!m_hierarchical && "get_local_linear_id is not supported for from within a parallel_for_work_item context");
SIMSYCL_CHECK(m_type == detail::group_type::nd_range
&& "get_local_linear_id is not supported for from within a parallel_for_work_item context");
return m_local_item.get_linear_id();
}

range_type get_local_range() const {
SIMSYCL_CHECK(
!m_hierarchical && "get_local_range is not supported for from within a parallel_for_work_item context");
return m_local_item.get_range();
}
range_type get_local_range() const { return m_local_item.get_range(); }

size_t get_local_range(int dimension) const {
SIMSYCL_CHECK(
!m_hierarchical && "get_local_range is not supported for from within a parallel_for_work_item context");
return get_local_range()[dimension];
}
size_t get_local_range(int dimension) const { return get_local_range()[dimension]; }

size_t get_local_linear_range() const {
SIMSYCL_CHECK(
!m_hierarchical && "get_local_range is not supported for from within a parallel_for_work_item context");
return get_local_range().size();
}
size_t get_local_linear_range() const { return get_local_range().size(); }

range_type get_group_range() const { return m_group_item.get_range(); }

Expand All @@ -144,26 +128,27 @@ class group {
size_t get_group_linear_id() const { return m_group_item.get_linear_id(); }

bool leader() const {
SIMSYCL_CHECK(!m_hierarchical && "leader() is not supported for from within a parallel_for_work_item context");
SIMSYCL_CHECK(m_type == detail::group_type::nd_range
&& "leader() is not supported for from within a parallel_for_work_item context");
return (get_local_linear_id() == 0);
}

template<typename WorkItemFunctionT>
void parallel_for_work_item(WorkItemFunctionT func) const {
SIMSYCL_CHECK(m_hierarchical
SIMSYCL_CHECK(m_type != detail::group_type::nd_range
&& "parallel_for_work_item is only supported for from within a parallel_for_work_item context");
SIMSYCL_CHECK(m_hier_local_range.has_value()
SIMSYCL_CHECK(m_type != detail::group_type::hierarchical_implicit_size
&& "parallel_for_work_item(func) without a range argument is only supported in a parallel_for_work_item "
"context with a set local range");
parallel_for_work_item(m_hier_local_range.value(), func);
parallel_for_work_item(m_local_item.get_range(), func);
}

// All parallel_for_work_item calls within a given parallel_for_work_group execution must have the same dimensions
template<typename WorkItemFunctionT>
void parallel_for_work_item(range<Dimensions> flexible_range, WorkItemFunctionT func) const {
SIMSYCL_CHECK(m_hierarchical
SIMSYCL_CHECK(m_type != detail::group_type::nd_range
&& "parallel_for_work_item is only supported for from within a parallel_for_work_item context");
detail::hierarchical_group_size_setter set(*this, flexible_range.size());
detail::hierarchical_group_size_setter set(*this, flexible_range);
if constexpr(Dimensions == 1) {
for(size_t i = 0; i < flexible_range[0]; ++i) {
const auto global_id = m_group_item.get_id() * flexible_range[0] + i;
Expand Down Expand Up @@ -292,33 +277,27 @@ class group {
friend bool operator!=(const group<Dimensions> &lhs, const group<Dimensions> &rhs) { return !(lhs == rhs); }

private:
item<Dimensions, false /* WithOffset */> m_local_item;
item<Dimensions, true /* WithOffset */> m_global_item;
item<Dimensions, false /* WithOffset */> m_group_item;
detail::concurrent_group *m_concurrent_group;

bool m_hierarchical = false;
std::optional<range<Dimensions>> m_hier_local_range;
template<typename G, int D>
friend class detail::hierarchical_group_size_setter;

group(const item<Dimensions, false> &local_item, const item<Dimensions, true> &global_item,
const item<Dimensions, false> &group_item, detail::concurrent_group *impl)
: m_local_item(local_item), m_global_item(global_item), m_group_item(group_item), m_concurrent_group(impl) {}
friend group<Dimensions> detail::make_group<Dimensions>(const detail::group_type type,
const sycl::item<Dimensions, false> &local_item, const sycl::item<Dimensions, true> &global_item,
const sycl::item<Dimensions, false> &group_item, detail::concurrent_group *impl);

group(const item<Dimensions, false> &group_item, const std::optional<range<Dimensions>> &hier_local_range,
detail::concurrent_group *impl)
: m_local_item(group_item), m_global_item(group_item), m_group_item(group_item), m_concurrent_group(impl),
m_hierarchical(true), m_hier_local_range(hier_local_range) {}

friend group<Dimensions> detail::make_group<Dimensions>(const sycl::item<Dimensions, false> &local_item,
const sycl::item<Dimensions, true> &global_item, const sycl::item<Dimensions, false> &group_item,
detail::concurrent_group *impl);
friend detail::group_type detail::get_group_type(const sycl::group<Dimensions> &g);
friend detail::concurrent_group &detail::get_concurrent_group<Dimensions>(const sycl::group<Dimensions> &g);

friend group<Dimensions> detail::make_hierarchical_group<Dimensions>(
const sycl::item<Dimensions, false> &group_item, const std::optional<sycl::range<Dimensions>> &hier_local_range,
detail::concurrent_group *impl);
detail::group_type m_type;
mutable item<Dimensions, false /* WithOffset */> m_local_item; // mutable for hierarchical_group_size_setter
mutable item<Dimensions, true /* WithOffset */> m_global_item; // mutable for hierarchical_group_size_setter
item<Dimensions, false /* WithOffset */> m_group_item;
detail::concurrent_group *m_concurrent_group;

friend bool detail::is_hierarchical_group<Dimensions>(const sycl::group<Dimensions> &g);
friend detail::concurrent_group &detail::get_concurrent_group<Dimensions>(const sycl::group<Dimensions> &g);
group(const detail::group_type type, const item<Dimensions, false> &local_item,
const item<Dimensions, true> &global_item, const item<Dimensions, false> &group_item,
detail::concurrent_group *impl)
: m_type(type), m_local_item(local_item), m_global_item(global_item), m_group_item(group_item),
m_concurrent_group(impl) {}
};

template<int Dimensions>
Expand Down
5 changes: 3 additions & 2 deletions include/simsycl/sycl/handler.hh
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,14 @@ class handler {

template<typename KernelName = simsycl::detail::unnamed_kernel, typename WorkgroupFunctionType, int Dimensions>
void parallel_for_work_group(range<Dimensions> num_work_groups, const WorkgroupFunctionType &kernel_func) {
detail::parallel_for_work_group<KernelName>(num_work_groups, {}, kernel_func);
detail::parallel_for_work_group<KernelName>(m_device, num_work_groups, {}, m_local_memory, kernel_func);
}

template<typename KernelName = simsycl::detail::unnamed_kernel, typename WorkgroupFunctionType, int Dimensions>
void parallel_for_work_group(range<Dimensions> num_work_groups, range<Dimensions> work_group_size,
const WorkgroupFunctionType &kernel_func) {
detail::parallel_for_work_group<KernelName>(num_work_groups, {work_group_size}, kernel_func);
detail::parallel_for_work_group<KernelName>(
m_device, num_work_groups, {work_group_size}, m_local_memory, kernel_func);
}

void single_task(const kernel &kernel_object);
Expand Down
2 changes: 1 addition & 1 deletion include/simsycl/sycl/private_memory.hh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class private_memory {
// Construct the storage if it has not yet been constructed
T &operator()(const h_item<Dimensions> &id) {
if(m_data.empty()) {
size_t num_items = simsycl::detail::get_concurrent_group(m_group).cur_hier_local_size;
size_t num_items = m_group.get_local_linear_range();
m_data.resize(num_items);
}
return m_data[id.get_local().get_linear_id()];
Expand Down
Loading

0 comments on commit 22c2bcb

Please sign in to comment.