Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
PeterTh committed Jan 9, 2024
1 parent a70e083 commit c38ab43
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 5 deletions.
7 changes: 6 additions & 1 deletion include/simsycl/detail/parallel_for.hh
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@ void execute_parallel_for(const sycl::range<Dimensions> &range, const Offset &of
const KernelFunc &func,
Reducers &...reducers) //
{
printf("execute_parallel_for 71\n");
register_kernel_on_static_construction<KernelName, KernelFunc>();
printf("execute_parallel_for 73\n");

simple_kernel<Dimensions, with_offset_v<Offset>> kernel;
if constexpr(std::is_invocable_v<const KernelFunc, sycl::item<Dimensions, with_offset_v<Offset>>, Reducers &...,
Expand All @@ -79,7 +81,9 @@ void execute_parallel_for(const sycl::range<Dimensions> &range, const Offset &of
std::is_invocable_v<const KernelFunc, sycl::item<Dimensions, with_offset_v<Offset>>, Reducers &...>);
kernel = [&](const sycl::item<Dimensions> &item) { func(item, reducers...); };
}
sequential_for(range, offset, kernel);
printf("execute_parallel_for 84\n");
sequential_for(range, offset, kernel);
printf("execute_parallel_for 86\n");
}

template<typename KernelName, int Dimensions, typename KernelFunc, typename... Reducers>
Expand Down Expand Up @@ -137,6 +141,7 @@ void parallel_for(sycl::range<Dimensions> num_work_items, sycl::kernel_handler k
template<typename KernelName, typename KernelFunc, int Dimensions>
void parallel_for(sycl::range<Dimensions> num_work_items, sycl::id<Dimensions> work_item_offset,
sycl::kernel_handler kh, const KernelFunc &kernel_func) {
printf("parallel_for 140\n");
execute_parallel_for<KernelName>(num_work_items, work_item_offset, kh, kernel_func);
}

Expand Down
1 change: 1 addition & 0 deletions include/simsycl/sycl/handler.hh
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ class handler {
template<typename KernelName = simsycl::detail::unnamed_kernel, typename KernelType, int Dimensions>
SIMSYCL_DETAIL_DEPRECATED_IN_SYCL void parallel_for(
range<Dimensions> num_work_items, id<Dimensions> work_item_offset, KernelType &&kernel_func) {
printf("parallel_for 94\n");
detail::parallel_for<KernelName>(num_work_items, work_item_offset, kernel_handler(this), kernel_func);
}

Expand Down
10 changes: 8 additions & 2 deletions src/simsycl/schedule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,27 +43,33 @@ namespace simsycl::detail {
template<int Dimensions, typename Offset>
void sequential_for(const sycl::range<Dimensions> &range, const Offset &offset,
const simple_kernel<Dimensions, with_offset_v<Offset>> &kernel) {
printf("sequential_for 46\n");
// limit the number of work items scheduled at a time to avoid allocating huge index buffers
constexpr size_t max_schedule_chunk_size = 16 << 10;
const auto schedule_chunk_size = std::min(range.size(), max_schedule_chunk_size);
const auto &schedule = get_cooperative_schedule();
std::vector<size_t> order(schedule_chunk_size);
auto schedule_state = schedule.init(order);


printf("sequential_for 54\n");
for(size_t schedule_offset = 0; schedule_offset < range.size(); schedule_offset += max_schedule_chunk_size) {
for(size_t schedule_id = 0; schedule_id < schedule_chunk_size; ++schedule_id) {
const auto linear_id = schedule_offset + order[schedule_id];
if(linear_id < range.size()) {
if constexpr(with_offset_v<Offset>) {
const auto id = offset + linear_index_to_id(range, linear_id);
kernel(make_item(id, range, offset));
printf("sequential_for 61\n");
kernel(make_item(id, range, offset));
printf("sequential_for 63\n");
} else {
const auto id = linear_index_to_id(range, linear_id);
kernel(make_item(id, range));
}
}
}
printf("sequential_for 68\n");
schedule_state = schedule.update(schedule_state, order);
printf("sequential_for 70\n");
}
}

Expand Down
25 changes: 23 additions & 2 deletions test/ambiguity_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,44 @@ using namespace sycl;
SIMSYCL_START_IGNORING_DEPRECATIONS

TEST_CASE("Calls to the deprecated parallel_for signature are not ambiguous", "[ambiguity][parallel_for]") {
printf("START\n");
queue q;
printf("q\n");
constexpr int offset = 7;
printf("1D\n");
SECTION("1D") {
printf("1D A\n");
q.submit([&](handler &cgh) {
cgh.parallel_for(range<1>{1}, id<1>{offset}, [=](id<1> i) { CHECK(i[0] == offset); });
printf("1D B\n");
cgh.parallel_for(range<1>{1}, id<1>{offset}, [=](id<1> i) {
printf("1D C\n");
CHECK(i[0] == offset);
printf("1D D\n");
});
printf("1D E\n");
});
printf("1D F\n");
}
printf("2D\n");
SECTION("2D") {
printf("2D A\n");
q.submit([&](handler &cgh) {
cgh.parallel_for(range<2>{1, 1}, id<2>{0, offset}, [=](id<2> i) { CHECK(i == id<2>{0, offset}); });
printf("2D B\n");
cgh.parallel_for(range<2>{1, 1}, id<2>{0, offset}, [=](id<2> i) {
printf("2D C\n");
CHECK(i == id<2>{0, offset});
printf("2D D\n");
});
printf("2D F\n");
});
}
printf("3D\n");
SECTION("3D") {
q.submit([&](handler &cgh) {
cgh.parallel_for(range<3>{1, 1, 1}, id<3>{0, offset, 0}, [=](id<3> i) { CHECK(i == id<3>{0, offset, 0}); });
});
}
printf("END\n");
}

SIMSYCL_STOP_IGNORING_DEPRECATIONS

0 comments on commit c38ab43

Please sign in to comment.