diff --git a/include/simsycl/detail/parallel_for.hh b/include/simsycl/detail/parallel_for.hh index 2d00bf9..a356c92 100644 --- a/include/simsycl/detail/parallel_for.hh +++ b/include/simsycl/detail/parallel_for.hh @@ -68,7 +68,9 @@ void execute_parallel_for(const sycl::range &range, const Offset &of const KernelFunc &func, Reducers &...reducers) // { + printf("execute_parallel_for 71\n"); register_kernel_on_static_construction(); + printf("execute_parallel_for 73\n"); simple_kernel> kernel; if constexpr(std::is_invocable_v>, Reducers &..., @@ -79,7 +81,9 @@ void execute_parallel_for(const sycl::range &range, const Offset &of std::is_invocable_v>, Reducers &...>); kernel = [&](const sycl::item &item) { func(item, reducers...); }; } - sequential_for(range, offset, kernel); + printf("execute_parallel_for 84\n"); + sequential_for(range, offset, kernel); + printf("execute_parallel_for 86\n"); } template @@ -137,6 +141,7 @@ void parallel_for(sycl::range num_work_items, sycl::kernel_handler k template void parallel_for(sycl::range num_work_items, sycl::id work_item_offset, sycl::kernel_handler kh, const KernelFunc &kernel_func) { + printf("parallel_for 140\n"); execute_parallel_for(num_work_items, work_item_offset, kh, kernel_func); } diff --git a/include/simsycl/sycl/handler.hh b/include/simsycl/sycl/handler.hh index 668355e..06f065f 100644 --- a/include/simsycl/sycl/handler.hh +++ b/include/simsycl/sycl/handler.hh @@ -91,6 +91,7 @@ class handler { template SIMSYCL_DETAIL_DEPRECATED_IN_SYCL void parallel_for( range num_work_items, id work_item_offset, KernelType &&kernel_func) { + printf("parallel_for 94\n"); detail::parallel_for(num_work_items, work_item_offset, kernel_handler(this), kernel_func); } diff --git a/src/simsycl/schedule.cc b/src/simsycl/schedule.cc index 93e4b24..af18465 100644 --- a/src/simsycl/schedule.cc +++ b/src/simsycl/schedule.cc @@ -43,27 +43,33 @@ namespace simsycl::detail { template void sequential_for(const sycl::range &range, const Offset &offset, const simple_kernel> &kernel) { + printf("sequential_for 46\n"); // limit the number of work items scheduled at a time to avoid allocating huge index buffers constexpr size_t max_schedule_chunk_size = 16 << 10; const auto schedule_chunk_size = std::min(range.size(), max_schedule_chunk_size); const auto &schedule = get_cooperative_schedule(); std::vector order(schedule_chunk_size); auto schedule_state = schedule.init(order); - + + printf("sequential_for 54\n"); for(size_t schedule_offset = 0; schedule_offset < range.size(); schedule_offset += max_schedule_chunk_size) { for(size_t schedule_id = 0; schedule_id < schedule_chunk_size; ++schedule_id) { const auto linear_id = schedule_offset + order[schedule_id]; if(linear_id < range.size()) { if constexpr(with_offset_v) { const auto id = offset + linear_index_to_id(range, linear_id); - kernel(make_item(id, range, offset)); + printf("sequential_for 61\n"); + kernel(make_item(id, range, offset)); + printf("sequential_for 63\n"); } else { const auto id = linear_index_to_id(range, linear_id); kernel(make_item(id, range)); } } } + printf("sequential_for 68\n"); schedule_state = schedule.update(schedule_state, order); + printf("sequential_for 70\n"); } } diff --git a/test/ambiguity_tests.cc b/test/ambiguity_tests.cc index 7d7827a..f47f964 100644 --- a/test/ambiguity_tests.cc +++ b/test/ambiguity_tests.cc @@ -7,23 +7,44 @@ using namespace sycl; SIMSYCL_START_IGNORING_DEPRECATIONS TEST_CASE("Calls to the deprecated parallel_for signature are not ambiguous", "[ambiguity][parallel_for]") { + printf("START\n"); queue q; + printf("q\n"); constexpr int offset = 7; + printf("1D\n"); SECTION("1D") { + printf("1D A\n"); q.submit([&](handler &cgh) { - cgh.parallel_for(range<1>{1}, id<1>{offset}, [=](id<1> i) { CHECK(i[0] == offset); }); + printf("1D B\n"); + cgh.parallel_for(range<1>{1}, id<1>{offset}, [=](id<1> i) { + printf("1D C\n"); + CHECK(i[0] == offset); + printf("1D D\n"); + }); + printf("1D E\n"); }); + printf("1D F\n"); } + printf("2D\n"); SECTION("2D") { + printf("2D A\n"); q.submit([&](handler &cgh) { - cgh.parallel_for(range<2>{1, 1}, id<2>{0, offset}, [=](id<2> i) { CHECK(i == id<2>{0, offset}); }); + printf("2D B\n"); + cgh.parallel_for(range<2>{1, 1}, id<2>{0, offset}, [=](id<2> i) { + printf("2D C\n"); + CHECK(i == id<2>{0, offset}); + printf("2D D\n"); + }); + printf("2D F\n"); }); } + printf("3D\n"); SECTION("3D") { q.submit([&](handler &cgh) { cgh.parallel_for(range<3>{1, 1, 1}, id<3>{0, offset, 0}, [=](id<3> i) { CHECK(i == id<3>{0, offset, 0}); }); }); } + printf("END\n"); } SIMSYCL_STOP_IGNORING_DEPRECATIONS