From 47863faa9ba4f3d3598323166b5653915eacfdeb Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Thu, 17 Oct 2024 15:00:08 +0100 Subject: [PATCH 01/24] Add sycl_khr_free_function_commands extension This extension provides an alternative mechanism for submitting commands to a device via free-functions that require developers to opt-in to the creation of event objects. It also proposes alternative names for several commands (e.g., launch) and simplifies some concepts (e.g., by removing the need for the nd_range class). --- adoc/chapters/architecture.adoc | 1 + adoc/extensions/index.adoc | 4 +- .../sycl_khr_free_function_commands.adoc | 928 ++++++++++++++++++ 3 files changed, 931 insertions(+), 2 deletions(-) create mode 100644 adoc/extensions/sycl_khr_free_function_commands.adoc diff --git a/adoc/chapters/architecture.adoc b/adoc/chapters/architecture.adoc index ff3e8d3a..6f66f69b 100644 --- a/adoc/chapters/architecture.adoc +++ b/adoc/chapters/architecture.adoc @@ -1978,6 +1978,7 @@ always matches the byte order of the devices. This allows data to be copied between the host and the devices without any byte swapping. +[[subsec:example.sycl.application]] == Example SYCL application Below is a more complex example application, combining some of the features diff --git a/adoc/extensions/index.adoc b/adoc/extensions/index.adoc index 07062df6..eb41d48c 100644 --- a/adoc/extensions/index.adoc +++ b/adoc/extensions/index.adoc @@ -7,7 +7,7 @@ working group. These extensions may be promoted to core features in future versions of the SYCL specification, but their design is subject to change. -(There are currently no extensions in this appendix.) - // leveloffset=2 allows extensions to be written as standalone documents // include::sycl_khr_extension_name.adoc[leveloffset=2] + +include::sycl_khr_free_function_commands.adoc[leveloffset=2] diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc new file mode 100644 index 00000000..a404632b --- /dev/null +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -0,0 +1,928 @@ += SYCL_KHR_FREE_FUNCTION_COMMANDS + +This extension provides an alternative mechanism for submitting commands to a +device via free-functions that require developers to opt-in to the creation of +[code]#event# objects. + +== Dependencies + +This extension has no dependencies on other extensions. + +== Feature test macro + +An implementation supporting this extension must predefine the macro +[code]#SYCL_KHR_FREE_FUNCTION_COMMANDS# to one of the values defined in the +table below. + +[%header,cols="1,5"] +|=== +|Value +|Description + +|1 +|Initial version of this extension. +|=== + +== Usage example + +The example below rewrites the application from +<> to demonstrate the usage of this extension. + +[source,role=synopsis] +---- +#include +#include +using namespace sycl; // (optional) avoids need for "sycl::" before SYCL names + +// Size of the matrices +constexpr size_t N = 2000; +constexpr size_t M = 3000; + +int main() { + // Create a queue to work on + queue myQueue; + + // Create some 2D arrays of float for our matrices + float* a = malloc_shared(N * M, myQueue); + float* b = malloc_shared(N * M, myQueue); + float* c = malloc_shared(N * M, myQueue); + + // Launch an asynchronous kernel to initialize a + // Use khr::submit to get a handler, even though not required here + khr::submit(myQueue, [&](handler& cgh) { + + // Enqueue a kernel iterating on a N*M 2D iteration space + khr::launch(cgh, range<2> { N, M }, [=](id<2> index) { + size_t i = index[0]; + size_t j = index[1]; + a[i * M + j] = i * 2 + j; + }); + + }); + + // Launch an asynchronous kernel to initialize b + // Use khr::launch to enqueue a kernel without a handler + khr::launch(myQueue, range<2> { N, M }, [=](id<2> index) { + size_t i = index[0]; + size_t j = index[1]; + b[i * M + j] = i * 2014 + j * 42; + }); + + // Ensure that the two previous kernels complete before enqueueing more work + // This does not block the host, but enforces dependencies on the device + khr::command_barrier(myQueue); + + // Launch an asynchronous kernel to compute matrix addition c = a + b + // Use khr::launch_grouped to enqueue a kernel using groups without a handler + range<2> local = { 2, 2 }; + range<2> global = { N, M }; + khr::launch_grouped(myQueue, global, local, [=](khr::invocation<2> ivc) { + size_t i = ivc.id(0); + size_t j = ivc.id(1); + size_t index = i * M + j; + c[index] = a[index] + b[index]; + }); + + // Wait for all three kernels to complete before accessing the results + // This blocks the host until all previous kernels have completed + myQueue.wait(); + + std::cout << std::endl << "Result:" << std::endl; + for (size_t i = 0; i < N; i++) { + for (size_t j = 0; j < M; j++) { + size_t index = i * M + j; + // Compare the result to the analytic value + if (c[index] != i * (2 + 2014) + j * (1 + 42)) { + std::cout << "Wrong value " << c[index] << " on element " << i << " " + << j << std::endl; + exit(-1); + } + } + } + + std::cout << "Good computation!" << std::endl; + return 0; +} +---- + +== New free functions + +=== Command-groups + +.[apititle]#submit# +[source,role=synopsis,id=api:submit] +---- +namespace sycl::khr { + +template +void submit(sycl::queue q, CommandGroupFunc&& cgf); + +} +---- +_Effects_: Equivalent to [code]#q.submit(cgf)#. + +''' + +.[apititle]#submit_tracked# +[source,role=synopsis,id=api:submit_tracked] +---- +namespace sycl::khr { + +template +sycl::event submit_tracked(sycl::queue q, CommandGroupFunc&& cgf); + +} +---- +_Effects_: Equivalent to [code]#q.submit(cgf)#. + +_Returns_: A [code]#sycl::event# associated with the submitted command. + +''' + +=== Kernel launch + +.[apititle]#launch# (kernel function) +[source,role=synopsis,id=api:launch] +---- +namespace sycl::khr { + +template +void launch(sycl::handler& h, sycl::range<1> r, const KernelType& k); (1) + +template +void launch(sycl::handler& h, sycl::range<2> r, const KernelType& k); (2) + +template +void launch(sycl::handler& h, sycl::range<3> r, const KernelType& k); (3) + +template +void launch(sycl::queue q, sycl::range<1> r, const KernelType& k); (4) + +template +void launch(sycl::queue q, sycl::range<2> r, const KernelType& k); (5) + +template +void launch(sycl::queue q, sycl::range<3> r, const KernelType& k); (6) + +} +---- +_Effects (1-3)_: Equivalent to [code]#h.parallel_for(r, k)#. + +_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { launch(h, r, +k); })#. + +''' + +.[apititle]#launch# (kernel object) +[source,role=synopsis,id=api:launch-kernel] +---- +namespace sycl::khr { + +template +void launch(sycl::handler& h, sycl::range<1> r, + const sycl::kernel& k, Args&&... args); (1) + +template +void launch(sycl::handler& h, sycl::range<2> r, + const sycl::kernel& k, Args&&... args); (2) + +template +void launch(sycl::handler& h, sycl::range<3> r, + const sycl::kernel& k, Args&&... args); (3) + +template +void launch(sycl::queue q, sycl::range<1> r, + const sycl::kernel& k, Args&&... args); (4) + +template +void launch(sycl::queue q, sycl::range<2> r, + const sycl::kernel& k, Args&&... args); (5) + +template +void launch(sycl::queue q, sycl::range<3> r, + const sycl::kernel& k, Args&&... args); (6) + +} +---- +_Effects_: Enqueues a kernel object that is invoked for every work-item in the +specified [code]#sycl::range#. +The arguments in [code]#args# are passed to the kernel in the same order. + +''' + +.[apititle]#launch_reduce# (kernel function) +[source,role=synopsis,id=api:launch_reduce] +---- +namespace sycl::khr { + +template +void launch_reduce(sycl::handler& h, sycl::range<1> r, + const KernelType& k, Reductions&&... reductions); (1) + +template +void launch_reduce(sycl::handler& h, sycl::range<2> r, + const KernelType& k, Reductions&&... reductions); (2) + +template +void launch_reduce(sycl::handler& h, sycl::range<3> r, + const KernelType& k, Reductions&&... reductions); (3) + +template +void launch_reduce(sycl::queue q, sycl::range<1> r, + const KernelType& k, Reductions&&... reductions); (4) + +template +void launch_reduce(sycl::queue q, sycl::range<2> r, + const KernelType& k, Reductions&&... reductions); (5) + +template +void launch_reduce(sycl::queue q, sycl::range<3> r, + const KernelType& k, Reductions&&... reductions); (6) + +} +---- +_Constraints_: The parameter pack consists of 0 or more objects created by the +[code]#sycl::reduction# function. + +_Effects (1-3)_: Equivalent to [code]#h.parallel_for(r, reductions..., k)#. + +_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { +launch_reduce(h, r, k, reductions...); })#. + + +''' + +.[apititle]#launch_reduce# (kernel object) +[source,role=synopsis,id=api:launch_reduce-kernel] +---- +namespace sycl::khr { + +template +void launch_reduce(sycl::handler& h, sycl::range<1> r, + const sycl::kernel& k, Args&&... args); (1) + +template +void launch_reduce(sycl::handler& h, sycl::range<2> r, + const sycl::kernel& k, Args&&... args); (2) + +template +void launch_reduce(sycl::handler& h, sycl::range<3> r, + const sycl::kernel& k, Args&&... args); (3) + +template +void launch_reduce(sycl::queue q, sycl::range<1> r, + const sycl::kernel& k, Args&&... args); (4) + +template +void launch_reduce(sycl::queue q, sycl::range<2> r, + const sycl::kernel& k, Args&&... args); (5) + +template +void launch_reduce(sycl::queue q, sycl::range<3> r, + const sycl::kernel& k, Args&&... args); (6) + +} +---- +_Effects_: Enqueues a kernel object that is invoked for every work-item in the +specified [code]#sycl::range#, where each work-item contributes to one or more +reductions. +The arguments in [code]#args# are passed to the kernel in the same order. + +''' + +.[apititle]#launch_grouped# (kernel function) +[source,role=synopsis,id=api:launch_grouped] +---- +namespace sycl::khr { + +template +void launch_grouped(sycl::handler& h, sycl::range<1> r, sycl::range<1> size, + const KernelType& k); (1) + +template +void launch_grouped(sycl::handler& h, sycl::range<2> r, sycl::range<2> size, + const KernelType& k); (2) + +template +void launch_grouped(sycl::handler& h, sycl::range<3> r, sycl::range<3> size, + const KernelType& k); (3) + +template +void launch_grouped(sycl::queue q, sycl::range<1> r, sycl::range<1> size, + const KernelType& k); (4) + +template +void launch_grouped(sycl::queue q, sycl::range<2> r, sycl::range<2> size, + const KernelType& k); (5) + +template +void launch_grouped(sycl::queue q, sycl::range<3> r, sycl::range<3> size, + const KernelType& k); (6) + +} +---- +_Effects (1-3)_: Equivalent to [code]#h.parallel_for(nd_range(r, size), k)#. + +_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { +launch_grouped(h, r, size, k); })#. + +''' + +.[apititle]#launch_grouped# (kernel object) +[source,role=synopsis,id=api:launch_grouped-kernel] +---- +namespace sycl::khr { + +template +void launch_grouped(sycl::handler& h, sycl::range<1> r, sycl::range<1> size, + const sycl::kernel& k, Args&&... args); (1) + +template +void launch_grouped(sycl::handler& h, sycl::range<2> r, sycl::range<2> size, + const sycl::kernel& k, Args&&... args); (2) + +template +void launch_grouped(sycl::handler& h, sycl::range<3> r, sycl::range<3> size, + const sycl::kernel& k, Args&&... args); (3) + +template +void launch_grouped(sycl::queue q, sycl::range<1> r, sycl::range<1> size, + const sycl::kernel& k, Args&&... args); (4) + +template +void launch_grouped(sycl::queue q, sycl::range<2> r, sycl::range<2> size, + const sycl::kernel& k, Args&&... args); (5) + +template +void launch_grouped(sycl::queue q, sycl::range<3> r, sycl::range<3> size, + const sycl::kernel& k, Args&&... args); (6) + +} +---- +_Effects_: Enqueues a kernel object that is invoked for every work-item in the +specified [code]#sycl::range#. +Work-items are grouped into work-groups of size [code]#size#. +The arguments in [code]#args# are passed to the kernel in the same order. + +''' + +.[apititle]#launch_grouped_reduce# (kernel function) +[source,role=synopsis,id=api:launch_grouped_reduce] +---- +namespace sycl::khr { + +template +void launch_grouped_reduce(sycl::handler& h, sycl::range<1> r, + sycl::range<1> size, const KernelType& k, + Reductions&&... reductions); (1) + +template +void launch_grouped_reduce(sycl::handler& h, sycl::range<2> r, + sycl::range<2> size, const KernelType& k, + Reductions&&... reductions); (2) + +template +void launch_grouped_reduce(sycl::handler& h, sycl::range<3> r, + sycl::range<3> size, const KernelType& k, + Reductions&&... reductions); (3) + +template +void launch_grouped_reduce(sycl::queue q, sycl::range<1> r, + sycl::range<1> size, const KernelType& k, + Reductions&&... reductions); (4) + +template +void launch_grouped_reduce(sycl::queue q, sycl::range<2> r, + sycl::range<2> size, const KernelType& k, + Reductions&&... reductions); (5) + +template +void launch_grouped_reduce(sycl::queue q, sycl::range<3> r, + sycl::range<3> size, const KernelType& k, + Reductions&&... reductions); (6) + +} +---- +_Constraints_: The parameter pack consists of 0 or more objects created by the +[code]#sycl::reduction# function. + +_Effects (1-3)_: Equivalent to [code]#h.parallel_for(nd_range(r, size), +reductions..., k)#. + +_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { +launch_grouped_reduce(h, r, size, k, reductions...); })#. + +''' + +.[apititle]#launch_grouped_reduce# (kernel object) +[source,role=synopsis,id=api:launch_grouped_reduce-kernel] +---- +namespace sycl::khr { + +template +void launch_grouped_reduce(sycl::handler& h, sycl::range<1> r, + sycl::range<1> size, const sycl::kernel& k, + Args&&... args); (1) + +template +void launch_grouped_reduce(sycl::handler& h, sycl::range<2> r, + sycl::range<2> size, const sycl::kernel& k, + Args&&... args); (2) + +template +void launch_grouped_reduce(sycl::handler& h, sycl::range<3> r, + sycl::range<3> size, const sycl::kernel& k, + Args&&... args); (3) + +template +void launch_grouped_reduce(sycl::queue q, sycl::range<1> r, + sycl::range<1> size, const sycl::kernel& k, + Args&&... args); (4) + +template +void launch_grouped_reduce(sycl::queue q, sycl::range<2> r, + sycl::range<2> size, const sycl::kernel& k, + Args&&... args); (5) + +template +void launch_grouped_reduce(sycl::queue q, sycl::range<3> r, + sycl::range<3> size, const sycl::kernel& k, + Args&&... args); (6) + +} +---- +_Effects_: Enqueues a kernel object that is invoked for every work-item in the +specified [code]#sycl::range#, where each work-item contributes to one or more +reductions. +Work-items are grouped into work-groups of size [code]#size#. +The arguments in [code]#args# are passed to the kernel in the same order. + +''' + +.[apititle]#launch_task# (kernel function) +[source,role=synopsis,id=api:launch_task] +---- +namespace sycl::khr { + +template +void launch_task(sycl::handler& h, const KernelType& k); (1) + +template +void launch_task(sycl::queue q, const KernelType& k); (2) + +} +---- +_Effects (1)_: Equivalent to [code]#h.single_task(k)#. + +_Effects (2)_: Equivalent to [code]#h.submit([&](handler& h) { launch_task(h, +k); })#. + +''' + +.[apititle]#launch_task# (kernel object) +[source,role=synopsis,id=api:launch_task-kernel] +---- +namespace sycl::khr { + +template +void launch_task(sycl::queue q, const sycl::kernel& k, Args&&... args); (1) + +template +void launch_task(sycl::handler& h, const sycl::kernel& k, Args&&... args); (2) + +} +---- +_Effects_: Enqueues a kernel object as a device task. +The arguments in [code]#args# are passed to the kernel in the same order. + +''' + +=== Memory operations + +.[apititle]#memcpy# +[source,role=synopsis,id=api:memcpy] +---- +namespace sycl::khr { + +void memcpy(sycl::handler& h, void* dest, const void* src, size_t numBytes); (1) + +void memcpy(sycl::queue q, void* dest, const void* src, size_t numBytes); (2) + +} +---- +_Effects (1)_: Equivalent to [code]#h.memcpy(dest, src, numBytes)#. + +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { memcpy(h, dest, +src, numBytes); })#. + +''' + +.[apititle]#copy# (USM pointers) +[source,role=synopsis,id=api:copy-pointer] +---- +namespace sycl::khr { + +template +void copy(sycl::handler& h, const T* src, T* dest, size_t count); (1) + +template +void copy(sycl::queue q, const T* src, T* dest, size_t count); (2) + +} +---- + +Copies between two USM pointers. + +_Constraints_: [code]#T# must be <>. + +_Preconditions_: [code]#src# and [code]#dest# must be host pointers or USM +pointers accessible on the device. +[code]#src# and [code]#dest# must point to allocations of at least [code]#count# +elements of type [code]#T#. + +_Effects (1)_: Equivalent to [code]#h.copy(src, dest, count)#. + +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, +dest, count); })# + +''' + +.[apititle]#copy# (accessors, host to device) +[source,role=synopsis,id=api:copy-accessor-h2d] +---- +namespace sycl::khr { + +template +void copy(sycl::handler& h, + const SrcT* src, + sycl::accessor dest); (3) + +template +void copy(sycl::handler& h, + std::shared_ptr src, + sycl::accessor dest); (4) + +template +void copy(sycl::queue q, + const SrcT* src, + sycl::accessor dest); (5) + +template +void copy(sycl::queue q, + std::shared_ptr src, + sycl::accessor dest); (6) + +} +---- + +Copies from host to device. + +_Constraints_: [code]#SrcT# and [code]#DestT# must be <>. +[code]#DestMode# must be [code]#access_mode::write# or +[code]#access_mode::read_write#. + +_Preconditions_: [code]#src# must be a host pointer, pointing to an allocation +of at least as many bytes as the range represented by [code]#dest#. + +_Effects (3-4)_: Equivalent to [code]#h.copy(src, dest)#. + +_Effects (5-6)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, +dest) })# + +''' + +.[apititle]#copy# (accessors, device to host) +[source,role=synopsis,id=api:copy-accessor-d2h] +---- +namespace sycl::khr { + +template +void copy(sycl::handler& h, + sycl::accessor src, + DestT* dest); (7) + +template +void copy(sycl::handler& h, + sycl::accessor src, + std::shared_ptr dest); (8) + +template +void copy(sycl::queue q, + sycl::accessor src, + DestT* dest); (9) + +template +void copy(sycl::queue q, + sycl::accessor src, + std::shared_ptr dest); (10) + +} +---- + +Copies from device to host. + +_Constraints_: [code]#SrcT# and [code]#DestT# must be <>. +[code]#DestMode# must be [code]#access_mode::read# or +[code]#access_mode::read_write#. + +_Preconditions_: [code]#dest# must be a host pointer, pointing to an allocation +of at least as many bytes as the range represented by [code]#src#. + +_Effects (7-8)_: Equivalent to [code]#h.copy(src, dest)#. + +_Effects (9-10)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, +dest); })#. + +''' + +.[apititle]#copy# (accessors, device to device) +[source,role=synopsis,id=api:copy-accessor-d2d] +---- +namespace sycl::khr { + +template +void copy(sycl::queue q, + sycl::accessor src, + sycl::accessor dest); (11) + +template +void copy(sycl::queue q, + sycl::accessor src, + sycl::accessor dest); (12) + +} +---- + +Copies between two device accessors. + +_Constraints_: [code]#SrcT# and [code]#DestT# must be <>. +[code]#SrcMode# must be [code]#access_mode::read# or +[code]#access_mode::read_write#. +[code]#DestMode# must be [code]#access_mode::write# or +[code]#access_mode::read_write#. + +_Effects (11)_: Equivalent to [code]#h.copy(src, dest)#. + +_Effects (12)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, +dest); })#. + +_Throws_: A synchronous [code]#exception# with the [code]#errc::invalid# error +code if [code]#dest.get_count() < src.get_count()#. + +''' + +.[apititle]#memset# +[source,role=synopsis,id=api:memset] +---- +namespace sycl::khr { + +void memset(sycl::handler& h, void* ptr, int value, size_t numBytes); (1) + +void memset(sycl::queue q, void* ptr, int value, size_t numBytes); (2) + +} +---- +_Effects (1)_: Equivalent to [code]#h.memset(ptr, value, numBytes)#. + +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { memset(h, value, +numBytes); })#. + +''' + +.[apititle]#fill# +[source,role=synopsis,id=api:fill] +---- +namespace sycl::khr { + +template +void fill(sycl::handler& h, T* ptr, const T& pattern, size_t count); (1) + +template +void fill(sycl::handler& h, + sycl::accessor dest, + const T& src); (2) + +template +void fill(sycl::queue q, T* ptr, const T& pattern, size_t count); (3) + +template +void fill(sycl::queue q, + sycl::accessor dest, + const T& src); (4) + +} +---- +_Effects (1)_: Equivalent to [code]#h.fill(ptr, pattern, count)#. + +_Effects (2)_: Equivalent to [code]#h.fill(dest, src)#. + +_Effects (3)_: Equivalent to [code]#q.submit([&](handler& h) { fill(h, ptr, +pattern, count); })#. + +_Effects (4)_: Equivalent to [code]#q.submit([&](handler& h) { fill(h, dest, +src); })#. + +''' + +.[apititle]#update_host# +[source,role=synopsis,id=api:update_host] +---- +namespace sycl::khr { + +template +void update_host(sycl::handler& h, accessor acc); (1) + +template +void update_host(sycl::queue q, accessor acc); (2) + +} +---- +_Constraints_: [code]#T# must be <>. + +_Effects (1)_: Equivalent to [code]#h.update_host(acc)#. + +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { update_host(h, +acc); })#. + +''' + +.[apititle]#prefetch# +[source,role=synopsis,id=api:prefetch] +---- +namespace sycl::khr { + +void prefetch(sycl::handler& h, void* ptr, size_t numBytes); (1) + +void prefetch(sycl::queue q, void* ptr, size_t numBytes); (2) + +} +---- +_Effects (1)_: Equivalent to [code]#h.prefetch(ptr, numBytes)#. + +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { prefetch(h, ptr, +numBytes); })#. + +''' + +.[apititle]#mem_advise# +[source,role=synopsis,id=api:mem_advise] +---- +namespace sycl::khr { + +void mem_advise(sycl::handler& h, void* ptr, size_t numBytes, int advice); (1) + +void mem_advise(sycl::queue q, void* ptr, size_t numBytes, int advice); (2) + +} +---- +_Effects (1)_: Equivalent to [code]#h.mem_advise(ptr, numBytes, advice)#. + +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { mem_advise(h, +ptr, numBytes, advice); })#. + +''' + +=== Command and event barriers + +.[apititle]#command_barrier# +[source,role=synopsis,id=api:command_barrier] +---- +namespace sycl::khr { + +void command_barrier(sycl::handler& h); (1) + +void command_barrier(sycl::queue q); (2) + +} +---- +_Effects_: Enqueues a command barrier. +Any commands submitted after this barrier cannot begin execution until all +previously submitted commands (and any commands associated with dependendent +events) have completed. + +''' + +.[apititle]#event_barrier# +[source,role=synopsis,id=api:event_barrier] +---- +namespace sycl::khr { + +void event_barrier(sycl::handler& h, const std::vector& events); (1) + +void event_barrier(sycl::queue q, const std::vector& events); (2) + +} +---- +_Effects_: Enqueues an event barrier. +Any commands submitted after this barrier cannot begin execution until all +commands associated with [code]#events# (and any commands associated with other +dependent events) have completed. + +{note}For both overloads, if [code]#events# is empty and an event barrier has no +other dependencies (e.g., specified by [code]#handler::depends_on#), it is ot +required to wait for any commands unless the [code]#queue# is in-order.{endnote} + +''' + +== [code]#invocation# class template + +The [code]#invocation# class template identifies an invocation of a kernel +function. + +Instances of the [code]#invocation# class template are not user-constructible +and are passed as an argument to each invocation of a kernel function. + +[source,role=synopsis] +---- +namespace sycl::khr { + +template +class invocation +{ + public: + static constexpr int dimensions = Dimensions; + + id id() const noexcept; + size_t linear_id() const noexcept; + + range range() const noexcept; + + group get_work_group() const noexcept; + + sub_group get_sub_group() const noexcept; + + // Available for backwards compatibility only + operator nd_item() const noexcept; +}; + +} +---- + +.[apidef]#id# +[source,role=synopsis,id=api:khr-free-function-commands-invocation-id] +---- +id id() const noexcept; +---- +_Returns_: The index of this invocation within the kernel dispatch. + +''' + +.[apidef]#linear_id# +[source,role=synopsis,id=api:khr-free-function-commands-invocation-linear_id] +---- +size_t linear_id() const noexcept; +---- +_Returns_: The linearized index (see <>) of this +invocation within the kernel dispatch. + +''' + +.[apidef]#range# +[source,role=synopsis,id=api:khr-free-function-commands-invocation-range] +---- +range range() const noexcept; +---- + +_Returns_: An index space representing all invocations of this kernel. + +''' + +.[apidef]#get_work_group# +[source,role=synopsis,id=api:khr-free-function-commands-invocation-get_work_group] +---- +group get_work_group() const noexcept; +---- + +_Returns_: A [code]#group# representing the <> to which this +invocation belongs. + +''' + +.[apidef]#get_sub_group# +[source,role=synopsis,id=api:khr-free-function-commands-invocation-get_sub_group] +---- +sub_group get_sub_group() const noexcept; +---- + +_Returns_: A [code]#sub_group# representing the sub-group to which this +invocation belongs. + +''' + +.[apidef]#nd_item conversion operator# +[source,role=synopsis,id=api:khr-free-function-commands-invocation-nd_item-conversion-operator] +---- +operator nd_item() const noexcept; +---- + +_Returns_: An [code]#nd_item# representing this invocation. + +{note}This function exists only to provide backwards compatibility with SYCL +2020 code in order to facilitate experimentation with the new interface proposed +by this extension.{endnote} + +== Issues + +None. From ce08652764e915d0d5bb16628869a7e2f5efd657 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Thu, 5 Dec 2024 09:55:50 +0000 Subject: [PATCH 02/24] Reword khr_free_function_commands comment --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index a404632b..d7264668 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -48,7 +48,7 @@ int main() { float* c = malloc_shared(N * M, myQueue); // Launch an asynchronous kernel to initialize a - // Use khr::submit to get a handler, even though not required here + // Use khr::submit to enqueue a kernel via a handler khr::submit(myQueue, [&](handler& cgh) { // Enqueue a kernel iterating on a N*M 2D iteration space From 372bb3b2d8462248c96972b5ea69396c776f006a Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Thu, 5 Dec 2024 09:58:23 +0000 Subject: [PATCH 03/24] Add periods to khr_free_function_commands comments --- .../sycl_khr_free_function_commands.adoc | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index d7264668..ed024758 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -32,26 +32,26 @@ The example below rewrites the application from ---- #include #include -using namespace sycl; // (optional) avoids need for "sycl::" before SYCL names +using namespace sycl; // (optional) avoids need for "sycl::" before SYCL names. -// Size of the matrices +// Size of the matrices. constexpr size_t N = 2000; constexpr size_t M = 3000; int main() { - // Create a queue to work on + // Create a queue to work on. queue myQueue; - // Create some 2D arrays of float for our matrices + // Create some 2D arrays of float for our matrices. float* a = malloc_shared(N * M, myQueue); float* b = malloc_shared(N * M, myQueue); float* c = malloc_shared(N * M, myQueue); - // Launch an asynchronous kernel to initialize a - // Use khr::submit to enqueue a kernel via a handler + // Launch an asynchronous kernel to initialize a. + // Use khr::submit to enqueue a kernel via a handler. khr::submit(myQueue, [&](handler& cgh) { - // Enqueue a kernel iterating on a N*M 2D iteration space + // Enqueue a kernel iterating on a N*M 2D iteration space. khr::launch(cgh, range<2> { N, M }, [=](id<2> index) { size_t i = index[0]; size_t j = index[1]; @@ -60,20 +60,20 @@ int main() { }); - // Launch an asynchronous kernel to initialize b - // Use khr::launch to enqueue a kernel without a handler + // Launch an asynchronous kernel to initialize b. + // Use khr::launch to enqueue a kernel without a handler. khr::launch(myQueue, range<2> { N, M }, [=](id<2> index) { size_t i = index[0]; size_t j = index[1]; b[i * M + j] = i * 2014 + j * 42; }); - // Ensure that the two previous kernels complete before enqueueing more work - // This does not block the host, but enforces dependencies on the device + // Ensure that the two previous kernels complete before enqueueing more work. + // This does not block the host, but enforces dependencies on the device. khr::command_barrier(myQueue); - // Launch an asynchronous kernel to compute matrix addition c = a + b - // Use khr::launch_grouped to enqueue a kernel using groups without a handler + // Launch an asynchronous kernel to compute matrix addition c = a + b. + // Use khr::launch_grouped to enqueue a kernel using groups without a handler. range<2> local = { 2, 2 }; range<2> global = { N, M }; khr::launch_grouped(myQueue, global, local, [=](khr::invocation<2> ivc) { @@ -83,15 +83,15 @@ int main() { c[index] = a[index] + b[index]; }); - // Wait for all three kernels to complete before accessing the results - // This blocks the host until all previous kernels have completed + // Wait for all three kernels to complete before accessing the results. + // This blocks the host until all previous kernels have completed. myQueue.wait(); std::cout << std::endl << "Result:" << std::endl; for (size_t i = 0; i < N; i++) { for (size_t j = 0; j < M; j++) { size_t index = i * M + j; - // Compare the result to the analytic value + // Compare the result to the analytic value. if (c[index] != i * (2 + 2014) + j * (1 + 42)) { std::cout << "Wrong value " << c[index] << " on element " << i << " " << j << std::endl; @@ -853,7 +853,7 @@ class invocation sub_group get_sub_group() const noexcept; - // Available for backwards compatibility only + // Available for backwards compatibility only. operator nd_item() const noexcept; }; From 9747f7a4ad3deecda7a422be4ba89730c458da59 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 10:15:53 +0000 Subject: [PATCH 04/24] Add + marks to code blocks containing ... --- .../sycl_khr_free_function_commands.adoc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index ed024758..6c30d3a7 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -244,10 +244,10 @@ void launch_reduce(sycl::queue q, sycl::range<3> r, _Constraints_: The parameter pack consists of 0 or more objects created by the [code]#sycl::reduction# function. -_Effects (1-3)_: Equivalent to [code]#h.parallel_for(r, reductions..., k)#. +_Effects (1-3)_: Equivalent to [code]#+h.parallel_for(r, reductions..., k)+#. -_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { -launch_reduce(h, r, k, reductions...); })#. +_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { +launch_reduce(h, r, k, reductions...); })+#. ''' @@ -406,11 +406,11 @@ void launch_grouped_reduce(sycl::queue q, sycl::range<3> r, _Constraints_: The parameter pack consists of 0 or more objects created by the [code]#sycl::reduction# function. -_Effects (1-3)_: Equivalent to [code]#h.parallel_for(nd_range(r, size), -reductions..., k)#. +_Effects (1-3)_: Equivalent to [code]#+h.parallel_for(nd_range(r, size), +reductions..., k)+#. -_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { -launch_grouped_reduce(h, r, size, k, reductions...); })#. +_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { +launch_grouped_reduce(h, r, size, k, reductions...); })+#. ''' From 2527e90051562e2b28d4c446d35674c2cfbf83b8 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 10:17:03 +0000 Subject: [PATCH 05/24] Require at least 1 reduction in *_reduce functions Previous "0 or more" wording only made sense when reductions could be optionally provided to functions like parallel_for; now that there are dedicated *_reduce functions, at least one reduction is required. --- adoc/extensions/sycl_khr_free_function_commands.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 6c30d3a7..6e6e1fec 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -241,7 +241,7 @@ void launch_reduce(sycl::queue q, sycl::range<3> r, } ---- -_Constraints_: The parameter pack consists of 0 or more objects created by the +_Constraints_: The parameter pack consists of 1 or more objects created by the [code]#sycl::reduction# function. _Effects (1-3)_: Equivalent to [code]#+h.parallel_for(r, reductions..., k)+#. @@ -403,7 +403,7 @@ void launch_grouped_reduce(sycl::queue q, sycl::range<3> r, } ---- -_Constraints_: The parameter pack consists of 0 or more objects created by the +_Constraints_: The parameter pack consists of 1 or more objects created by the [code]#sycl::reduction# function. _Effects (1-3)_: Equivalent to [code]#+h.parallel_for(nd_range(r, size), From f63adeb07c1f10d43ce604a82349b1da8437e72b Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 10:20:13 +0000 Subject: [PATCH 06/24] Replace "must be" with "is" in constraints "is" is more consistent with ISO C++ wording. --- .../sycl_khr_free_function_commands.adoc | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 6e6e1fec..ef276e0d 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -533,7 +533,7 @@ void copy(sycl::queue q, const T* src, T* dest, size_t count); (2) Copies between two USM pointers. -_Constraints_: [code]#T# must be <>. +_Constraints_: [code]#T# is <>. _Preconditions_: [code]#src# and [code]#dest# must be host pointers or USM pointers accessible on the device. @@ -577,8 +577,8 @@ void copy(sycl::queue q, Copies from host to device. -_Constraints_: [code]#SrcT# and [code]#DestT# must be <>. -[code]#DestMode# must be [code]#access_mode::write# or +_Constraints_: [code]#SrcT# and [code]#DestT# is <>. +[code]#DestMode# is [code]#access_mode::write# or [code]#access_mode::read_write#. _Preconditions_: [code]#src# must be a host pointer, pointing to an allocation @@ -621,8 +621,8 @@ void copy(sycl::queue q, Copies from device to host. -_Constraints_: [code]#SrcT# and [code]#DestT# must be <>. -[code]#DestMode# must be [code]#access_mode::read# or +_Constraints_: [code]#SrcT# and [code]#DestT# is <>. +[code]#DestMode# is [code]#access_mode::read# or [code]#access_mode::read_write#. _Preconditions_: [code]#dest# must be a host pointer, pointing to an allocation @@ -657,10 +657,9 @@ void copy(sycl::queue q, Copies between two device accessors. -_Constraints_: [code]#SrcT# and [code]#DestT# must be <>. -[code]#SrcMode# must be [code]#access_mode::read# or -[code]#access_mode::read_write#. -[code]#DestMode# must be [code]#access_mode::write# or +_Constraints_: [code]#SrcT# and [code]#DestT# is <>. +[code]#SrcMode# is [code]#access_mode::read# or [code]#access_mode::read_write#. +[code]#DestMode# is [code]#access_mode::write# or [code]#access_mode::read_write#. _Effects (11)_: Equivalent to [code]#h.copy(src, dest)#. @@ -739,7 +738,7 @@ void update_host(sycl::queue q, accessor acc); } ---- -_Constraints_: [code]#T# must be <>. +_Constraints_: [code]#T# is <>. _Effects (1)_: Equivalent to [code]#h.update_host(acc)#. From 47c08f8623d9be26f7116fd9da71fa42ba71aa02 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 10:27:17 +0000 Subject: [PATCH 07/24] Use bulleted list for multiple constraints --- .../sycl_khr_free_function_commands.adoc | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index ef276e0d..4e60163b 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -577,9 +577,11 @@ void copy(sycl::queue q, Copies from host to device. -_Constraints_: [code]#SrcT# and [code]#DestT# is <>. -[code]#DestMode# is [code]#access_mode::write# or -[code]#access_mode::read_write#. +_Constraints_: + +* [code]#SrcT# and [code]#DestT# is <>. +* [code]#DestMode# is [code]#access_mode::write# or + [code]#access_mode::read_write#. _Preconditions_: [code]#src# must be a host pointer, pointing to an allocation of at least as many bytes as the range represented by [code]#dest#. @@ -621,9 +623,11 @@ void copy(sycl::queue q, Copies from device to host. -_Constraints_: [code]#SrcT# and [code]#DestT# is <>. -[code]#DestMode# is [code]#access_mode::read# or -[code]#access_mode::read_write#. +_Constraints_: + +* [code]#SrcT# and [code]#DestT# is <>. +* [code]#DestMode# is [code]#access_mode::read# or + [code]#access_mode::read_write#. _Preconditions_: [code]#dest# must be a host pointer, pointing to an allocation of at least as many bytes as the range represented by [code]#src#. @@ -657,10 +661,13 @@ void copy(sycl::queue q, Copies between two device accessors. -_Constraints_: [code]#SrcT# and [code]#DestT# is <>. -[code]#SrcMode# is [code]#access_mode::read# or [code]#access_mode::read_write#. -[code]#DestMode# is [code]#access_mode::write# or -[code]#access_mode::read_write#. +_Constraints_: + +* [code]#SrcT# and [code]#DestT# is <>. +* [code]#SrcMode# is [code]#access_mode::read# or + [code]#access_mode::read_write#. +* [code]#DestMode# is [code]#access_mode::write# or + [code]#access_mode::read_write#. _Effects (11)_: Equivalent to [code]#h.copy(src, dest)#. From 15fd80ab652cb66e6239ef8a16ee15154aac1fdc Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 10:52:00 +0000 Subject: [PATCH 08/24] Rewrite preconditions for USM copy functions --- .../sycl_khr_free_function_commands.adoc | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 4e60163b..8f8e60b9 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -535,10 +535,16 @@ Copies between two USM pointers. _Constraints_: [code]#T# is <>. -_Preconditions_: [code]#src# and [code]#dest# must be host pointers or USM -pointers accessible on the device. -[code]#src# and [code]#dest# must point to allocations of at least [code]#count# -elements of type [code]#T#. +_Preconditions_: + +* [code]#src# is a host pointer or a pointer within a USM allocation that is + accessible on the device. +* [code]#dest# is a host pointer or a pointer within a USM allocation that is + accessible on the device. +* [code]#src# and [code]#dest# both point to allocations of at least + [code]#count# elements of type [code]#T#. +* If either [code]#src# or [code]#dest# is a pointer is to a USM allocation, + that allocation was created from the same context as the handler's queue. _Effects (1)_: Equivalent to [code]#h.copy(src, dest, count)#. @@ -583,8 +589,11 @@ _Constraints_: * [code]#DestMode# is [code]#access_mode::write# or [code]#access_mode::read_write#. -_Preconditions_: [code]#src# must be a host pointer, pointing to an allocation -of at least as many bytes as the range represented by [code]#dest#. +_Preconditions_: + +* [code]#src# is a host pointer. +* [code]#src# points to an allocation of at least as many bytes as the range + represented by [code]#dest#. _Effects (3-4)_: Equivalent to [code]#h.copy(src, dest)#. @@ -629,8 +638,11 @@ _Constraints_: * [code]#DestMode# is [code]#access_mode::read# or [code]#access_mode::read_write#. -_Preconditions_: [code]#dest# must be a host pointer, pointing to an allocation -of at least as many bytes as the range represented by [code]#src#. +_Preconditions_: + +* [code]#dest# is a host pointer. +* [code]#dest# points to an allocation of at least as many bytes as the range + represented by [code]#src#. _Effects (7-8)_: Equivalent to [code]#h.copy(src, dest)#. From 9c627921aa959a199a29ce246491c5b995fdef58 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 10:54:13 +0000 Subject: [PATCH 09/24] Fix typo in non-normative note --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 8f8e60b9..0efb0ee2 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -839,7 +839,7 @@ commands associated with [code]#events# (and any commands associated with other dependent events) have completed. {note}For both overloads, if [code]#events# is empty and an event barrier has no -other dependencies (e.g., specified by [code]#handler::depends_on#), it is ot +other dependencies (e.g., specified by [code]#handler::depends_on#), it is not required to wait for any commands unless the [code]#queue# is in-order.{endnote} ''' From 437754958e8597148fe1479d7a73691197aa947e Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 11:06:23 +0000 Subject: [PATCH 10/24] Define kernel object overloads via equivalence --- .../sycl_khr_free_function_commands.adoc | 68 +++++++++++++------ 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 0efb0ee2..4406e187 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -204,9 +204,16 @@ void launch(sycl::queue q, sycl::range<3> r, } ---- -_Effects_: Enqueues a kernel object that is invoked for every work-item in the -specified [code]#sycl::range#. -The arguments in [code]#args# are passed to the kernel in the same order. +_Effects (1-3)_: Equivalent to: + +[source,c++] +---- +h.set_args(args...); +h.parallel_for(r, k); +---- + +_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { launch(h, r, k, +args...); })#. ''' @@ -283,10 +290,15 @@ void launch_reduce(sycl::queue q, sycl::range<3> r, } ---- -_Effects_: Enqueues a kernel object that is invoked for every work-item in the -specified [code]#sycl::range#, where each work-item contributes to one or more -reductions. -The arguments in [code]#args# are passed to the kernel in the same order. +_Effects (1-3)_: Equivalent to: +[source,c++] +---- +h.set_args(args...); +h.parallel_for(r, k); +---- + +_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { +launch_reduce(h, r, k, args...); })#. ''' @@ -359,10 +371,15 @@ void launch_grouped(sycl::queue q, sycl::range<3> r, sycl::range<3> size, } ---- -_Effects_: Enqueues a kernel object that is invoked for every work-item in the -specified [code]#sycl::range#. -Work-items are grouped into work-groups of size [code]#size#. -The arguments in [code]#args# are passed to the kernel in the same order. +_Effects (1-3)_: Equivalent to: +[source,c++] +---- +h.set_args(args...); +h.parallel_for(nd_range(r, size), k); +---- + +_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { +launch_grouped(h, r, size, k, args...); })#. ''' @@ -451,11 +468,15 @@ void launch_grouped_reduce(sycl::queue q, sycl::range<3> r, } ---- -_Effects_: Enqueues a kernel object that is invoked for every work-item in the -specified [code]#sycl::range#, where each work-item contributes to one or more -reductions. -Work-items are grouped into work-groups of size [code]#size#. -The arguments in [code]#args# are passed to the kernel in the same order. +_Effects (1-3)_: Equivalent to: +[source,c++] +---- +h.set_args(args...); +h.parallel_for(nd_range(r, size), k); +---- + +_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { +launch_grouped_reduce(h, r, size, k, args...); })#. ''' @@ -485,15 +506,22 @@ k); })#. namespace sycl::khr { template -void launch_task(sycl::queue q, const sycl::kernel& k, Args&&... args); (1) +void launch_task(sycl::handler& h, const sycl::kernel& k, Args&&... args); (1) template -void launch_task(sycl::handler& h, const sycl::kernel& k, Args&&... args); (2) +void launch_task(sycl::queue q, const sycl::kernel& k, Args&&... args); (2) } ---- -_Effects_: Enqueues a kernel object as a device task. -The arguments in [code]#args# are passed to the kernel in the same order. +_Effects (1)_: Equivalent to: +[source,c++] +---- +h.set_args(args...); +h.parallel_for(k); +---- + +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { launch_task(h, k, +args...); })#. ''' From c24fb1303aae2d9310e6657db4b5fb955a52474c Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 11:16:47 +0000 Subject: [PATCH 11/24] Clarify dependencies for command_/event_barrier --- .../sycl_khr_free_function_commands.adoc | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 4406e187..2035280d 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -844,9 +844,11 @@ void command_barrier(sycl::queue q); (2) } ---- _Effects_: Enqueues a command barrier. -Any commands submitted after this barrier cannot begin execution until all -previously submitted commands (and any commands associated with dependendent -events) have completed. +Any commands submitted after this barrier cannot begin execution until: + +* All commands previously submitted to this queue have completed; and +* All commands associated with this command's dependencies (e.g., via + `handler::depends_on`) have completed. ''' @@ -862,9 +864,11 @@ void event_barrier(sycl::queue q, const std::vector& events); (2 } ---- _Effects_: Enqueues an event barrier. -Any commands submitted after this barrier cannot begin execution until all -commands associated with [code]#events# (and any commands associated with other -dependent events) have completed. +Any commands submitted after this barrier cannot begin execution until: + +* All commands associated with [code]#events# have completed; and +* All commands associated with this command's dependencies (e.g., via + `handler::depends_on`) have completed. {note}For both overloads, if [code]#events# is empty and an event barrier has no other dependencies (e.g., specified by [code]#handler::depends_on#), it is not From 664a9129b3130c4cc24ea7d833bed5957eeea615 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 11:20:23 +0000 Subject: [PATCH 12/24] Clarify that event_barrier can be a no-op --- adoc/extensions/sycl_khr_free_function_commands.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 2035280d..88219cb9 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -871,8 +871,8 @@ Any commands submitted after this barrier cannot begin execution until: `handler::depends_on`) have completed. {note}For both overloads, if [code]#events# is empty and an event barrier has no -other dependencies (e.g., specified by [code]#handler::depends_on#), it is not -required to wait for any commands unless the [code]#queue# is in-order.{endnote} +other dependencies (e.g., specified by [code]#handler::depends_on#), then this +operation is a no-op.{endnote} ''' From 16111b2d8a9e5ccd685d61a905dce9f158d094e8 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 11:40:39 +0000 Subject: [PATCH 13/24] Add missing invocation constructor --- .../sycl_khr_free_function_commands.adoc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 88219cb9..f577cd24 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -904,6 +904,7 @@ class invocation sub_group get_sub_group() const noexcept; // Available for backwards compatibility only. + invocation(nd_item it) noexcept; operator nd_item() const noexcept; }; @@ -961,6 +962,21 @@ invocation belongs. ''' +.[apidef]#invocation constructor# +[source,role=synopsis,id=api:khr-free-function-commands-invocation-invocation-constructor] +---- +invocation(nd_item() it) noexcept; +---- + +_Effects_: Constructs a [code]#invocation# representing the same work-item as +[code]#it#. + +{note}This function exists only to provide backwards compatibility with SYCL +2020 code in order to facilitate experimentation with the new interface proposed +by this extension.{endnote} + +''' + .[apidef]#nd_item conversion operator# [source,role=synopsis,id=api:khr-free-function-commands-invocation-nd_item-conversion-operator] ---- From 88b540aff2c79f9c2fba8b2cf0bfa917b39434dc Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 13:33:22 +0000 Subject: [PATCH 14/24] Restart numbering at 1 in each synopsis block --- .../sycl_khr_free_function_commands.adoc | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index f577cd24..0893d0df 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -589,22 +589,22 @@ namespace sycl::khr { template void copy(sycl::handler& h, const SrcT* src, - sycl::accessor dest); (3) + sycl::accessor dest); (1) template void copy(sycl::handler& h, std::shared_ptr src, - sycl::accessor dest); (4) + sycl::accessor dest); (2) template void copy(sycl::queue q, const SrcT* src, - sycl::accessor dest); (5) + sycl::accessor dest); (3) template void copy(sycl::queue q, std::shared_ptr src, - sycl::accessor dest); (6) + sycl::accessor dest); (4) } ---- @@ -623,9 +623,9 @@ _Preconditions_: * [code]#src# points to an allocation of at least as many bytes as the range represented by [code]#dest#. -_Effects (3-4)_: Equivalent to [code]#h.copy(src, dest)#. +_Effects (1-2)_: Equivalent to [code]#h.copy(src, dest)#. -_Effects (5-6)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, +_Effects (3-4)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, dest) })# ''' @@ -638,22 +638,22 @@ namespace sycl::khr { template void copy(sycl::handler& h, sycl::accessor src, - DestT* dest); (7) + DestT* dest); (1) template void copy(sycl::handler& h, sycl::accessor src, - std::shared_ptr dest); (8) + std::shared_ptr dest); (2) template void copy(sycl::queue q, sycl::accessor src, - DestT* dest); (9) + DestT* dest); (3) template void copy(sycl::queue q, sycl::accessor src, - std::shared_ptr dest); (10) + std::shared_ptr dest); (4) } ---- @@ -672,9 +672,9 @@ _Preconditions_: * [code]#dest# points to an allocation of at least as many bytes as the range represented by [code]#src#. -_Effects (7-8)_: Equivalent to [code]#h.copy(src, dest)#. +_Effects (1-2)_: Equivalent to [code]#h.copy(src, dest)#. -_Effects (9-10)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, +_Effects (3-4)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, dest); })#. ''' @@ -688,13 +688,13 @@ template void copy(sycl::queue q, sycl::accessor src, - sycl::accessor dest); (11) + sycl::accessor dest); (1) template void copy(sycl::queue q, sycl::accessor src, - sycl::accessor dest); (12) + sycl::accessor dest); (1) } ---- @@ -709,9 +709,9 @@ _Constraints_: * [code]#DestMode# is [code]#access_mode::write# or [code]#access_mode::read_write#. -_Effects (11)_: Equivalent to [code]#h.copy(src, dest)#. +_Effects (1)_: Equivalent to [code]#h.copy(src, dest)#. -_Effects (12)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, +_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { copy(h, src, dest); })#. _Throws_: A synchronous [code]#exception# with the [code]#errc::invalid# error From 2575901220de997cdc88635a60253c4b6f95b143 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 14:22:17 +0000 Subject: [PATCH 15/24] Replace backticks with [code] environment --- adoc/extensions/sycl_khr_free_function_commands.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 0893d0df..693b7dbc 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -848,7 +848,7 @@ Any commands submitted after this barrier cannot begin execution until: * All commands previously submitted to this queue have completed; and * All commands associated with this command's dependencies (e.g., via - `handler::depends_on`) have completed. + [code]#handler::depends_on#) have completed. ''' @@ -868,7 +868,7 @@ Any commands submitted after this barrier cannot begin execution until: * All commands associated with [code]#events# have completed; and * All commands associated with this command's dependencies (e.g., via - `handler::depends_on`) have completed. + [code]#handler::depends_on#) have completed. {note}For both overloads, if [code]#events# is empty and an event barrier has no other dependencies (e.g., specified by [code]#handler::depends_on#), then this From 8fa1ce202f7ae9a4790ed27f386c8299f9656ab3 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 15:47:03 +0000 Subject: [PATCH 16/24] Add + marks to code blocks containing ... again --- .../sycl_khr_free_function_commands.adoc | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 693b7dbc..36dbb91f 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -212,8 +212,8 @@ h.set_args(args...); h.parallel_for(r, k); ---- -_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { launch(h, r, k, -args...); })#. +_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { launch(h, r, +k, args...); })+#. ''' @@ -297,8 +297,8 @@ h.set_args(args...); h.parallel_for(r, k); ---- -_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { -launch_reduce(h, r, k, args...); })#. +_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { +launch_reduce(h, r, k, args...); })+#. ''' @@ -378,8 +378,8 @@ h.set_args(args...); h.parallel_for(nd_range(r, size), k); ---- -_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { -launch_grouped(h, r, size, k, args...); })#. +_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { +launch_grouped(h, r, size, k, args...); })+#. ''' @@ -475,8 +475,8 @@ h.set_args(args...); h.parallel_for(nd_range(r, size), k); ---- -_Effects (4-6)_: Equivalent to [code]#q.submit([&](handler& h) { -launch_grouped_reduce(h, r, size, k, args...); })#. +_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { +launch_grouped_reduce(h, r, size, k, args...); })+#. ''' @@ -520,8 +520,8 @@ h.set_args(args...); h.parallel_for(k); ---- -_Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { launch_task(h, k, -args...); })#. +_Effects (2)_: Equivalent to [code]#+q.submit([&](handler& h) { launch_task(h, +k, args...); })+#. ''' From 8d79af481e2e1c118c63e80980f3c4ca5f76010b Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 16:03:20 +0000 Subject: [PATCH 17/24] Fix grammar: "is" to "are" Co-authored-by: Greg Lueck --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 36dbb91f..e8bf8d0c 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -613,7 +613,7 @@ Copies from host to device. _Constraints_: -* [code]#SrcT# and [code]#DestT# is <>. +* [code]#SrcT# and [code]#DestT# are <>. * [code]#DestMode# is [code]#access_mode::write# or [code]#access_mode::read_write#. From 2ba23942e349613db64e21f47f0077a52ca617f6 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 16:05:30 +0000 Subject: [PATCH 18/24] Fix formatting of bulleted lists --- .../sycl_khr_free_function_commands.adoc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index e8bf8d0c..ea96e319 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -566,11 +566,11 @@ _Constraints_: [code]#T# is <>. _Preconditions_: * [code]#src# is a host pointer or a pointer within a USM allocation that is - accessible on the device. + accessible on the device; * [code]#dest# is a host pointer or a pointer within a USM allocation that is - accessible on the device. + accessible on the device; * [code]#src# and [code]#dest# both point to allocations of at least - [code]#count# elements of type [code]#T#. + [code]#count# elements of type [code]#T#; and * If either [code]#src# or [code]#dest# is a pointer is to a USM allocation, that allocation was created from the same context as the handler's queue. @@ -613,13 +613,13 @@ Copies from host to device. _Constraints_: -* [code]#SrcT# and [code]#DestT# are <>. +* [code]#SrcT# and [code]#DestT# are <>; and * [code]#DestMode# is [code]#access_mode::write# or [code]#access_mode::read_write#. _Preconditions_: -* [code]#src# is a host pointer. +* [code]#src# is a host pointer; and * [code]#src# points to an allocation of at least as many bytes as the range represented by [code]#dest#. @@ -662,13 +662,13 @@ Copies from device to host. _Constraints_: -* [code]#SrcT# and [code]#DestT# is <>. +* [code]#SrcT# and [code]#DestT# is <>; and * [code]#DestMode# is [code]#access_mode::read# or [code]#access_mode::read_write#. _Preconditions_: -* [code]#dest# is a host pointer. +* [code]#dest# is a host pointer; and * [code]#dest# points to an allocation of at least as many bytes as the range represented by [code]#src#. @@ -703,9 +703,9 @@ Copies between two device accessors. _Constraints_: -* [code]#SrcT# and [code]#DestT# is <>. +* [code]#SrcT# and [code]#DestT# is <>; * [code]#SrcMode# is [code]#access_mode::read# or - [code]#access_mode::read_write#. + [code]#access_mode::read_write#; and * [code]#DestMode# is [code]#access_mode::write# or [code]#access_mode::read_write#. From e382dbcd7f01d88b35faa2c359f2a234483d9633 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 16:08:52 +0000 Subject: [PATCH 19/24] Fix more instances of "is" that should be "are" --- adoc/extensions/sycl_khr_free_function_commands.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index ea96e319..fbc7ff97 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -662,7 +662,7 @@ Copies from device to host. _Constraints_: -* [code]#SrcT# and [code]#DestT# is <>; and +* [code]#SrcT# and [code]#DestT# are <>; and * [code]#DestMode# is [code]#access_mode::read# or [code]#access_mode::read_write#. @@ -703,7 +703,7 @@ Copies between two device accessors. _Constraints_: -* [code]#SrcT# and [code]#DestT# is <>; +* [code]#SrcT# and [code]#DestT# are <>; * [code]#SrcMode# is [code]#access_mode::read# or [code]#access_mode::read_write#; and * [code]#DestMode# is [code]#access_mode::write# or From a9bdc102f37d54ae1cbadb103b67f4c992f7df45 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 16:10:26 +0000 Subject: [PATCH 20/24] Remove unnecessary device-copyable constraint There is no need to constrain T here because T must be device-copyable in order to construct the accessor passed as an argument. --- adoc/extensions/sycl_khr_free_function_commands.adoc | 2 -- 1 file changed, 2 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index fbc7ff97..5ac212f0 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -785,8 +785,6 @@ void update_host(sycl::queue q, accessor acc); } ---- -_Constraints_: [code]#T# is <>. - _Effects (1)_: Equivalent to [code]#h.update_host(acc)#. _Effects (2)_: Equivalent to [code]#q.submit([&](handler& h) { update_host(h, From 269706cd5c2255b77204cabdb75d6595a068baa2 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 6 Dec 2024 17:02:12 +0000 Subject: [PATCH 21/24] Remove khr::invocation from free_function_commands Renaming sycl::nd_item is not a necessary part of the API redesign for submitting work, so it should be moved to its own extension. This will also give us more time to consider the design and naming of any proposed replacement(s), including how they should interact with new functionality proposed in other KHRs. --- .../sycl_khr_free_function_commands.adoc | 119 +----------------- 1 file changed, 3 insertions(+), 116 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 5ac212f0..2e71256a 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -76,9 +76,9 @@ int main() { // Use khr::launch_grouped to enqueue a kernel using groups without a handler. range<2> local = { 2, 2 }; range<2> global = { N, M }; - khr::launch_grouped(myQueue, global, local, [=](khr::invocation<2> ivc) { - size_t i = ivc.id(0); - size_t j = ivc.id(1); + khr::launch_grouped(myQueue, global, local, [=](sycl::nd_item<2> it) { + size_t i = it.get_global_id(0); + size_t j = it.get_global_id(1); size_t index = i * M + j; c[index] = a[index] + b[index]; }); @@ -874,119 +874,6 @@ operation is a no-op.{endnote} ''' -== [code]#invocation# class template - -The [code]#invocation# class template identifies an invocation of a kernel -function. - -Instances of the [code]#invocation# class template are not user-constructible -and are passed as an argument to each invocation of a kernel function. - -[source,role=synopsis] ----- -namespace sycl::khr { - -template -class invocation -{ - public: - static constexpr int dimensions = Dimensions; - - id id() const noexcept; - size_t linear_id() const noexcept; - - range range() const noexcept; - - group get_work_group() const noexcept; - - sub_group get_sub_group() const noexcept; - - // Available for backwards compatibility only. - invocation(nd_item it) noexcept; - operator nd_item() const noexcept; -}; - -} ----- - -.[apidef]#id# -[source,role=synopsis,id=api:khr-free-function-commands-invocation-id] ----- -id id() const noexcept; ----- -_Returns_: The index of this invocation within the kernel dispatch. - -''' - -.[apidef]#linear_id# -[source,role=synopsis,id=api:khr-free-function-commands-invocation-linear_id] ----- -size_t linear_id() const noexcept; ----- -_Returns_: The linearized index (see <>) of this -invocation within the kernel dispatch. - -''' - -.[apidef]#range# -[source,role=synopsis,id=api:khr-free-function-commands-invocation-range] ----- -range range() const noexcept; ----- - -_Returns_: An index space representing all invocations of this kernel. - -''' - -.[apidef]#get_work_group# -[source,role=synopsis,id=api:khr-free-function-commands-invocation-get_work_group] ----- -group get_work_group() const noexcept; ----- - -_Returns_: A [code]#group# representing the <> to which this -invocation belongs. - -''' - -.[apidef]#get_sub_group# -[source,role=synopsis,id=api:khr-free-function-commands-invocation-get_sub_group] ----- -sub_group get_sub_group() const noexcept; ----- - -_Returns_: A [code]#sub_group# representing the sub-group to which this -invocation belongs. - -''' - -.[apidef]#invocation constructor# -[source,role=synopsis,id=api:khr-free-function-commands-invocation-invocation-constructor] ----- -invocation(nd_item() it) noexcept; ----- - -_Effects_: Constructs a [code]#invocation# representing the same work-item as -[code]#it#. - -{note}This function exists only to provide backwards compatibility with SYCL -2020 code in order to facilitate experimentation with the new interface proposed -by this extension.{endnote} - -''' - -.[apidef]#nd_item conversion operator# -[source,role=synopsis,id=api:khr-free-function-commands-invocation-nd_item-conversion-operator] ----- -operator nd_item() const noexcept; ----- - -_Returns_: An [code]#nd_item# representing this invocation. - -{note}This function exists only to provide backwards compatibility with SYCL -2020 code in order to facilitate experimentation with the new interface proposed -by this extension.{endnote} - == Issues None. From fa8a8f6d60e5d3df80c29974b4083a4ea4aa9445 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Mon, 9 Dec 2024 16:46:30 +0000 Subject: [PATCH 22/24] Remove empty issues section --- adoc/extensions/sycl_khr_free_function_commands.adoc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index 2e71256a..ecf2b1a6 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -873,7 +873,3 @@ other dependencies (e.g., specified by [code]#handler::depends_on#), then this operation is a no-op.{endnote} ''' - -== Issues - -None. From db380b4e051510952e31cd6a4bf7231d3954ec76 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Mon, 9 Dec 2024 16:53:39 +0000 Subject: [PATCH 23/24] Add missing constraints to fill overloads --- adoc/extensions/sycl_khr_free_function_commands.adoc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index ecf2b1a6..b410df5f 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -760,6 +760,9 @@ void fill(sycl::queue q, } ---- + +_Constraints (1, 3)_: [code]#T# is <>. + _Effects (1)_: Equivalent to [code]#h.fill(ptr, pattern, count)#. _Effects (2)_: Equivalent to [code]#h.fill(dest, src)#. From d26831a0415669dc985a815e3fc89b6f1d34cabb Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Thu, 12 Dec 2024 15:26:40 +0000 Subject: [PATCH 24/24] Remove *_reduce functions for kernel objects There are currently no backends that define interop for reductions, so we can remove these functions for now. If we decide later that these functions are necessary, we can release a revision of the KHR. --- .../sycl_khr_free_function_commands.adoc | 92 ------------------- 1 file changed, 92 deletions(-) diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc index b410df5f..a373e565 100644 --- a/adoc/extensions/sycl_khr_free_function_commands.adoc +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -257,49 +257,6 @@ _Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { launch_reduce(h, r, k, reductions...); })+#. -''' - -.[apititle]#launch_reduce# (kernel object) -[source,role=synopsis,id=api:launch_reduce-kernel] ----- -namespace sycl::khr { - -template -void launch_reduce(sycl::handler& h, sycl::range<1> r, - const sycl::kernel& k, Args&&... args); (1) - -template -void launch_reduce(sycl::handler& h, sycl::range<2> r, - const sycl::kernel& k, Args&&... args); (2) - -template -void launch_reduce(sycl::handler& h, sycl::range<3> r, - const sycl::kernel& k, Args&&... args); (3) - -template -void launch_reduce(sycl::queue q, sycl::range<1> r, - const sycl::kernel& k, Args&&... args); (4) - -template -void launch_reduce(sycl::queue q, sycl::range<2> r, - const sycl::kernel& k, Args&&... args); (5) - -template -void launch_reduce(sycl::queue q, sycl::range<3> r, - const sycl::kernel& k, Args&&... args); (6) - -} ----- -_Effects (1-3)_: Equivalent to: -[source,c++] ----- -h.set_args(args...); -h.parallel_for(r, k); ----- - -_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { -launch_reduce(h, r, k, args...); })+#. - ''' .[apititle]#launch_grouped# (kernel function) @@ -431,55 +388,6 @@ launch_grouped_reduce(h, r, size, k, reductions...); })+#. ''' -.[apititle]#launch_grouped_reduce# (kernel object) -[source,role=synopsis,id=api:launch_grouped_reduce-kernel] ----- -namespace sycl::khr { - -template -void launch_grouped_reduce(sycl::handler& h, sycl::range<1> r, - sycl::range<1> size, const sycl::kernel& k, - Args&&... args); (1) - -template -void launch_grouped_reduce(sycl::handler& h, sycl::range<2> r, - sycl::range<2> size, const sycl::kernel& k, - Args&&... args); (2) - -template -void launch_grouped_reduce(sycl::handler& h, sycl::range<3> r, - sycl::range<3> size, const sycl::kernel& k, - Args&&... args); (3) - -template -void launch_grouped_reduce(sycl::queue q, sycl::range<1> r, - sycl::range<1> size, const sycl::kernel& k, - Args&&... args); (4) - -template -void launch_grouped_reduce(sycl::queue q, sycl::range<2> r, - sycl::range<2> size, const sycl::kernel& k, - Args&&... args); (5) - -template -void launch_grouped_reduce(sycl::queue q, sycl::range<3> r, - sycl::range<3> size, const sycl::kernel& k, - Args&&... args); (6) - -} ----- -_Effects (1-3)_: Equivalent to: -[source,c++] ----- -h.set_args(args...); -h.parallel_for(nd_range(r, size), k); ----- - -_Effects (4-6)_: Equivalent to [code]#+q.submit([&](handler& h) { -launch_grouped_reduce(h, r, size, k, args...); })+#. - -''' - .[apititle]#launch_task# (kernel function) [source,role=synopsis,id=api:launch_task] ----