Skip to content

Commit 7bdb758

Browse files
authored
[SYCL] Implementation change to the khr_free_function_commands extension (#17222)
The purpose of this PR is to change `khr_free_function_commands` implementation to improve performance of the extension. Additionally, this PR includes unittests to verify that events are not created.
1 parent 7122e57 commit 7bdb758

File tree

5 files changed

+577
-94
lines changed

5 files changed

+577
-94
lines changed

sycl/include/sycl/ext/khr/free_function_commands.hpp

+24-21
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
namespace sycl {
66
inline namespace _V1 {
7+
78
#ifdef __DPCPP_ENABLE_UNFINISHED_KHR_EXTENSIONS
89
namespace khr {
910

@@ -75,23 +76,17 @@ void launch(handler &h, range<3> r, const kernel &k, ArgsT &&...args) {
7576

7677
template <typename... ArgsT>
7778
void launch(queue q, range<1> r, const kernel &k, ArgsT &&...args) {
78-
submit(q, [&](handler &h) {
79-
parallel_for(h, r, k, std::forward<ArgsT>(args)...);
80-
});
79+
submit(q, [&](handler &h) { launch(h, r, k, std::forward<ArgsT>(args)...); });
8180
}
8281

8382
template <typename... ArgsT>
8483
void launch(queue q, range<2> r, const kernel &k, ArgsT &&...args) {
85-
submit(q, [&](handler &h) {
86-
parallel_for(h, r, k, std::forward<ArgsT>(args)...);
87-
});
84+
submit(q, [&](handler &h) { launch(h, r, k, std::forward<ArgsT>(args)...); });
8885
}
8986

9087
template <typename... ArgsT>
9188
void launch(queue q, range<3> r, const kernel &k, ArgsT &&...args) {
92-
submit(q, [&](handler &h) {
93-
parallel_for(h, r, k, std::forward<ArgsT>(args)...);
94-
});
89+
submit(q, [&](handler &h) { launch(h, r, k, std::forward<ArgsT>(args)...); });
9590
}
9691

9792
template <typename KernelType, typename... Reductions>
@@ -303,7 +298,7 @@ inline void memcpy(handler &h, void *dest, const void *src, size_t numBytes) {
303298
inline void memcpy(queue q, void *dest, const void *src, size_t numBytes,
304299
const sycl::detail::code_location &codeLoc =
305300
sycl::detail::code_location::current()) {
306-
q.submit([&](handler &h) { memcpy(h, dest, src, numBytes); }, codeLoc);
301+
sycl::ext::oneapi::experimental::memcpy(q, dest, src, numBytes, codeLoc);
307302
}
308303

309304
template <typename T>
@@ -335,7 +330,8 @@ void copy(queue q, const SrcT *src,
335330
accessor<DestT, DestDims, DestMode, target::device> dest,
336331
const sycl::detail::code_location &codeLoc =
337332
sycl::detail::code_location::current()) {
338-
q.submit(
333+
submit(
334+
q,
339335
[&](handler &h) {
340336
h.require(dest);
341337
copy(h, src, dest);
@@ -348,7 +344,8 @@ void copy(queue q, std::shared_ptr<SrcT> src,
348344
accessor<DestT, DestDims, DestMode, target::device> dest,
349345
const sycl::detail::code_location &codeLoc =
350346
sycl::detail::code_location::current()) {
351-
q.submit(
347+
submit(
348+
q,
352349
[&](handler &h) {
353350
h.require(dest);
354351
copy(h, src, dest);
@@ -373,7 +370,8 @@ void copy(queue q, accessor<SrcT, SrcDims, SrcMode, target::device> src,
373370
DestT *dest,
374371
const sycl::detail::code_location &codeLoc =
375372
sycl::detail::code_location::current()) {
376-
q.submit(
373+
submit(
374+
q,
377375
[&](handler &h) {
378376
h.require(src);
379377
copy(h, src, dest);
@@ -386,7 +384,8 @@ void copy(queue q, accessor<SrcT, SrcDims, SrcMode, target::device> src,
386384
std::shared_ptr<DestT> dest,
387385
const sycl::detail::code_location &codeLoc =
388386
sycl::detail::code_location::current()) {
389-
q.submit(
387+
submit(
388+
q,
390389
[&](handler &h) {
391390
h.require(src);
392391
copy(h, src, dest);
@@ -407,7 +406,8 @@ void copy(queue q, accessor<SrcT, SrcDims, SrcMode, target::device> src,
407406
accessor<DestT, DestDims, DestMode, target::device> dest,
408407
const sycl::detail::code_location &codeLoc =
409408
sycl::detail::code_location::current()) {
410-
q.submit(
409+
submit(
410+
q,
411411
[&](handler &h) {
412412
h.require(src);
413413
h.require(dest);
@@ -422,7 +422,7 @@ inline void memset(handler &h, void *ptr, int value, size_t numBytes) {
422422
inline void memset(queue q, void *ptr, int value, size_t numBytes,
423423
const sycl::detail::code_location &codeLoc =
424424
sycl::detail::code_location::current()) {
425-
q.submit([&](handler &h) { memset(h, ptr, value, numBytes); }, codeLoc);
425+
sycl::ext::oneapi::experimental::memset(q, ptr, value, numBytes, codeLoc);
426426
}
427427

428428
template <typename T>
@@ -440,14 +440,15 @@ template <typename T>
440440
void fill(queue q, T *ptr, const T &pattern, size_t count,
441441
const sycl::detail::code_location &codeLoc =
442442
sycl::detail::code_location::current()) {
443-
q.submit([&](handler &h) { fill(h, ptr, pattern, count); }, codeLoc);
443+
submit(q, [&](handler &h) { fill(h, ptr, pattern, count); }, codeLoc);
444444
}
445445

446446
template <typename T, int Dims, access_mode Mode>
447447
void fill(queue q, accessor<T, Dims, Mode, target::device> dest, const T &src,
448448
const sycl::detail::code_location &codeLoc =
449449
sycl::detail::code_location::current()) {
450-
q.submit(
450+
submit(
451+
q,
451452
[&](handler &h) {
452453
h.require(dest);
453454
fill(h, dest, src);
@@ -464,7 +465,8 @@ template <typename T, int Dims, access_mode Mode>
464465
void update_host(queue q, accessor<T, Dims, Mode, target::device> acc,
465466
const sycl::detail::code_location &codeLoc =
466467
sycl::detail::code_location::current()) {
467-
q.submit(
468+
submit(
469+
q,
468470
[&](handler &h) {
469471
h.require(acc);
470472
update_host(h, acc);
@@ -478,7 +480,7 @@ inline void prefetch(handler &h, void *ptr, size_t numBytes) {
478480
inline void prefetch(queue q, void *ptr, size_t numBytes,
479481
const sycl::detail::code_location &codeLoc =
480482
sycl::detail::code_location::current()) {
481-
q.submit([&](handler &h) { prefetch(h, ptr, numBytes); }, codeLoc);
483+
submit(q, [&](handler &h) { prefetch(h, ptr, numBytes); }, codeLoc);
482484
}
483485

484486
inline void mem_advise(handler &h, void *ptr, size_t numBytes, int advice) {
@@ -488,7 +490,8 @@ inline void mem_advise(handler &h, void *ptr, size_t numBytes, int advice) {
488490
inline void mem_advise(queue q, void *ptr, size_t numBytes, int advice,
489491
const sycl::detail::code_location &codeLoc =
490492
sycl::detail::code_location::current()) {
491-
q.submit([&](handler &h) { mem_advise(h, ptr, numBytes, advice); }, codeLoc);
493+
sycl::ext::oneapi::experimental::mem_advise(q, ptr, numBytes, advice,
494+
codeLoc);
492495
}
493496

494497
inline void command_barrier(handler &h) { h.ext_oneapi_barrier(); }

sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp

+2-73
Original file line numberDiff line numberDiff line change
@@ -7,88 +7,17 @@
77
//===----------------------------------------------------------------------===//
88
// Tests the behavior of enqueue free functions when events can be discarded.
99

10-
#include "detail/event_impl.hpp"
11-
#include "detail/queue_impl.hpp"
12-
#include "sycl/platform.hpp"
13-
#include <helpers/TestKernel.hpp>
14-
#include <helpers/UrMock.hpp>
15-
16-
#include <gtest/gtest.h>
10+
#include "FreeFunctionCommands/FreeFunctionEventsHelpers.hpp"
1711

18-
#include <sycl/detail/core.hpp>
12+
#include <helpers/TestKernel.hpp>
1913
#include <sycl/ext/oneapi/experimental/enqueue_functions.hpp>
20-
#include <sycl/properties/all_properties.hpp>
21-
#include <sycl/usm.hpp>
2214

2315
using namespace sycl;
2416

2517
namespace oneapiext = ext::oneapi::experimental;
2618

2719
namespace {
2820

29-
inline ur_result_t after_urKernelGetInfo(void *pParams) {
30-
auto params = *static_cast<ur_kernel_get_info_params_t *>(pParams);
31-
constexpr char MockKernel[] = "TestKernel";
32-
if (*params.ppropName == UR_KERNEL_INFO_FUNCTION_NAME) {
33-
if (*params.ppPropValue) {
34-
assert(*params.ppropSize == sizeof(MockKernel));
35-
std::memcpy(*params.ppPropValue, MockKernel, sizeof(MockKernel));
36-
}
37-
if (*params.ppPropSizeRet)
38-
**params.ppPropSizeRet = sizeof(MockKernel);
39-
}
40-
return UR_RESULT_SUCCESS;
41-
}
42-
43-
thread_local size_t counter_urEnqueueKernelLaunch = 0;
44-
inline ur_result_t redefined_urEnqueueKernelLaunch(void *pParams) {
45-
++counter_urEnqueueKernelLaunch;
46-
auto params = *static_cast<ur_enqueue_kernel_launch_params_t *>(pParams);
47-
EXPECT_EQ(*params.pphEvent, nullptr);
48-
return UR_RESULT_SUCCESS;
49-
}
50-
51-
thread_local size_t counter_urUSMEnqueueMemcpy = 0;
52-
inline ur_result_t redefined_urUSMEnqueueMemcpy(void *pParams) {
53-
++counter_urUSMEnqueueMemcpy;
54-
auto params = *static_cast<ur_enqueue_usm_memcpy_params_t *>(pParams);
55-
EXPECT_EQ(*params.pphEvent, nullptr);
56-
return UR_RESULT_SUCCESS;
57-
}
58-
59-
thread_local size_t counter_urUSMEnqueueFill = 0;
60-
inline ur_result_t redefined_urUSMEnqueueFill(void *pParams) {
61-
++counter_urUSMEnqueueFill;
62-
auto params = *static_cast<ur_enqueue_usm_fill_params_t *>(pParams);
63-
EXPECT_EQ(*params.pphEvent, nullptr);
64-
return UR_RESULT_SUCCESS;
65-
}
66-
67-
thread_local size_t counter_urUSMEnqueuePrefetch = 0;
68-
inline ur_result_t redefined_urUSMEnqueuePrefetch(void *pParams) {
69-
++counter_urUSMEnqueuePrefetch;
70-
auto params = *static_cast<ur_enqueue_usm_prefetch_params_t *>(pParams);
71-
EXPECT_EQ(*params.pphEvent, nullptr);
72-
return UR_RESULT_SUCCESS;
73-
}
74-
75-
thread_local size_t counter_urUSMEnqueueMemAdvise = 0;
76-
inline ur_result_t redefined_urUSMEnqueueMemAdvise(void *pParams) {
77-
++counter_urUSMEnqueueMemAdvise;
78-
auto params = *static_cast<ur_enqueue_usm_advise_params_t *>(pParams);
79-
EXPECT_EQ(*params.pphEvent, nullptr);
80-
return UR_RESULT_SUCCESS;
81-
}
82-
83-
thread_local size_t counter_urEnqueueEventsWaitWithBarrier = 0;
84-
thread_local std::chrono::time_point<std::chrono::steady_clock>
85-
timestamp_urEnqueueEventsWaitWithBarrier;
86-
inline ur_result_t after_urEnqueueEventsWaitWithBarrier(void *pParams) {
87-
++counter_urEnqueueEventsWaitWithBarrier;
88-
timestamp_urEnqueueEventsWaitWithBarrier = std::chrono::steady_clock::now();
89-
return UR_RESULT_SUCCESS;
90-
}
91-
9221
class EnqueueFunctionsEventsTests : public ::testing::Test {
9322
public:
9423
EnqueueFunctionsEventsTests()
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
add_sycl_unittest(FreeFunctionCommandsTests OBJECT
22
Barrier.cpp
3+
FreeFunctionCommandsEvents.cpp
34
)

0 commit comments

Comments
 (0)