From bfd4ad6adaa9ad2259ced629ace98d8986ff47a1 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Thu, 9 Jan 2025 05:59:29 +0000 Subject: [PATCH 1/8] [SYCL][Bindless][E2E] Test for host USM backed images This patch adds a test for an image backed by a host USM allocation. --- sycl/cmake/modules/FetchUnifiedRuntime.cmake | 3 +- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 23 ++- .../bindless_images/sampling_2D_USM_host.cpp | 147 ++++++++++++++++++ 3 files changed, 165 insertions(+), 8 deletions(-) create mode 100644 sycl/test-e2e/bindless_images/sampling_2D_USM_host.cpp diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index 72841724fa01d..92e9287d47bb5 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -116,7 +116,8 @@ if(SYCL_UR_USE_FETCH_CONTENT) CACHE PATH "Path to external '${name}' adapter source dir" FORCE) endfunction() - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") + # set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") + set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git") include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index de4a5aa14251b..e566012fa5693 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,8 +1,17 @@ -# commit 7eae5c80a9e969bc12fda57c9cc0a0970f0cd17f -# Merge: 9c652ffb b78cfa71 -# Author: Ross Brunton -# Date: Thu Jan 9 17:28:00 2025 +0000 -# Merge pull request #2048 from RossBrunton/ross/refc +# commit 70053658b3f741f41cf9b62e063e8c4fe51957e0 +# Author: Przemek Malon +# Date: Wed Jan 8 19:53:17 2025 +0000 +# Enable creation of images backed by host USM # -# Use reference counting on factories -set(UNIFIED_RUNTIME_TAG 7eae5c80a9e969bc12fda57c9cc0a0970f0cd17f) +# Small patch to enable bindless images backed by host USM in the CUDA +# adapter. +# +# Host and Device USM pointers are usable across the host and device +# for all versions of CUDA that we support. There is no need to provide +# the `CU_MEMHOSTALLOC_DEVICEMAP` flag during allocation, or calling +# `cuMemHostGetDevicePointer` to retrieve a device usable address. +# +# Passing a `CU_MEMHOSTALLOC_WRITECOMBINED` flag to the host USM +# allocation will enhance performance in certain scenarios, however, an +# extension allowing this is not yet available. +set(UNIFIED_RUNTIME_TAG 70053658b3f741f41cf9b62e063e8c4fe51957e0) diff --git a/sycl/test-e2e/bindless_images/sampling_2D_USM_host.cpp b/sycl/test-e2e/bindless_images/sampling_2D_USM_host.cpp new file mode 100644 index 0000000000000..bca3d2c1c0ddd --- /dev/null +++ b/sycl/test-e2e/bindless_images/sampling_2D_USM_host.cpp @@ -0,0 +1,147 @@ +// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images_2d_usm + +// RUN: %{build} -o %t.out +// RUN: %{run-unfiltered-devices} %t.out + +#include +#include +#include + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +class image_addition; + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // declare image data + size_t width = 5; + size_t height = 6; + size_t N = width * height; + size_t widthInBytes = width * sizeof(float); + std::vector out(N); + std::vector expected(N); + std::vector dataIn(N); + + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + expected[i + (width * j)] = i + (width * j); + dataIn[i + (width * j)] = i + (width * j); + } + } + + try { + sycl::ext::oneapi::experimental::bindless_image_sampler samp( + sycl::addressing_mode::clamp, + sycl::coordinate_normalization_mode::normalized, + sycl::filtering_mode::linear); + + // Extension: image descriptor + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, 1, sycl::image_channel_type::fp32); + + auto devicePitchAlign = dev.get_info< + sycl::ext::oneapi::experimental::info::device::image_row_pitch_align>(); + auto deviceMaxPitch = + dev.get_info(); + + // Pitch requirements: + // - pitch % devicePitchAlign == 0 + // - pitch >= widthInBytes + // - pitch <= deviceMaxPitch + size_t pitch = devicePitchAlign * + std::ceil(float(widthInBytes) / float(devicePitchAlign)); + assert(pitch <= deviceMaxPitch); + + // Host USM allocation + auto imgMem = + sycl::aligned_alloc_host(devicePitchAlign, (pitch * height), ctxt); + + if (imgMem == nullptr) { + std::cerr << "Error allocating images!" << std::endl; + return 1; + } + + // Copy to host USM and incorporate pitch + for (size_t i = 0; i < height; i++) { + memcpy(static_cast(imgMem) + (i * pitch / sizeof(float)), + dataIn.data() + (i * width), widthInBytes); + } + + // Extension: create the image and return the handle + sycl::ext::oneapi::experimental::sampled_image_handle imgHandle = + sycl::ext::oneapi::experimental::create_image(imgMem, pitch, samp, desc, + dev, ctxt); + + sycl::buffer buf((float *)out.data(), + sycl::range<2>{height, width}); + q.submit([&](sycl::handler &cgh) { + auto outAcc = buf.get_access( + cgh, sycl::range<2>{height, width}); + + cgh.parallel_for( + sycl::nd_range<2>{{width, height}, {width, height}}, + [=](sycl::nd_item<2> it) { + size_t dim0 = it.get_local_id(0); + size_t dim1 = it.get_local_id(1); + + // Normalize coordinates -- +0.5 to look towards centre of pixel + float fdim0 = float(dim0 + 0.5f) / (float)width; + float fdim1 = float(dim1 + 0.5f) / (float)height; + + // Extension: sample image data from handle + float px = sycl::ext::oneapi::experimental::sample_image( + imgHandle, sycl::float2(fdim0, fdim1)); + + outAcc[sycl::id<2>{dim1, dim0}] = px; + }); + }); + + q.wait_and_throw(); + + // Extension: cleanup + sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle, dev, ctxt); + sycl::free(imgMem, ctxt); + } catch (sycl::exception e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (int i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} From af2bab84891666fb0b9a9da3392478bab543aa5c Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Mon, 20 Jan 2025 10:38:39 +0000 Subject: [PATCH 2/8] Update UR Tag --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 24 +++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 9c18fb006a799..4721df6456d76 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,17 @@ -# commit 222e4b1d51536bb38e03e2000a79679af0a44a6d -# Merge: 30d183a0 28108a7e -# Author: Kenneth Benzie (Benie) -# Date: Fri Jan 17 10:28:34 2025 +0000 -# Merge pull request #2561 from Bensuo/ben/cmd-buffer-l0-fence -# [L0][CMDBUF] Optimize fence/event waits during update -set(UNIFIED_RUNTIME_TAG 222e4b1d51536bb38e03e2000a79679af0a44a6d) +# commit d49c8cd9e4b911444c9abc124a9e6fb0b097c074 +# Author: Przemek Malon +# Date: Wed Jan 8 19:53:17 2025 +0000 +# Enable creation of images backed by host USM +# +# Small patch to enable bindless images backed by host USM in the CUDA +# adapter. +# +# Host and Device USM pointers are usable across the host and device +# for all versions of CUDA that we support. There is no need to provide +# the `CU_MEMHOSTALLOC_DEVICEMAP` flag during allocation, or calling +# `cuMemHostGetDevicePointer` to retrieve a device usable address. +# +# Passing a `CU_MEMHOSTALLOC_WRITECOMBINED` flag to the host USM +# allocation will enhance performance in certain scenarios, however, an +# extension allowing this is not yet available. +set(UNIFIED_RUNTIME_TAG d49c8cd9e4b911444c9abc124a9e6fb0b097c074) \ No newline at end of file From 82664407573e2c332654799d885b031021e6d2ed Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Wed, 22 Jan 2025 10:28:06 +0000 Subject: [PATCH 3/8] Update UR Tag --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 24 +++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 17899bdefc3dc..876762ea0b69b 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,17 @@ -# commit 871061f1aa3b8ade57e0a2ed63d8000e257548cc -# Merge: 262ec93e 7cca93f9 -# Author: Kenneth Benzie (Benie) -# Date: Tue Jan 21 13:26:45 2025 +0000 -# Merge pull request #2588 from kbenzie/benie/ci-delete-prerelease -# Remove the prerelease.yml job -set(UNIFIED_RUNTIME_TAG 871061f1aa3b8ade57e0a2ed63d8000e257548cc) +# commit b8198b0e031fe80b29a6909b508a47e1f8bd1fcc +# Author: Przemek Malon +# Date: Wed Jan 8 19:53:17 2025 +0000 +# Enable creation of images backed by host USM +# +# Small patch to enable bindless images backed by host USM in the CUDA +# adapter. +# +# Host and Device USM pointers are usable across the host and device +# for all versions of CUDA that we support. There is no need to provide +# the `CU_MEMHOSTALLOC_DEVICEMAP` flag during allocation, or calling +# `cuMemHostGetDevicePointer` to retrieve a device usable address. +# +# Passing a `CU_MEMHOSTALLOC_WRITECOMBINED` flag to the host USM +# allocation will enhance performance in certain scenarios, however, an +# extension allowing this is not yet available. +set(UNIFIED_RUNTIME_TAG b8198b0e031fe80b29a6909b508a47e1f8bd1fcc) \ No newline at end of file From 96efdb8dacbd1f6fb8d91d6a314955e91ce0be5d Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Wed, 22 Jan 2025 15:18:18 +0000 Subject: [PATCH 4/8] Update UR Tag --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 24 +++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 839ff5564af90..3d0aa55776232 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,17 @@ -# commit f058cb230c65fe8094f74043d0c9afd5ba0e8325 -# Merge: 871061f1 3bf76246 -# Author: Ross Brunton -# Date: Tue Jan 21 15:49:41 2025 +0000 -# Merge pull request #2593 from RossBrunton/ross/formatall -# Ensure all files are clang formatted -set(UNIFIED_RUNTIME_TAG f058cb230c65fe8094f74043d0c9afd5ba0e8325) +# commit 01c0c0eb1a66681c216af7a7c6b0470c769fe11d +# Author: Przemek Malon +# Date: Wed Jan 8 19:53:17 2025 +0000 +# Enable creation of images backed by host USM +# +# Small patch to enable bindless images backed by host USM in the CUDA +# adapter. +# +# Host and Device USM pointers are usable across the host and device +# for all versions of CUDA that we support. There is no need to provide +# the `CU_MEMHOSTALLOC_DEVICEMAP` flag during allocation, or calling +# `cuMemHostGetDevicePointer` to retrieve a device usable address. +# +# Passing a `CU_MEMHOSTALLOC_WRITECOMBINED` flag to the host USM +# allocation will enhance performance in certain scenarios, however, an +# extension allowing this is not yet available. +set(UNIFIED_RUNTIME_TAG 01c0c0eb1a66681c216af7a7c6b0470c769fe11d) From 97724546c2a09275b4427e25d9438c498440e866 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Wed, 22 Jan 2025 15:24:50 +0000 Subject: [PATCH 5/8] Update UR Tag --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 3d0aa55776232..3d9c401b0dac0 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,4 +1,4 @@ -# commit 01c0c0eb1a66681c216af7a7c6b0470c769fe11d +# commit 38da1bd8c0262eeabbea24e74536af4ab13ec0e3 # Author: Przemek Malon # Date: Wed Jan 8 19:53:17 2025 +0000 # Enable creation of images backed by host USM @@ -14,4 +14,4 @@ # Passing a `CU_MEMHOSTALLOC_WRITECOMBINED` flag to the host USM # allocation will enhance performance in certain scenarios, however, an # extension allowing this is not yet available. -set(UNIFIED_RUNTIME_TAG 01c0c0eb1a66681c216af7a7c6b0470c769fe11d) +set(UNIFIED_RUNTIME_TAG 38da1bd8c0262eeabbea24e74536af4ab13ec0e3) From 6bd52c2170a9fecc10703977642a9e3987c36fa1 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Thu, 23 Jan 2025 11:02:26 +0000 Subject: [PATCH 6/8] Update UR Tag --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 24 +++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 82f7986f312b1..475a4698d574e 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,17 @@ -# commit 3f6dbf3e4bec2ba7bfe7e4cebe998b51852baadc -# Merge: bf7a6548 dc294e4a -# Author: Kenneth Benzie (Benie) -# Date: Wed Jan 22 15:54:22 2025 +0000 -# Merge pull request #2193 from aarongreig/aaron/clarifyIsNativeHandleOwned -# Clarify spec around isNativeHandleOwned. -set(UNIFIED_RUNTIME_TAG 3f6dbf3e4bec2ba7bfe7e4cebe998b51852baadc) +# commit eeff9f4a6e0ed51ba459fd923724fb4c3dd545d7 +# Author: Przemek Malon +# Date: Wed Jan 8 19:53:17 2025 +0000 +# Enable creation of images backed by host USM +# +# Small patch to enable bindless images backed by host USM in the CUDA +# adapter. +# +# Host and Device USM pointers are usable across the host and device +# for all versions of CUDA that we support. There is no need to provide +# the `CU_MEMHOSTALLOC_DEVICEMAP` flag during allocation, or calling +# `cuMemHostGetDevicePointer` to retrieve a device usable address. +# +# Passing a `CU_MEMHOSTALLOC_WRITECOMBINED` flag to the host USM +# allocation will enhance performance in certain scenarios, however, an +# extension allowing this is not yet available. +set(UNIFIED_RUNTIME_TAG eeff9f4a6e0ed51ba459fd923724fb4c3dd545d7) From cf79deac1eaaa2c8586d785cdc5304317dd51be6 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Mon, 27 Jan 2025 11:24:28 +0000 Subject: [PATCH 7/8] Update UR Tag --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 5ba8a6aa938f1..de93531c96fc5 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,7 @@ -# commit b841691699393dd2375e987c3d38d5f59c3e35cf -# Merge: c6859445 9de10cd9 +# commit 0bb6789f0113ea937d861fd67fd677b91ecdeb8b +# Merge: e370a2b9 eeff9f4a # Author: Kenneth Benzie (Benie) -# Date: Thu Jan 23 16:07:06 2025 +0000 -# Merge pull request #2559 from Bensuo/fix_kernel_arg_indices -# [CUDA][HIP] Fix kernel arguments being overwritten when added out of order -set(UNIFIED_RUNTIME_TAG b841691699393dd2375e987c3d38d5f59c3e35cf) +# Date: Mon Jan 27 10:40:02 2025 +0000 +# Merge pull request #2551 from przemektmalon/przemek/bindless-images-host-usm +# Enable creation of bindless images backed by host USM +set(UNIFIED_RUNTIME_TAG 0bb6789f0113ea937d861fd67fd677b91ecdeb8b) \ No newline at end of file From b4c917b7da26ed4b9d1056163db29a9bfe7b46b9 Mon Sep 17 00:00:00 2001 From: Przemek Malon Date: Mon, 27 Jan 2025 11:27:01 +0000 Subject: [PATCH 8/8] Add EOF Newline --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index de93531c96fc5..7b46bd5b034c9 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -4,4 +4,4 @@ # Date: Mon Jan 27 10:40:02 2025 +0000 # Merge pull request #2551 from przemektmalon/przemek/bindless-images-host-usm # Enable creation of bindless images backed by host USM -set(UNIFIED_RUNTIME_TAG 0bb6789f0113ea937d861fd67fd677b91ecdeb8b) \ No newline at end of file +set(UNIFIED_RUNTIME_TAG 0bb6789f0113ea937d861fd67fd677b91ecdeb8b)