Skip to content

Commit

Permalink
[SYCL][Bindless][E2E] Test for host USM backed images
Browse files Browse the repository at this point in the history
This patch adds a test for an image backed by a host USM allocation.
  • Loading branch information
przemektmalon committed Jan 13, 2025
1 parent e0089dc commit bfd4ad6
Show file tree
Hide file tree
Showing 3 changed files with 165 additions and 8 deletions.
3 changes: 2 additions & 1 deletion sycl/cmake/modules/FetchUnifiedRuntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ if(SYCL_UR_USE_FETCH_CONTENT)
CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
endfunction()

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
# set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
set(UNIFIED_RUNTIME_REPO "https://github.com/przemektmalon/unified-runtime.git")
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake)

set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
Expand Down
23 changes: 16 additions & 7 deletions sycl/cmake/modules/UnifiedRuntimeTag.cmake
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
# commit 7eae5c80a9e969bc12fda57c9cc0a0970f0cd17f
# Merge: 9c652ffb b78cfa71
# Author: Ross Brunton <[email protected]>
# Date: Thu Jan 9 17:28:00 2025 +0000
# Merge pull request #2048 from RossBrunton/ross/refc
# commit 70053658b3f741f41cf9b62e063e8c4fe51957e0
# Author: Przemek Malon <[email protected]>
# Date: Wed Jan 8 19:53:17 2025 +0000
# Enable creation of images backed by host USM
#
# Use reference counting on factories
set(UNIFIED_RUNTIME_TAG 7eae5c80a9e969bc12fda57c9cc0a0970f0cd17f)
# Small patch to enable bindless images backed by host USM in the CUDA
# adapter.
#
# Host and Device USM pointers are usable across the host and device
# for all versions of CUDA that we support. There is no need to provide
# the `CU_MEMHOSTALLOC_DEVICEMAP` flag during allocation, or calling
# `cuMemHostGetDevicePointer` to retrieve a device usable address.
#
# Passing a `CU_MEMHOSTALLOC_WRITECOMBINED` flag to the host USM
# allocation will enhance performance in certain scenarios, however, an
# extension allowing this is not yet available.
set(UNIFIED_RUNTIME_TAG 70053658b3f741f41cf9b62e063e8c4fe51957e0)
147 changes: 147 additions & 0 deletions sycl/test-e2e/bindless_images/sampling_2D_USM_host.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
// REQUIRES: cuda
// REQUIRES: aspect-ext_oneapi_bindless_images_2d_usm

// RUN: %{build} -o %t.out
// RUN: %{run-unfiltered-devices} %t.out

#include <cmath>
#include <iostream>
#include <sycl/detail/core.hpp>

#include <sycl/ext/oneapi/bindless_images.hpp>
#include <sycl/usm.hpp>

// Uncomment to print additional test information
// #define VERBOSE_PRINT

class image_addition;

int main() {

sycl::device dev;
sycl::queue q(dev);
auto ctxt = q.get_context();

// declare image data
size_t width = 5;
size_t height = 6;
size_t N = width * height;
size_t widthInBytes = width * sizeof(float);
std::vector<float> out(N);
std::vector<float> expected(N);
std::vector<float> dataIn(N);

for (int i = 0; i < width; i++) {
for (int j = 0; j < height; j++) {
expected[i + (width * j)] = i + (width * j);
dataIn[i + (width * j)] = i + (width * j);
}
}

try {
sycl::ext::oneapi::experimental::bindless_image_sampler samp(
sycl::addressing_mode::clamp,
sycl::coordinate_normalization_mode::normalized,
sycl::filtering_mode::linear);

// Extension: image descriptor
sycl::ext::oneapi::experimental::image_descriptor desc(
{width, height}, 1, sycl::image_channel_type::fp32);

auto devicePitchAlign = dev.get_info<
sycl::ext::oneapi::experimental::info::device::image_row_pitch_align>();
auto deviceMaxPitch =
dev.get_info<sycl::ext::oneapi::experimental::info::device::
max_image_linear_row_pitch>();

// Pitch requirements:
// - pitch % devicePitchAlign == 0
// - pitch >= widthInBytes
// - pitch <= deviceMaxPitch
size_t pitch = devicePitchAlign *
std::ceil(float(widthInBytes) / float(devicePitchAlign));
assert(pitch <= deviceMaxPitch);

// Host USM allocation
auto imgMem =
sycl::aligned_alloc_host(devicePitchAlign, (pitch * height), ctxt);

if (imgMem == nullptr) {
std::cerr << "Error allocating images!" << std::endl;
return 1;
}

// Copy to host USM and incorporate pitch
for (size_t i = 0; i < height; i++) {
memcpy(static_cast<float *>(imgMem) + (i * pitch / sizeof(float)),
dataIn.data() + (i * width), widthInBytes);
}

// Extension: create the image and return the handle
sycl::ext::oneapi::experimental::sampled_image_handle imgHandle =
sycl::ext::oneapi::experimental::create_image(imgMem, pitch, samp, desc,
dev, ctxt);

sycl::buffer<float, 2> buf((float *)out.data(),
sycl::range<2>{height, width});
q.submit([&](sycl::handler &cgh) {
auto outAcc = buf.get_access<sycl::access_mode::write>(
cgh, sycl::range<2>{height, width});

cgh.parallel_for<image_addition>(
sycl::nd_range<2>{{width, height}, {width, height}},
[=](sycl::nd_item<2> it) {
size_t dim0 = it.get_local_id(0);
size_t dim1 = it.get_local_id(1);

// Normalize coordinates -- +0.5 to look towards centre of pixel
float fdim0 = float(dim0 + 0.5f) / (float)width;
float fdim1 = float(dim1 + 0.5f) / (float)height;

// Extension: sample image data from handle
float px = sycl::ext::oneapi::experimental::sample_image<float>(
imgHandle, sycl::float2(fdim0, fdim1));

outAcc[sycl::id<2>{dim1, dim0}] = px;
});
});

q.wait_and_throw();

// Extension: cleanup
sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle, dev, ctxt);
sycl::free(imgMem, ctxt);
} catch (sycl::exception e) {
std::cerr << "SYCL exception caught! : " << e.what() << "\n";
return 1;
} catch (...) {
std::cerr << "Unknown exception caught!\n";
return 2;
}

// collect and validate output
bool validated = true;
for (int i = 0; i < N; i++) {
bool mismatch = false;
if (out[i] != expected[i]) {
mismatch = true;
validated = false;
}

if (mismatch) {
#ifdef VERBOSE_PRINT
std::cout << "Result mismatch! Expected: " << expected[i]
<< ", Actual: " << out[i] << std::endl;
#else
break;
#endif
}
}
if (validated) {
std::cout << "Test passed!" << std::endl;
return 0;
}

std::cout << "Test failed!" << std::endl;
return 3;
}

0 comments on commit bfd4ad6

Please sign in to comment.