Skip to content

Commit 874ee86

Browse files
[SYCL][Bindless][E2E] Vulkan depth format interop test (#18281)
This patch introduces an E2E test which verifies that a Vulkan depth texture can be correctly imported into SYCL and its data retrieved from and written to within a SYCL kernel.
1 parent ed094f2 commit 874ee86

File tree

2 files changed

+403
-2
lines changed

2 files changed

+403
-2
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,390 @@
1+
// REQUIRES: aspect-ext_oneapi_external_memory_import || (windows && level_zero && aspect-ext_oneapi_bindless_images)
2+
// REQUIRES: vulkan
3+
4+
// RUN: %{build} %link-vulkan -o %t.out %if target-spir %{ -Wno-ignored-attributes %}
5+
// RUN: %{run} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out
6+
7+
// Uncomment to print additional test information
8+
// #define VERBOSE_PRINT
9+
10+
#include "../helpers/common.hpp"
11+
#include "vulkan_common.hpp"
12+
13+
#include <sycl/ext/oneapi/bindless_images.hpp>
14+
15+
namespace syclexp = sycl::ext::oneapi::experimental;
16+
17+
template <typename InteropMemHandleT>
18+
void runSycl(const sycl::device &syclDevice, sycl::range<2> globalSize,
19+
sycl::range<2> localSize, InteropMemHandleT extMemInHandle,
20+
InteropMemHandleT extMemOutHandle) {
21+
22+
sycl::queue syclQueue{syclDevice};
23+
24+
const size_t imgSizeBytes = globalSize.size() * sizeof(float);
25+
26+
#ifdef _WIN32
27+
syclexp::external_mem_descriptor<syclexp::resource_win32_handle> extMemInDesc{
28+
extMemInHandle, syclexp::external_mem_handle_type::win32_nt_handle,
29+
imgSizeBytes};
30+
syclexp::external_mem_descriptor<syclexp::resource_win32_handle>
31+
extMemOutDesc{extMemOutHandle,
32+
syclexp::external_mem_handle_type::win32_nt_handle,
33+
imgSizeBytes};
34+
#else
35+
syclexp::external_mem_descriptor<syclexp::resource_fd> extMemInDesc{
36+
extMemInHandle, syclexp::external_mem_handle_type::opaque_fd,
37+
imgSizeBytes};
38+
syclexp::external_mem_descriptor<syclexp::resource_fd> extMemOutDesc{
39+
extMemOutHandle, syclexp::external_mem_handle_type::opaque_fd,
40+
imgSizeBytes};
41+
#endif
42+
43+
// Extension: create interop memory handles.
44+
syclexp::external_mem externalMemIn =
45+
syclexp::import_external_memory(extMemInDesc, syclQueue);
46+
syclexp::external_mem externalMemOut =
47+
syclexp::import_external_memory(extMemOutDesc, syclQueue);
48+
49+
// Image descriptor - Vulkan depth texture mapped to single channel fp32
50+
// image.
51+
syclexp::image_descriptor imgDesc(globalSize, 1,
52+
sycl::image_channel_type::fp32);
53+
54+
// Extension: map image memory handles.
55+
syclexp::image_mem_handle imgMemIn =
56+
syclexp::map_external_image_memory(externalMemIn, imgDesc, syclQueue);
57+
syclexp::image_mem_handle imgMemOut =
58+
syclexp::map_external_image_memory(externalMemOut, imgDesc, syclQueue);
59+
60+
// Extension: create the image and return the handle.
61+
syclexp::unsampled_image_handle imgIn =
62+
syclexp::create_image(imgMemIn, imgDesc, syclQueue);
63+
syclexp::unsampled_image_handle imgOut =
64+
syclexp::create_image(imgMemOut, imgDesc, syclQueue);
65+
66+
try {
67+
syclQueue.submit([&](sycl::handler &cgh) {
68+
cgh.parallel_for<class TestDepthTextureFetch>(
69+
sycl::nd_range<2>{globalSize, localSize}, [=](sycl::nd_item<2> it) {
70+
size_t dim0 = it.get_global_id(0);
71+
size_t dim1 = it.get_global_id(1);
72+
73+
float depth =
74+
syclexp::fetch_image<float>(imgIn, sycl::int2(dim0, dim1));
75+
76+
syclexp::write_image<float>(imgOut, sycl::int2(dim0, dim1), depth);
77+
});
78+
});
79+
80+
// Wait for kernel completion before destroying external objects.
81+
syclQueue.wait_and_throw();
82+
83+
// Cleanup.
84+
syclexp::destroy_image_handle(imgIn, syclQueue);
85+
syclexp::destroy_image_handle(imgOut, syclQueue);
86+
syclexp::free_image_mem(imgMemIn, syclexp::image_type::standard, syclQueue);
87+
syclexp::free_image_mem(imgMemOut, syclexp::image_type::standard,
88+
syclQueue);
89+
syclexp::release_external_memory(externalMemIn, syclQueue);
90+
syclexp::release_external_memory(externalMemOut, syclQueue);
91+
} catch (sycl::exception e) {
92+
std::cerr << "\tKernel submission failed! " << e.what() << std::endl;
93+
exit(-1);
94+
} catch (...) {
95+
std::cerr << "\tKernel submission failed!" << std::endl;
96+
exit(-1);
97+
}
98+
}
99+
100+
bool runTest(const sycl::device &syclDevice, sycl::range<2> dims,
101+
sycl::range<2> localSize) {
102+
const uint32_t imgWidth = static_cast<uint32_t>(dims[0]);
103+
const uint32_t imgHeight = static_cast<uint32_t>(dims[1]);
104+
105+
const VkImageType imgType = VK_IMAGE_TYPE_2D;
106+
const VkFormat imgInFormat = VK_FORMAT_D32_SFLOAT;
107+
const VkFormat imgOutFormat = VK_FORMAT_D32_SFLOAT;
108+
109+
const size_t imgSizeElems = imgWidth * imgHeight;
110+
const size_t imgSizeBytes = imgSizeElems * sizeof(float);
111+
112+
const VkExtent3D imgExtent = {imgWidth, imgHeight, 1};
113+
114+
VkImage vkInputImage;
115+
VkDeviceMemory vkInputImageMemory;
116+
VkImage vkOutputImage;
117+
VkDeviceMemory vkOutputImageMemory;
118+
119+
// Initialize image input data.
120+
std::vector<float> inputVec(imgSizeElems, 0.f);
121+
for (int i = 0; i < imgSizeElems; ++i) {
122+
// Default Vulkan depth textures clmap values to between 0 and 1.
123+
inputVec[i] = float(i) / float(imgSizeElems);
124+
}
125+
126+
// Create/allocate device images.
127+
{
128+
vkInputImage = vkutil::createImage(imgType, imgInFormat, imgExtent,
129+
VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
130+
VK_IMAGE_USAGE_TRANSFER_DST_BIT |
131+
VK_IMAGE_USAGE_STORAGE_BIT,
132+
1);
133+
VkMemoryRequirements memRequirements;
134+
auto inputImageMemoryTypeIndex = vkutil::getImageMemoryTypeIndex(
135+
vkInputImage, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, memRequirements);
136+
vkInputImageMemory = vkutil::allocateDeviceMemory(
137+
imgSizeBytes, inputImageMemoryTypeIndex, vkInputImage);
138+
VK_CHECK_CALL(vkBindImageMemory(vk_device, vkInputImage, vkInputImageMemory,
139+
0 /*memoryOffset*/));
140+
141+
vkOutputImage = vkutil::createImage(imgType, imgOutFormat, imgExtent,
142+
VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
143+
VK_IMAGE_USAGE_TRANSFER_DST_BIT |
144+
VK_IMAGE_USAGE_STORAGE_BIT,
145+
1);
146+
VkMemoryRequirements outputMemRequirements;
147+
auto outputImageMemoryTypeIndex = vkutil::getImageMemoryTypeIndex(
148+
vkOutputImage, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
149+
outputMemRequirements);
150+
vkOutputImageMemory = vkutil::allocateDeviceMemory(
151+
imgSizeBytes, outputImageMemoryTypeIndex, vkOutputImage);
152+
VK_CHECK_CALL(vkBindImageMemory(vk_device, vkOutputImage,
153+
vkOutputImageMemory, 0 /*memoryOffset*/));
154+
}
155+
156+
// Transition image layouts.
157+
printString("Submitting image layout transition\n");
158+
{
159+
VkImageMemoryBarrier imgInBarrier =
160+
vkutil::createImageMemoryBarrier(vkInputImage, 1 /*mipLevels*/);
161+
VkImageMemoryBarrier imgOutBarrier =
162+
vkutil::createImageMemoryBarrier(vkOutputImage, 1 /*mipLevels*/);
163+
164+
// Update aspect mask for the images to VK_IMAGE_ASPECT_DEPTH_BIT.
165+
imgInBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
166+
imgOutBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
167+
168+
VkCommandBufferBeginInfo cbbi = {};
169+
cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
170+
cbbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
171+
172+
VK_CHECK_CALL(vkBeginCommandBuffer(vk_computeCmdBuffer, &cbbi));
173+
vkCmdPipelineBarrier(vk_computeCmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
174+
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0,
175+
nullptr, 1, &imgInBarrier);
176+
177+
vkCmdPipelineBarrier(vk_computeCmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
178+
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0,
179+
nullptr, 1, &imgOutBarrier);
180+
VK_CHECK_CALL(vkEndCommandBuffer(vk_computeCmdBuffer));
181+
182+
VkSubmitInfo submission = {};
183+
submission.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
184+
submission.commandBufferCount = 1;
185+
submission.pCommandBuffers = &vk_computeCmdBuffer;
186+
187+
VK_CHECK_CALL(vkQueueSubmit(vk_compute_queue, 1 /*submitCount*/,
188+
&submission, VK_NULL_HANDLE /*fence*/));
189+
VK_CHECK_CALL(vkQueueWaitIdle(vk_compute_queue));
190+
}
191+
192+
// Allocate temporary staging buffer and copy input data to device.
193+
printString("Allocating staging memory and copying to device image\n");
194+
{
195+
VkBuffer stagingBuffer;
196+
VkDeviceMemory stagingMemory;
197+
198+
stagingBuffer = vkutil::createBuffer(imgSizeBytes,
199+
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
200+
VK_BUFFER_USAGE_TRANSFER_DST_BIT);
201+
auto inputStagingMemoryTypeIndex = vkutil::getBufferMemoryTypeIndex(
202+
stagingBuffer, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
203+
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
204+
stagingMemory =
205+
vkutil::allocateDeviceMemory(imgSizeBytes, inputStagingMemoryTypeIndex,
206+
nullptr /*image*/, false /*exportable*/);
207+
VK_CHECK_CALL(vkBindBufferMemory(vk_device, stagingBuffer, stagingMemory,
208+
0 /*memoryOffset*/));
209+
210+
// Copy host data to temporary staging buffer.
211+
float *inputStagingData = nullptr;
212+
VK_CHECK_CALL(vkMapMemory(vk_device, stagingMemory, 0 /*offset*/,
213+
imgSizeBytes, 0 /*flags*/,
214+
(void **)&inputStagingData));
215+
for (int i = 0; i < (imgSizeElems); ++i) {
216+
inputStagingData[i] = inputVec[i];
217+
}
218+
vkUnmapMemory(vk_device, stagingMemory);
219+
220+
// Copy temporary staging buffer to device image memory.
221+
VkCommandBufferBeginInfo cbbi = {};
222+
cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
223+
cbbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
224+
225+
VkBufferImageCopy copyRegion = {};
226+
copyRegion.imageExtent = {imgWidth, imgHeight, 1};
227+
copyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
228+
copyRegion.imageSubresource.layerCount = 1;
229+
230+
VK_CHECK_CALL(vkBeginCommandBuffer(vk_transferCmdBuffers[0], &cbbi));
231+
vkCmdCopyBufferToImage(vk_transferCmdBuffers[0], stagingBuffer,
232+
vkInputImage, VK_IMAGE_LAYOUT_GENERAL,
233+
1 /*regionCount*/, &copyRegion);
234+
VK_CHECK_CALL(vkEndCommandBuffer(vk_transferCmdBuffers[0]));
235+
236+
std::vector<VkPipelineStageFlags> stages{VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT};
237+
238+
VkSubmitInfo submission = {};
239+
submission.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
240+
submission.commandBufferCount = 1;
241+
submission.pCommandBuffers = &vk_transferCmdBuffers[0];
242+
submission.pWaitDstStageMask = stages.data();
243+
244+
VK_CHECK_CALL(vkQueueSubmit(vk_transfer_queue, 1 /*submitCount*/,
245+
&submission, VK_NULL_HANDLE /*fence*/));
246+
VK_CHECK_CALL(vkQueueWaitIdle(vk_transfer_queue));
247+
248+
// Destroy temporary staging buffer and free memory.
249+
vkDestroyBuffer(vk_device, stagingBuffer, nullptr);
250+
vkFreeMemory(vk_device, stagingMemory, nullptr);
251+
}
252+
253+
printString("Getting memory interop handles\n");
254+
// Get memory interop handles.
255+
#ifdef _WIN32
256+
auto imgMemIn = vkutil::getMemoryWin32Handle(vkInputImageMemory);
257+
auto imgMemOut = vkutil::getMemoryWin32Handle(vkOutputImageMemory);
258+
#else
259+
auto imgMemIn = vkutil::getMemoryOpaqueFD(vkInputImageMemory);
260+
auto imgMemOut = vkutil::getMemoryOpaqueFD(vkOutputImageMemory);
261+
#endif
262+
263+
// Call into SYCL to fetch from input image, and populate the output image.
264+
printString("Calling into SYCL with interop memory handles\n");
265+
runSycl(syclDevice, dims, localSize, imgMemIn, imgMemOut);
266+
267+
// Copy image memory to temporary staging buffer, and back to host.
268+
printString("Copying image memory to host\n");
269+
std::vector<float> outputVec(imgSizeElems, 0.f);
270+
{
271+
VkBuffer stagingBuffer;
272+
VkDeviceMemory stagingMemory;
273+
274+
stagingBuffer = vkutil::createBuffer(imgSizeBytes,
275+
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
276+
VK_BUFFER_USAGE_TRANSFER_DST_BIT);
277+
auto outputStagingMemoryTypeIndex = vkutil::getBufferMemoryTypeIndex(
278+
stagingBuffer, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
279+
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
280+
stagingMemory =
281+
vkutil::allocateDeviceMemory(imgSizeBytes, outputStagingMemoryTypeIndex,
282+
nullptr /*image*/, false /*exportable*/);
283+
VK_CHECK_CALL(vkBindBufferMemory(vk_device, stagingBuffer, stagingMemory,
284+
0 /*memoryOffset*/));
285+
286+
VkCommandBufferBeginInfo cbbi = {};
287+
cbbi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
288+
cbbi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
289+
290+
VkBufferImageCopy copyRegion = {};
291+
copyRegion.imageExtent = {imgWidth, imgHeight, 1};
292+
copyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
293+
copyRegion.imageSubresource.layerCount = 1;
294+
295+
VK_CHECK_CALL(vkBeginCommandBuffer(vk_transferCmdBuffers[1], &cbbi));
296+
vkCmdCopyImageToBuffer(vk_transferCmdBuffers[1], vkOutputImage,
297+
VK_IMAGE_LAYOUT_GENERAL, stagingBuffer,
298+
1 /*regionCount*/, &copyRegion);
299+
VK_CHECK_CALL(vkEndCommandBuffer(vk_transferCmdBuffers[1]));
300+
301+
std::vector<VkPipelineStageFlags> stages{VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT};
302+
303+
VkSubmitInfo submission = {};
304+
submission.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
305+
submission.commandBufferCount = 1;
306+
submission.pCommandBuffers = &vk_transferCmdBuffers[1];
307+
submission.pWaitDstStageMask = stages.data();
308+
309+
VK_CHECK_CALL(vkQueueSubmit(vk_transfer_queue, 1 /*submitCount*/,
310+
&submission, VK_NULL_HANDLE /*fence*/));
311+
VK_CHECK_CALL(vkQueueWaitIdle(vk_transfer_queue));
312+
313+
// Copy temporary staging buffer output data to host output vector.
314+
float *outputStagingData = (float *)outputVec.data();
315+
VK_CHECK_CALL(vkMapMemory(vk_device, stagingMemory, 0 /*offset*/,
316+
imgSizeBytes, 0 /*flags*/,
317+
(void **)&outputStagingData));
318+
for (int i = 0; i < (imgSizeElems); ++i) {
319+
outputVec[i] = outputStagingData[i];
320+
}
321+
vkUnmapMemory(vk_device, stagingMemory);
322+
323+
// Destroy temporary staging buffer and free memory.
324+
vkDestroyBuffer(vk_device, stagingBuffer, nullptr);
325+
vkFreeMemory(vk_device, stagingMemory, nullptr);
326+
}
327+
328+
// Destroy images and free their memory.
329+
vkDestroyImage(vk_device, vkInputImage, nullptr);
330+
vkDestroyImage(vk_device, vkOutputImage, nullptr);
331+
vkFreeMemory(vk_device, vkInputImageMemory, nullptr);
332+
vkFreeMemory(vk_device, vkOutputImageMemory, nullptr);
333+
334+
// Validate that SYCL made changes to the memory.
335+
bool validated = true;
336+
for (int i = 0; i < (imgSizeElems); ++i) {
337+
float expected = inputVec[i];
338+
// Use helper function to determine if data is accepted.
339+
// For floats, use default accepted error variance.
340+
if (!util::is_equal(outputVec[i], expected)) {
341+
std::cerr << "Result mismatch! actual[" << i << "] == " << outputVec[i]
342+
<< " : expected == " << expected << "\n";
343+
validated = false;
344+
}
345+
if (!validated)
346+
break;
347+
}
348+
349+
if (validated) {
350+
printString("Results are correct!\n");
351+
}
352+
353+
return validated;
354+
}
355+
356+
int main() {
357+
358+
if (vkutil::setupInstance() != VK_SUCCESS) {
359+
std::cerr << "Instance setup failed!\n";
360+
return EXIT_FAILURE;
361+
}
362+
363+
sycl::device syclDevice;
364+
365+
if (vkutil::setupDevice(syclDevice.get_info<sycl::info::device::name>()) !=
366+
VK_SUCCESS) {
367+
std::cerr << "Device setup failed!\n";
368+
return EXIT_FAILURE;
369+
}
370+
371+
if (vkutil::setupCommandBuffers() != VK_SUCCESS) {
372+
std::cerr << "Command buffers setup failed!\n";
373+
return EXIT_FAILURE;
374+
}
375+
376+
auto testPassed = runTest(syclDevice, {16, 16}, {16, 16});
377+
378+
if (vkutil::cleanup() != VK_SUCCESS) {
379+
std::cerr << "Cleanup failed!\n";
380+
return EXIT_FAILURE;
381+
}
382+
383+
if (testPassed) {
384+
std::cout << "Test passed!\n";
385+
return EXIT_SUCCESS;
386+
}
387+
388+
std::cerr << "Test failed\n";
389+
return EXIT_FAILURE;
390+
}

0 commit comments

Comments
 (0)