Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[vulkan] Added support for RPI 5 #8548

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions src/Target.cpp
Original file line number Diff line number Diff line change
@@ -1304,17 +1304,28 @@ int Target::get_arm_v8_lower_bound() const {
}

bool Target::supports_type(const Type &t) const {
if (has_feature(Vulkan)) {
if (t.is_float() && t.bits() == 64) {
return has_feature(Target::VulkanFloat64);
} else if (t.is_float() && t.bits() == 16) {
return has_feature(Target::VulkanFloat16);
} else if (t.is_int_or_uint() && t.bits() == 64) {
return has_feature(Target::VulkanInt64);
} else if (t.is_int_or_uint() && t.bits() == 16) {
return has_feature(Target::VulkanInt16);
} else if (t.is_int_or_uint() && t.bits() == 8) {
return has_feature(Target::VulkanInt8);
}
}
if (t.bits() == 64) {
if (t.is_float()) {
return (!has_feature(Metal) &&
!has_feature(D3D12Compute) &&
(!has_feature(Target::OpenCL) || has_feature(Target::CLDoubles)) &&
(!has_feature(Vulkan) || has_feature(Target::VulkanFloat64)) &&
!has_feature(WebGPU));
} else {
return (!has_feature(Metal) &&
!has_feature(D3D12Compute) &&
(!has_feature(Vulkan) || has_feature(Target::VulkanInt64)) &&
!has_feature(WebGPU));
}
}
2 changes: 1 addition & 1 deletion src/runtime/internal/memory_resources.h
Original file line number Diff line number Diff line change
@@ -151,7 +151,7 @@ ALWAYS_INLINE size_t aligned_size(size_t offset, size_t size, size_t alignment)
ALWAYS_INLINE size_t conform_size(size_t offset, size_t size, size_t alignment, size_t nearest_multiple) {
size_t adjusted_size = aligned_size(offset, size, alignment);
adjusted_size = (alignment > adjusted_size) ? alignment : adjusted_size;
if (nearest_multiple > 0) {
if ((nearest_multiple > 0) && ((adjusted_size % nearest_multiple) != 0)) {
size_t rounded_size = (((adjusted_size + nearest_multiple - 1) / nearest_multiple) * nearest_multiple);
return rounded_size;
} else {
54 changes: 48 additions & 6 deletions src/runtime/internal/region_allocator.h
Original file line number Diff line number Diff line change
@@ -74,7 +74,7 @@ class RegionAllocator {
BlockRegion *coalesce_block_regions(void *user_context, BlockRegion *region);

// Returns true if the given region can be split to accomodate the given size
bool can_split(const BlockRegion *region, const MemoryRequest &request) const;
bool can_split(void *use_context, const BlockRegion *region, const MemoryRequest &request) const;

// Splits the given block region into a smaller region to accomodate the given size, followed by empty space for the remaining
BlockRegion *split_block_region(void *user_context, BlockRegion *region, const MemoryRequest &request);
@@ -195,7 +195,7 @@ MemoryRegion *RegionAllocator::reserve(void *user_context, const MemoryRequest &
return nullptr;
}

if (can_split(block_region, region_request)) {
if (can_split(user_context, block_region, region_request)) {
#ifdef DEBUG_RUNTIME_INTERNAL
debug(user_context) << "RegionAllocator: Splitting region of size ( " << (int32_t)(block_region->memory.size) << ") "
<< "to accomodate requested size (" << (int32_t)(region_request.size) << " bytes)";
@@ -443,8 +443,29 @@ BlockRegion *RegionAllocator::coalesce_block_regions(void *user_context, BlockRe
return block_region;
}

bool RegionAllocator::can_split(const BlockRegion *block_region, const MemoryRequest &split_request) const {
return (block_region && (block_region->memory.size > split_request.size) && (block_region->usage_count == 0));
bool RegionAllocator::can_split(void *user_context, const BlockRegion *block_region, const MemoryRequest &split_request) const {

// See if we can actually split the block region and create empty space big enough
if (block_region && (block_region->memory.size > split_request.size) && (block_region->usage_count == 0)) {

// We can only split if there's still room left after conforming the allocation request since the
// conform method may actually grow the requested size to accomodate alignment constraints
MemoryRequest test_request = split_request;
test_request.size = block_region->memory.size - test_request.size;
test_request.offset = block_region->memory.offset + test_request.size;
int error_code = conform(user_context, &test_request);
if (error_code) {
#ifdef DEBUG_RUNTIME_INTERNAL
debug(nullptr) << "RegionAllocator: Failed to conform test request for splitting block region!\n";
#endif
return false;
}

if ((block_region->memory.size - test_request.size) > 0) {
return true;
}
}
return false;
}

BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion *block_region, const MemoryRequest &request) {
@@ -470,8 +491,9 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion

#ifdef DEBUG_RUNTIME_INTERNAL
debug(user_context) << "RegionAllocator: Splitting "
<< "current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes) "
<< "to create empty region (offset=" << (int32_t)split_request.offset << " size=" << (int32_t)(split_request.size) << " bytes)";
<< "current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes) into ...\n\t"
<< "existing region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size - split_request.size) << " bytes)\n\t"
<< "empty region (offset=" << (int32_t)split_request.offset << " size=" << (int32_t)(split_request.size) << " bytes)\n";
#endif
BlockRegion *next_region = block_region->next_ptr;
BlockRegion *empty_region = create_block_region(user_context, split_request);
@@ -484,6 +506,12 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion
empty_region->prev_ptr = block_region;
block_region->next_ptr = empty_region;
block_region->memory.size -= empty_region->memory.size;

#ifdef DEBUG_RUNTIME_INTERNAL
debug(user_context) << "RegionAllocator: Split block region into ...\n\t"
<< "existing region (ptr=" << (void *)block_region << " prev_ptr=" << block_region->prev_ptr << " next_ptr=" << block_region->next_ptr << " offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes)\n\t"
<< "empty region (ptr=" << (void *)empty_region << " prev_ptr=" << empty_region->prev_ptr << " next_ptr=" << empty_region->next_ptr << " offset=" << (int32_t)empty_region->memory.offset << " size=" << (int32_t)(empty_region->memory.size) << " bytes)\n";
#endif
return empty_region;
}

@@ -605,8 +633,22 @@ int RegionAllocator::alloc_block_region(void *user_context, BlockRegion *block_r
#endif
halide_abort_if_false(user_context, allocators.region.allocate != nullptr);
halide_abort_if_false(user_context, block_region->status == AllocationStatus::Available);

int error_code = 0;
MemoryRegion *memory_region = &(block_region->memory);
if (memory_region->size <= 0) {
#ifdef DEBUG_RUNTIME_INTERNAL
debug(user_context) << " skipping zero size region ("
<< "block_ptr=" << (void *)block_region->block_ptr << " "
<< "block_region=" << (void *)block_region << " "
<< "memory_offset=" << (uint32_t)(block_region->memory.offset) << " "
<< "memory_size=" << (uint32_t)(block_region->memory.size) << " "
<< "block_reserved=" << (uint32_t)block->reserved << " "
<< ")\n";
#endif
return error_code;
}

if (memory_region->handle == nullptr) {
error_code = allocators.region.allocate(user_context, memory_region);
memory_region->is_owner = true;
92 changes: 63 additions & 29 deletions src/runtime/vulkan.cpp
Original file line number Diff line number Diff line change
@@ -1193,13 +1193,6 @@ WEAK int halide_vulkan_run(void *user_context,
}
}
}

// 2b. Create the pipeline layout
error_code = vk_create_pipeline_layout(user_context, ctx.allocator, shader_module->shader_count, shader_module->descriptor_set_layouts, &(shader_module->pipeline_layout));
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to create pipeline layout!\n";
return error_code;
}
}

VulkanDispatchData dispatch_data = {};
@@ -1213,16 +1206,8 @@ WEAK int halide_vulkan_run(void *user_context,

VulkanShaderBinding *entry_point_binding = (shader_module->shader_bindings + entry_point_index);

// 2c. Setup the compute pipeline (eg override any specializations for shared mem or workgroup size)
error_code = vk_setup_compute_pipeline(user_context, ctx.allocator, entry_point_binding, &dispatch_data, shader_module->shader_module, shader_module->pipeline_layout, &(entry_point_binding->compute_pipeline));
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to setup compute pipeline!\n";
return error_code;
}

// 2d. Create a descriptor set
if (entry_point_binding->descriptor_set == VK_NULL_HANDLE) {

// 2c. If Push Descriptor Set isn't supported, then allocate a descriptor set
if ((vkCmdPushDescriptorSetKHR == nullptr) && (entry_point_binding->descriptor_set == VK_NULL_HANDLE)) {
// Construct a descriptor pool
//
// NOTE: while this could be re-used across multiple pipelines, we only know the storage requirements of this kernel's
@@ -1244,7 +1229,7 @@ WEAK int halide_vulkan_run(void *user_context,
}
}

// 3a. Create a buffer for the scalar parameters
// 2d. Create a buffer for the scalar parameters
if ((entry_point_binding->args_region == nullptr) && entry_point_binding->uniform_buffer_count) {
size_t scalar_buffer_size = vk_estimate_scalar_uniform_buffer_size(user_context, arg_sizes, args, arg_is_buffer);
if (scalar_buffer_size > 0) {
@@ -1256,7 +1241,7 @@ WEAK int halide_vulkan_run(void *user_context,
}
}

// 3b. Update uniform buffer with scalar parameters
// 2e. Update uniform buffer with scalar parameters
VkBuffer *args_buffer = nullptr;
if ((entry_point_binding->args_region != nullptr) && entry_point_binding->uniform_buffer_count) {
error_code = vk_update_scalar_uniform_buffer(user_context, ctx.allocator, entry_point_binding->args_region, arg_sizes, args, arg_is_buffer);
@@ -1272,10 +1257,28 @@ WEAK int halide_vulkan_run(void *user_context,
}
}

// 3c. Update buffer bindings for descriptor set
error_code = vk_update_descriptor_set(user_context, ctx.allocator, args_buffer, entry_point_binding->uniform_buffer_count, entry_point_binding->storage_buffer_count, arg_sizes, args, arg_is_buffer, entry_point_binding->descriptor_set);
// 2f. If Push Descriptor Set isn't supported, then update the buffer bindings for the allocated descriptor set
if (vkCmdPushDescriptorSetKHR == nullptr) {
error_code = vk_update_descriptor_set(user_context, ctx.allocator, args_buffer, entry_point_binding->uniform_buffer_count, entry_point_binding->storage_buffer_count, arg_sizes, args, arg_is_buffer, entry_point_binding->descriptor_set);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to update descriptor set!\n";
return error_code;
}
}

// 2b. Create the pipeline layout
if (shader_module->pipeline_layout == VK_NULL_HANDLE) {
error_code = vk_create_pipeline_layout(user_context, ctx.allocator, shader_module->shader_count, shader_module->descriptor_set_layouts, &(shader_module->pipeline_layout));
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to create pipeline layout!\n";
return error_code;
}
}

// 3. Setup the compute pipeline (eg override any specializations for shared mem or workgroup size)
error_code = vk_setup_compute_pipeline(user_context, ctx.allocator, entry_point_binding, &dispatch_data, shader_module->shader_module, shader_module->pipeline_layout, &(entry_point_binding->compute_pipeline));
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to update descriptor set!\n";
error(user_context) << "Vulkan: Failed to setup compute pipeline!\n";
return error_code;
}

@@ -1287,18 +1290,49 @@ WEAK int halide_vulkan_run(void *user_context,
}

// 5. Fill the command buffer
error_code = vk_fill_command_buffer_with_dispatch_call(user_context,
ctx.device, cmds.command_buffer,
entry_point_binding->compute_pipeline,
shader_module->pipeline_layout,
entry_point_binding->descriptor_set,
entry_point_index,
blocksX, blocksY, blocksZ);
error_code = vk_begin_command_buffer(user_context, cmds.command_buffer);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to start command buffer for dispatch call!\n";
return error_code;
}
error_code = vk_bind_pipeline(user_context, cmds.command_buffer, entry_point_binding->compute_pipeline);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to bind compute pipeline to command buffer for dispatch call!\n";
return error_code;
}

if (vkCmdPushDescriptorSetKHR != nullptr) {
error_code = vk_push_descriptor_set(user_context, ctx.allocator, cmds.command_buffer, entry_point_binding->compute_pipeline, shader_module->pipeline_layout, entry_point_binding->descriptor_set, args_buffer, entry_point_binding->uniform_buffer_count, entry_point_binding->storage_buffer_count, arg_sizes, args, arg_is_buffer);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to update descriptor set!\n";
return error_code;
}
} else {
error_code = vk_bind_descriptor_sets(user_context, cmds.command_buffer, shader_module->pipeline_layout, entry_point_binding->descriptor_set, entry_point_index);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to bind descriptor set to command buffer for dispatch call!\n";
return error_code;
}
}

error_code = vk_dispatch_kernel(user_context,
ctx.device, cmds.command_buffer,
entry_point_binding->compute_pipeline,
shader_module->pipeline_layout,
entry_point_binding->descriptor_set,
entry_point_index,
blocksX, blocksY, blocksZ);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to fill command buffer with dispatch call!\n";
return error_code;
}

error_code = vk_end_command_buffer(user_context, cmds.command_buffer);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to end command buffer for dispatch call!\n";
return error_code;
}

// 6. Submit the command buffer to our command queue
error_code = vk_submit_command_buffer(user_context, ctx.queue, cmds.command_buffer);
if (error_code != halide_error_code_success) {
10 changes: 9 additions & 1 deletion src/runtime/vulkan_extensions.h
Original file line number Diff line number Diff line change
@@ -203,10 +203,18 @@ uint32_t vk_get_required_device_extensions(void *user_context, StringTable &ext_
uint32_t vk_get_optional_device_extensions(void *user_context, StringTable &ext_table) {
const char *optional_ext_table[] = {
"VK_KHR_portability_subset", //< necessary for running under Molten (aka Vulkan on Mac)
VK_KHR_MAINTENANCE_1_EXTENSION_NAME,
VK_KHR_MAINTENANCE_2_EXTENSION_NAME,
VK_KHR_MAINTENANCE_3_EXTENSION_NAME,
VK_KHR_MAINTENANCE_4_EXTENSION_NAME,
VK_KHR_MAINTENANCE_5_EXTENSION_NAME,
VK_KHR_MAINTENANCE_6_EXTENSION_NAME,
VK_KHR_MAINTENANCE_7_EXTENSION_NAME,
VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME};
VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME,
VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME};

const uint32_t optional_ext_count = sizeof(optional_ext_table) / sizeof(optional_ext_table[0]);
ext_table.fill(user_context, (const char **)optional_ext_table, optional_ext_count);
return optional_ext_count;
1 change: 1 addition & 0 deletions src/runtime/vulkan_functions.h
Original file line number Diff line number Diff line change
@@ -204,6 +204,7 @@ VULKAN_FN(vkCmdCopyBuffer2)
// VULKAN_FN(vkCmdCopyImageToBuffer2)
// VULKAN_FN(vkCmdEndRendering)
VULKAN_FN(vkCmdPipelineBarrier2)
VULKAN_FN(vkCmdPushDescriptorSetKHR)
VULKAN_FN(vkCmdResetEvent2)
// VULKAN_FN(vkCmdResolveImage2)
// VULKAN_FN(vkCmdSetCullMode)
58 changes: 50 additions & 8 deletions src/runtime/vulkan_internal.h
Original file line number Diff line number Diff line change
@@ -106,14 +106,19 @@ int vk_destroy_command_buffer(void *user_context, VulkanMemoryAllocator *allocat

struct ScopedVulkanCommandBufferAndPool;

int vk_fill_command_buffer_with_dispatch_call(void *user_context,
VkDevice device,
VkCommandBuffer command_buffer,
VkPipeline compute_pipeline,
VkPipelineLayout pipeline_layout,
VkDescriptorSet descriptor_set,
uint32_t descriptor_set_index,
int blocksX, int blocksY, int blocksZ);
int vk_begin_command_buffer(void *user_context, VkCommandBuffer command_buffer);
int vk_end_command_buffer(void *user_context, VkCommandBuffer command_buffer);
int vk_bind_pipeline(void *user_context, VkCommandBuffer command_buffer, VkPipeline compute_pipeline);
int vk_bind_descriptor_sets_to_command_buffer(void *user_context, VkCommandBuffer command_buffer, VkPipeline compute_pipeline);

int vk_dispatch_kernel(void *user_context,
VkDevice device,
VkCommandBuffer command_buffer,
VkPipeline compute_pipeline,
VkPipelineLayout pipeline_layout,
VkDescriptorSet descriptor_set,
uint32_t descriptor_set_index,
int blocksX, int blocksY, int blocksZ);

int vk_submit_command_buffer(void *user_context, VkQueue queue, VkCommandBuffer command_buffer);

@@ -175,6 +180,24 @@ int vk_create_descriptor_set(void *user_context,
VkDescriptorPool descriptor_pool,
VkDescriptorSet *descriptor_set);

int vk_get_descriptor_buffer_info(void *user_context,
VulkanMemoryAllocator *allocator,
VkDescriptorSet descriptor_set,
VkBuffer *scalar_args_buffer,
size_t uniform_buffer_count,
size_t storage_buffer_count,
size_t arg_sizes[],
void *args[],
int8_t arg_is_buffer[],
BlockStorage *descriptor_buffer_info_result);

int vk_get_write_descriptor_set_info(void *user_context,
VulkanMemoryAllocator *allocator,
BlockStorage *descriptor_buffer_info,
VkDescriptorSet descriptor_set,
VkBuffer *scalar_args_buffer,
BlockStorage *write_descriptor_set_result);

int vk_update_descriptor_set(void *user_context,
VulkanMemoryAllocator *allocator,
VkBuffer *scalar_args_buffer,
@@ -185,6 +208,25 @@ int vk_update_descriptor_set(void *user_context,
int8_t arg_is_buffer[],
VkDescriptorSet descriptor_set);

int vk_bind_descriptor_sets(void *user_context,
VkCommandBuffer command_buffer,
VkPipelineLayout pipeline_layout,
VkDescriptorSet descriptor_set,
uint32_t descriptor_set_index);

int vk_push_descriptor_set(void *user_context,
VulkanMemoryAllocator *allocator,
VkCommandBuffer command_buffer,
VkPipeline compute_pipeline,
VkPipelineLayout pipeline_layout,
VkDescriptorSet descriptor_set,
VkBuffer *scalar_args_buffer,
size_t uniform_buffer_count,
size_t storage_buffer_count,
size_t arg_sizes[],
void *args[],
int8_t arg_is_buffer[]);

// -- Pipeline Layout
int vk_create_pipeline_layout(void *user_context,
VulkanMemoryAllocator *allocator,
22 changes: 15 additions & 7 deletions src/runtime/vulkan_memory.h
Original file line number Diff line number Diff line change
@@ -556,7 +556,7 @@ int VulkanMemoryAllocator::lookup_requirements(void *user_context, size_t size,
#if defined(HL_VK_DEBUG_MEM)
debug(nullptr) << "VulkanMemoryAllocator: Looking up requirements ("
<< "user_context=" << user_context << " "
<< "size=" << (uint32_t)block->size << ", "
<< "size=" << (uint32_t)size << ", "
<< "usage_flags=" << usage_flags << ") ... \n";
#endif
VkBufferCreateInfo create_info = {
@@ -998,7 +998,7 @@ int VulkanMemoryAllocator::conform(void *user_context, MemoryRequest *request) {

#if defined(HL_VK_DEBUG_MEM)
debug(nullptr) << "VulkanMemoryAllocator: Buffer requirements ("
<< "requested_size=" << (uint32_t)region->size << ", "
<< "requested_size=" << (uint32_t)request->size << ", "
<< "required_alignment=" << (uint32_t)memory_requirements.alignment << ", "
<< "required_size=" << (uint32_t)memory_requirements.size << ")\n";
#endif
@@ -1051,7 +1051,7 @@ int VulkanMemoryAllocator::conform_region_request(void *instance_ptr, MemoryRequ
#if defined(HL_VK_DEBUG_MEM)
debug(nullptr) << "VulkanMemoryAllocator: Conforming region request ("
<< "user_context=" << user_context << " "
<< "request=" << (void *)(region) << ") ... \n";
<< "request=" << (void *)(request) << ") ... \n";
#endif

if ((instance->device == nullptr) || (instance->physical_device == nullptr)) {
@@ -1125,6 +1125,9 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg

VkResult result = vkCreateBuffer(instance->device, &create_info, instance->alloc_callbacks, buffer);
if (result != VK_SUCCESS) {
debug(user_context) << "VulkanRegionAllocator: Failed to create buffer!\n\t"
<< "vkCreateBuffer returned: " << vk_get_error_name(result) << "\n";

// Allocation failed ... collect unused regions and try again ...
instance->collect(user_context);
result = vkCreateBuffer(instance->device, &create_info, instance->alloc_callbacks, buffer);
@@ -1165,12 +1168,9 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg
<< "vkCreateBuffer returned: " << vk_get_error_name(result) << "\n";
return halide_error_code_device_malloc_failed;
}
region->size = create_info.size;
}

#ifdef DEBUG_RUNTIME
debug(nullptr) << "vkCreateBuffer: Created buffer for device region (" << (uint64_t)region->size << " bytes) ...\n";
#endif

RegionAllocator *region_allocator = RegionAllocator::find_allocator(user_context, region);
if (region_allocator == nullptr) {
error(user_context) << "VulkanBlockAllocator: Unable to allocate region! Invalid region allocator!\n";
@@ -1189,6 +1189,10 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg
return halide_error_code_internal_error;
}

#ifdef DEBUG_RUNTIME
debug(nullptr) << "vkCreateBuffer: Created buffer for device region (" << (uint64_t)region->size << " bytes) ...\n";
#endif

// Finally, bind buffer to the device memory
result = vkBindBufferMemory(instance->device, *buffer, *device_memory, region->offset);
if (result != VK_SUCCESS) {
@@ -1197,6 +1201,10 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg
return halide_error_code_generic_error;
}

#ifdef DEBUG_RUNTIME
debug(nullptr) << "vkBindBufferMemory: Bound memory to device buffer for device region (" << (uint64_t)region->size << " bytes) ...\n";
#endif

region->handle = (void *)buffer;
region->is_owner = true;
instance->region_byte_count += region->size;
453 changes: 331 additions & 122 deletions src/runtime/vulkan_resources.h

Large diffs are not rendered by default.

8 changes: 7 additions & 1 deletion test/autoschedulers/mullapudi2016/histogram.cpp
Original file line number Diff line number Diff line change
@@ -120,11 +120,17 @@ double run_test(bool auto_schedule) {
}

int main(int argc, char **argv) {
if (get_jit_target_from_environment().arch == Target::WebAssembly) {
Halide::Target target = get_jit_target_from_environment();
if (target.arch == Target::WebAssembly) {
printf("[SKIP] Autoschedulers do not support WebAssembly.\n");
return 0;
}

if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt8))) {
printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
return 0;
}

if (argc != 2) {
fprintf(stderr, "Usage: %s <autoscheduler-lib>\n", argv[0]);
return 1;
5 changes: 5 additions & 0 deletions test/correctness/bool_predicate_cast.cpp
Original file line number Diff line number Diff line change
@@ -8,6 +8,11 @@ int main(int argc, char **argv) {
// Test explicit casting of a predicate to an integer as part of a reduction
// NOTE: triggers a convert_to_bool in Vulkan for a SelectOp
Target target = get_jit_target_from_environment();
if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt8))) {
printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
return 0;
}

Var x("x"), y("y");

Func input("input");
4 changes: 4 additions & 0 deletions test/correctness/boundary_conditions.cpp
Original file line number Diff line number Diff line change
@@ -392,6 +392,10 @@ int main(int argc, char **argv) {
// The wasm jit is very slow, so shorten this test here.
vector_width_max = 8;
}
if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt8))) {
printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
return 0;
}

std::vector<Task> tasks;
for (int vector_width = 1; vector_width <= vector_width_max; vector_width *= 2) {
9 changes: 7 additions & 2 deletions test/correctness/convolution.cpp
Original file line number Diff line number Diff line change
@@ -4,6 +4,13 @@
using namespace Halide;

int main(int argc, char **argv) {
Target target = get_jit_target_from_environment();
if (target.has_feature(Target::Vulkan)) {
if (!target.has_feature(Target::VulkanInt16)) {
printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n");
return 0;
}
}

// int W = 64*3, H = 64*3;
const int W = 128, H = 48;
@@ -64,8 +71,6 @@ int main(int argc, char **argv) {
Func blur2("blur2");
blur2(x, y) = sum(tent(r.x, r.y) * input(x + r.x - 1, y + r.y - 1));

Target target = get_jit_target_from_environment();

if (target.has_gpu_feature()) {
Var xi("xi"), yi("yi");

5 changes: 5 additions & 0 deletions test/correctness/convolution_multiple_kernels.cpp
Original file line number Diff line number Diff line change
@@ -39,6 +39,11 @@ int main(int argc, char **argv) {
sum(cast<uint16_t>(box2(r.x, r.y)) * input(x + r.x, y + r.y));

Target target = get_jit_target_from_environment();
if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt16))) {
printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n");
return 0;
}

if (target.has_gpu_feature()) {
Var xi("xi"), yi("yi");
blur.gpu_tile(x, y, xi, yi, 16, 16);
4 changes: 4 additions & 0 deletions test/correctness/dilate3x3.cpp
Original file line number Diff line number Diff line change
@@ -27,6 +27,10 @@ int main(int argc, char **argv) {

// Schedule.
Target target = get_jit_target_from_environment();
if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt8))) {
printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
return 0;
}
if (target.has_gpu_feature()) {
Var xi("xi"), yi("yi");
dilate3x3.gpu_tile(x, y, xi, yi, 16, 16);
7 changes: 6 additions & 1 deletion test/correctness/gpu_arg_types.cpp
Original file line number Diff line number Diff line change
@@ -3,10 +3,15 @@
using namespace Halide;
int main(int argc, char *argv[]) {

if (!get_jit_target_from_environment().has_gpu_feature()) {
Halide::Target target = get_jit_target_from_environment();
if (!target.has_gpu_feature()) {
printf("[SKIP] No GPU target enabled.\n");
return 0;
}
if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt16))) {
printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n");
return 0;
}

Func f, g;
Var x, y, tx, ty;
8 changes: 7 additions & 1 deletion test/correctness/gpu_dynamic_shared.cpp
Original file line number Diff line number Diff line change
@@ -11,14 +11,20 @@ int main(int argc, char **argv) {
}

if (t.has_feature(Target::Vulkan)) {
if (!t.has_feature(Target::VulkanV13)) {
printf("[SKIP] Skipping test for Vulkan ... missing 1.3 feature in target!\n");
return 0;
}

const auto *interface = get_device_interface_for_device_api(DeviceAPI::Vulkan);
assert(interface->compute_capability != nullptr);
int major, minor;
int err = interface->compute_capability(nullptr, &major, &minor);
if (err != 0 || (major == 1 && minor < 3)) {
printf("[SKIP] Vulkan %d.%d is less than required 1.3.\n", major, minor);
printf("[SKIP] Vulkan runtime support %d.%d is less than required 1.3.\n", major, minor);
return 0;
}

if ((t.os == Target::IOS) || (t.os == Target::OSX)) {
printf("[SKIP] Skipping test for Vulkan on iOS/OSX (MoltenVK doesn't support dynamic LocalSizeId yet)!\n");
return 0;
11 changes: 11 additions & 0 deletions test/correctness/gpu_reuse_shared_memory.cpp
Original file line number Diff line number Diff line change
@@ -172,6 +172,17 @@ int main(int argc, char **argv) {
return 0;
}

if (t.has_feature(Target::Vulkan)) {
if (!t.has_feature(Target::VulkanInt8)) {
printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
return 0;
}
if (!t.has_feature(Target::VulkanInt16)) {
printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n");
return 0;
}
}

for (auto memory_type : {MemoryType::GPUShared, MemoryType::Heap}) {
printf("Running multi thread type test\n");
if (multi_thread_type_test(memory_type) != 0) {
8 changes: 7 additions & 1 deletion test/correctness/gpu_transpose.cpp
Original file line number Diff line number Diff line change
@@ -4,11 +4,17 @@
using namespace Halide;

int main(int argc, char **argv) {
if (!get_jit_target_from_environment().has_gpu_feature()) {
Target t = get_jit_target_from_environment();
if (!t.has_gpu_feature()) {
printf("[SKIP] No GPU target enabled.\n");
return 0;
}

if (t.has_feature(Target::Vulkan) && (!t.has_feature(Target::VulkanInt8))) {
printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
return 0;
}

ImageParam in(UInt(8), 2);

Var x, y;
13 changes: 7 additions & 6 deletions test/correctness/interleave_rgb.cpp
Original file line number Diff line number Diff line change
@@ -103,14 +103,15 @@ bool test_deinterleave(int x_stride) {
}

int main(int argc, char **argv) {
Target target = get_jit_target_from_environment();
for (int x_stride : {3, 4}) {
if (!test_interleave<uint8_t>(x_stride)) return 1;
if (!test_interleave<uint16_t>(x_stride)) return 1;
if (!test_interleave<uint32_t>(x_stride)) return 1;
if (target.supports_type(halide_type_of<uint8_t>()) && !test_interleave<uint8_t>(x_stride)) return 1;
if (target.supports_type(halide_type_of<uint16_t>()) && !test_interleave<uint16_t>(x_stride)) return 1;
if (target.supports_type(halide_type_of<uint32_t>()) && !test_interleave<uint32_t>(x_stride)) return 1;

if (!test_deinterleave<uint8_t>(x_stride)) return 1;
if (!test_deinterleave<uint16_t>(x_stride)) return 1;
if (!test_deinterleave<uint32_t>(x_stride)) return 1;
if (target.supports_type(halide_type_of<uint8_t>()) && !test_deinterleave<uint8_t>(x_stride)) return 1;
if (target.supports_type(halide_type_of<uint16_t>()) && !test_deinterleave<uint16_t>(x_stride)) return 1;
if (target.supports_type(halide_type_of<uint32_t>()) && !test_deinterleave<uint32_t>(x_stride)) return 1;
}
printf("Success!\n");
return 0;
5 changes: 5 additions & 0 deletions test/correctness/interleave_x.cpp
Original file line number Diff line number Diff line change
@@ -11,6 +11,11 @@ int main(int argc, char **argv) {
interleaved(x, y) = select(x % 2 == 0, cast<uint16_t>(3), cast<uint16_t>(7));

Target target = get_jit_target_from_environment();
if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt16))) {
printf("[SKIP] Skipping test for Vulkan ... missing support for Int16!\n");
return 0;
}

if (target.has_gpu_feature()) {
Var tx("tx"), ty("ty");
interleaved.gpu_tile(x, y, tx, ty, 16, 16);
12 changes: 6 additions & 6 deletions test/correctness/logical.cpp
Original file line number Diff line number Diff line change
@@ -13,6 +13,12 @@ Expr u16(Expr a) {

int main(int argc, char **argv) {

Target target = get_jit_target_from_environment();
if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt8))) {
printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
return 0;
}

Buffer<uint8_t> input(128, 64);

for (int y = 0; y < input.height(); y++) {
@@ -28,7 +34,6 @@ int main(int argc, char **argv) {
((input(x, y) > 40) && (!(input(x, y) > 50))),
u8(255), u8(0));

Target target = get_jit_target_from_environment();
if (target.has_gpu_feature()) {
f.gpu_tile(x, y, xi, yi, 16, 16);
f.vectorize(xi, 4);
@@ -62,7 +67,6 @@ int main(int argc, char **argv) {
((input(x, y) > 40) && (!common_cond)),
u8(255), u8(0));

Target target = get_jit_target_from_environment();
if (target.has_gpu_feature()) {
f.gpu_tile(x, y, xi, yi, 16, 16);
f.vectorize(xi, 4);
@@ -93,8 +97,6 @@ int main(int argc, char **argv) {
Func f("f");
f(x, y) = select(x < 10 || x > 20 || y < 10 || y > 20, 0, input(x, y));

Target target = get_jit_target_from_environment();

if (target.has_gpu_feature()) {
f.gpu_tile(x, y, xi, yi, 16, 16);
f.vectorize(xi, 4);
@@ -124,7 +126,6 @@ int main(int argc, char **argv) {
Expr ten = 10;
f(x, y) = select(input(x, y) > ten, u8(255), u8(0));

Target target = get_jit_target_from_environment();
if (target.has_gpu_feature()) {
f.gpu_tile(x, y, xi, yi, 16, 16);
f.vectorize(xi, 4);
@@ -177,7 +178,6 @@ int main(int argc, char **argv) {
cpu.compute_root();
gpu.compute_root();

Target target = get_jit_target_from_environment();
if (target.has_feature(Target::OpenCL) && n == 16 && w == 32) {
// Workaround for https://github.com/halide/Halide/issues/2477
printf("Skipping uint%d -> uint%d for OpenCL\n", n, w);
8 changes: 7 additions & 1 deletion test/correctness/median3x3.cpp
Original file line number Diff line number Diff line change
@@ -13,6 +13,13 @@ Expr mid3(Expr a, Expr b, Expr c) {
}

int main(int arch, char **argv) {

Target target = get_jit_target_from_environment();
if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt8))) {
printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
return 0;
}

const int W = 256, H = 256;
Buffer<uint8_t> in(W, H);
// Set up the input.
@@ -43,7 +50,6 @@ int main(int arch, char **argv) {
median3x3(x, y) = mid3(min_max(x, y), max_min(x, y), mid_mid(x, y));

// Schedule.
Target target = get_jit_target_from_environment();
if (target.has_gpu_feature()) {
Var xi("xi"), yi("yi");
median3x3.gpu_tile(x, y, xi, yi, 16, 16);
15 changes: 15 additions & 0 deletions test/correctness/mul_div_mod.cpp
Original file line number Diff line number Diff line change
@@ -540,6 +540,21 @@ void add_test_div_mod(int vector_width, ScheduleVariant scheduling, Target targe
int main(int argc, char **argv) {
Target target = get_jit_target_from_environment();

if (target.has_feature(Target::Vulkan)) {
if (!target.has_feature(Target::VulkanInt8)) {
printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
return 0;
}
if (!target.has_feature(Target::VulkanInt16)) {
printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n");
return 0;
}
if (!target.has_feature(Target::VulkanInt64)) {
printf("[SKIP] Skipping test for Vulkan ... missing Int64 support!\n");
return 0;
}
}

ScheduleVariant scheduling = CPU;
if (target.has_gpu_feature()) {
scheduling = TiledGPU;
5 changes: 3 additions & 2 deletions test/correctness/multiple_outputs.cpp
Original file line number Diff line number Diff line change
@@ -4,7 +4,8 @@
using namespace Halide;

int main(int argc, char **argv) {
const bool use_gpu = get_jit_target_from_environment().has_gpu_feature();
Target target = get_jit_target_from_environment();
const bool use_gpu = target.has_gpu_feature();

// An internal Func that produces multiple values.
{
@@ -93,7 +94,7 @@ int main(int argc, char **argv) {
}

// Now multiple output Funcs via inferred Realization
{
if (target.supports_type(halide_type_of<uint8_t>()) && target.supports_type(halide_type_of<int16_t>())) {
Func f, g;
Var x, xi;
f(x) = cast<float>(100 * x);
17 changes: 13 additions & 4 deletions test/correctness/widening_reduction.cpp
Original file line number Diff line number Diff line change
@@ -9,11 +9,20 @@ using namespace Halide::Internal;
int main(int arch, char **argv) {

Halide::Target target = get_jit_target_from_environment();
if (target.has_feature(Target::Vulkan) && ((target.os == Target::IOS) || target.os == Target::OSX)) {
printf("[SKIP] Skipping test for Vulkan on iOS/OSX (MoltenVK fails to convert max/min intrinsics correctly)!\n");
return 0;
if (target.has_feature(Target::Vulkan)) {
if (!target.has_feature(Target::VulkanInt8)) {
printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
return 0;
}
if (!target.has_feature(Target::VulkanInt16)) {
printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n");
return 0;
}
if ((target.os == Target::IOS) || (target.os == Target::OSX)) {
printf("[SKIP] Skipping test for Vulkan on iOS/OSX (MoltenVK fails to convert max/min intrinsics correctly)!\n");
return 0;
}
}

const int W = 256, H = 256;

Buffer<uint8_t> in(W, H);