Skip to content

Commit

Permalink
Merge branch 'main' into 578-4
Browse files Browse the repository at this point in the history
  • Loading branch information
Rekt3421 authored Nov 19, 2024
2 parents 9a8f8ee + 4aa6f00 commit fec97ff
Show file tree
Hide file tree
Showing 27 changed files with 1,114 additions and 507 deletions.
8 changes: 6 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,9 @@ if("Ninja" STREQUAL ${CMAKE_GENERATOR})
endif()

# OpenCL Headers
include_directories(BEFORE ${PROJECT_SOURCE_DIR}/external/OpenCL-Headers)
set(OPENCL_HEADERS_SOURCE_DIR ${PROJECT_SOURCE_DIR}/external/OpenCL-Headers
CACHE STRING "OpenCL-Headers source directory")
include_directories(BEFORE ${OPENCL_HEADERS_SOURCE_DIR})

# SPIR-V Headers
set(SPIRV_HEADERS_SOURCE_DIR ${PROJECT_SOURCE_DIR}/external/SPIRV-Headers CACHE STRING
Expand All @@ -106,7 +108,8 @@ set(SPIRV-Headers_SOURCE_DIR ${SPIRV_HEADERS_SOURCE_DIR})

set(CLVK_BUILD_SPIRV_TOOLS ON CACHE BOOL "Set to OFF to disable SPIRV-Tools build")
if (CLVK_BUILD_SPIRV_TOOLS)
add_subdirectory(${SPIRV_TOOLS_SOURCE_DIR} EXCLUDE_FROM_ALL)
add_subdirectory(${SPIRV_TOOLS_SOURCE_DIR}
${PROJECT_BINARY_DIR}/external/SPIRV-Tools EXCLUDE_FROM_ALL)
endif()

# clspv
Expand All @@ -121,6 +124,7 @@ if (CLVK_COMPILER_AVAILABLE)
endif()
set(CLSPV_SOURCE_DIR ${PROJECT_SOURCE_DIR}/external/clspv
CACHE STRING "Clspv source directory")
set(CLSPV_BUILD_SPIRV_DIS OFF)
add_subdirectory(${CLSPV_SOURCE_DIR} ${PROJECT_BINARY_DIR}/external/clspv
EXCLUDE_FROM_ALL)
set_target_properties(clspv PROPERTIES RUNTIME_OUTPUT_DIRECTORY
Expand Down
19 changes: 18 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,14 @@ using the name of the corresponding environment variable.

* `CLVK_CONFIG_FILE` specifies the path to an additional configuration file.

* `CLVK_IGNORE_OUT_OF_ORDER_EXECUTION` controls whether out-of-order queues can be
created. Out-of-order queues, when allowed, always behave as in-order queues. This can be
useful to enable applications that request out-of-order queues but don't use all their features
to run.

* 0: creating an out-of-order queue results in a failure (default)
* 1: creating an out-of-order queue is supported but it will function as an in-order queue

* `CLVK_LOG` controls the level of logging

* 0: only print fatal messages (default)
Expand Down Expand Up @@ -525,13 +533,22 @@ using the name of the corresponding environment variable.
with caution.

* `CLVK_DEVICE_EXTENSIONS` specifies extensions to be added to the list of
exposed extensions. It expects a whitespace separated list of extensions.
exposed extensions. It expects a comma separated list of extensions.

* `CLVK_DEVICE_EXTENSIONS_MASKED` specifies extensions to be removed from the
list of exposed extensions. It expects a comma separated list of extensions.

* `CLVK_BUILD_IN_SEPARATE_THREAD` force to build kernels in a separate thread
(default: false). It brings a slight overhead when creating the thread, but
can be a work-around when having issues with clang compiling in the
application thread.

* `CLVK_INIT_IMAGE_AT_CREATION` force to initialize OpenCL images created with
`CL_MEM_COPY_HOST_PTR` or `CL_MEM_USE_HOST_PTR` at creation time instead of
initializing them during first use of the image (default: false). It reduces
the memory footprint as clvk needs to keep a buffer with the data to
initialize at first use.

# Limitations

* Only one device per CL context
Expand Down
2 changes: 1 addition & 1 deletion external/SPIRV-LLVM-Translator
2 changes: 1 addition & 1 deletion external/SPIRV-Tools
Submodule SPIRV-Tools updated 185 files
2 changes: 1 addition & 1 deletion external/clspv
Submodule clspv updated 107 files
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ endif()
add_library(OpenCL-objects OBJECT
api.cpp
config.cpp
context.cpp
device.cpp
device_properties.cpp
event.cpp
Expand Down
26 changes: 26 additions & 0 deletions src/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,9 @@ cl_int CLVK_API_CALL clGetDeviceInfo(cl_device_id dev,
cl_device_device_enqueue_capabilities val_dev_enqueue_caps;
cl_device_pci_bus_info_khr val_pci_bus_info;
cl_device_atomic_capabilities val_atomic_capabilities;
cl_device_integer_dot_product_capabilities_khr val_int_dot_product;
cl_device_integer_dot_product_acceleration_properties_khr
val_int_dot_product_props;
std::vector<size_t> val_subgroup_sizes;

auto device = icd_downcast(dev);
Expand Down Expand Up @@ -905,6 +908,22 @@ cl_int CLVK_API_CALL clGetDeviceInfo(cl_device_id dev,
copy_ptr = &val_pci_bus_info;
size_ret = sizeof(val_pci_bus_info);
break;
case CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR:
val_int_dot_product = device->dot_product_capabilities();
copy_ptr = &val_int_dot_product;
size_ret = sizeof(val_int_dot_product);
break;
case CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR:
val_int_dot_product_props = device->dot_product_8bit_properties();
copy_ptr = &val_int_dot_product_props;
size_ret = sizeof(val_int_dot_product_props);
break;
case CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR:
val_int_dot_product_props =
device->dot_product_4x8bit_packed_properties();
copy_ptr = &val_int_dot_product_props;
size_ret = sizeof(val_int_dot_product_props);
break;
case CL_DEVICE_SUB_GROUP_SIZES_INTEL:
if (device->supports_subgroup_size_selection()) {
uint32_t size = device->min_sub_group_size();
Expand Down Expand Up @@ -1467,6 +1486,13 @@ cvk_create_command_queue(cl_context context, cl_device_id device,
return nullptr;
}

if (!config.ignore_out_of_order_execution()) {
// We do not support out of order command queues so this must fail
if (properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) {
*errcode_ret = CL_INVALID_QUEUE_PROPERTIES;
return nullptr;
}
}
auto queue = std::make_unique<cvk_command_queue>(
icd_downcast(context), icd_downcast(device), properties,
std::move(properties_array));
Expand Down
4 changes: 4 additions & 0 deletions src/config.def
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ OPTION(uint32_t, opencl_version, (uint32_t)CL_MAKE_VERSION(3, 0, 0))

OPTION(bool, build_in_separate_thread, false)

OPTION(bool, init_image_at_creation, false)

#if COMPILER_AVAILABLE
OPTION(std::string, clspv_options, "")
#if !CLSPV_ONLINE_COMPILER
Expand All @@ -52,6 +54,7 @@ OPTION(uint32_t, max_cmd_batch_size, 10000u)
OPTION(uint32_t, max_first_cmd_batch_size, 10000u)
OPTION(uint32_t, max_cmd_group_size, UINT32_MAX)
OPTION(uint32_t, max_first_cmd_group_size, UINT32_MAX)
OPTION(bool, ignore_out_of_order_execution, false) // false meaning dont ignore

// experimental
OPTION(bool, dynamic_batches, false)
Expand All @@ -62,6 +65,7 @@ OPTION(uint32_t, enqueue_command_retry_sleep_us, UINT32_MAX) // UINT32_MAX meani
OPTION(bool, supports_filter_linear, true)

OPTION(std::string, device_extensions, "")
OPTION(std::string, device_extensions_masked, "")

//
// Logging
Expand Down
34 changes: 34 additions & 0 deletions src/context.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright 2024 The clvk authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "context.hpp"
#include "queue.hpp"

cvk_command_queue* cvk_context::get_or_create_image_init_command_queue() {
std::unique_lock<std::mutex> lock(m_queue_image_init_lock);
if (m_queue_image_init != nullptr) {
return m_queue_image_init;
}
std::vector<cl_queue_properties> properties_array;
m_queue_image_init =
new cvk_command_queue(this, m_device, 0, std::move(properties_array));
cl_int ret = m_queue_image_init->init();
if (ret != CL_SUCCESS) {
return nullptr;
}
m_queue_image_init->detach_from_context();
return m_queue_image_init;
}

void cvk_context::free_image_init_command_queue() { delete m_queue_image_init; }
9 changes: 9 additions & 0 deletions src/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ struct cvk_context_callback {
void* data;
};

struct cvk_command_queue;

struct cvk_context : public _cl_context,
refcounted,
object_magic_header<object_magic::context> {
Expand Down Expand Up @@ -73,6 +75,7 @@ struct cvk_context : public _cl_context,
auto cb = *cbi;
cb.pointer(this, cb.data);
}
free_image_init_command_queue();
}

const std::vector<cl_context_properties>& properties() const {
Expand Down Expand Up @@ -116,6 +119,9 @@ struct cvk_context : public _cl_context,
cvk_printf_callback_t get_printf_callback() { return m_printf_callback; }
void* get_printf_userdata() { return m_user_data; }

cvk_command_queue* get_or_create_image_init_command_queue();
void free_image_init_command_queue();

private:
cvk_device* m_device;
std::mutex m_callbacks_lock;
Expand All @@ -124,6 +130,9 @@ struct cvk_context : public _cl_context,
size_t m_printf_buffersize;
cvk_printf_callback_t m_printf_callback;
void* m_user_data;

std::mutex m_queue_image_init_lock;
cvk_command_queue* m_queue_image_init = nullptr;
};

static inline cvk_context* icd_downcast(cl_context context) {
Expand Down
54 changes: 54 additions & 0 deletions src/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ void cvk_device::init_vulkan_properties(VkInstance instance) {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES;
m_float_controls_properties.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES;
m_integer_dot_product_properties.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_PROPERTIES;

//--- Get maxMemoryAllocationSize for figuring out the max single buffer
// allocation size and default init when the extension is not supported
Expand Down Expand Up @@ -83,6 +85,8 @@ void cvk_device::init_vulkan_properties(VkInstance instance) {
m_maintenance3_properties),
VER_EXT_PROP(VK_MAKE_VERSION(1, 2, 0), nullptr,
m_float_controls_properties),
VER_EXT_PROP(VK_MAKE_VERSION(1, 3, 0), nullptr,
m_integer_dot_product_properties),
};
#undef VER_EXT_PROP

Expand Down Expand Up @@ -335,6 +339,8 @@ void cvk_device::init_features(VkInstance instance) {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR;
m_features_subgroup_size_control.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES;
m_features_shader_integer_dot_product.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_FEATURES;
m_features_queue_global_priority.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_KHR;

Expand Down Expand Up @@ -369,6 +375,8 @@ void cvk_device::init_features(VkInstance instance) {
VER_EXT_FEAT(VK_MAKE_VERSION(1, 2, 0),
VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME,
m_features_buffer_device_address),
VER_EXT_FEAT(VK_MAKE_VERSION(1, 3, 0), nullptr,
m_features_shader_integer_dot_product),
VER_EXT_FEAT(0, VK_KHR_GLOBAL_PRIORITY_EXTENSION_NAME,
m_features_queue_global_priority),

Expand Down Expand Up @@ -535,6 +543,10 @@ void cvk_device::init_compiler_options() {
m_device_compiler_options += " -physical-storage-buffers ";
}

if (supports_dot_product()) {
m_device_compiler_options += " -cl-arm-integer-dot-product ";
}

// Builtin options
auto parse_builtins = [](std::string s) {
std::set<std::string> builtins;
Expand Down Expand Up @@ -675,6 +687,22 @@ void cvk_device::build_extension_ils_list() {
m_has_subgroup_size_selection = true;
}

if (supports_dot_product()) {
if (supports_int8()) {
m_extensions.push_back(MAKE_NAME_VERSION(
2, 0, 0, CL_KHR_INTEGER_DOT_PRODUCT_EXTENSION_NAME));
m_extensions.push_back(
MAKE_NAME_VERSION(1, 0, 0, "cl_arm_integer_dot_product_int8"));
m_extensions.push_back(MAKE_NAME_VERSION(
1, 0, 0, "cl_arm_integer_dot_product_accumulate_int8"));
m_extensions.push_back(MAKE_NAME_VERSION(
1, 0, 0,
"cl_arm_integer_dot_product_accumulate_saturate_int8"));
}
m_extensions.push_back(MAKE_NAME_VERSION(
1, 0, 0, "cl_arm_integer_dot_product_accumulate_int16"));
}

auto split_string = [](std::string input, char delimiter) {
std::vector<std::string> outputs;
size_t pos = 0;
Expand All @@ -697,11 +725,23 @@ void cvk_device::build_extension_ils_list() {
m_extensions.push_back(extension);
}

auto config_extensions_masked =
split_string(config.device_extensions_masked(), ',');
for (auto& config_extension_masked : config_extensions_masked) {
for (auto it = m_extensions.begin(); it != m_extensions.end(); it++) {
if (strcmp(config_extension_masked.c_str(), it->name) == 0) {
m_extensions.erase(it);
break;
}
}
}

// Build extension string
for (auto& ext : m_extensions) {
m_extension_string += ext.name;
m_extension_string += " ";
}
cvk_info_fn("extensions: '%s'", m_extension_string.c_str());

// Build list of ILs
m_ils = {
Expand Down Expand Up @@ -759,6 +799,14 @@ void cvk_device::build_extension_ils_list() {
m_opencl_c_features.push_back(
MAKE_NAME_VERSION(3, 0, 0, "__opencl_c_fp64"));
}
if (supports_dot_product()) {
if (supports_int8()) {
m_opencl_c_features.push_back(MAKE_NAME_VERSION(
3, 0, 0, "__opencl_c_integer_dot_product_input_4x8bit"));
}
m_opencl_c_features.push_back(MAKE_NAME_VERSION(
3, 0, 0, "__opencl_c_integer_dot_product_input_4x8bit_packed"));
}
}

bool cvk_device::create_vulkan_queues_and_device(uint32_t num_queues,
Expand Down Expand Up @@ -1186,6 +1234,12 @@ bool cvk_device::supports_capability(spv::Capability capability) const {
return m_float_controls_properties.shaderRoundingModeRTEFloat32 ||
m_float_controls_properties.shaderRoundingModeRTEFloat16 ||
m_float_controls_properties.shaderRoundingModeRTEFloat64;
case spv::CapabilityDotProduct:
case spv::CapabilityDotProductInput4x8BitPacked:
return supports_dot_product();
case spv::CapabilityDotProductInput4x8Bit:
case spv::CapabilityDotProductInputAll:
return supports_dot_product() && supports_int8();
// Capabilities that have not yet been mapped to Vulkan features:
default:
cvk_warn_fn("Capability %d not yet mapped to a feature.", capability);
Expand Down
Loading

0 comments on commit fec97ff

Please sign in to comment.