diff --git a/examples/by_api_module/error_handling.cu b/examples/by_api_module/error_handling.cu index ebfd045d..f52c2d8e 100644 --- a/examples/by_api_module/error_handling.cu +++ b/examples/by_api_module/error_handling.cu @@ -32,6 +32,23 @@ int main(int, char **) die_("An error was outstanding, despite our not having committed any 'sticky' errors)"); } + cuda::device::current::set(cuda::device::get(0)); + auto device = cuda::device::current::get(); + + bool got_expected_exception = false; + try { + cuda::launch_configuration_t lc = cuda::launch_config_builder() + .overall_size(2048) + .block_dimensions(15000) // Note: higher than the possible maximum for know CUDA devices + .build(); + (void) lc; + } catch (::std::invalid_argument& ex) { + got_expected_exception = true; + } + if (not got_expected_exception) { + die_("Should have gotten an ::std::invalid_argument exception about a launch configuration, but - didn't"); + } + std::cout << "SUCCESS\n"; return EXIT_SUCCESS; } diff --git a/src/cuda/api/error.hpp b/src/cuda/api/error.hpp index e3b028d1..c72d4aa3 100644 --- a/src/cuda/api/error.hpp +++ b/src/cuda/api/error.hpp @@ -202,12 +202,18 @@ constexpr inline bool operator!=(const named_t& lhs, const status_t& rhs) noexce /** * @brief Determine whether the API call returning the specified status had succeeded */ +///@{ constexpr bool is_success(status_t status) { return status == static_cast(status::success); } +constexpr bool is_success(cudaError_t status) { return static_cast(status) == static_cast(status::success); } +///@} /** * @brief Determine whether the API call returning the specified status had failed */ +///@{ constexpr bool is_failure(status_t status) { return not is_success(status); } +constexpr bool is_failure(cudaError_t status) { return is_failure(static_cast(status)); } +///@} /** * Obtain a brief textual explanation for a specified kind of CUDA Runtime API status diff --git a/src/cuda/api/kernel.hpp b/src/cuda/api/kernel.hpp index b4a43718..3197a0c8 100644 --- a/src/cuda/api/kernel.hpp +++ b/src/cuda/api/kernel.hpp @@ -97,7 +97,7 @@ inline attribute_value_t get_attribute_in_current_context(handle_t handle, attri * way. * * @note The association of a `kernel_t` with an individual device or context is somewhat - * tenuous. That is, the same function could be used with any other compatible device; + * tenuous. That is, the same function could be used with any other validate_block_dimensions_compatibility device; * However, many/most of the features, attributes and settings are context-specific * or device-specific. * diff --git a/src/cuda/api/kernel_launch.hpp b/src/cuda/api/kernel_launch.hpp index 8a379bd5..ca21de13 100644 --- a/src/cuda/api/kernel_launch.hpp +++ b/src/cuda/api/kernel_launch.hpp @@ -151,7 +151,7 @@ void enqueue_raw_kernel_launch_in_current_context( static_assert(::std::is_function::value or is_function_ptr::value, "Only a bona fide function can be launched as a CUDA kernel"); #ifndef NDEBUG - detail_::validate(launch_configuration); + validate(launch_configuration); #endif if (launch_configuration.block_cooperation == thread_blocks_may_not_cooperate) { // regular plain vanilla launch diff --git a/src/cuda/api/launch_config_builder.hpp b/src/cuda/api/launch_config_builder.hpp index c488341c..9f75bb78 100644 --- a/src/cuda/api/launch_config_builder.hpp +++ b/src/cuda/api/launch_config_builder.hpp @@ -44,6 +44,20 @@ inline dimensions_t div_rounding_up(overall_dimensions_t overall_dims, block_dim } // namespace grid +namespace detail_ { + +static void validate_all_dimension_compatibility( + grid::block_dimensions_t block, + grid::dimensions_t grid, + grid::overall_dimensions_t overall) +{ + if (grid * block != overall) { + throw ::std::invalid_argument("specified block, grid and overall dimensions do not agree"); + } +} + +} // namespace detail_ + class launch_config_builder_t { public: void resolve_dimensions() { @@ -194,132 +208,76 @@ class launch_config_builder_t { launch_config_builder_t& operator=(launch_configuration_t config) { - thread_block_cooperation = config.block_cooperation; - dynamic_shared_memory_size_ = config.dynamic_shared_memory_size; #ifndef NDEBUG - block_dims_acceptable_to_kernel_or_device(config.dimensions.block); + detail_::validate(config); + if (kernel_) { detail_::validate_compatibility(*kernel_, config); } + if (device_) { detail_::validate_compatibility(device(), config); } #endif + thread_block_cooperation = config.block_cooperation; + dynamic_shared_memory_size_ = config.dynamic_shared_memory_size; dimensions(config.dimensions); return *this; } #ifndef NDEBUG - static void compatible( + static void validate_compatibility( const kernel_t* kernel_ptr, memory::shared::size_t shared_mem_size) { if (kernel_ptr == nullptr) { return; } - if (shared_mem_size == 0) { return; } - auto max_shared = kernel_ptr->get_maximum_dynamic_shared_memory_per_block(); - if (shared_mem_size > max_shared) { - throw ::std::invalid_argument("Requested dynamic shared memory size " - + ::std::to_string(shared_mem_size) + " exceeds kernel's maximum allowed value of " - + ::std::to_string(max_shared)); - } + detail_::validate_compatibility(*kernel_ptr, shared_mem_size); } - static void compatible( + static void validate_compatibility( optional maybe_device_id, memory::shared::size_t shared_mem_size) { if (not maybe_device_id) { return; } - if (shared_mem_size == 0) { return; } - auto max_shared = device(maybe_device_id).properties().max_shared_memory_per_block(); - if (shared_mem_size > max_shared) { - throw ::std::invalid_argument( - "Requested dynamic shared memory size " + ::std::to_string(shared_mem_size) - + " exceeds the device maximum of " + ::std::to_string(max_shared)); - } + detail_::validate_compatibility(device(maybe_device_id), shared_mem_size); } void validate_dynamic_shared_memory_size(memory::shared::size_t size) { - compatible(kernel_, size); - compatible(device_, size); + validate_compatibility(kernel_, size); + validate_compatibility(device_, size); } - // Note: This ignores the value of dimensions.grid an dimensions.faltatt - static void compatible( + static void validate_block_dimension_compatibility( const kernel_t* kernel_ptr, grid::block_dimensions_t block_dims) { if (kernel_ptr == nullptr) { return; } - auto max_block_size = kernel_ptr->maximum_threads_per_block(); - auto volume = block_dims.volume(); - if (volume > max_block_size) { - throw ::std::invalid_argument( - "specified block dimensions result in blocks of size " + ::std::to_string(volume) - + ", exceeding the maximum possible block size of " + ::std::to_string(max_block_size) - + " for " + kernel::detail_::identify(*kernel_ptr)); - } + return detail_::validate_block_dimension_compatibility(*kernel_ptr, block_dims); } - static void compatible( + static void validate_block_dimension_compatibility( optional maybe_device_id, grid::block_dimensions_t block_dims) { if (not maybe_device_id) { return; } - auto dev = device(maybe_device_id); - auto max_block_size = dev.maximum_threads_per_block(); - auto volume = block_dims.volume(); - if (volume > max_block_size) { - throw ::std::invalid_argument( - "specified block dimensions result in blocks of size " + ::std::to_string(volume) - + ", exceeding the maximum possible block size of " + ::std::to_string(max_block_size) - + " for " + device::detail_::identify(dev.id())); - } - auto dim_maxima = grid::block_dimensions_t{ - static_cast(dev.get_attribute(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X)), - static_cast(dev.get_attribute(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y)), - static_cast(dev.get_attribute(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z)) - }; - auto check = - [dev](grid::block_dimension_t dim, grid::block_dimension_t max, const char* axis) { - if (max > dim) { - throw ::std::invalid_argument( - ::std::string("specified block ") + axis + "-axis dimension " + ::std::to_string(dim) - + " exceeds the maximum supported " + axis + " dimension of " + ::std::to_string(max) - + " for " + device::detail_::identify(dev.id())); - } - }; - check(block_dims.x, dim_maxima.x, "X"); - check(block_dims.y, dim_maxima.y, "Y"); - check(block_dims.z, dim_maxima.z, "Z"); - } - - void block_dims_acceptable_to_kernel_or_device(grid::block_dimensions_t block_dims) const - { - compatible(kernel_, block_dims); - compatible(device_, block_dims); - } - - static void dimensions_compatible( - grid::block_dimensions_t block, - grid::dimensions_t grid, - grid::overall_dimensions_t overall) - { - if (grid * block != overall) { - throw ::std::invalid_argument("specified block, grid and overall dimensions do not agree"); - } + detail_::validate_block_dimension_compatibility(device(maybe_device_id), block_dims); } void validate_block_dimensions(grid::block_dimensions_t block_dims) const { + detail_::validate_block_dimensions(block_dims); if (dimensions_.grid and dimensions_.overall) { - dimensions_compatible(block_dims, dimensions_.grid.value(), dimensions_.overall.value()); + detail_::validate_all_dimension_compatibility( + block_dims, dimensions_.grid.value(), dimensions_.overall.value()); } - block_dims_acceptable_to_kernel_or_device(block_dims); + // TODO: Check divisibility + validate_block_dimension_compatibility(kernel_, block_dims); + validate_block_dimension_compatibility(device_, block_dims); } void validate_grid_dimensions(grid::dimensions_t grid_dims) const { + detail_::validate_grid_dimensions(grid_dims); if (dimensions_.block and dimensions_.overall) { - if (grid_dims * dimensions_.block.value() != dimensions_.overall.value()) { - throw ::std::invalid_argument( - "specified grid dimensions conflict with the already-specified " - "block and overall dimensions"); - } + detail_::validate_all_dimension_compatibility( + dimensions_.block.value(), grid_dims, dimensions_.overall.value()); } + // TODO: Check divisibility } void validate_overall_dimensions(grid::overall_dimensions_t overall_dims) const @@ -339,9 +297,9 @@ class launch_config_builder_t { auto block_dims = dimensions_.block ? dimensions_.block.value() : get_composite_dimensions().block; - compatible(kernel_ptr, block_dims); + validate_block_dimension_compatibility(kernel_ptr, block_dims); } - compatible(kernel_ptr, dynamic_shared_memory_size_); + validate_compatibility(kernel_ptr, dynamic_shared_memory_size_); } void validate_device(device::id_t device_id) const @@ -350,17 +308,18 @@ class launch_config_builder_t { auto block_dims = dimensions_.block ? dimensions_.block.value() : get_composite_dimensions().block; - compatible(device_id, block_dims); + validate_block_dimension_compatibility(device_id, block_dims); } - compatible(device_id, dynamic_shared_memory_size_); + validate_compatibility(device_id, dynamic_shared_memory_size_); } void validate_composite_dimensions(grid::composite_dimensions_t composite_dims) const { - compatible(kernel_, composite_dims.block); - compatible(device_, composite_dims.block); + validate_block_dimension_compatibility(kernel_, composite_dims.block); + validate_block_dimension_compatibility(device_, composite_dims.block); // Is there anything to validate regarding the grid dims? + validate_block_dimension_compatibility(device_, composite_dims.grid); } #endif // ifndef NDEBUG diff --git a/src/cuda/api/launch_configuration.hpp b/src/cuda/api/launch_configuration.hpp index 4594ea58..f374e080 100644 --- a/src/cuda/api/launch_configuration.hpp +++ b/src/cuda/api/launch_configuration.hpp @@ -19,6 +19,33 @@ namespace cuda { +class kernel_t; + +namespace detail_ { + +inline void validate_block_dimensions(grid::block_dimensions_t block_dims) +{ + if (block_dims.volume() == 0) { + throw ::std::invalid_argument("Zero-volume grid-of-blocks dimensions provided"); + } +} + +inline void validate_grid_dimensions(grid::dimensions_t grid_dims) +{ + if (grid_dims.volume() == 0) { + throw ::std::invalid_argument("Zero-volume block dimensions provided"); + } +} + +// Note: The reason for the verbose name is the identity of the block and grid dimension types +void validate_block_dimension_compatibility(const device_t &device, grid::block_dimensions_t block_dims); +void validate_block_dimension_compatibility(const kernel_t &kernel, grid::block_dimensions_t block_dims); + +void validate_compatibility(const kernel_t &kernel, memory::shared::size_t shared_mem_size); +void validate_compatibility(const device_t &device, memory::shared::size_t shared_mem_size); + +} // namespace detail_ + struct launch_configuration_t { grid::composite_dimensions_t dimensions {0 , 0 }; @@ -122,17 +149,32 @@ constexpr bool operator!=(const launch_configuration_t lhs, const launch_configu namespace detail_ { +// Note: This will not check anything related to the device or the kernel +// with which the launch configuration is to be used inline void validate(launch_configuration_t launch_config) noexcept(false) { - if (launch_config.dimensions.grid.volume() == 0) { - throw ::std::invalid_argument("Launch config specifies a zero-volume grid-of-blocks"); - } - if (launch_config.dimensions.block.volume() == 0) { - throw ::std::invalid_argument("Launch config specifies a zero-volume block dimensions"); - } - // TODO: Consider adding device-specific validations here, like checking for - // block size limits, shared mem size limits etc - by taking an optional device - // as a parameter + validate_block_dimensions(launch_config.dimensions.block); + validate_grid_dimensions(launch_config.dimensions.grid); +} + +inline void validate_compatibility( + const device_t& device, + launch_configuration_t launch_config) noexcept(false) +{ + validate(launch_config); + validate_block_dimension_compatibility(device, launch_config.dimensions.block); + // Uncomment if we actually get such checks + // validate_grid_dimension_compatibility(device, launch_config.dimensions.grid); +} + +inline void validate_compatibility( + const kernel_t& kernel, + launch_configuration_t launch_config) noexcept(false) +{ + validate(launch_config); + validate_block_dimension_compatibility(kernel, launch_config.dimensions.block); + // Uncomment if we actually get such checks + // validate_grid_dimension_compatibility(kernel, launch_config.dimensions.grid); } } // namespace detail_ diff --git a/src/cuda/api/multi_wrapper_impls/kernel_launch.hpp b/src/cuda/api/multi_wrapper_impls/kernel_launch.hpp index af1777a9..c12c5f30 100644 --- a/src/cuda/api/multi_wrapper_impls/kernel_launch.hpp +++ b/src/cuda/api/multi_wrapper_impls/kernel_launch.hpp @@ -20,6 +20,83 @@ namespace cuda { namespace detail_ { +inline void validate_compatibility( + const device_t &device, + memory::shared::size_t shared_mem_size) +{ + if (shared_mem_size == 0) { return; } + memory::shared::size_t max_shared = device.get_attribute(CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN); + + // Note: A single kernel may not be able to access this shared memory capacity without opting-in to + // it using kernel_t::set_maximum_dynamic_shared_memory_per_block. See @ref kernel_t + + if (shared_mem_size > max_shared) { + throw ::std::invalid_argument( + "A dynamic shared memory size of " + ::std::to_string(shared_mem_size) + + " bytes exceeds the device maximum of " + ::std::to_string(max_shared)); + } +} + +inline void validate_block_dimension_compatibility( + const device_t &device, + grid::block_dimensions_t block_dims) +{ + auto max_block_size = device.maximum_threads_per_block(); + auto volume = block_dims.volume(); + if (volume > max_block_size) { + throw ::std::invalid_argument( + "Specified block dimensions result in blocks of size " + ::std::to_string(volume) + + ", exceeding the maximum possible block size of " + ::std::to_string(max_block_size) + + " for " + device::detail_::identify(device.id())); + } + auto dim_maxima = grid::block_dimensions_t{ + static_cast(device.get_attribute(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X)), + static_cast(device.get_attribute(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y)), + static_cast(device.get_attribute(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z)) + }; + auto device_id = device.id(); + auto check = + [device_id](grid::block_dimension_t dim, grid::block_dimension_t max, const char *axis) { + if (max < dim) { + throw ::std::invalid_argument( + ::std::string("specified block ") + axis + "-axis dimension " + ::std::to_string(dim) + + " exceeds the maximum supported " + axis + " dimension of " + ::std::to_string(max) + + " for " + device::detail_::identify(device_id)); + } + }; + check(block_dims.x, dim_maxima.x, "X"); + check(block_dims.y, dim_maxima.y, "Y"); + check(block_dims.z, dim_maxima.z, "Z"); +} + +inline void validate_compatibility( + const kernel_t& kernel_ptr, + memory::shared::size_t shared_mem_size) +{ + if (shared_mem_size == 0) { return; } + auto max_shared = kernel_ptr.get_maximum_dynamic_shared_memory_per_block(); + if (shared_mem_size > max_shared) { + throw ::std::invalid_argument( + "Requested dynamic shared memory size " + + ::std::to_string(shared_mem_size) + " exceeds kernel's maximum allowed value of " + + ::std::to_string(max_shared)); + } +} + +inline void validate_block_dimension_compatibility( + const kernel_t& kernel, + grid::block_dimensions_t block_dims) +{ + auto max_block_size = kernel.maximum_threads_per_block(); + auto volume = block_dims.volume(); + if (volume > max_block_size) { + throw ::std::invalid_argument( + "specified block dimensions result in blocks of size " + ::std::to_string(volume) + + ", exceeding the maximum possible block size of " + ::std::to_string(max_block_size) + + " for " + kernel::detail_::identify(kernel)); + } +} + template void enqueue_launch_helper::operator()( const apriori_compiled_kernel_t& wrapped_kernel, @@ -38,6 +115,12 @@ void enqueue_launch_helper::oper // `KernelParameter` pack may contain some references, arrays and so on - which CUDA // kernels cannot accept; so we massage those a bit. +#ifndef NDEBUG + validate_compatibility(stream.device(), launch_configuration); + validate_compatibility(wrapped_kernel, launch_configuration); + // validating the configuration unto-itself should happen within the following function: +#endif + detail_::enqueue_raw_kernel_launch_in_current_context( unwrapped_kernel_function, stream.handle(), @@ -62,6 +145,9 @@ inline void launch_type_erased_in_current_context( launch_configuration_t launch_config, const void** marshalled_arguments) { +#ifndef NDEBUG + validate(launch_config); +#endif status_t status; const auto&lc = launch_config; // alias for brevity if (launch_config.block_cooperation) @@ -104,6 +190,11 @@ struct enqueue_launch_helper { auto marshalled_arguments { marshal_dynamic_kernel_arguments(::std::forward(arguments)...) }; auto function_handle = wrapped_kernel.handle(); CAW_SET_SCOPE_CONTEXT(stream.context_handle()); +#ifndef NDEBUG + validate_compatibility(stream.device(), launch_config); + validate_compatibility(wrapped_kernel, launch_config); +#endif + launch_type_erased_in_current_context( function_handle, stream.device_id(), stream.context_handle(), stream.handle(), launch_config, marshalled_arguments.data()); @@ -123,6 +214,10 @@ void enqueue_launch( // and not have trouble enqueueing into a stream in another context - it balks at doing so under // certain conditions, so we must place ourselves in the stream's context. CAW_SET_SCOPE_CONTEXT(stream.context_handle()); +#ifndef NDEBUG + validate_compatibility(stream.device(), launch_configuration); + // validating the configuration without the device should happen within the next function... +#endif detail_::enqueue_raw_kernel_launch_in_current_context( kernel_function, stream.handle(), launch_configuration, ::std::forward(parameters)...);