diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu index b313f1f05..8ada45262 100644 --- a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu +++ b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu @@ -138,9 +138,7 @@ MRFactoryFunc get_mr_factory(std::string const& resource_name) if (resource_name == "arena") { return &make_arena; } if (resource_name == "binning") { return &make_binning; } - std::cout << "Error: invalid memory_resource name: " << resource_name << std::endl; - - RMM_FAIL(); + RMM_FAIL("Invalid memory_resource name: " + resource_name); } void declare_benchmark(std::string const& name) @@ -175,7 +173,7 @@ void declare_benchmark(std::string const& name) return; } - std::cout << "Error: invalid memory_resource name: " << name << std::endl; + RMM_FAIL("Invalid memory_resource name: " + name); } // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) diff --git a/benchmarks/utilities/simulated_memory_resource.hpp b/benchmarks/utilities/simulated_memory_resource.hpp index 73e2a4d37..4ca243e1c 100644 --- a/benchmarks/utilities/simulated_memory_resource.hpp +++ b/benchmarks/utilities/simulated_memory_resource.hpp @@ -65,7 +65,10 @@ class simulated_memory_resource final : public device_memory_resource { void* do_allocate(std::size_t bytes, cuda_stream_view) override { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - RMM_EXPECTS(begin_ + bytes <= end_, "Simulated memory size exceeded", rmm::bad_alloc); + RMM_EXPECTS(begin_ + bytes <= end_, + "Simulated memory size exceeded (failed to allocate " + + rmm::detail::format_bytes(bytes) + ")", + rmm::bad_alloc); auto* ptr = static_cast(begin_); begin_ += bytes; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) return ptr; diff --git a/include/rmm/detail/error.hpp b/include/rmm/detail/error.hpp index 3a05d69eb..ae5b37acf 100644 --- a/include/rmm/detail/error.hpp +++ b/include/rmm/detail/error.hpp @@ -55,11 +55,11 @@ GET_RMM_EXPECTS_MACRO(__VA_ARGS__, RMM_EXPECTS_3, RMM_EXPECTS_2) \ (__VA_ARGS__) #define GET_RMM_EXPECTS_MACRO(_1, _2, _3, NAME, ...) NAME -#define RMM_EXPECTS_3(_condition, _reason, _exception_type) \ - (!!(_condition)) ? static_cast(0) \ - : throw _exception_type /*NOLINT(bugprone-macro-parentheses)*/ \ - { \ - "RMM failure at: " __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _reason \ +#define RMM_EXPECTS_3(_condition, _reason, _exception_type) \ + (!!(_condition)) ? static_cast(0) \ + : throw _exception_type /*NOLINT(bugprone-macro-parentheses)*/ \ + { \ + std::string("RMM failure at: " __FILE__ ":" RMM_STRINGIFY(__LINE__) ": ") + _reason \ } #define RMM_EXPECTS_2(_condition, _reason) RMM_EXPECTS_3(_condition, _reason, rmm::logic_error) @@ -79,9 +79,10 @@ GET_RMM_FAIL_MACRO(__VA_ARGS__, RMM_FAIL_2, RMM_FAIL_1) \ (__VA_ARGS__) #define GET_RMM_FAIL_MACRO(_1, _2, NAME, ...) NAME -#define RMM_FAIL_2(_what, _exception_type) \ - /*NOLINTNEXTLINE(bugprone-macro-parentheses)*/ \ - throw _exception_type{"RMM failure at:" __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _what}; +#define RMM_FAIL_2(_what, _exception_type) \ + /*NOLINTNEXTLINE(bugprone-macro-parentheses)*/ \ + throw _exception_type{std::string{"RMM failure at:" __FILE__ ":" RMM_STRINGIFY(__LINE__) ": "} + \ + _what}; #define RMM_FAIL_1(_what) RMM_FAIL_2(_what, rmm::logic_error) /** @@ -131,8 +132,31 @@ * * Defaults to throwing rmm::bad_alloc, but when `cudaErrorMemoryAllocation` is returned, * rmm::out_of_memory is thrown instead. + * + * Can be called with either 1 or 2 arguments: + * - RMM_CUDA_TRY_ALLOC(cuda_call): Performs error checking without specifying bytes + * - RMM_CUDA_TRY_ALLOC(cuda_call, num_bytes): Includes the number of bytes in the error message */ -#define RMM_CUDA_TRY_ALLOC(_call) \ +#define RMM_CUDA_TRY_ALLOC(...) \ + GET_RMM_CUDA_TRY_ALLOC_MACRO(__VA_ARGS__, RMM_CUDA_TRY_ALLOC_2, RMM_CUDA_TRY_ALLOC_1) \ + (__VA_ARGS__) +#define GET_RMM_CUDA_TRY_ALLOC_MACRO(_1, _2, NAME, ...) NAME + +#define RMM_CUDA_TRY_ALLOC_2(_call, num_bytes) \ + do { \ + cudaError_t const error = (_call); \ + if (cudaSuccess != error) { \ + cudaGetLastError(); \ + auto const msg = std::string{"CUDA error (failed to allocate "} + \ + std::to_string(num_bytes) + " bytes) at: " + __FILE__ + ":" + \ + RMM_STRINGIFY(__LINE__) + ": " + cudaGetErrorName(error) + " " + \ + cudaGetErrorString(error); \ + if (cudaErrorMemoryAllocation == error) { throw rmm::out_of_memory{msg}; } \ + throw rmm::bad_alloc{msg}; \ + } \ + } while (0) + +#define RMM_CUDA_TRY_ALLOC_1(_call) \ do { \ cudaError_t const error = (_call); \ if (cudaSuccess != error) { \ diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index f8e4e16cb..6aaa659ec 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -166,7 +166,9 @@ class arena_memory_resource final : public device_memory_resource { void* pointer = arena.allocate(bytes); if (pointer == nullptr) { if (dump_log_on_failure_) { dump_memory_log(bytes); } - RMM_FAIL("Maximum pool size exceeded", rmm::out_of_memory); + auto const msg = std::string("Maximum pool size exceeded (failed to allocate ") + + rmm::detail::format_bytes(bytes) + "): No room in arena."; + RMM_FAIL(msg.c_str(), rmm::out_of_memory); } return pointer; } diff --git a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp index a4ae74394..92aea2072 100644 --- a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp @@ -98,7 +98,8 @@ class cuda_async_view_memory_resource final : public device_memory_resource { { void* ptr{nullptr}; if (bytes > 0) { - RMM_CUDA_TRY_ALLOC(cudaMallocFromPoolAsync(&ptr, bytes, pool_handle(), stream.value())); + RMM_CUDA_TRY_ALLOC(cudaMallocFromPoolAsync(&ptr, bytes, pool_handle(), stream.value()), + bytes); } return ptr; } diff --git a/include/rmm/mr/device/cuda_memory_resource.hpp b/include/rmm/mr/device/cuda_memory_resource.hpp index 522145d93..e4afbf711 100644 --- a/include/rmm/mr/device/cuda_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_memory_resource.hpp @@ -59,7 +59,7 @@ class cuda_memory_resource final : public device_memory_resource { void* do_allocate(std::size_t bytes, [[maybe_unused]] cuda_stream_view stream) override { void* ptr{nullptr}; - RMM_CUDA_TRY_ALLOC(cudaMalloc(&ptr, bytes)); + RMM_CUDA_TRY_ALLOC(cudaMalloc(&ptr, bytes), bytes); return ptr; } diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp index ad676bfd2..5b1476d37 100644 --- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp +++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp @@ -214,7 +214,8 @@ class stream_ordered_memory_resource : public crtp, public device_ size = rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT); RMM_EXPECTS(size <= this->underlying().get_maximum_allocation_size(), - "Maximum allocation size exceeded", + std::string("Maximum allocation size exceeded (failed to allocate ") + + rmm::detail::format_bytes(size) + ")", rmm::out_of_memory); auto const block = this->underlying().get_block(size, stream_event); diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp index 96d730191..d551fbdb3 100644 --- a/include/rmm/mr/device/limiting_resource_adaptor.hpp +++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -150,7 +151,9 @@ class limiting_resource_adaptor final : public device_memory_resource { } allocated_bytes_ -= proposed_size; - RMM_FAIL("Exceeded memory limit", rmm::out_of_memory); + auto const msg = std::string("Exceeded memory limit (failed to allocate ") + + rmm::detail::format_bytes(bytes) + ")"; + RMM_FAIL(msg.c_str(), rmm::out_of_memory); } /** diff --git a/include/rmm/mr/device/managed_memory_resource.hpp b/include/rmm/mr/device/managed_memory_resource.hpp index 2bb807a31..3ade7fb53 100644 --- a/include/rmm/mr/device/managed_memory_resource.hpp +++ b/include/rmm/mr/device/managed_memory_resource.hpp @@ -63,7 +63,7 @@ class managed_memory_resource final : public device_memory_resource { if (bytes == 0) { return nullptr; } void* ptr{nullptr}; - RMM_CUDA_TRY_ALLOC(cudaMallocManaged(&ptr, bytes)); + RMM_CUDA_TRY_ALLOC(cudaMallocManaged(&ptr, bytes), bytes); return ptr; } diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index dee0471b9..6abf10e2c 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -242,8 +242,8 @@ class pool_memory_resource final * Attempts to allocate `try_size` bytes from upstream. If it fails, it iteratively reduces the * attempted size by half until `min_size`, returning the allocated block once it succeeds. * - * @throws rmm::bad_alloc if `min_size` bytes cannot be allocated from upstream or maximum pool - * size is exceeded. + * @throws rmm::out_of_memory if `min_size` bytes cannot be allocated from upstream or maximum + * pool size is exceeded. * * @param try_size The initial requested size to try allocating. * @param min_size The minimum requested size to try allocating. @@ -252,21 +252,34 @@ class pool_memory_resource final */ block_type try_to_expand(std::size_t try_size, std::size_t min_size, cuda_stream_view stream) { + auto report_error = [&](const char* reason) { + RMM_LOG_ERROR("[A][Stream %s][Upstream %zuB][FAILURE maximum pool size exceeded: %s]", + rmm::detail::format_stream(stream), + min_size, + reason); + auto const msg = std::string("Maximum pool size exceeded (failed to allocate ") + + rmm::detail::format_bytes(min_size) + std::string("): ") + reason; + RMM_FAIL(msg.c_str(), rmm::out_of_memory); + }; + while (try_size >= min_size) { - auto block = block_from_upstream(try_size, stream); - if (block.has_value()) { - current_pool_size_ += block.value().size(); - return block.value(); - } - if (try_size == min_size) { - break; // only try `size` once + try { + auto block = block_from_upstream(try_size, stream); + current_pool_size_ += block.size(); + return block; + } catch (std::exception const& e) { + if (try_size == min_size) { report_error(e.what()); } } try_size = std::max(min_size, try_size / 2); } - RMM_LOG_ERROR("[A][Stream %s][Upstream %zuB][FAILURE maximum pool size exceeded]", - rmm::detail::format_stream(stream), - min_size); - RMM_FAIL("Maximum pool size exceeded", rmm::out_of_memory); + + auto const max_size = maximum_pool_size_.value_or(std::numeric_limits::max()); + auto const msg = std::string("Not enough room to grow, current/max/try size = ") + + rmm::detail::format_bytes(pool_size()) + ", " + + rmm::detail::format_bytes(max_size) + ", " + + rmm::detail::format_bytes(min_size); + report_error(msg.c_str()); + return {}; } /** @@ -307,6 +320,7 @@ class pool_memory_resource final // limit each time. If it is not set, grow exponentially, e.g. by doubling the pool size each // time. Upon failure, attempt to back off exponentially, e.g. by half the attempted size, // until either success or the attempt is less than the requested size. + return try_to_expand(size_to_grow(size), size, stream); } @@ -339,21 +353,17 @@ class pool_memory_resource final * * @param size The size in bytes to allocate from the upstream resource * @param stream The stream on which the memory is to be used. + * @throws if call to allocate_async() throws * @return block_type The allocated block */ - std::optional block_from_upstream(std::size_t size, cuda_stream_view stream) + block_type block_from_upstream(std::size_t size, cuda_stream_view stream) { RMM_LOG_DEBUG("[A][Stream %s][Upstream %zuB]", rmm::detail::format_stream(stream), size); if (size == 0) { return {}; } - try { - void* ptr = get_upstream_resource().allocate_async(size, stream); - return std::optional{ - *upstream_blocks_.emplace(static_cast(ptr), size, true).first}; - } catch (std::exception const& e) { - return std::nullopt; - } + void* ptr = get_upstream_resource().allocate_async(size, stream); + return *upstream_blocks_.emplace(static_cast(ptr), size, true).first; } /** diff --git a/include/rmm/mr/device/system_memory_resource.hpp b/include/rmm/mr/device/system_memory_resource.hpp index 666a7a9c4..34c16f719 100644 --- a/include/rmm/mr/device/system_memory_resource.hpp +++ b/include/rmm/mr/device/system_memory_resource.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -103,7 +104,9 @@ class system_memory_resource final : public device_memory_resource { return rmm::detail::aligned_host_allocate( bytes, CUDA_ALLOCATION_ALIGNMENT, [](std::size_t size) { return ::operator new(size); }); } catch (std::bad_alloc const& e) { - RMM_FAIL("Failed to allocate memory: " + std::string{e.what()}, rmm::out_of_memory); + auto const msg = std::string("Failed to allocate ") + rmm::detail::format_bytes(bytes) + + std::string("of memory: ") + e.what(); + RMM_FAIL(msg.c_str(), rmm::out_of_memory); } } diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp index cf746f5ac..f75a8f7be 100644 --- a/include/rmm/mr/host/pinned_memory_resource.hpp +++ b/include/rmm/mr/host/pinned_memory_resource.hpp @@ -122,8 +122,7 @@ class pinned_memory_resource final : public host_memory_resource { return rmm::detail::aligned_host_allocate(bytes, alignment, [](std::size_t size) { void* ptr{nullptr}; - auto status = cudaMallocHost(&ptr, size); - if (cudaSuccess != status) { throw std::bad_alloc{}; } + RMM_CUDA_TRY_ALLOC(cudaMallocHost(&ptr, size), size); return ptr; }); } diff --git a/include/rmm/mr/pinned_host_memory_resource.hpp b/include/rmm/mr/pinned_host_memory_resource.hpp index b5689b3ed..3f5878171 100644 --- a/include/rmm/mr/pinned_host_memory_resource.hpp +++ b/include/rmm/mr/pinned_host_memory_resource.hpp @@ -72,7 +72,7 @@ class pinned_host_memory_resource { return rmm::detail::aligned_host_allocate(bytes, alignment, [](std::size_t size) { void* ptr{nullptr}; - RMM_CUDA_TRY_ALLOC(cudaHostAlloc(&ptr, size, cudaHostAllocDefault)); + RMM_CUDA_TRY_ALLOC(cudaHostAlloc(&ptr, size, cudaHostAllocDefault), size); return ptr; }); } diff --git a/tests/mr/host/pinned_pool_mr_tests.cpp b/tests/mr/host/pinned_pool_mr_tests.cpp index 436fdfd1d..0a28ead7c 100644 --- a/tests/mr/host/pinned_pool_mr_tests.cpp +++ b/tests/mr/host/pinned_pool_mr_tests.cpp @@ -88,5 +88,16 @@ TEST(PinnedPoolTest, NonAlignedPoolSize) rmm::logic_error); } +TEST(PinnedPoolTest, ThrowOutOfMemory) +{ + rmm::mr::pinned_memory_resource pinned_mr{}; + const auto initial{0}; + const auto maximum{1024}; + pool_mr mr{pinned_mr, initial, maximum}; + mr.allocate(1024); + + EXPECT_THROW(mr.allocate(1024), rmm::out_of_memory); +} + } // namespace } // namespace rmm::test