diff --git a/CMakePresets.json b/CMakePresets.json index dcaf9b75977..debb8596592 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -244,7 +244,7 @@ "cudax_ENABLE_HEADER_TESTING": true, "cudax_ENABLE_TESTING": true, "cudax_ENABLE_EXAMPLES": true, - "cudax_ENABLE_CUDASTF": true, + "cudax_ENABLE_CUDASTF": false, "cudax_ENABLE_CUDASTF_BOUNDSCHECK": false, "cudax_ENABLE_CUDASTF_CODE_GENERATION": true, "cudax_ENABLE_CUDASTF_MATHLIBS": false, @@ -710,4 +710,4 @@ "inherits": "base" } ] -} +} \ No newline at end of file diff --git a/cudax/include/cuda/experimental/__container/async_buffer.cuh b/cudax/include/cuda/experimental/__container/async_buffer.cuh index 3d4cad522ac..7d748e59d7c 100644 --- a/cudax/include/cuda/experimental/__container/async_buffer.cuh +++ b/cudax/include/cuda/experimental/__container/async_buffer.cuh @@ -847,6 +847,41 @@ using async_device_buffer = async_buffer<_Tp, _CUDA_VMR::device_accessible>; template using async_host_buffer = async_buffer<_Tp, _CUDA_VMR::host_accessible>; +template +async_buffer<_Tp, _TargetProperties...> +copy_to(const async_buffer<_Tp, _SourceProperties...>& __source, + any_async_resource<_TargetProperties...> __mr, + cuda::stream_ref __stream) +{ + env_t<_TargetProperties...> __env{__mr, __stream}; + async_buffer<_Tp, _TargetProperties...> __res{__env, __source.size(), uninit}; + __source.wait(); + + _CCCL_TRY_CUDA_API( + ::cudaMemcpyAsync, + "cudax::async_buffer::__copy_cross: failed to copy data", + __res.__unwrapped_begin(), + __source.__unwrapped_begin(), + sizeof(_Tp) * __source.size(), + cudaMemcpyKind::cudaMemcpyDefault, + __stream.get()); + + return __res; +} + +template +async_buffer<_Tp, _TargetProperties...> +copy_to(const async_buffer<_Tp, _SourceProperties...>& __source, any_async_resource<_TargetProperties...> __mr) +{ + return ::cuda::experimental::copy_to(__source, __mr, __source.get_stream()); +} + +template +async_buffer<_Tp, _SourceProperties...> copy_to(const async_buffer<_Tp, _SourceProperties...>& __source) +{ + return ::cuda::experimental::copy_to(__source, __source.get_memory_resource(), __source.get_stream()); +} + } // namespace cuda::experimental _CCCL_POP_MACROS diff --git a/cudax/test/CMakeLists.txt b/cudax/test/CMakeLists.txt index 33df2faa347..f63336f7b55 100644 --- a/cudax/test/CMakeLists.txt +++ b/cudax/test/CMakeLists.txt @@ -102,6 +102,7 @@ foreach(cn_target IN LISTS cudax_TARGETS) containers/async_buffer/comparison.cu containers/async_buffer/constructor.cu containers/async_buffer/conversion.cu + containers/async_buffer/copy.cu containers/async_buffer/iterators.cu containers/async_buffer/properties.cu containers/async_buffer/swap.cu diff --git a/cudax/test/containers/async_buffer/copy.cu b/cudax/test/containers/async_buffer/copy.cu new file mode 100644 index 00000000000..83d330c4100 --- /dev/null +++ b/cudax/test/containers/async_buffer/copy.cu @@ -0,0 +1,147 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "helper.h" +#include "types.h" + +// TODO: only device accessible resource +TEMPLATE_TEST_CASE("cudax::async_buffer copy_to", + "[container][async_buffer]", + cuda::std::tuple, + cuda::std::tuple, + (cuda::std::tuple) ) +{ + using Env = typename extract_properties::env; + using Resource = typename extract_properties::resource; + using Buffer = typename extract_properties::async_buffer; + using T = typename Buffer::value_type; + + cudax::stream stream{}; + Resource resource{}; + Env env{resource, stream}; + + using MatchingResource = typename extract_properties::matching_resource; + Env matching_env{MatchingResource{resource}, stream}; + + SECTION("Same resource and stream") + { + { // empty input + const Buffer input{env}; + const Buffer buf = cudax::copy_to(input, input.get_memory_resource(), input.get_stream()); + CHECK(buf.empty()); + CHECK(buf.data() == nullptr); + } + + { // non-empty input + const Buffer input{env, {T(1), T(42), T(1337), T(0), T(12), T(-1)}}; + const Buffer buf = cudax::copy_to(input, input.get_memory_resource(), input.get_stream()); + CHECK(!buf.empty()); + CHECK(equal_range(buf)); + } + + { // empty input + const Buffer input{env}; + const Buffer buf = cudax::copy_to(input); + CHECK(buf.empty()); + CHECK(buf.data() == nullptr); + } + + { // non-empty input + const Buffer input{env, {T(1), T(42), T(1337), T(0), T(12), T(-1)}}; + const Buffer buf = cudax::copy_to(input); + CHECK(!buf.empty()); + CHECK(equal_range(buf)); + } + } + + SECTION("Different stream") + { + cudax::stream other_stream{}; + { // empty input + const Buffer input{env}; + const Buffer buf = cudax::copy_to(input, input.get_memory_resource(), other_stream); + CHECK(buf.empty()); + CHECK(buf.data() == nullptr); + } + + { // non-empty input + const Buffer input{env, {T(1), T(42), T(1337), T(0), T(12), T(-1)}}; + const Buffer buf = cudax::copy_to(input, input.get_memory_resource(), other_stream); + CHECK(!buf.empty()); + CHECK(equal_range(buf)); + } + } + + SECTION("Different resource and stream") + { + cudax::stream other_stream{}; + { // empty input + const Buffer input{env}; + const auto buf = cudax::copy_to(input, env.query(cudax::get_memory_resource), other_stream); + static_assert(!cuda::std::is_same_v>); + CHECK(buf.empty()); + CHECK(buf.data() == nullptr); + } + + { // non-empty input + const Buffer input{env, {T(1), T(42), T(1337), T(0), T(12), T(-1)}}; + const auto buf = cudax::copy_to(input, env.query(cudax::get_memory_resource), other_stream); + static_assert(!cuda::std::is_same_v>); + CHECK(!buf.empty()); + CHECK(equal_range(buf)); + } + } + + SECTION("Different resource, same stream") + { + { // empty input + const Buffer input{env}; + const auto buf = cudax::copy_to(input, env.query(cudax::get_memory_resource), stream); + static_assert(!cuda::std::is_same_v>); + CHECK(buf.empty()); + CHECK(buf.data() == nullptr); + } + + { // non-empty input + const Buffer input{env, {T(1), T(42), T(1337), T(0), T(12), T(-1)}}; + const auto buf = cudax::copy_to(input, env.query(cudax::get_memory_resource), stream); + static_assert(!cuda::std::is_same_v>); + CHECK(!buf.empty()); + CHECK(equal_range(buf)); + } + + { // empty input + const Buffer input{env}; + const auto buf = cudax::copy_to(input, env.query(cudax::get_memory_resource)); + static_assert(!cuda::std::is_same_v>); + CHECK(buf.empty()); + CHECK(buf.data() == nullptr); + } + + { // non-empty input + const Buffer input{env, {T(1), T(42), T(1337), T(0), T(12), T(-1)}}; + const auto buf = cudax::copy_to(input, env.query(cudax::get_memory_resource)); + static_assert(!cuda::std::is_same_v>); + CHECK(!buf.empty()); + CHECK(equal_range(buf)); + } + } +}