diff --git a/CMakeLists.txt b/CMakeLists.txt index cd31fc6..3dc6b57 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.11 FATAL_ERROR) # 3.11 to avoid issues with OpenMP + CUDA -project(SpFFT LANGUAGES CXX VERSION 1.0.0) +project(SpFFT LANGUAGES CXX VERSION 1.0.1) set(SPFFT_SO_VERSION 1) set(SPFFT_VERSION ${PROJECT_VERSION}) diff --git a/src/execution/execution_gpu.cpp b/src/execution/execution_gpu.cpp index 0336916..05c7298 100644 --- a/src/execution/execution_gpu.cpp +++ b/src/execution/execution_gpu.cpp @@ -272,7 +272,7 @@ auto ExecutionGPU::forward_xy(const T* input) -> void { gpu::check_status(gpu::memcpy_async(static_cast(spaceDomainDataExternalGPU_.data()), static_cast(inputPtrHost), spaceDomainDataExternalGPU_.size() * sizeof(T), - gpu::flag::MemcpyDeviceToHost, stream_.get())); + gpu::flag::MemcpyHostToDevice, stream_.get())); } transformXY_->forward(inputPtrGPU, freqDomainXYGPU_.data()); } diff --git a/tests/mpi_tests/test_transpose_gpu.cpp b/tests/mpi_tests/test_transpose_gpu.cpp index 1fb98d8..c67ba1d 100644 --- a/tests/mpi_tests/test_transpose_gpu.cpp +++ b/tests/mpi_tests/test_transpose_gpu.cpp @@ -109,8 +109,8 @@ static void check_space_domain(const HostArrayView3D>& real for (SizeType z = 0; z < numLocalXYPlanes; ++z) { for (SizeType x = 0; x < fullView.dim_outer(); ++x) { for (SizeType y = 0; y < fullView.dim_mid(); ++y) { - EXPECT_EQ(realView(z, x, y).real(), fullView(x, y, z + planeOffset).real()); - EXPECT_EQ(realView(z, x, y).imag(), fullView(x, y, z + planeOffset).imag()); + EXPECT_EQ(realView(z, y, x).real(), fullView(x, y, z + planeOffset).real()); + EXPECT_EQ(realView(z, y, x).imag(), fullView(x, y, z + planeOffset).imag()); } } }