diff --git a/CMakeLists.txt b/CMakeLists.txt index 7bb817b..02eac72 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,7 @@ FetchContent_MakeAvailable(cudawrappers) FetchContent_Declare( ccglib GIT_REPOSITORY https://git.astron.nl/RD/recruit/ccglib - GIT_TAG main) + GIT_TAG packing_complex_last) # temporary to support complex-last data FetchContent_MakeAvailable(ccglib) add_library(tcbf SHARED src/tcbf.cu) diff --git a/src/tcbf.cu b/src/tcbf.cu index 1538332..59bf522 100644 --- a/src/tcbf.cu +++ b/src/tcbf.cu @@ -78,18 +78,16 @@ void Beamformer::write_BF(cu::HostMemory &BF, const std::string path) { void Beamformer::RF_to_device(cu::HostMemory &RF) { // transfer in chunks to handle padding - // RF shape is complex * frames(padded) * samples(padded) - for (size_t c = 0; c < COMPLEX; c++) { - for (size_t f = 0; f < frames_; f++) { - // get objects pointing to start of chunk to transfer - const size_t d_offset = - c * frames_padded_ * samples_padded_ + f * samples_padded_; - const size_t offset = c * frames_ * samples_ + f * samples_; - const size_t bytes_to_transfer = samples_; - cu::DeviceMemory d_RF_chunk(*d_RF, d_offset, bytes_to_transfer); - stream_.memcpyHtoDAsync(d_RF_chunk, static_cast(RF) + offset, - bytes_to_transfer); - } + // RF shape is frames(padded) * samples(padded) * complex + for (size_t f = 0; f < frames_; f++) { + // get objects pointing to start of chunk to transfer + // factors of 2 are for complex + const size_t d_offset = f * samples_padded_ * 2; + const size_t offset = f * samples_ * 2; + const size_t bytes_to_transfer = samples_ * 2; + cu::DeviceMemory d_RF_chunk(*d_RF, d_offset, bytes_to_transfer); + stream_.memcpyHtoDAsync(d_RF_chunk, static_cast(RF) + offset, + bytes_to_transfer); } } @@ -116,7 +114,8 @@ void Beamformer::process(cu::HostMemory &RF, cu::HostMemory &BF) { // transfer RF to GPU RF_to_device(RF); // pack bits - pack_rf_->Run(*d_RF, *d_RF_packed, ccglib::packing::pack); + pack_rf_->Run(*d_RF, *d_RF_packed, ccglib::packing::pack, + ccglib::packing::complex_last); // transpose to format required by GEMM transpose_rf_->Run(*d_RF_packed, *d_RF_transposed); // do GEMM