Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
TeachRaccooon committed Aug 15, 2024
1 parent 6f01c04 commit 8ad444a
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions RandLAPACK/gpu_functions/rl_cuda_kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ void col_swap_gpu(
int64_t lda,
int64_t const* idx
) {
/*
#if 1
std::vector<int64_t> idx_copy(k);
auto a = std::make_unique<T[]>(m * k);
Expand All @@ -437,7 +438,8 @@ void col_swap_gpu(
cudaMemcpyAsync(A, a.get(), sizeof(T) * m * k, cudaMemcpyHostToDevice, stream);
// must add this to avoid dangling reference during async copy
cudaStreamSynchronize(stream);
#else
#else
*/
#ifdef USE_CUDA
//constexpr int threadsPerBlock{128};
//int64_t num_blocks{(m + threadsPerBlock - 1) / threadsPerBlock};
Expand Down Expand Up @@ -465,7 +467,7 @@ void col_swap_gpu(
dim3 dimGrid(std::min(upper_bound, lower_bound), 1, 1);
void* kernelArgs[] = {(void*)&m, (void*)&n, (void*)&k, (void*)&A, (void*)&lda, (void*)&idx, (void*)&idx_copy};
cudaLaunchCooperativeKernel((void*)col_swap_gpu_kernel<T>, dimGrid, dimBlock, kernelArgs, 0, stream);
#endif
//#endif
ierr = cudaGetLastError();
if (ierr != cudaSuccess)
{
Expand Down

0 comments on commit 8ad444a

Please sign in to comment.