From 2e06ab95babc0867fda210c8acfd54adbb05443c Mon Sep 17 00:00:00 2001 From: viclafargue Date: Tue, 14 Jan 2025 15:04:36 +0000 Subject: [PATCH 01/25] Fix sparse utilities --- .../raft/cluster/detail/connectivities.cuh | 2 +- cpp/include/raft/cluster/detail/mst.cuh | 4 +- cpp/include/raft/sparse/convert/csr.cuh | 10 ++--- .../raft/sparse/convert/detail/csr.cuh | 8 ++-- cpp/include/raft/sparse/detail/coo.cuh | 14 +++---- cpp/include/raft/sparse/detail/utils.h | 6 +-- cpp/include/raft/sparse/linalg/degree.cuh | 6 +-- .../raft/sparse/linalg/detail/degree.cuh | 20 +++++----- .../raft/sparse/linalg/detail/norm.cuh | 22 +++++------ .../raft/sparse/linalg/detail/symmetrize.cuh | 18 ++++----- cpp/include/raft/sparse/linalg/norm.cuh | 8 ++-- .../neighbors/detail/cross_component_nn.cuh | 2 +- cpp/include/raft/sparse/op/detail/filter.cuh | 37 +++++++++---------- cpp/include/raft/sparse/op/detail/sort.h | 2 +- cpp/include/raft/sparse/op/sort.cuh | 2 +- .../raft/spatial/knn/detail/ball_cover.cuh | 2 +- cpp/test/sparse/solver/lanczos.cu | 4 +- cpp/test/sparse/symmetrize.cu | 4 +- 18 files changed, 85 insertions(+), 86 deletions(-) diff --git a/cpp/include/raft/cluster/detail/connectivities.cuh b/cpp/include/raft/cluster/detail/connectivities.cuh index c527b754c3..fdb8af9171 100644 --- a/cpp/include/raft/cluster/detail/connectivities.cuh +++ b/cpp/include/raft/cluster/detail/connectivities.cuh @@ -95,7 +95,7 @@ struct distance_graph_impl indptr2(m + 1, stream); raft::sparse::convert::sorted_coo_to_csr( - connected_edges.rows(), connected_edges.nnz, indptr2.data(), m + 1, stream); + connected_edges.rows(), (value_idx)connected_edges.nnz, indptr2.data(), m + 1, stream); // On the second call, we hand the MST the original colors // and the new set of edges and let it restart the optimization process @@ -204,4 +204,4 @@ void build_sorted_mst( raft::copy_async(mst_weight, mst_coo.weights.data(), mst_coo.n_edges, stream); } -}; // namespace raft::cluster::detail \ No newline at end of file +}; // namespace raft::cluster::detail diff --git a/cpp/include/raft/sparse/convert/csr.cuh b/cpp/include/raft/sparse/convert/csr.cuh index 081192ed44..cbe20d8d3a 100644 --- a/cpp/include/raft/sparse/convert/csr.cuh +++ b/cpp/include/raft/sparse/convert/csr.cuh @@ -52,8 +52,8 @@ void coo_to_csr(raft::resources const& handle, * @param m: number of rows in dense matrix * @param stream: cuda stream to use */ -template -void sorted_coo_to_csr(const T* rows, int nnz, T* row_ind, int m, cudaStream_t stream) +template +void sorted_coo_to_csr(const T* rows, outT nnz, outT* row_ind, int m, cudaStream_t stream) { detail::sorted_coo_to_csr(rows, nnz, row_ind, m, stream); } @@ -65,10 +65,10 @@ void sorted_coo_to_csr(const T* rows, int nnz, T* row_ind, int m, cudaStream_t s * @param row_ind: output row indices array * @param stream: cuda stream to use */ -template -void sorted_coo_to_csr(COO* coo, int* row_ind, cudaStream_t stream) +template +void sorted_coo_to_csr(COO* coo, outT* row_ind, cudaStream_t stream) { - detail::sorted_coo_to_csr(coo->rows(), coo->nnz, row_ind, coo->n_rows, stream); + detail::sorted_coo_to_csr(coo->rows(), (outT)coo->nnz, row_ind, coo->n_rows, stream); } /** diff --git a/cpp/include/raft/sparse/convert/detail/csr.cuh b/cpp/include/raft/sparse/convert/detail/csr.cuh index a5d7de9a07..79ed0d36ff 100644 --- a/cpp/include/raft/sparse/convert/detail/csr.cuh +++ b/cpp/include/raft/sparse/convert/detail/csr.cuh @@ -84,18 +84,18 @@ void coo_to_csr(raft::resources const& handle, * @param m: number of rows in dense matrix * @param stream: cuda stream to use */ -template -void sorted_coo_to_csr(const T* rows, int nnz, T* row_ind, int m, cudaStream_t stream) +template +void sorted_coo_to_csr(const T* rows, outT nnz, outT* row_ind, int m, cudaStream_t stream) { rmm::device_uvector row_counts(m, stream); - RAFT_CUDA_TRY(cudaMemsetAsync(row_counts.data(), 0, m * sizeof(T), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(row_counts.data(), 0, (uint64_t)m * sizeof(T), stream)); linalg::coo_degree(rows, nnz, row_counts.data(), stream); // create csr compressed row index from row counts thrust::device_ptr row_counts_d = thrust::device_pointer_cast(row_counts.data()); - thrust::device_ptr c_ind_d = thrust::device_pointer_cast(row_ind); + thrust::device_ptr c_ind_d = thrust::device_pointer_cast(row_ind); exclusive_scan(rmm::exec_policy(stream), row_counts_d, row_counts_d + m, c_ind_d); } diff --git a/cpp/include/raft/sparse/detail/coo.cuh b/cpp/include/raft/sparse/detail/coo.cuh index 9a38c11a07..0d2a06bf03 100644 --- a/cpp/include/raft/sparse/detail/coo.cuh +++ b/cpp/include/raft/sparse/detail/coo.cuh @@ -52,7 +52,7 @@ class COO { rmm::device_uvector vals_arr; public: - Index_Type nnz; + uint64_t nnz; Index_Type n_rows; Index_Type n_cols; @@ -75,7 +75,7 @@ class COO { COO(rmm::device_uvector& rows, rmm::device_uvector& cols, rmm::device_uvector& vals, - Index_Type nnz, + uint64_t nnz, Index_Type n_rows = 0, Index_Type n_cols = 0) : rows_arr(rows), cols_arr(cols), vals_arr(vals), nnz(nnz), n_rows(n_rows), n_cols(n_cols) @@ -90,7 +90,7 @@ class COO { * @param init: initialize arrays with zeros */ COO(cudaStream_t stream, - Index_Type nnz, + uint64_t nnz, Index_Type n_rows = 0, Index_Type n_cols = 0, bool init = true) @@ -121,7 +121,7 @@ class COO { */ bool validate_size() const { - if (this->nnz < 0 || n_rows < 0 || n_cols < 0) return false; + if (this->nnz <= 0 || n_rows <= 0 || n_cols <= 0) return false; return true; } @@ -204,7 +204,7 @@ class COO { * @param init: should values be initialized to 0? * @param stream: CUDA stream to use */ - void allocate(Index_Type nnz, bool init, cudaStream_t stream) + void allocate(uint64_t nnz, bool init, cudaStream_t stream) { this->allocate(nnz, 0, init, stream); } @@ -216,7 +216,7 @@ class COO { * @param init: should values be initialized to 0? * @param stream: CUDA stream to use */ - void allocate(Index_Type nnz, Index_Type size, bool init, cudaStream_t stream) + void allocate(uint64_t nnz, Index_Type size, bool init, cudaStream_t stream) { this->allocate(nnz, size, size, init, stream); } @@ -230,7 +230,7 @@ class COO { * @param stream: stream to use for init */ void allocate( - Index_Type nnz, Index_Type n_rows, Index_Type n_cols, bool init, cudaStream_t stream) + uint64_t nnz, Index_Type n_rows, Index_Type n_cols, bool init, cudaStream_t stream) { this->n_rows = n_rows; this->n_cols = n_cols; diff --git a/cpp/include/raft/sparse/detail/utils.h b/cpp/include/raft/sparse/detail/utils.h index 3eed74f3b4..16db863a2d 100644 --- a/cpp/include/raft/sparse/detail/utils.h +++ b/cpp/include/raft/sparse/detail/utils.h @@ -103,10 +103,10 @@ void iota_fill(value_idx* indices, value_idx nrows, value_idx ncols, cudaStream_ iota_fill_block_kernel<<>>(indices, ncols); } -template -__device__ int get_stop_idx(T row, T m, T nnz, const T* ind) +template +__device__ indT get_stop_idx(T row, T m, indT nnz, const indT* ind) { - int stop_idx = 0; + indT stop_idx = 0; if (row < (m - 1)) stop_idx = ind[row + 1]; else diff --git a/cpp/include/raft/sparse/linalg/degree.cuh b/cpp/include/raft/sparse/linalg/degree.cuh index 57c9b986b4..5da4c9a30d 100644 --- a/cpp/include/raft/sparse/linalg/degree.cuh +++ b/cpp/include/raft/sparse/linalg/degree.cuh @@ -34,7 +34,7 @@ namespace linalg { * @param stream: cuda stream to use */ template -void coo_degree(const T* rows, int nnz, T* results, cudaStream_t stream) +void coo_degree(const T* rows, uint64_t nnz, T* results, cudaStream_t stream) { detail::coo_degree<64, T>(rows, nnz, results, stream); } @@ -66,7 +66,7 @@ void coo_degree(COO* in, int* results, cudaStream_t stream) */ template void coo_degree_scalar( - const int* rows, const T* vals, int nnz, T scalar, int* results, cudaStream_t stream = 0) + const int* rows, const T* vals, uint64_t nnz, T scalar, int* results, cudaStream_t stream = 0) { detail::coo_degree_scalar<64>(rows, vals, nnz, scalar, results, stream); } @@ -120,4 +120,4 @@ void coo_degree_nz(COO* in, int* results, cudaStream_t stream) }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/linalg/detail/degree.cuh b/cpp/include/raft/sparse/linalg/detail/degree.cuh index df31192cf7..6338b8eb00 100644 --- a/cpp/include/raft/sparse/linalg/detail/degree.cuh +++ b/cpp/include/raft/sparse/linalg/detail/degree.cuh @@ -39,10 +39,10 @@ namespace detail { * @param nnz the size of the rows array * @param results array to place results */ -template -RAFT_KERNEL coo_degree_kernel(const T* rows, int nnz, T* results) +template +RAFT_KERNEL coo_degree_kernel(const T* rows, uint64_t nnz, T* results) { - int row = (blockIdx.x * TPB_X) + threadIdx.x; + uint64_t row = (blockIdx.x * TPB_X) + threadIdx.x; if (row < nnz) { atomicAdd(results + rows[row], (T)1); } } @@ -54,8 +54,8 @@ RAFT_KERNEL coo_degree_kernel(const T* rows, int nnz, T* results) * @param results: output result array * @param stream: cuda stream to use */ -template -void coo_degree(const T* rows, int nnz, T* results, cudaStream_t stream) +template +void coo_degree(const T* rows, uint64_t nnz, T* results, cudaStream_t stream) { dim3 grid_rc(raft::ceildiv(nnz, TPB_X), 1, 1); dim3 blk_rc(TPB_X, 1, 1); @@ -71,11 +71,11 @@ RAFT_KERNEL coo_degree_nz_kernel(const int* rows, const T* vals, int nnz, int* r if (row < nnz && vals[row] != 0.0) { raft::myAtomicAdd(results + rows[row], 1); } } -template +template RAFT_KERNEL coo_degree_scalar_kernel( - const int* rows, const T* vals, int nnz, T scalar, int* results) + const int* rows, const T* vals, uint64_t nnz, T scalar, int* results) { - int row = (blockIdx.x * TPB_X) + threadIdx.x; + uint64_t row = (blockIdx.x * TPB_X) + threadIdx.x; if (row < nnz && vals[row] != scalar) { raft::myAtomicAdd(results + rows[row], 1); } } @@ -90,9 +90,9 @@ RAFT_KERNEL coo_degree_scalar_kernel( * @param results: output row counts * @param stream: cuda stream to use */ -template +template void coo_degree_scalar( - const int* rows, const T* vals, int nnz, T scalar, int* results, cudaStream_t stream = 0) + const int* rows, const T* vals, uint64_t nnz, T scalar, int* results, cudaStream_t stream = 0) { dim3 grid_rc(raft::ceildiv(nnz, TPB_X), 1, 1); dim3 blk_rc(TPB_X, 1, 1); diff --git a/cpp/include/raft/sparse/linalg/detail/norm.cuh b/cpp/include/raft/sparse/linalg/detail/norm.cuh index 3702111f83..5dc5b2db41 100644 --- a/cpp/include/raft/sparse/linalg/detail/norm.cuh +++ b/cpp/include/raft/sparse/linalg/detail/norm.cuh @@ -40,14 +40,14 @@ namespace sparse { namespace linalg { namespace detail { -template +template RAFT_KERNEL csr_row_normalize_l1_kernel( // @TODO: This can be done much more parallel by // having threads in a warp compute the sum in parallel // over each row and then divide the values in parallel. - const int* ia, // csr row ex_scan (sorted by row) + const indT* ia, // csr row ex_scan (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros + indT nnz, // array of values and number of non-zeros int m, // num rows in csr T* result) { // output array @@ -57,19 +57,19 @@ RAFT_KERNEL csr_row_normalize_l1_kernel( // sum all vals_arr for row and divide each val by sum if (row < m) { - int start_idx = ia[row]; - int stop_idx = 0; + indT start_idx = ia[row]; + indT stop_idx = 0; if (row < m - 1) { stop_idx = ia[row + 1]; } else stop_idx = nnz; T sum = T(0.0); - for (int j = start_idx; j < stop_idx; j++) { + for (indT j = start_idx; j < stop_idx; j++) { sum = sum + fabs(vals[j]); } - for (int j = start_idx; j < stop_idx; j++) { + for (indT j = start_idx; j < stop_idx; j++) { if (sum != 0.0) { T val = vals[j]; result[j] = val / sum; @@ -90,10 +90,10 @@ RAFT_KERNEL csr_row_normalize_l1_kernel( * @param result: l1 normalized data array * @param stream: cuda stream to use */ -template -void csr_row_normalize_l1(const int* ia, // csr row ex_scan (sorted by row) +template +void csr_row_normalize_l1(const indT* ia, // csr row ex_scan (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros + indT nnz, // array of values and number of non-zeros int m, // num rows in csr T* result, cudaStream_t stream) @@ -232,4 +232,4 @@ void rowNormCsrCaller(const IdxType* ia, }; // end NAMESPACE detail }; // end NAMESPACE linalg }; // end NAMESPACE sparse -}; // end NAMESPACE raft \ No newline at end of file +}; // end NAMESPACE raft diff --git a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh index d343bcbf66..af0b6feb37 100644 --- a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh +++ b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh @@ -48,7 +48,7 @@ namespace detail { // TODO: value_idx param needs to be used for this once FAISS is updated to use float32 // for indices so that the index types can be uniform template -RAFT_KERNEL coo_symmetrize_kernel(int* row_ind, +RAFT_KERNEL coo_symmetrize_kernel(uint64_t* row_ind, int* rows, int* cols, T* vals, @@ -56,17 +56,17 @@ RAFT_KERNEL coo_symmetrize_kernel(int* row_ind, int* ocols, T* ovals, int n, - int cnnz, + uint64_t cnnz, Lambda reduction_op) { int row = (blockIdx.x * TPB_X) + threadIdx.x; if (row < n) { - int start_idx = row_ind[row]; // each thread processes one row - int stop_idx = get_stop_idx(row, n, cnnz, row_ind); + uint64_t start_idx = row_ind[row]; // each thread processes one row + uint64_t stop_idx = get_stop_idx(row, n, cnnz, row_ind); int row_nnz = 0; - int out_start_idx = start_idx * 2; + uint64_t out_start_idx = start_idx * 2; for (int idx = 0; idx < stop_idx - start_idx; idx++) { int cur_row = rows[idx + start_idx]; @@ -74,13 +74,13 @@ RAFT_KERNEL coo_symmetrize_kernel(int* row_ind, T cur_val = vals[idx + start_idx]; int lookup_row = cur_col; - int t_start = row_ind[lookup_row]; // Start at - int t_stop = get_stop_idx(lookup_row, n, cnnz, row_ind); + uint64_t t_start = row_ind[lookup_row]; // Start at + uint64_t t_stop = get_stop_idx(lookup_row, n, cnnz, row_ind); T transpose = 0.0; bool found_match = false; - for (int t_idx = t_start; t_idx < t_stop; t_idx++) { + for (uint64_t t_idx = t_start; t_idx < t_stop; t_idx++) { // If we find a match, let's get out of the loop. We won't // need to modify the transposed value, since that will be // done in a different thread. @@ -142,7 +142,7 @@ void coo_symmetrize(COO* in, ASSERT(!out->validate_mem(), "Expecting unallocated COO for output"); - rmm::device_uvector in_row_ind(in->n_rows, stream); + rmm::device_uvector in_row_ind(in->n_rows, stream); convert::sorted_coo_to_csr(in, in_row_ind.data(), stream); diff --git a/cpp/include/raft/sparse/linalg/norm.cuh b/cpp/include/raft/sparse/linalg/norm.cuh index 43dd182fe5..ea325c3472 100644 --- a/cpp/include/raft/sparse/linalg/norm.cuh +++ b/cpp/include/raft/sparse/linalg/norm.cuh @@ -36,10 +36,10 @@ namespace linalg { * @param result: l1 normalized data array * @param stream: cuda stream to use */ -template -void csr_row_normalize_l1(const int* ia, // csr row ex_scan (sorted by row) +template +void csr_row_normalize_l1(const indT* ia, // csr row ex_scan (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros + indT nnz, // array of values and number of non-zeros int m, // num rows in csr T* result, cudaStream_t stream) @@ -104,4 +104,4 @@ void rowNormCsr(raft::resources const& handle, }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif diff --git a/cpp/include/raft/sparse/neighbors/detail/cross_component_nn.cuh b/cpp/include/raft/sparse/neighbors/detail/cross_component_nn.cuh index a47d5a6f34..1247b91d2e 100644 --- a/cpp/include/raft/sparse/neighbors/detail/cross_component_nn.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/cross_component_nn.cuh @@ -242,7 +242,7 @@ void perform_1nn(raft::resources const& handle, // the color components. auto colors_group_idxs = raft::make_device_vector(handle, n_components + 1); raft::sparse::convert::sorted_coo_to_csr( - colors, n_rows, colors_group_idxs.data_handle(), n_components + 1, stream); + colors, (value_idx)n_rows, colors_group_idxs.data_handle(), n_components + 1, stream); auto group_idxs_view = raft::make_device_vector_view( colors_group_idxs.data_handle() + 1, n_components); diff --git a/cpp/include/raft/sparse/op/detail/filter.cuh b/cpp/include/raft/sparse/op/detail/filter.cuh index 3df85e6871..700885240c 100644 --- a/cpp/include/raft/sparse/op/detail/filter.cuh +++ b/cpp/include/raft/sparse/op/detail/filter.cuh @@ -42,27 +42,27 @@ namespace sparse { namespace op { namespace detail { -template +template RAFT_KERNEL coo_remove_scalar_kernel(const int* rows, const int* cols, const T* vals, - int nnz, + uint64_t nnz, int* crows, int* ccols, T* cvals, - int* ex_scan, - int* cur_ex_scan, + uint64_t* ex_scan, + uint64_t* cur_ex_scan, int m, T scalar) { int row = (blockIdx.x * TPB_X) + threadIdx.x; if (row < m) { - int start = cur_ex_scan[row]; - int stop = get_stop_idx(row, m, nnz, cur_ex_scan); - int cur_out_idx = ex_scan[row]; + uint64_t start = cur_ex_scan[row]; + uint64_t stop = get_stop_idx(row, m, nnz, cur_ex_scan); + uint64_t cur_out_idx = ex_scan[row]; - for (int idx = start; idx < stop; idx++) { + for (uint64_t idx = start; idx < stop; idx++) { if (vals[idx] != scalar) { crows[cur_out_idx] = rows[idx]; ccols[cur_out_idx] = cols[idx]; @@ -94,7 +94,7 @@ template void coo_remove_scalar(const int* rows, const int* cols, const T* vals, - int nnz, + uint64_t nnz, int* crows, int* ccols, T* cvals, @@ -104,19 +104,19 @@ void coo_remove_scalar(const int* rows, int n, cudaStream_t stream) { - rmm::device_uvector ex_scan(n, stream); - rmm::device_uvector cur_ex_scan(n, stream); + rmm::device_uvector ex_scan(n, stream); + rmm::device_uvector cur_ex_scan(n, stream); - RAFT_CUDA_TRY(cudaMemsetAsync(ex_scan.data(), 0, n * sizeof(int), stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(cur_ex_scan.data(), 0, n * sizeof(int), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(ex_scan.data(), 0, (uint64_t)n * sizeof(int), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(cur_ex_scan.data(), 0, (uint64_t)n * sizeof(int), stream)); thrust::device_ptr dev_cnnz = thrust::device_pointer_cast(cnnz); - thrust::device_ptr dev_ex_scan = thrust::device_pointer_cast(ex_scan.data()); + thrust::device_ptr dev_ex_scan = thrust::device_pointer_cast(ex_scan.data()); thrust::exclusive_scan(rmm::exec_policy(stream), dev_cnnz, dev_cnnz + n, dev_ex_scan); RAFT_CUDA_TRY(cudaPeekAtLastError()); thrust::device_ptr dev_cur_cnnz = thrust::device_pointer_cast(cur_cnnz); - thrust::device_ptr dev_cur_ex_scan = thrust::device_pointer_cast(cur_ex_scan.data()); + thrust::device_ptr dev_cur_ex_scan = thrust::device_pointer_cast(cur_ex_scan.data()); thrust::exclusive_scan(rmm::exec_policy(stream), dev_cur_cnnz, dev_cur_cnnz + n, dev_cur_ex_scan); RAFT_CUDA_TRY(cudaPeekAtLastError()); @@ -151,8 +151,8 @@ void coo_remove_scalar(COO* in, COO* out, T scalar, cudaStream_t stream) rmm::device_uvector row_count_nz(in->n_rows, stream); rmm::device_uvector row_count(in->n_rows, stream); - RAFT_CUDA_TRY(cudaMemsetAsync(row_count_nz.data(), 0, in->n_rows * sizeof(int), stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(row_count.data(), 0, in->n_rows * sizeof(int), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(row_count_nz.data(), 0, (uint64_t)in->n_rows * sizeof(int), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(row_count.data(), 0, (uint64_t)in->n_rows * sizeof(int), stream)); linalg::coo_degree(in->rows(), in->nnz, row_count.data(), stream); RAFT_CUDA_TRY(cudaPeekAtLastError()); @@ -161,8 +161,7 @@ void coo_remove_scalar(COO* in, COO* out, T scalar, cudaStream_t stream) RAFT_CUDA_TRY(cudaPeekAtLastError()); thrust::device_ptr d_row_count_nz = thrust::device_pointer_cast(row_count_nz.data()); - int out_nnz = - thrust::reduce(rmm::exec_policy(stream), d_row_count_nz, d_row_count_nz + in->n_rows); + uint64_t out_nnz = thrust::reduce(rmm::exec_policy(stream), d_row_count_nz, d_row_count_nz + in->n_rows, (uint64_t)0); out->allocate(out_nnz, in->n_rows, in->n_cols, false, stream); diff --git a/cpp/include/raft/sparse/op/detail/sort.h b/cpp/include/raft/sparse/op/detail/sort.h index 02287c2367..7d09ebeddc 100644 --- a/cpp/include/raft/sparse/op/detail/sort.h +++ b/cpp/include/raft/sparse/op/detail/sort.h @@ -69,7 +69,7 @@ struct TupleComp { * @param stream: cuda stream to use */ template -void coo_sort(IdxT m, IdxT n, IdxT nnz, IdxT* rows, IdxT* cols, T* vals, cudaStream_t stream) +void coo_sort(IdxT m, IdxT n, uint64_t nnz, IdxT* rows, IdxT* cols, T* vals, cudaStream_t stream) { auto coo_indices = thrust::make_zip_iterator(thrust::make_tuple(rows, cols)); diff --git a/cpp/include/raft/sparse/op/sort.cuh b/cpp/include/raft/sparse/op/sort.cuh index 5b8a792429..35b5fd9f31 100644 --- a/cpp/include/raft/sparse/op/sort.cuh +++ b/cpp/include/raft/sparse/op/sort.cuh @@ -38,7 +38,7 @@ namespace op { * @param stream: cuda stream to use */ template -void coo_sort(IdxT m, IdxT n, IdxT nnz, IdxT* rows, IdxT* cols, T* vals, cudaStream_t stream) +void coo_sort(IdxT m, IdxT n, uint64_t nnz, IdxT* rows, IdxT* cols, T* vals, cudaStream_t stream) { detail::coo_sort(m, n, nnz, rows, cols, vals, stream); } diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover.cuh index c4ca2ffa61..f436d5c740 100644 --- a/cpp/include/raft/spatial/knn/detail/ball_cover.cuh +++ b/cpp/include/raft/spatial/knn/detail/ball_cover.cuh @@ -161,7 +161,7 @@ void construct_landmark_1nn(raft::resources const& handle, // convert to CSR for fast lookup raft::sparse::convert::sorted_coo_to_csr(R_1nn_inds.data(), - index.m, + (value_idx)index.m, index.get_R_indptr().data_handle(), index.n_landmarks + 1, resource::get_cuda_stream(handle)); diff --git a/cpp/test/sparse/solver/lanczos.cu b/cpp/test/sparse/solver/lanczos.cu index 128ab73747..23b3a7ff99 100644 --- a/cpp/test/sparse/solver/lanczos.cu +++ b/cpp/test/sparse/solver/lanczos.cu @@ -173,7 +173,7 @@ class rmat_lanczos_tests raft::make_device_vector(handle, symmetric_coo.n_rows + 1); raft::sparse::convert::sorted_coo_to_csr(symmetric_coo.rows(), - symmetric_coo.nnz, + (IndexType)symmetric_coo.nnz, row_indices.data_handle(), symmetric_coo.n_rows + 1, stream); @@ -198,7 +198,7 @@ class rmat_lanczos_tests symmetric_coo.cols(), symmetric_coo.vals(), symmetric_coo.n_rows, - symmetric_coo.nnz}; + (IndexType)symmetric_coo.nnz}; raft::sparse::solver::lanczos_solver_config config{ n_components, params.maxiter, params.restartiter, params.tol, rng.seed}; diff --git a/cpp/test/sparse/symmetrize.cu b/cpp/test/sparse/symmetrize.cu index e1a74dc40b..c3a03a942c 100644 --- a/cpp/test/sparse/symmetrize.cu +++ b/cpp/test/sparse/symmetrize.cu @@ -109,8 +109,8 @@ class SparseSymmetrizeTest rmm::device_scalar sum(stream); sum.set_value_to_zero_async(stream); - assert_symmetry<<>>( - out.rows(), out.cols(), out.vals(), out.nnz, sum.data()); + assert_symmetry<<>>( + out.rows(), out.cols(), out.vals(), (value_idx)out.nnz, sum.data()); sum_h = sum.value(stream); resource::sync_stream(handle, stream); From 3cd388069cbbec7dee5bd19435cea03a9265a79d Mon Sep 17 00:00:00 2001 From: viclafargue Date: Tue, 21 Jan 2025 17:04:30 +0000 Subject: [PATCH 02/25] Additionnal fixes --- .../raft/sparse/convert/detail/csr.cuh | 7 +- cpp/include/raft/sparse/linalg/degree.cuh | 16 ++-- .../raft/sparse/linalg/detail/degree.cuh | 20 ++-- .../raft/sparse/linalg/detail/spectral.cuh | 4 +- .../raft/sparse/linalg/detail/symmetrize.cuh | 14 +-- cpp/include/raft/sparse/linalg/spectral.cuh | 2 +- cpp/include/raft/sparse/op/detail/filter.cuh | 95 ++++++++----------- .../raft/sparse/solver/detail/lanczos.cuh | 6 +- 8 files changed, 76 insertions(+), 88 deletions(-) diff --git a/cpp/include/raft/sparse/convert/detail/csr.cuh b/cpp/include/raft/sparse/convert/detail/csr.cuh index 79ed0d36ff..aac5b26786 100644 --- a/cpp/include/raft/sparse/convert/detail/csr.cuh +++ b/cpp/include/raft/sparse/convert/detail/csr.cuh @@ -87,14 +87,13 @@ void coo_to_csr(raft::resources const& handle, template void sorted_coo_to_csr(const T* rows, outT nnz, outT* row_ind, int m, cudaStream_t stream) { - rmm::device_uvector row_counts(m, stream); - - RAFT_CUDA_TRY(cudaMemsetAsync(row_counts.data(), 0, (uint64_t)m * sizeof(T), stream)); + rmm::device_uvector row_counts(m, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(row_counts.data(), 0, (uint64_t)m * sizeof(outT), stream)); linalg::coo_degree(rows, nnz, row_counts.data(), stream); // create csr compressed row index from row counts - thrust::device_ptr row_counts_d = thrust::device_pointer_cast(row_counts.data()); + thrust::device_ptr row_counts_d = thrust::device_pointer_cast(row_counts.data()); thrust::device_ptr c_ind_d = thrust::device_pointer_cast(row_ind); exclusive_scan(rmm::exec_policy(stream), row_counts_d, row_counts_d + m, c_ind_d); } diff --git a/cpp/include/raft/sparse/linalg/degree.cuh b/cpp/include/raft/sparse/linalg/degree.cuh index 5da4c9a30d..a154c2357c 100644 --- a/cpp/include/raft/sparse/linalg/degree.cuh +++ b/cpp/include/raft/sparse/linalg/degree.cuh @@ -33,8 +33,8 @@ namespace linalg { * @param results: output result array * @param stream: cuda stream to use */ -template -void coo_degree(const T* rows, uint64_t nnz, T* results, cudaStream_t stream) +template +void coo_degree(const T* rows, uint64_t nnz, outT* results, cudaStream_t stream) { detail::coo_degree<64, T>(rows, nnz, results, stream); } @@ -47,8 +47,8 @@ void coo_degree(const T* rows, uint64_t nnz, T* results, cudaStream_t stream) * @param results: output array with row counts (size=in->n_rows) * @param stream: cuda stream to use */ -template -void coo_degree(COO* in, int* results, cudaStream_t stream) +template +void coo_degree(COO* in, outT* results, cudaStream_t stream) { coo_degree(in->rows(), in->nnz, results, stream); } @@ -64,9 +64,9 @@ void coo_degree(COO* in, int* results, cudaStream_t stream) * @param results: output row counts * @param stream: cuda stream to use */ -template +template void coo_degree_scalar( - const int* rows, const T* vals, uint64_t nnz, T scalar, int* results, cudaStream_t stream = 0) + const int* rows, const T* vals, uint64_t nnz, T scalar, outT* results, cudaStream_t stream = 0) { detail::coo_degree_scalar<64>(rows, vals, nnz, scalar, results, stream); } @@ -80,8 +80,8 @@ void coo_degree_scalar( * @param results: output row counts * @param stream: cuda stream to use */ -template -void coo_degree_scalar(COO* in, T scalar, int* results, cudaStream_t stream) +template +void coo_degree_scalar(COO* in, T scalar, outT* results, cudaStream_t stream) { coo_degree_scalar(in->rows(), in->vals(), in->nnz, scalar, results, stream); } diff --git a/cpp/include/raft/sparse/linalg/detail/degree.cuh b/cpp/include/raft/sparse/linalg/detail/degree.cuh index 6338b8eb00..d7f72e9b6c 100644 --- a/cpp/include/raft/sparse/linalg/detail/degree.cuh +++ b/cpp/include/raft/sparse/linalg/detail/degree.cuh @@ -39,11 +39,11 @@ namespace detail { * @param nnz the size of the rows array * @param results array to place results */ -template -RAFT_KERNEL coo_degree_kernel(const T* rows, uint64_t nnz, T* results) +template +RAFT_KERNEL coo_degree_kernel(const T* rows, uint64_t nnz, outT* results) { uint64_t row = (blockIdx.x * TPB_X) + threadIdx.x; - if (row < nnz) { atomicAdd(results + rows[row], (T)1); } + if (row < nnz) { atomicAdd(results + rows[row], (outT)1); } } /** @@ -54,8 +54,8 @@ RAFT_KERNEL coo_degree_kernel(const T* rows, uint64_t nnz, T* results) * @param results: output result array * @param stream: cuda stream to use */ -template -void coo_degree(const T* rows, uint64_t nnz, T* results, cudaStream_t stream) +template +void coo_degree(const T* rows, uint64_t nnz, outT* results, cudaStream_t stream) { dim3 grid_rc(raft::ceildiv(nnz, TPB_X), 1, 1); dim3 blk_rc(TPB_X, 1, 1); @@ -71,12 +71,12 @@ RAFT_KERNEL coo_degree_nz_kernel(const int* rows, const T* vals, int nnz, int* r if (row < nnz && vals[row] != 0.0) { raft::myAtomicAdd(results + rows[row], 1); } } -template +template RAFT_KERNEL coo_degree_scalar_kernel( - const int* rows, const T* vals, uint64_t nnz, T scalar, int* results) + const int* rows, const T* vals, uint64_t nnz, T scalar, outT* results) { uint64_t row = (blockIdx.x * TPB_X) + threadIdx.x; - if (row < nnz && vals[row] != scalar) { raft::myAtomicAdd(results + rows[row], 1); } + if (row < nnz && vals[row] != scalar) { raft::myAtomicAdd((outT*)results + rows[row], (outT)1); } } /** @@ -90,9 +90,9 @@ RAFT_KERNEL coo_degree_scalar_kernel( * @param results: output row counts * @param stream: cuda stream to use */ -template +template void coo_degree_scalar( - const int* rows, const T* vals, uint64_t nnz, T scalar, int* results, cudaStream_t stream = 0) + const int* rows, const T* vals, uint64_t nnz, T scalar, outT* results, cudaStream_t stream = 0) { dim3 grid_rc(raft::ceildiv(nnz, TPB_X), 1, 1); dim3 blk_rc(TPB_X, 1, 1); diff --git a/cpp/include/raft/sparse/linalg/detail/spectral.cuh b/cpp/include/raft/sparse/linalg/detail/spectral.cuh index a1642d1455..ad2a567294 100644 --- a/cpp/include/raft/sparse/linalg/detail/spectral.cuh +++ b/cpp/include/raft/sparse/linalg/detail/spectral.cuh @@ -35,8 +35,8 @@ void fit_embedding(raft::resources const& handle, int* rows, int* cols, T* vals, - int nnz, - int n, + uint64_t nnz, + uint64_t n, int n_components, T* out, unsigned long long seed = 1234567) diff --git a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh index af0b6feb37..550a3e9505 100644 --- a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh +++ b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh @@ -65,13 +65,13 @@ RAFT_KERNEL coo_symmetrize_kernel(uint64_t* row_ind, uint64_t start_idx = row_ind[row]; // each thread processes one row uint64_t stop_idx = get_stop_idx(row, n, cnnz, row_ind); - int row_nnz = 0; + uint64_t row_nnz = 0; uint64_t out_start_idx = start_idx * 2; - for (int idx = 0; idx < stop_idx - start_idx; idx++) { - int cur_row = rows[idx + start_idx]; - int cur_col = cols[idx + start_idx]; - T cur_val = vals[idx + start_idx]; + for (uint64_t idx = 0; idx < stop_idx - start_idx; idx++) { + int cur_row = rows[start_idx + idx]; + int cur_col = cols[start_idx + idx]; + T cur_val = vals[start_idx + idx]; int lookup_row = cur_col; uint64_t t_start = row_ind[lookup_row]; // Start at @@ -104,7 +104,7 @@ RAFT_KERNEL coo_symmetrize_kernel(uint64_t* row_ind, // Note that if we did find a match, we don't need to // compute `res` on it here because it will be computed // in a different thread. - if (!found_match && vals[idx] != 0.0) { + if (!found_match && cur_val != 0.0) { orows[out_start_idx + row_nnz] = cur_col; ocols[out_start_idx + row_nnz] = cur_row; ovals[out_start_idx + row_nnz] = res; @@ -158,7 +158,7 @@ void coo_symmetrize(COO* in, in->n_rows, in->nnz, reduction_op); - RAFT_CUDA_TRY(cudaPeekAtLastError()); + RAFT_CUDA_TRY(cudaPeekAtLastError()); } /** diff --git a/cpp/include/raft/sparse/linalg/spectral.cuh b/cpp/include/raft/sparse/linalg/spectral.cuh index 4c0595bf91..eb45ab013e 100644 --- a/cpp/include/raft/sparse/linalg/spectral.cuh +++ b/cpp/include/raft/sparse/linalg/spectral.cuh @@ -28,7 +28,7 @@ void fit_embedding(raft::resources const& handle, int* rows, int* cols, T* vals, - int nnz, + uint64_t nnz, int n, int n_components, T* out, diff --git a/cpp/include/raft/sparse/op/detail/filter.cuh b/cpp/include/raft/sparse/op/detail/filter.cuh index 700885240c..94c314bb67 100644 --- a/cpp/include/raft/sparse/op/detail/filter.cuh +++ b/cpp/include/raft/sparse/op/detail/filter.cuh @@ -43,34 +43,36 @@ namespace op { namespace detail { template -RAFT_KERNEL coo_remove_scalar_kernel(const int* rows, - const int* cols, - const T* vals, +RAFT_KERNEL coo_remove_scalar_kernel(const int* in_rows, + const int* in_cols, + const T* in_vals, uint64_t nnz, - int* crows, - int* ccols, - T* cvals, - uint64_t* ex_scan, - uint64_t* cur_ex_scan, - int m, - T scalar) + int* out_rows, + int* out_cols, + T* out_vals, + uint64_t* row_indices, + int* rows_lenght_acc, + T scalar, + int n_rows) { - int row = (blockIdx.x * TPB_X) + threadIdx.x; - - if (row < m) { - uint64_t start = cur_ex_scan[row]; - uint64_t stop = get_stop_idx(row, m, nnz, cur_ex_scan); - uint64_t cur_out_idx = ex_scan[row]; - - for (uint64_t idx = start; idx < stop; idx++) { - if (vals[idx] != scalar) { - crows[cur_out_idx] = rows[idx]; - ccols[cur_out_idx] = cols[idx]; - cvals[cur_out_idx] = vals[idx]; - ++cur_out_idx; - } - } - } + uint64_t in_idx = (blockIdx.x * TPB_X) + threadIdx.x; + + if (in_idx >= nnz) + return; + + int val = in_vals[in_idx]; + + if (val == scalar) + return; + + int row = in_rows[in_idx]; + + uint64_t row_start_index = row_indices[row]; + uint64_t out_idx = row_start_index + atomicAdd(rows_lenght_acc + row, 1); + + out_rows[out_idx] = row; + out_cols[out_idx] = in_cols[in_idx]; + out_vals[out_idx] = val; } /** @@ -90,7 +92,7 @@ RAFT_KERNEL coo_remove_scalar_kernel(const int* rows, * @param d_alloc device allocator for temporary buffers * @param stream: cuda stream to use */ -template +template void coo_remove_scalar(const int* rows, const int* cols, const T* vals, @@ -98,29 +100,23 @@ void coo_remove_scalar(const int* rows, int* crows, int* ccols, T* cvals, - int* cnnz, - int* cur_cnnz, + uint64_t* cnnz, T scalar, int n, cudaStream_t stream) { rmm::device_uvector ex_scan(n, stream); - rmm::device_uvector cur_ex_scan(n, stream); - - RAFT_CUDA_TRY(cudaMemsetAsync(ex_scan.data(), 0, (uint64_t)n * sizeof(int), stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(cur_ex_scan.data(), 0, (uint64_t)n * sizeof(int), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(ex_scan.data(), 0, (uint64_t)n * sizeof(uint64_t), stream)); - thrust::device_ptr dev_cnnz = thrust::device_pointer_cast(cnnz); + thrust::device_ptr dev_cnnz = thrust::device_pointer_cast(cnnz); thrust::device_ptr dev_ex_scan = thrust::device_pointer_cast(ex_scan.data()); thrust::exclusive_scan(rmm::exec_policy(stream), dev_cnnz, dev_cnnz + n, dev_ex_scan); RAFT_CUDA_TRY(cudaPeekAtLastError()); - thrust::device_ptr dev_cur_cnnz = thrust::device_pointer_cast(cur_cnnz); - thrust::device_ptr dev_cur_ex_scan = thrust::device_pointer_cast(cur_ex_scan.data()); - thrust::exclusive_scan(rmm::exec_policy(stream), dev_cur_cnnz, dev_cur_cnnz + n, dev_cur_ex_scan); - RAFT_CUDA_TRY(cudaPeekAtLastError()); + rmm::device_uvector rows_length_acc(n, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(rows_length_acc.data(), 0, (uint64_t)n * sizeof(int), stream)); - dim3 grid(raft::ceildiv(n, TPB_X), 1, 1); + dim3 grid(raft::ceildiv(nnz, TPB_X), 1, 1); dim3 blk(TPB_X, 1, 1); coo_remove_scalar_kernel<<>>(rows, @@ -131,9 +127,9 @@ void coo_remove_scalar(const int* rows, ccols, cvals, dev_ex_scan.get(), - dev_cur_ex_scan.get(), - n, - scalar); + rows_length_acc.data(), + scalar, + n); RAFT_CUDA_TRY(cudaPeekAtLastError()); } @@ -148,19 +144,13 @@ void coo_remove_scalar(const int* rows, template void coo_remove_scalar(COO* in, COO* out, T scalar, cudaStream_t stream) { - rmm::device_uvector row_count_nz(in->n_rows, stream); - rmm::device_uvector row_count(in->n_rows, stream); - - RAFT_CUDA_TRY(cudaMemsetAsync(row_count_nz.data(), 0, (uint64_t)in->n_rows * sizeof(int), stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(row_count.data(), 0, (uint64_t)in->n_rows * sizeof(int), stream)); - - linalg::coo_degree(in->rows(), in->nnz, row_count.data(), stream); - RAFT_CUDA_TRY(cudaPeekAtLastError()); + rmm::device_uvector row_count_nz(in->n_rows, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(row_count_nz.data(), 0, (uint64_t)in->n_rows * sizeof(uint64_t), stream)); - linalg::coo_degree_scalar(in->rows(), in->vals(), in->nnz, scalar, row_count_nz.data(), stream); + linalg::coo_degree_scalar(in->rows(), in->vals(), in->nnz, scalar, (unsigned long long int*)row_count_nz.data(), stream); RAFT_CUDA_TRY(cudaPeekAtLastError()); - thrust::device_ptr d_row_count_nz = thrust::device_pointer_cast(row_count_nz.data()); + thrust::device_ptr d_row_count_nz = thrust::device_pointer_cast(row_count_nz.data()); uint64_t out_nnz = thrust::reduce(rmm::exec_policy(stream), d_row_count_nz, d_row_count_nz + in->n_rows, (uint64_t)0); out->allocate(out_nnz, in->n_rows, in->n_cols, false, stream); @@ -173,7 +163,6 @@ void coo_remove_scalar(COO* in, COO* out, T scalar, cudaStream_t stream) out->cols(), out->vals(), row_count_nz.data(), - row_count.data(), scalar, in->n_rows, stream); diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index ddfa01731a..e46ea77eaa 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -151,7 +151,7 @@ int performLanczosIteration(raft::resources const& handle, RAFT_EXPECTS(A != nullptr, "Null matrix pointer."); - index_type_t n = A->nrows_; + uint64_t n = A->nrows_; // ------------------------------------------------------- // Compute second Lanczos vector @@ -814,7 +814,7 @@ int computeSmallestEigenvectors( constexpr value_type_t zero = 0; // Matrix dimension - index_type_t n = A->nrows_; + uint64_t n = A->nrows_; // Shift for implicit restart value_type_t shiftUpper; @@ -1160,7 +1160,7 @@ int computeLargestEigenvectors( constexpr value_type_t zero = 0; // Matrix dimension - index_type_t n = A->nrows_; + uint64_t n = A->nrows_; // Lanczos iteration counters index_type_t maxIter_curr = restartIter; // Maximum size of Lanczos system From 347ea4e45582b5854e9b07d4450f8a23c6c4648d Mon Sep 17 00:00:00 2001 From: viclafargue Date: Wed, 22 Jan 2025 15:27:08 +0000 Subject: [PATCH 03/25] Revert coo_remove_scalar_kernel code --- cpp/include/raft/sparse/op/detail/filter.cuh | 78 +++++++++++--------- 1 file changed, 44 insertions(+), 34 deletions(-) diff --git a/cpp/include/raft/sparse/op/detail/filter.cuh b/cpp/include/raft/sparse/op/detail/filter.cuh index 94c314bb67..e4a18d48bb 100644 --- a/cpp/include/raft/sparse/op/detail/filter.cuh +++ b/cpp/include/raft/sparse/op/detail/filter.cuh @@ -42,37 +42,35 @@ namespace sparse { namespace op { namespace detail { -template -RAFT_KERNEL coo_remove_scalar_kernel(const int* in_rows, - const int* in_cols, - const T* in_vals, +template +RAFT_KERNEL coo_remove_scalar_kernel(const int* rows, + const int* cols, + const T* vals, uint64_t nnz, int* out_rows, int* out_cols, T* out_vals, - uint64_t* row_indices, - int* rows_lenght_acc, - T scalar, - int n_rows) + uint64_t* ex_scan, + uint64_t* cur_ex_scan, + int m, + T scalar) { - uint64_t in_idx = (blockIdx.x * TPB_X) + threadIdx.x; - - if (in_idx >= nnz) - return; - - int val = in_vals[in_idx]; - - if (val == scalar) - return; - - int row = in_rows[in_idx]; - - uint64_t row_start_index = row_indices[row]; - uint64_t out_idx = row_start_index + atomicAdd(rows_lenght_acc + row, 1); - - out_rows[out_idx] = row; - out_cols[out_idx] = in_cols[in_idx]; - out_vals[out_idx] = val; + int row = (blockIdx.x * TPB_X) + threadIdx.x; + + if (row < m) { + uint64_t start = cur_ex_scan[row]; + uint64_t stop = get_stop_idx(row, m, nnz, cur_ex_scan); + uint64_t cur_out_idx = ex_scan[row]; + + for (uint64_t idx = start; idx < stop; idx++) { + if (vals[idx] != scalar) { + out_rows[cur_out_idx] = rows[idx]; + out_cols[cur_out_idx] = cols[idx]; + out_vals[cur_out_idx] = vals[idx]; + ++cur_out_idx; + } + } + } } /** @@ -92,7 +90,7 @@ RAFT_KERNEL coo_remove_scalar_kernel(const int* in_rows, * @param d_alloc device allocator for temporary buffers * @param stream: cuda stream to use */ -template +template void coo_remove_scalar(const int* rows, const int* cols, const T* vals, @@ -101,22 +99,27 @@ void coo_remove_scalar(const int* rows, int* ccols, T* cvals, uint64_t* cnnz, + uint64_t* cur_cnnz, T scalar, int n, cudaStream_t stream) { rmm::device_uvector ex_scan(n, stream); + rmm::device_uvector cur_ex_scan(n, stream); RAFT_CUDA_TRY(cudaMemsetAsync(ex_scan.data(), 0, (uint64_t)n * sizeof(uint64_t), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(cur_ex_scan.data(), 0, (uint64_t)n * sizeof(uint64_t), stream)); thrust::device_ptr dev_cnnz = thrust::device_pointer_cast(cnnz); thrust::device_ptr dev_ex_scan = thrust::device_pointer_cast(ex_scan.data()); thrust::exclusive_scan(rmm::exec_policy(stream), dev_cnnz, dev_cnnz + n, dev_ex_scan); RAFT_CUDA_TRY(cudaPeekAtLastError()); - rmm::device_uvector rows_length_acc(n, stream); - RAFT_CUDA_TRY(cudaMemsetAsync(rows_length_acc.data(), 0, (uint64_t)n * sizeof(int), stream)); + thrust::device_ptr dev_cur_cnnz = thrust::device_pointer_cast(cur_cnnz); + thrust::device_ptr dev_cur_ex_scan = thrust::device_pointer_cast(cur_ex_scan.data()); + thrust::exclusive_scan(rmm::exec_policy(stream), dev_cur_cnnz, dev_cur_cnnz + n, dev_cur_ex_scan); + RAFT_CUDA_TRY(cudaPeekAtLastError()); - dim3 grid(raft::ceildiv(nnz, TPB_X), 1, 1); + dim3 grid(raft::ceildiv(n, TPB_X), 1, 1); dim3 blk(TPB_X, 1, 1); coo_remove_scalar_kernel<<>>(rows, @@ -127,9 +130,9 @@ void coo_remove_scalar(const int* rows, ccols, cvals, dev_ex_scan.get(), - rows_length_acc.data(), - scalar, - n); + dev_cur_ex_scan.get(), + n, + scalar); RAFT_CUDA_TRY(cudaPeekAtLastError()); } @@ -145,13 +148,19 @@ template void coo_remove_scalar(COO* in, COO* out, T scalar, cudaStream_t stream) { rmm::device_uvector row_count_nz(in->n_rows, stream); + rmm::device_uvector row_count(in->n_rows, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(row_count_nz.data(), 0, (uint64_t)in->n_rows * sizeof(uint64_t), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(row_count.data(), 0, (uint64_t)in->n_rows * sizeof(uint64_t), stream)); + + linalg::coo_degree(in->rows(), in->nnz, row_count.data(), stream); + RAFT_CUDA_TRY(cudaPeekAtLastError()); linalg::coo_degree_scalar(in->rows(), in->vals(), in->nnz, scalar, (unsigned long long int*)row_count_nz.data(), stream); RAFT_CUDA_TRY(cudaPeekAtLastError()); thrust::device_ptr d_row_count_nz = thrust::device_pointer_cast(row_count_nz.data()); - uint64_t out_nnz = thrust::reduce(rmm::exec_policy(stream), d_row_count_nz, d_row_count_nz + in->n_rows, (uint64_t)0); + uint64_t out_nnz = thrust::reduce(rmm::exec_policy(stream), d_row_count_nz, d_row_count_nz + in->n_rows); out->allocate(out_nnz, in->n_rows, in->n_cols, false, stream); @@ -163,6 +172,7 @@ void coo_remove_scalar(COO* in, COO* out, T scalar, cudaStream_t stream) out->cols(), out->vals(), row_count_nz.data(), + row_count.data(), scalar, in->n_rows, stream); From 92e4af162f4e3bf80500a45a83d17dbbaec1e6ac Mon Sep 17 00:00:00 2001 From: viclafargue Date: Sat, 25 Jan 2025 10:04:02 +0100 Subject: [PATCH 04/25] check style --- cpp/include/raft/sparse/detail/coo.cuh | 3 +-- cpp/include/raft/sparse/linalg/detail/norm.cuh | 4 ++-- cpp/include/raft/sparse/linalg/detail/symmetrize.cuh | 8 ++++---- cpp/include/raft/sparse/linalg/norm.cuh | 2 +- cpp/include/raft/sparse/op/detail/filter.cuh | 12 ++++++++---- cpp/include/raft/sparse/solver/detail/lanczos.cuh | 6 ++++-- 6 files changed, 20 insertions(+), 15 deletions(-) diff --git a/cpp/include/raft/sparse/detail/coo.cuh b/cpp/include/raft/sparse/detail/coo.cuh index 0d2a06bf03..c41af76243 100644 --- a/cpp/include/raft/sparse/detail/coo.cuh +++ b/cpp/include/raft/sparse/detail/coo.cuh @@ -229,8 +229,7 @@ class COO { * @param init: should values be initialized to 0? * @param stream: stream to use for init */ - void allocate( - uint64_t nnz, Index_Type n_rows, Index_Type n_cols, bool init, cudaStream_t stream) + void allocate(uint64_t nnz, Index_Type n_rows, Index_Type n_cols, bool init, cudaStream_t stream) { this->n_rows = n_rows; this->n_cols = n_cols; diff --git a/cpp/include/raft/sparse/linalg/detail/norm.cuh b/cpp/include/raft/sparse/linalg/detail/norm.cuh index 5dc5b2db41..0390fb5f69 100644 --- a/cpp/include/raft/sparse/linalg/detail/norm.cuh +++ b/cpp/include/raft/sparse/linalg/detail/norm.cuh @@ -48,7 +48,7 @@ RAFT_KERNEL csr_row_normalize_l1_kernel( const indT* ia, // csr row ex_scan (sorted by row) const T* vals, indT nnz, // array of values and number of non-zeros - int m, // num rows in csr + int m, // num rows in csr T* result) { // output array @@ -94,7 +94,7 @@ template void csr_row_normalize_l1(const indT* ia, // csr row ex_scan (sorted by row) const T* vals, indT nnz, // array of values and number of non-zeros - int m, // num rows in csr + int m, // num rows in csr T* result, cudaStream_t stream) { // output array diff --git a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh index 550a3e9505..a59fba8f8e 100644 --- a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh +++ b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh @@ -73,9 +73,9 @@ RAFT_KERNEL coo_symmetrize_kernel(uint64_t* row_ind, int cur_col = cols[start_idx + idx]; T cur_val = vals[start_idx + idx]; - int lookup_row = cur_col; - uint64_t t_start = row_ind[lookup_row]; // Start at - uint64_t t_stop = get_stop_idx(lookup_row, n, cnnz, row_ind); + int lookup_row = cur_col; + uint64_t t_start = row_ind[lookup_row]; // Start at + uint64_t t_stop = get_stop_idx(lookup_row, n, cnnz, row_ind); T transpose = 0.0; @@ -158,7 +158,7 @@ void coo_symmetrize(COO* in, in->n_rows, in->nnz, reduction_op); - RAFT_CUDA_TRY(cudaPeekAtLastError()); + RAFT_CUDA_TRY(cudaPeekAtLastError()); } /** diff --git a/cpp/include/raft/sparse/linalg/norm.cuh b/cpp/include/raft/sparse/linalg/norm.cuh index ea325c3472..f90d088ee6 100644 --- a/cpp/include/raft/sparse/linalg/norm.cuh +++ b/cpp/include/raft/sparse/linalg/norm.cuh @@ -40,7 +40,7 @@ template void csr_row_normalize_l1(const indT* ia, // csr row ex_scan (sorted by row) const T* vals, indT nnz, // array of values and number of non-zeros - int m, // num rows in csr + int m, // num rows in csr T* result, cudaStream_t stream) { // output array diff --git a/cpp/include/raft/sparse/op/detail/filter.cuh b/cpp/include/raft/sparse/op/detail/filter.cuh index e4a18d48bb..41af0315d8 100644 --- a/cpp/include/raft/sparse/op/detail/filter.cuh +++ b/cpp/include/raft/sparse/op/detail/filter.cuh @@ -150,17 +150,21 @@ void coo_remove_scalar(COO* in, COO* out, T scalar, cudaStream_t stream) rmm::device_uvector row_count_nz(in->n_rows, stream); rmm::device_uvector row_count(in->n_rows, stream); - RAFT_CUDA_TRY(cudaMemsetAsync(row_count_nz.data(), 0, (uint64_t)in->n_rows * sizeof(uint64_t), stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(row_count.data(), 0, (uint64_t)in->n_rows * sizeof(uint64_t), stream)); + RAFT_CUDA_TRY( + cudaMemsetAsync(row_count_nz.data(), 0, (uint64_t)in->n_rows * sizeof(uint64_t), stream)); + RAFT_CUDA_TRY( + cudaMemsetAsync(row_count.data(), 0, (uint64_t)in->n_rows * sizeof(uint64_t), stream)); linalg::coo_degree(in->rows(), in->nnz, row_count.data(), stream); RAFT_CUDA_TRY(cudaPeekAtLastError()); - linalg::coo_degree_scalar(in->rows(), in->vals(), in->nnz, scalar, (unsigned long long int*)row_count_nz.data(), stream); + linalg::coo_degree_scalar( + in->rows(), in->vals(), in->nnz, scalar, (unsigned long long int*)row_count_nz.data(), stream); RAFT_CUDA_TRY(cudaPeekAtLastError()); thrust::device_ptr d_row_count_nz = thrust::device_pointer_cast(row_count_nz.data()); - uint64_t out_nnz = thrust::reduce(rmm::exec_policy(stream), d_row_count_nz, d_row_count_nz + in->n_rows); + uint64_t out_nnz = + thrust::reduce(rmm::exec_policy(stream), d_row_count_nz, d_row_count_nz + in->n_rows); out->allocate(out_nnz, in->n_rows, in->n_cols, false, stream); diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index e46ea77eaa..c9749624e1 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -836,7 +836,8 @@ int computeSmallestEigenvectors( // ------------------------------------------------------- // Check that parameters are valid // ------------------------------------------------------- - RAFT_EXPECTS(nEigVecs > 0 && nEigVecs <= n, "Invalid number of eigenvectors."); + RAFT_EXPECTS(nEigVecs > 0 && static_cast(nEigVecs) <= n, + "Invalid number of eigenvectors."); RAFT_EXPECTS(restartIter > 0, "Invalid restartIter."); RAFT_EXPECTS(tol > 0, "Invalid tolerance."); RAFT_EXPECTS(maxIter >= nEigVecs, "Invalid maxIter."); @@ -1183,7 +1184,8 @@ int computeLargestEigenvectors( // ------------------------------------------------------- // Check that parameters are valid // ------------------------------------------------------- - RAFT_EXPECTS(nEigVecs > 0 && nEigVecs <= n, "Invalid number of eigenvectors."); + RAFT_EXPECTS(nEigVecs > 0 && static_cast(nEigVecs) <= n, + "Invalid number of eigenvectors."); RAFT_EXPECTS(restartIter > 0, "Invalid restartIter."); RAFT_EXPECTS(tol > 0, "Invalid tolerance."); RAFT_EXPECTS(maxIter >= nEigVecs, "Invalid maxIter."); From dc72acce3334214f7a3aec361e0fdf6f1bf6a2fe Mon Sep 17 00:00:00 2001 From: viclafargue Date: Mon, 27 Jan 2025 16:39:05 +0000 Subject: [PATCH 05/25] compilation fix --- cpp/include/raft/linalg/detail/lanczos.cuh | 2 +- cpp/include/raft/sparse/linalg/detail/spectral.cuh | 5 ++--- cpp/include/raft/spectral/detail/matrix_wrappers.hpp | 6 +++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cpp/include/raft/linalg/detail/lanczos.cuh b/cpp/include/raft/linalg/detail/lanczos.cuh index 134ef3ef36..6498e9c6b7 100644 --- a/cpp/include/raft/linalg/detail/lanczos.cuh +++ b/cpp/include/raft/linalg/detail/lanczos.cuh @@ -1004,7 +1004,7 @@ int computeSmallestEigenvectors( index_type_t n = A.nrows_; // Check that parameters are valid - RAFT_EXPECTS(nEigVecs > 0 && nEigVecs <= n, "Invalid number of eigenvectors."); + RAFT_EXPECTS(nEigVecs > 0 && static_cast(nEigVecs) <= n, "Invalid number of eigenvectors."); RAFT_EXPECTS(restartIter > 0, "Invalid restartIter."); RAFT_EXPECTS(tol > 0, "Invalid tolerance."); RAFT_EXPECTS(maxIter >= nEigVecs, "Invalid maxIter."); diff --git a/cpp/include/raft/sparse/linalg/detail/spectral.cuh b/cpp/include/raft/sparse/linalg/detail/spectral.cuh index ad2a567294..334906c114 100644 --- a/cpp/include/raft/sparse/linalg/detail/spectral.cuh +++ b/cpp/include/raft/sparse/linalg/detail/spectral.cuh @@ -45,8 +45,7 @@ void fit_embedding(raft::resources const& handle, rmm::device_uvector src_offsets(n + 1, stream); rmm::device_uvector dst_cols(nnz, stream); rmm::device_uvector dst_vals(nnz, stream); - convert::coo_to_csr( - handle, rows, cols, vals, nnz, n, src_offsets.data(), dst_cols.data(), dst_vals.data()); + convert::coo_to_csr(handle, rows, cols, vals, static_cast(nnz), static_cast(n), src_offsets.data(), dst_cols.data(), dst_vals.data()); rmm::device_uvector eigVals(n_components + 1, stream); rmm::device_uvector eigVecs(n * (n_components + 1), stream); @@ -65,7 +64,7 @@ void fit_embedding(raft::resources const& handle, value_type* vs = dst_vals.data(); raft::spectral::matrix::sparse_matrix_t const r_csr_m{ - handle, ro, ci, vs, n, nnz}; + handle, ro, ci, vs, static_cast(n), nnz}; index_type neigvs = n_components + 1; index_type maxiter = 4000; // default reset value (when set to 0); diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp index db8a5dc9ef..91094b4be8 100644 --- a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp +++ b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp @@ -142,7 +142,7 @@ struct sparse_matrix_t { value_type const* values, index_type const nrows, index_type const ncols, - index_type const nnz) + uint64_t const nnz) : handle_(raft_handle), row_offsets_(row_offsets), col_indices_(col_indices), @@ -158,7 +158,7 @@ struct sparse_matrix_t { index_type const* col_indices, value_type const* values, index_type const nrows, - index_type const nnz) + uint64_t const nnz) : handle_(raft_handle), row_offsets_(row_offsets), col_indices_(col_indices), @@ -311,7 +311,7 @@ struct sparse_matrix_t { value_type const* values_; index_type const nrows_; index_type const ncols_; - index_type const nnz_; + uint64_t const nnz_; }; template From 8e58d293a4a6ff14efbad9cb4dcf496336c996b9 Mon Sep 17 00:00:00 2001 From: viclafargue Date: Mon, 27 Jan 2025 18:54:45 +0000 Subject: [PATCH 06/25] fix tests --- cpp/tests/linalg/eigen_solvers.cu | 2 +- cpp/tests/sparse/solver/lanczos.cu | 4 ++-- cpp/tests/sparse/spectral_matrix.cu | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/tests/linalg/eigen_solvers.cu b/cpp/tests/linalg/eigen_solvers.cu index cf75ff89bf..e18b3a9f29 100644 --- a/cpp/tests/linalg/eigen_solvers.cu +++ b/cpp/tests/linalg/eigen_solvers.cu @@ -46,7 +46,7 @@ TEST(Raft, EigenSolvers) index_type nnz = 0; index_type nrows = 0; - sparse_matrix_t sm1{h, ro, ci, vs, nrows, nnz}; + sparse_matrix_t sm1{h, ro, ci, vs, nrows, static_cast(nnz)}; ASSERT_EQ(nullptr, sm1.row_offsets_); index_type neigvs{10}; diff --git a/cpp/tests/sparse/solver/lanczos.cu b/cpp/tests/sparse/solver/lanczos.cu index 23b3a7ff99..2f5de6870d 100644 --- a/cpp/tests/sparse/solver/lanczos.cu +++ b/cpp/tests/sparse/solver/lanczos.cu @@ -173,7 +173,7 @@ class rmat_lanczos_tests raft::make_device_vector(handle, symmetric_coo.n_rows + 1); raft::sparse::convert::sorted_coo_to_csr(symmetric_coo.rows(), - (IndexType)symmetric_coo.nnz, + static_cast(symmetric_coo.nnz), row_indices.data_handle(), symmetric_coo.n_rows + 1, stream); @@ -198,7 +198,7 @@ class rmat_lanczos_tests symmetric_coo.cols(), symmetric_coo.vals(), symmetric_coo.n_rows, - (IndexType)symmetric_coo.nnz}; + symmetric_coo.nnz}; raft::sparse::solver::lanczos_solver_config config{ n_components, params.maxiter, params.restartiter, params.tol, rng.seed}; diff --git a/cpp/tests/sparse/spectral_matrix.cu b/cpp/tests/sparse/spectral_matrix.cu index 52f7eff10e..f37af32fb9 100644 --- a/cpp/tests/sparse/spectral_matrix.cu +++ b/cpp/tests/sparse/spectral_matrix.cu @@ -55,7 +55,7 @@ TEST(Raft, SpectralMatrices) value_type* vs{nullptr}; index_type nnz = 0; index_type nrows = 0; - sparse_matrix_t sm1{h, ro, ci, vs, nrows, nnz}; + sparse_matrix_t sm1{h, ro, ci, vs, nrows, static_cast(nnz)}; sparse_matrix_t sm2{h, csr_v}; ASSERT_EQ(nullptr, sm1.row_offsets_); ASSERT_EQ(nullptr, sm2.row_offsets_); From c75cd87992f45ec36bd5d596646159c2e2b5cc17 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 28 Jan 2025 13:06:36 -0600 Subject: [PATCH 07/25] FIX style fixes --- cpp/include/raft/linalg/detail/lanczos.cuh | 3 ++- cpp/include/raft/sparse/linalg/detail/spectral.cuh | 10 +++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/linalg/detail/lanczos.cuh b/cpp/include/raft/linalg/detail/lanczos.cuh index 6498e9c6b7..d4d3325f77 100644 --- a/cpp/include/raft/linalg/detail/lanczos.cuh +++ b/cpp/include/raft/linalg/detail/lanczos.cuh @@ -1004,7 +1004,8 @@ int computeSmallestEigenvectors( index_type_t n = A.nrows_; // Check that parameters are valid - RAFT_EXPECTS(nEigVecs > 0 && static_cast(nEigVecs) <= n, "Invalid number of eigenvectors."); + RAFT_EXPECTS(nEigVecs > 0 && static_cast(nEigVecs) <= n, + "Invalid number of eigenvectors."); RAFT_EXPECTS(restartIter > 0, "Invalid restartIter."); RAFT_EXPECTS(tol > 0, "Invalid tolerance."); RAFT_EXPECTS(maxIter >= nEigVecs, "Invalid maxIter."); diff --git a/cpp/include/raft/sparse/linalg/detail/spectral.cuh b/cpp/include/raft/sparse/linalg/detail/spectral.cuh index 334906c114..90e698f0b9 100644 --- a/cpp/include/raft/sparse/linalg/detail/spectral.cuh +++ b/cpp/include/raft/sparse/linalg/detail/spectral.cuh @@ -45,7 +45,15 @@ void fit_embedding(raft::resources const& handle, rmm::device_uvector src_offsets(n + 1, stream); rmm::device_uvector dst_cols(nnz, stream); rmm::device_uvector dst_vals(nnz, stream); - convert::coo_to_csr(handle, rows, cols, vals, static_cast(nnz), static_cast(n), src_offsets.data(), dst_cols.data(), dst_vals.data()); + convert::coo_to_csr(handle, + rows, + cols, + vals, + static_cast(nnz), + static_cast(n), + src_offsets.data(), + dst_cols.data(), + dst_vals.data()); rmm::device_uvector eigVals(n_components + 1, stream); rmm::device_uvector eigVecs(n * (n_components + 1), stream); From 694d371661c4ac97955ad2e6324a3ae9734f2aa9 Mon Sep 17 00:00:00 2001 From: viclafargue Date: Wed, 29 Jan 2025 19:53:22 +0000 Subject: [PATCH 08/25] changes so far --- .../raft/cluster/detail/connectivities.cuh | 2 +- cpp/include/raft/cluster/detail/mst.cuh | 2 +- cpp/include/raft/sparse/convert/csr.cuh | 8 +-- .../raft/sparse/convert/detail/csr.cuh | 2 +- cpp/include/raft/sparse/detail/coo.cuh | 67 ++++++++++++------- cpp/include/raft/sparse/linalg/degree.cuh | 16 ++--- .../neighbors/detail/cross_component_nn.cuh | 2 +- .../raft/spatial/knn/detail/ball_cover.cuh | 2 +- .../raft/spectral/detail/matrix_wrappers.hpp | 8 +-- cpp/tests/linalg/eigen_solvers.cu | 2 +- cpp/tests/sparse/solver/lanczos.cu | 2 +- cpp/tests/sparse/spectral_matrix.cu | 2 +- cpp/tests/sparse/symmetrize.cu | 2 +- 13 files changed, 69 insertions(+), 48 deletions(-) diff --git a/cpp/include/raft/cluster/detail/connectivities.cuh b/cpp/include/raft/cluster/detail/connectivities.cuh index fdb8af9171..c527b754c3 100644 --- a/cpp/include/raft/cluster/detail/connectivities.cuh +++ b/cpp/include/raft/cluster/detail/connectivities.cuh @@ -95,7 +95,7 @@ struct distance_graph_impl indptr2(m + 1, stream); raft::sparse::convert::sorted_coo_to_csr( - connected_edges.rows(), (value_idx)connected_edges.nnz, indptr2.data(), m + 1, stream); + connected_edges.rows(), connected_edges.nnz, indptr2.data(), m + 1, stream); // On the second call, we hand the MST the original colors // and the new set of edges and let it restart the optimization process diff --git a/cpp/include/raft/sparse/convert/csr.cuh b/cpp/include/raft/sparse/convert/csr.cuh index 188928bbaa..1478f1c42e 100644 --- a/cpp/include/raft/sparse/convert/csr.cuh +++ b/cpp/include/raft/sparse/convert/csr.cuh @@ -54,10 +54,10 @@ void coo_to_csr(raft::resources const& handle, * @param m: number of rows in dense matrix * @param stream: cuda stream to use */ -template -void sorted_coo_to_csr(const T* rows, outT nnz, outT* row_ind, int m, cudaStream_t stream) +template +void sorted_coo_to_csr(const T* rows, nnz_type nnz, outT* row_ind, int m, cudaStream_t stream) { - detail::sorted_coo_to_csr(rows, nnz, row_ind, m, stream); + detail::sorted_coo_to_csr(rows, (uint64_t)nnz, row_ind, m, stream); } /** @@ -70,7 +70,7 @@ void sorted_coo_to_csr(const T* rows, outT nnz, outT* row_ind, int m, cudaStream template void sorted_coo_to_csr(COO* coo, outT* row_ind, cudaStream_t stream) { - detail::sorted_coo_to_csr(coo->rows(), (outT)coo->nnz, row_ind, coo->n_rows, stream); + detail::sorted_coo_to_csr(coo->rows(), coo->safe_nnz, row_ind, coo->n_rows, stream); } /** diff --git a/cpp/include/raft/sparse/convert/detail/csr.cuh b/cpp/include/raft/sparse/convert/detail/csr.cuh index aac5b26786..44d6f21f9d 100644 --- a/cpp/include/raft/sparse/convert/detail/csr.cuh +++ b/cpp/include/raft/sparse/convert/detail/csr.cuh @@ -85,7 +85,7 @@ void coo_to_csr(raft::resources const& handle, * @param stream: cuda stream to use */ template -void sorted_coo_to_csr(const T* rows, outT nnz, outT* row_ind, int m, cudaStream_t stream) +void sorted_coo_to_csr(const T* rows, uint64_t nnz, outT* row_ind, int m, cudaStream_t stream) { rmm::device_uvector row_counts(m, stream); RAFT_CUDA_TRY(cudaMemsetAsync(row_counts.data(), 0, (uint64_t)m * sizeof(outT), stream)); diff --git a/cpp/include/raft/sparse/detail/coo.cuh b/cpp/include/raft/sparse/detail/coo.cuh index c41af76243..38cd33cb3e 100644 --- a/cpp/include/raft/sparse/detail/coo.cuh +++ b/cpp/include/raft/sparse/detail/coo.cuh @@ -52,7 +52,8 @@ class COO { rmm::device_uvector vals_arr; public: - uint64_t nnz; + Index_Type nnz; + uint64_t safe_nnz; Index_Type n_rows; Index_Type n_cols; @@ -60,7 +61,13 @@ class COO { * @param stream: CUDA stream to use */ COO(cudaStream_t stream) - : rows_arr(0, stream), cols_arr(0, stream), vals_arr(0, stream), nnz(0), n_rows(0), n_cols(0) + : rows_arr(0, stream), + cols_arr(0, stream), + vals_arr(0, stream), + nnz(0), + n_rows(0), + n_cols(0), + safe_nnz(0) { } @@ -72,13 +79,20 @@ class COO { * @param n_rows: number of rows in the dense matrix * @param n_cols: number of cols in the dense matrix */ + template COO(rmm::device_uvector& rows, rmm::device_uvector& cols, rmm::device_uvector& vals, - uint64_t nnz, + SafeNNZ_Type nnz, Index_Type n_rows = 0, Index_Type n_cols = 0) - : rows_arr(rows), cols_arr(cols), vals_arr(vals), nnz(nnz), n_rows(n_rows), n_cols(n_cols) + : rows_arr(rows), + cols_arr(cols), + vals_arr(vals), + nnz((Index_Type)nnz), + n_rows(n_rows), + n_cols(n_cols), + safe_nnz((uint64_t)nnz) { } @@ -89,8 +103,9 @@ class COO { * @param n_cols: number of cols in the dense matrix * @param init: initialize arrays with zeros */ + template COO(cudaStream_t stream, - uint64_t nnz, + SafeNNZ_Type nnz, Index_Type n_rows = 0, Index_Type n_cols = 0, bool init = true) @@ -99,7 +114,8 @@ class COO { vals_arr(nnz, stream), nnz(nnz), n_rows(n_rows), - n_cols(n_cols) + n_cols(n_cols), + safe_nnz(nnz) { if (init) init_arrays(stream); } @@ -107,10 +123,10 @@ class COO { void init_arrays(cudaStream_t stream) { RAFT_CUDA_TRY( - cudaMemsetAsync(this->rows_arr.data(), 0, this->nnz * sizeof(Index_Type), stream)); + cudaMemsetAsync(this->rows_arr.data(), 0, this->safe_nnz * sizeof(Index_Type), stream)); RAFT_CUDA_TRY( - cudaMemsetAsync(this->cols_arr.data(), 0, this->nnz * sizeof(Index_Type), stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(this->vals_arr.data(), 0, this->nnz * sizeof(T), stream)); + cudaMemsetAsync(this->cols_arr.data(), 0, this->safe_nnz * sizeof(Index_Type), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(this->vals_arr.data(), 0, this->safe_nnz * sizeof(T), stream)); } ~COO() {} @@ -121,7 +137,7 @@ class COO { */ bool validate_size() const { - if (this->nnz <= 0 || n_rows <= 0 || n_cols <= 0) return false; + if (this->safe_nnz <= 0 || n_rows <= 0 || n_cols <= 0) return false; return true; } @@ -162,10 +178,10 @@ class COO { cudaStream_t stream; RAFT_CUDA_TRY(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); - out << raft::arr2Str(c.rows_arr.data(), c.nnz, "rows", stream) << std::endl; - out << raft::arr2Str(c.cols_arr.data(), c.nnz, "cols", stream) << std::endl; - out << raft::arr2Str(c.vals_arr.data(), c.nnz, "vals", stream) << std::endl; - out << "nnz=" << c.nnz << std::endl; + out << raft::arr2Str(c.rows_arr.data(), c.safe_nnz, "rows", stream) << std::endl; + out << raft::arr2Str(c.cols_arr.data(), c.safe_nnz, "cols", stream) << std::endl; + out << raft::arr2Str(c.vals_arr.data(), c.safe_nnz, "vals", stream) << std::endl; + out << "nnz=" << c.safe_nnz << std::endl; out << "n_rows=" << c.n_rows << std::endl; out << "n_cols=" << c.n_cols << std::endl; @@ -204,7 +220,8 @@ class COO { * @param init: should values be initialized to 0? * @param stream: CUDA stream to use */ - void allocate(uint64_t nnz, bool init, cudaStream_t stream) + template + void allocate(SafeNNZ_Type nnz, bool init, cudaStream_t stream) { this->allocate(nnz, 0, init, stream); } @@ -216,7 +233,8 @@ class COO { * @param init: should values be initialized to 0? * @param stream: CUDA stream to use */ - void allocate(uint64_t nnz, Index_Type size, bool init, cudaStream_t stream) + template + void allocate(SafeNNZ_Type nnz, Index_Type size, bool init, cudaStream_t stream) { this->allocate(nnz, size, size, init, stream); } @@ -229,15 +247,18 @@ class COO { * @param init: should values be initialized to 0? * @param stream: stream to use for init */ - void allocate(uint64_t nnz, Index_Type n_rows, Index_Type n_cols, bool init, cudaStream_t stream) + template + void allocate( + SafeNNZ_Type nnz, Index_Type n_rows, Index_Type n_cols, bool init, cudaStream_t stream) { - this->n_rows = n_rows; - this->n_cols = n_cols; - this->nnz = nnz; + this->n_rows = n_rows; + this->n_cols = n_cols; + this->nnz = (Index_Type)nnz; + this->safe_nnz = nnz; - this->rows_arr.resize(this->nnz, stream); - this->cols_arr.resize(this->nnz, stream); - this->vals_arr.resize(this->nnz, stream); + this->rows_arr.resize(nnz, stream); + this->cols_arr.resize(nnz, stream); + this->vals_arr.resize(nnz, stream); if (init) init_arrays(stream); } diff --git a/cpp/include/raft/sparse/linalg/degree.cuh b/cpp/include/raft/sparse/linalg/degree.cuh index a154c2357c..9e17b01c75 100644 --- a/cpp/include/raft/sparse/linalg/degree.cuh +++ b/cpp/include/raft/sparse/linalg/degree.cuh @@ -33,10 +33,10 @@ namespace linalg { * @param results: output result array * @param stream: cuda stream to use */ -template -void coo_degree(const T* rows, uint64_t nnz, outT* results, cudaStream_t stream) +template +void coo_degree(const T* rows, nnz_type nnz, outT* results, cudaStream_t stream) { - detail::coo_degree<64, T>(rows, nnz, results, stream); + detail::coo_degree<64, T>(rows, (uint64_t)nnz, results, stream); } /** @@ -50,7 +50,7 @@ void coo_degree(const T* rows, uint64_t nnz, outT* results, cudaStream_t stream) template void coo_degree(COO* in, outT* results, cudaStream_t stream) { - coo_degree(in->rows(), in->nnz, results, stream); + coo_degree(in->rows(), in->safe_nnz, results, stream); } /** @@ -64,11 +64,11 @@ void coo_degree(COO* in, outT* results, cudaStream_t stream) * @param results: output row counts * @param stream: cuda stream to use */ -template +template void coo_degree_scalar( - const int* rows, const T* vals, uint64_t nnz, T scalar, outT* results, cudaStream_t stream = 0) + const int* rows, const T* vals, nnz_type nnz, T scalar, outT* results, cudaStream_t stream = 0) { - detail::coo_degree_scalar<64>(rows, vals, nnz, scalar, results, stream); + detail::coo_degree_scalar<64>(rows, vals, (uint64_t)nnz, scalar, results, stream); } /** @@ -83,7 +83,7 @@ void coo_degree_scalar( template void coo_degree_scalar(COO* in, T scalar, outT* results, cudaStream_t stream) { - coo_degree_scalar(in->rows(), in->vals(), in->nnz, scalar, results, stream); + coo_degree_scalar(in->rows(), in->vals(), in->safe_nnz, scalar, results, stream); } /** diff --git a/cpp/include/raft/sparse/neighbors/detail/cross_component_nn.cuh b/cpp/include/raft/sparse/neighbors/detail/cross_component_nn.cuh index 1247b91d2e..a47d5a6f34 100644 --- a/cpp/include/raft/sparse/neighbors/detail/cross_component_nn.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/cross_component_nn.cuh @@ -242,7 +242,7 @@ void perform_1nn(raft::resources const& handle, // the color components. auto colors_group_idxs = raft::make_device_vector(handle, n_components + 1); raft::sparse::convert::sorted_coo_to_csr( - colors, (value_idx)n_rows, colors_group_idxs.data_handle(), n_components + 1, stream); + colors, n_rows, colors_group_idxs.data_handle(), n_components + 1, stream); auto group_idxs_view = raft::make_device_vector_view( colors_group_idxs.data_handle() + 1, n_components); diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover.cuh index f436d5c740..c4ca2ffa61 100644 --- a/cpp/include/raft/spatial/knn/detail/ball_cover.cuh +++ b/cpp/include/raft/spatial/knn/detail/ball_cover.cuh @@ -161,7 +161,7 @@ void construct_landmark_1nn(raft::resources const& handle, // convert to CSR for fast lookup raft::sparse::convert::sorted_coo_to_csr(R_1nn_inds.data(), - (value_idx)index.m, + index.m, index.get_R_indptr().data_handle(), index.n_landmarks + 1, resource::get_cuda_stream(handle)); diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp index 91094b4be8..bab5b4515a 100644 --- a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp +++ b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp @@ -134,7 +134,7 @@ class vector_t { const thrust_exec_policy_t thrust_policy; }; -template +template struct sparse_matrix_t { sparse_matrix_t(resources const& raft_handle, index_type const* row_offsets, @@ -142,7 +142,7 @@ struct sparse_matrix_t { value_type const* values, index_type const nrows, index_type const ncols, - uint64_t const nnz) + nnz_type const nnz) : handle_(raft_handle), row_offsets_(row_offsets), col_indices_(col_indices), @@ -158,7 +158,7 @@ struct sparse_matrix_t { index_type const* col_indices, value_type const* values, index_type const nrows, - uint64_t const nnz) + nnz_type const nnz) : handle_(raft_handle), row_offsets_(row_offsets), col_indices_(col_indices), @@ -311,7 +311,7 @@ struct sparse_matrix_t { value_type const* values_; index_type const nrows_; index_type const ncols_; - uint64_t const nnz_; + nnz_type const nnz_; }; template diff --git a/cpp/tests/linalg/eigen_solvers.cu b/cpp/tests/linalg/eigen_solvers.cu index e18b3a9f29..cf75ff89bf 100644 --- a/cpp/tests/linalg/eigen_solvers.cu +++ b/cpp/tests/linalg/eigen_solvers.cu @@ -46,7 +46,7 @@ TEST(Raft, EigenSolvers) index_type nnz = 0; index_type nrows = 0; - sparse_matrix_t sm1{h, ro, ci, vs, nrows, static_cast(nnz)}; + sparse_matrix_t sm1{h, ro, ci, vs, nrows, nnz}; ASSERT_EQ(nullptr, sm1.row_offsets_); index_type neigvs{10}; diff --git a/cpp/tests/sparse/solver/lanczos.cu b/cpp/tests/sparse/solver/lanczos.cu index 2f5de6870d..128ab73747 100644 --- a/cpp/tests/sparse/solver/lanczos.cu +++ b/cpp/tests/sparse/solver/lanczos.cu @@ -173,7 +173,7 @@ class rmat_lanczos_tests raft::make_device_vector(handle, symmetric_coo.n_rows + 1); raft::sparse::convert::sorted_coo_to_csr(symmetric_coo.rows(), - static_cast(symmetric_coo.nnz), + symmetric_coo.nnz, row_indices.data_handle(), symmetric_coo.n_rows + 1, stream); diff --git a/cpp/tests/sparse/spectral_matrix.cu b/cpp/tests/sparse/spectral_matrix.cu index f37af32fb9..52f7eff10e 100644 --- a/cpp/tests/sparse/spectral_matrix.cu +++ b/cpp/tests/sparse/spectral_matrix.cu @@ -55,7 +55,7 @@ TEST(Raft, SpectralMatrices) value_type* vs{nullptr}; index_type nnz = 0; index_type nrows = 0; - sparse_matrix_t sm1{h, ro, ci, vs, nrows, static_cast(nnz)}; + sparse_matrix_t sm1{h, ro, ci, vs, nrows, nnz}; sparse_matrix_t sm2{h, csr_v}; ASSERT_EQ(nullptr, sm1.row_offsets_); ASSERT_EQ(nullptr, sm2.row_offsets_); diff --git a/cpp/tests/sparse/symmetrize.cu b/cpp/tests/sparse/symmetrize.cu index c3a03a942c..5f7463ac89 100644 --- a/cpp/tests/sparse/symmetrize.cu +++ b/cpp/tests/sparse/symmetrize.cu @@ -109,7 +109,7 @@ class SparseSymmetrizeTest rmm::device_scalar sum(stream); sum.set_value_to_zero_async(stream); - assert_symmetry<<>>( + assert_symmetry<<>>( out.rows(), out.cols(), out.vals(), (value_idx)out.nnz, sum.data()); sum_h = sum.value(stream); From 48813134d38051a9ec1a11ecbbba02b4461a44e4 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 29 Jan 2025 15:31:55 -0500 Subject: [PATCH 09/25] Exposing more templates for sparse types --- cpp/include/raft/sparse/convert/csr.cuh | 2 +- cpp/include/raft/sparse/linalg/degree.cuh | 2 +- .../raft/sparse/linalg/detail/degree.cuh | 24 ++++----- .../raft/sparse/linalg/detail/spectral.cuh | 6 +-- .../raft/sparse/linalg/detail/symmetrize.cuh | 22 ++++---- cpp/include/raft/sparse/linalg/spectral.cuh | 6 +-- cpp/include/raft/sparse/op/detail/filter.cuh | 52 ++++++++++--------- cpp/include/raft/sparse/op/detail/sort.h | 11 ++-- cpp/include/raft/sparse/op/sort.cuh | 11 ++-- 9 files changed, 71 insertions(+), 65 deletions(-) diff --git a/cpp/include/raft/sparse/convert/csr.cuh b/cpp/include/raft/sparse/convert/csr.cuh index 1478f1c42e..53a025ec71 100644 --- a/cpp/include/raft/sparse/convert/csr.cuh +++ b/cpp/include/raft/sparse/convert/csr.cuh @@ -57,7 +57,7 @@ void coo_to_csr(raft::resources const& handle, template void sorted_coo_to_csr(const T* rows, nnz_type nnz, outT* row_ind, int m, cudaStream_t stream) { - detail::sorted_coo_to_csr(rows, (uint64_t)nnz, row_ind, m, stream); + detail::sorted_coo_to_csr(rows, nnz, row_ind, m, stream); } /** diff --git a/cpp/include/raft/sparse/linalg/degree.cuh b/cpp/include/raft/sparse/linalg/degree.cuh index 9e17b01c75..965c923993 100644 --- a/cpp/include/raft/sparse/linalg/degree.cuh +++ b/cpp/include/raft/sparse/linalg/degree.cuh @@ -36,7 +36,7 @@ namespace linalg { template void coo_degree(const T* rows, nnz_type nnz, outT* results, cudaStream_t stream) { - detail::coo_degree<64, T>(rows, (uint64_t)nnz, results, stream); + detail::coo_degree<64, T>(rows, (nnz_type)nnz, results, stream); } /** diff --git a/cpp/include/raft/sparse/linalg/detail/degree.cuh b/cpp/include/raft/sparse/linalg/detail/degree.cuh index d7f72e9b6c..0163e15878 100644 --- a/cpp/include/raft/sparse/linalg/detail/degree.cuh +++ b/cpp/include/raft/sparse/linalg/detail/degree.cuh @@ -39,8 +39,8 @@ namespace detail { * @param nnz the size of the rows array * @param results array to place results */ -template -RAFT_KERNEL coo_degree_kernel(const T* rows, uint64_t nnz, outT* results) +template +RAFT_KERNEL coo_degree_kernel(const T* rows, nnz_t nnz, outT* results) { uint64_t row = (blockIdx.x * TPB_X) + threadIdx.x; if (row < nnz) { atomicAdd(results + rows[row], (outT)1); } @@ -54,8 +54,8 @@ RAFT_KERNEL coo_degree_kernel(const T* rows, uint64_t nnz, outT* results) * @param results: output result array * @param stream: cuda stream to use */ -template -void coo_degree(const T* rows, uint64_t nnz, outT* results, cudaStream_t stream) +template +void coo_degree(const T* rows, nnz_t nnz, outT* results, cudaStream_t stream) { dim3 grid_rc(raft::ceildiv(nnz, TPB_X), 1, 1); dim3 blk_rc(TPB_X, 1, 1); @@ -64,16 +64,16 @@ void coo_degree(const T* rows, uint64_t nnz, outT* results, cudaStream_t stream) RAFT_CUDA_TRY(cudaGetLastError()); } -template -RAFT_KERNEL coo_degree_nz_kernel(const int* rows, const T* vals, int nnz, int* results) +template +RAFT_KERNEL coo_degree_nz_kernel(const int* rows, const T* vals, nnz_t nnz, int* results) { int row = (blockIdx.x * TPB_X) + threadIdx.x; if (row < nnz && vals[row] != 0.0) { raft::myAtomicAdd(results + rows[row], 1); } } -template +template RAFT_KERNEL coo_degree_scalar_kernel( - const int* rows, const T* vals, uint64_t nnz, T scalar, outT* results) + const int* rows, const T* vals, nnz_t nnz, T scalar, outT* results) { uint64_t row = (blockIdx.x * TPB_X) + threadIdx.x; if (row < nnz && vals[row] != scalar) { raft::myAtomicAdd((outT*)results + rows[row], (outT)1); } @@ -90,9 +90,9 @@ RAFT_KERNEL coo_degree_scalar_kernel( * @param results: output row counts * @param stream: cuda stream to use */ -template +template void coo_degree_scalar( - const int* rows, const T* vals, uint64_t nnz, T scalar, outT* results, cudaStream_t stream = 0) + const int* rows, const T* vals, nnz_t nnz, T scalar, outT* results, cudaStream_t stream = 0) { dim3 grid_rc(raft::ceildiv(nnz, TPB_X), 1, 1); dim3 blk_rc(TPB_X, 1, 1); @@ -110,8 +110,8 @@ void coo_degree_scalar( * @param results: output row counts * @param stream: cuda stream to use */ -template -void coo_degree_nz(const int* rows, const T* vals, int nnz, int* results, cudaStream_t stream) +template +void coo_degree_nz(const int* rows, const T* vals, nnz_t nnz, int* results, cudaStream_t stream) { dim3 grid_rc(raft::ceildiv(nnz, TPB_X), 1, 1); dim3 blk_rc(TPB_X, 1, 1); diff --git a/cpp/include/raft/sparse/linalg/detail/spectral.cuh b/cpp/include/raft/sparse/linalg/detail/spectral.cuh index 90e698f0b9..103030cbad 100644 --- a/cpp/include/raft/sparse/linalg/detail/spectral.cuh +++ b/cpp/include/raft/sparse/linalg/detail/spectral.cuh @@ -30,13 +30,13 @@ namespace sparse { namespace spectral { namespace detail { -template +template void fit_embedding(raft::resources const& handle, int* rows, int* cols, T* vals, - uint64_t nnz, - uint64_t n, + nnz_t nnz, + IndT n, int n_components, T* out, unsigned long long seed = 1234567) diff --git a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh index a59fba8f8e..513b71d0b2 100644 --- a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh +++ b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh @@ -47,8 +47,8 @@ namespace detail { // TODO: value_idx param needs to be used for this once FAISS is updated to use float32 // for indices so that the index types can be uniform -template -RAFT_KERNEL coo_symmetrize_kernel(uint64_t* row_ind, +template +RAFT_KERNEL coo_symmetrize_kernel(nnz_t* row_ind, int* rows, int* cols, T* vals, @@ -56,31 +56,31 @@ RAFT_KERNEL coo_symmetrize_kernel(uint64_t* row_ind, int* ocols, T* ovals, int n, - uint64_t cnnz, + nnz_t cnnz, Lambda reduction_op) { int row = (blockIdx.x * TPB_X) + threadIdx.x; if (row < n) { - uint64_t start_idx = row_ind[row]; // each thread processes one row - uint64_t stop_idx = get_stop_idx(row, n, cnnz, row_ind); + nnz_t start_idx = row_ind[row]; // each thread processes one row + nnz_t stop_idx = get_stop_idx(row, n, cnnz, row_ind); - uint64_t row_nnz = 0; - uint64_t out_start_idx = start_idx * 2; + nnz_t row_nnz = 0; + nnz_t out_start_idx = start_idx * 2; for (uint64_t idx = 0; idx < stop_idx - start_idx; idx++) { int cur_row = rows[start_idx + idx]; int cur_col = cols[start_idx + idx]; T cur_val = vals[start_idx + idx]; - int lookup_row = cur_col; - uint64_t t_start = row_ind[lookup_row]; // Start at - uint64_t t_stop = get_stop_idx(lookup_row, n, cnnz, row_ind); + int lookup_row = cur_col; + nnz_t t_start = row_ind[lookup_row]; // Start at + nnz_t t_stop = get_stop_idx(lookup_row, n, cnnz, row_ind); T transpose = 0.0; bool found_match = false; - for (uint64_t t_idx = t_start; t_idx < t_stop; t_idx++) { + for (nnz_t t_idx = t_start; t_idx < t_stop; t_idx++) { // If we find a match, let's get out of the loop. We won't // need to modify the transposed value, since that will be // done in a different thread. diff --git a/cpp/include/raft/sparse/linalg/spectral.cuh b/cpp/include/raft/sparse/linalg/spectral.cuh index 33ba9cd340..c63d551bf2 100644 --- a/cpp/include/raft/sparse/linalg/spectral.cuh +++ b/cpp/include/raft/sparse/linalg/spectral.cuh @@ -23,13 +23,13 @@ namespace raft { namespace sparse { namespace spectral { -template +template void fit_embedding(raft::resources const& handle, int* rows, int* cols, T* vals, - uint64_t nnz, - int n, + nnz_t nnz, + IndT n, int n_components, T* out, unsigned long long seed = 1234567) diff --git a/cpp/include/raft/sparse/op/detail/filter.cuh b/cpp/include/raft/sparse/op/detail/filter.cuh index 41af0315d8..881db377f9 100644 --- a/cpp/include/raft/sparse/op/detail/filter.cuh +++ b/cpp/include/raft/sparse/op/detail/filter.cuh @@ -42,27 +42,27 @@ namespace sparse { namespace op { namespace detail { -template +template RAFT_KERNEL coo_remove_scalar_kernel(const int* rows, const int* cols, const T* vals, - uint64_t nnz, + nnz_t nnz, int* out_rows, int* out_cols, T* out_vals, - uint64_t* ex_scan, - uint64_t* cur_ex_scan, + nnz_t* ex_scan, + nnz_t* cur_ex_scan, int m, T scalar) { int row = (blockIdx.x * TPB_X) + threadIdx.x; if (row < m) { - uint64_t start = cur_ex_scan[row]; - uint64_t stop = get_stop_idx(row, m, nnz, cur_ex_scan); - uint64_t cur_out_idx = ex_scan[row]; + nnz_t start = cur_ex_scan[row]; + nnz_t stop = get_stop_idx(row, m, nnz, cur_ex_scan); + nnz_t cur_out_idx = ex_scan[row]; - for (uint64_t idx = start; idx < stop; idx++) { + for (nnz_t idx = start; idx < stop; idx++) { if (vals[idx] != scalar) { out_rows[cur_out_idx] = rows[idx]; out_cols[cur_out_idx] = cols[idx]; @@ -90,32 +90,32 @@ RAFT_KERNEL coo_remove_scalar_kernel(const int* rows, * @param d_alloc device allocator for temporary buffers * @param stream: cuda stream to use */ -template +template void coo_remove_scalar(const int* rows, const int* cols, const T* vals, - uint64_t nnz, + nnz_t nnz, int* crows, int* ccols, T* cvals, - uint64_t* cnnz, - uint64_t* cur_cnnz, + nnz_t* cnnz, + nnz_t* cur_cnnz, T scalar, int n, cudaStream_t stream) { rmm::device_uvector ex_scan(n, stream); rmm::device_uvector cur_ex_scan(n, stream); - RAFT_CUDA_TRY(cudaMemsetAsync(ex_scan.data(), 0, (uint64_t)n * sizeof(uint64_t), stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(cur_ex_scan.data(), 0, (uint64_t)n * sizeof(uint64_t), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(ex_scan.data(), 0, (nnz_t)n * sizeof(uint64_t), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(cur_ex_scan.data(), 0, (nnz_t)n * sizeof(uint64_t), stream)); - thrust::device_ptr dev_cnnz = thrust::device_pointer_cast(cnnz); - thrust::device_ptr dev_ex_scan = thrust::device_pointer_cast(ex_scan.data()); + thrust::device_ptr dev_cnnz = thrust::device_pointer_cast(cnnz); + thrust::device_ptr dev_ex_scan = thrust::device_pointer_cast(ex_scan.data()); thrust::exclusive_scan(rmm::exec_policy(stream), dev_cnnz, dev_cnnz + n, dev_ex_scan); RAFT_CUDA_TRY(cudaPeekAtLastError()); - thrust::device_ptr dev_cur_cnnz = thrust::device_pointer_cast(cur_cnnz); - thrust::device_ptr dev_cur_ex_scan = thrust::device_pointer_cast(cur_ex_scan.data()); + thrust::device_ptr dev_cur_cnnz = thrust::device_pointer_cast(cur_cnnz); + thrust::device_ptr dev_cur_ex_scan = thrust::device_pointer_cast(cur_ex_scan.data()); thrust::exclusive_scan(rmm::exec_policy(stream), dev_cur_cnnz, dev_cur_cnnz + n, dev_cur_ex_scan); RAFT_CUDA_TRY(cudaPeekAtLastError()); @@ -147,22 +147,26 @@ void coo_remove_scalar(const int* rows, template void coo_remove_scalar(COO* in, COO* out, T scalar, cudaStream_t stream) { - rmm::device_uvector row_count_nz(in->n_rows, stream); - rmm::device_uvector row_count(in->n_rows, stream); + rmm::device_uvector row_count_nz(in->n_rows, stream); + rmm::device_uvector row_count(in->n_rows, stream); RAFT_CUDA_TRY( cudaMemsetAsync(row_count_nz.data(), 0, (uint64_t)in->n_rows * sizeof(uint64_t), stream)); RAFT_CUDA_TRY( cudaMemsetAsync(row_count.data(), 0, (uint64_t)in->n_rows * sizeof(uint64_t), stream)); - linalg::coo_degree(in->rows(), in->nnz, row_count.data(), stream); + linalg::coo_degree(in->rows(), in->safe_nnz, row_count.data(), stream); RAFT_CUDA_TRY(cudaPeekAtLastError()); - linalg::coo_degree_scalar( - in->rows(), in->vals(), in->nnz, scalar, (unsigned long long int*)row_count_nz.data(), stream); + linalg::coo_degree_scalar(in->rows(), + in->vals(), + in->safe_nnz, + scalar, + (unsigned long long int*)row_count_nz.data(), + stream); RAFT_CUDA_TRY(cudaPeekAtLastError()); - thrust::device_ptr d_row_count_nz = thrust::device_pointer_cast(row_count_nz.data()); + thrust::device_ptr d_row_count_nz = thrust::device_pointer_cast(row_count_nz.data()); uint64_t out_nnz = thrust::reduce(rmm::exec_policy(stream), d_row_count_nz, d_row_count_nz + in->n_rows); diff --git a/cpp/include/raft/sparse/op/detail/sort.h b/cpp/include/raft/sparse/op/detail/sort.h index 7d09ebeddc..6464ad9374 100644 --- a/cpp/include/raft/sparse/op/detail/sort.h +++ b/cpp/include/raft/sparse/op/detail/sort.h @@ -68,8 +68,8 @@ struct TupleComp { * @param vals vals array from coo matrix * @param stream: cuda stream to use */ -template -void coo_sort(IdxT m, IdxT n, uint64_t nnz, IdxT* rows, IdxT* cols, T* vals, cudaStream_t stream) +template +void coo_sort(IdxT m, IdxT n, nnz_t nnz, IdxT* rows, IdxT* cols, T* vals, cudaStream_t stream) { auto coo_indices = thrust::make_zip_iterator(thrust::make_tuple(rows, cols)); @@ -86,7 +86,8 @@ void coo_sort(IdxT m, IdxT n, uint64_t nnz, IdxT* rows, IdxT* cols, T* vals, cud template void coo_sort(COO* const in, cudaStream_t stream) { - coo_sort(in->n_rows, in->n_cols, in->nnz, in->rows(), in->cols(), in->vals(), stream); + coo_sort( + in->n_rows, in->n_cols, in->safe_nnz, in->rows(), in->cols(), in->vals(), stream); } /** @@ -99,9 +100,9 @@ void coo_sort(COO* const in, cudaStream_t stream) * @param[in] nnz number of edges in edge list * @param[in] stream cuda stream for which to order cuda operations */ -template +template void coo_sort_by_weight( - value_idx* rows, value_idx* cols, value_t* data, value_idx nnz, cudaStream_t stream) + value_idx* rows, value_idx* cols, value_t* data, nnz_t nnz, cudaStream_t stream) { thrust::device_ptr t_data = thrust::device_pointer_cast(data); diff --git a/cpp/include/raft/sparse/op/sort.cuh b/cpp/include/raft/sparse/op/sort.cuh index 35b5fd9f31..2a6cb52c83 100644 --- a/cpp/include/raft/sparse/op/sort.cuh +++ b/cpp/include/raft/sparse/op/sort.cuh @@ -37,8 +37,8 @@ namespace op { * @param vals vals array from coo matrix * @param stream: cuda stream to use */ -template -void coo_sort(IdxT m, IdxT n, uint64_t nnz, IdxT* rows, IdxT* cols, T* vals, cudaStream_t stream) +template +void coo_sort(IdxT m, IdxT n, nnz_t nnz, IdxT* rows, IdxT* cols, T* vals, cudaStream_t stream) { detail::coo_sort(m, n, nnz, rows, cols, vals, stream); } @@ -52,7 +52,8 @@ void coo_sort(IdxT m, IdxT n, uint64_t nnz, IdxT* rows, IdxT* cols, T* vals, cud template void coo_sort(COO* const in, cudaStream_t stream) { - coo_sort(in->n_rows, in->n_cols, in->nnz, in->rows(), in->cols(), in->vals(), stream); + coo_sort( + in->n_rows, in->n_cols, in->safe_nnz, in->rows(), in->cols(), in->vals(), stream); } /** @@ -65,9 +66,9 @@ void coo_sort(COO* const in, cudaStream_t stream) * @param[in] nnz number of edges in edge list * @param[in] stream cuda stream for which to order cuda operations */ -template +template void coo_sort_by_weight( - value_idx* rows, value_idx* cols, value_t* data, value_idx nnz, cudaStream_t stream) + value_idx* rows, value_idx* cols, value_t* data, nnz_t nnz, cudaStream_t stream) { detail::coo_sort_by_weight(rows, cols, data, nnz, stream); } From ccb3b93c8629ac151f93a12c8034fec42884c6d0 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 29 Jan 2025 15:56:22 -0500 Subject: [PATCH 10/25] Adding explicit types ot COO object --- cpp/include/raft/sparse/coo.hpp | 4 +- cpp/include/raft/sparse/detail/coo.cuh | 69 +++++++------------ .../raft/sparse/linalg/detail/degree.cuh | 4 +- .../raft/sparse/linalg/detail/symmetrize.cuh | 4 +- cpp/include/raft/sparse/op/detail/filter.cuh | 8 +-- 5 files changed, 36 insertions(+), 53 deletions(-) diff --git a/cpp/include/raft/sparse/coo.hpp b/cpp/include/raft/sparse/coo.hpp index a176fefc3e..1c61117dab 100644 --- a/cpp/include/raft/sparse/coo.hpp +++ b/cpp/include/raft/sparse/coo.hpp @@ -39,8 +39,8 @@ namespace sparse { * @tparam value_idx: the type of index array * */ -template -using COO = detail::COO; +template +using COO = detail::COO; }; // namespace sparse }; // namespace raft diff --git a/cpp/include/raft/sparse/detail/coo.cuh b/cpp/include/raft/sparse/detail/coo.cuh index 38cd33cb3e..deedffa636 100644 --- a/cpp/include/raft/sparse/detail/coo.cuh +++ b/cpp/include/raft/sparse/detail/coo.cuh @@ -44,7 +44,7 @@ namespace detail { * @tparam Index_Type: the type of index array * */ -template +template class COO { protected: rmm::device_uvector rows_arr; @@ -52,8 +52,11 @@ class COO { rmm::device_uvector vals_arr; public: - Index_Type nnz; - uint64_t safe_nnz; + using value_t = T; + using index_t = Index_Type; + using nnz_t = nnz_type; + + nnz_type nnz; Index_Type n_rows; Index_Type n_cols; @@ -61,13 +64,7 @@ class COO { * @param stream: CUDA stream to use */ COO(cudaStream_t stream) - : rows_arr(0, stream), - cols_arr(0, stream), - vals_arr(0, stream), - nnz(0), - n_rows(0), - n_cols(0), - safe_nnz(0) + : rows_arr(0, stream), cols_arr(0, stream), vals_arr(0, stream), nnz(0), n_rows(0), n_cols(0) { } @@ -79,20 +76,13 @@ class COO { * @param n_rows: number of rows in the dense matrix * @param n_cols: number of cols in the dense matrix */ - template COO(rmm::device_uvector& rows, rmm::device_uvector& cols, rmm::device_uvector& vals, - SafeNNZ_Type nnz, + nnz_type nnz, Index_Type n_rows = 0, Index_Type n_cols = 0) - : rows_arr(rows), - cols_arr(cols), - vals_arr(vals), - nnz((Index_Type)nnz), - n_rows(n_rows), - n_cols(n_cols), - safe_nnz((uint64_t)nnz) + : rows_arr(rows), cols_arr(cols), vals_arr(vals), nnz(nnz), n_rows(n_rows), n_cols(n_cols) { } @@ -103,9 +93,8 @@ class COO { * @param n_cols: number of cols in the dense matrix * @param init: initialize arrays with zeros */ - template COO(cudaStream_t stream, - SafeNNZ_Type nnz, + nnz_type nnz, Index_Type n_rows = 0, Index_Type n_cols = 0, bool init = true) @@ -114,8 +103,7 @@ class COO { vals_arr(nnz, stream), nnz(nnz), n_rows(n_rows), - n_cols(n_cols), - safe_nnz(nnz) + n_cols(n_cols) { if (init) init_arrays(stream); } @@ -123,10 +111,10 @@ class COO { void init_arrays(cudaStream_t stream) { RAFT_CUDA_TRY( - cudaMemsetAsync(this->rows_arr.data(), 0, this->safe_nnz * sizeof(Index_Type), stream)); + cudaMemsetAsync(this->rows_arr.data(), 0, this - nnz * sizeof(Index_Type), stream)); RAFT_CUDA_TRY( - cudaMemsetAsync(this->cols_arr.data(), 0, this->safe_nnz * sizeof(Index_Type), stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(this->vals_arr.data(), 0, this->safe_nnz * sizeof(T), stream)); + cudaMemsetAsync(this->cols_arr.data(), 0, this->nnz * sizeof(Index_Type), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(this->vals_arr.data(), 0, this->nnz * sizeof(T), stream)); } ~COO() {} @@ -137,7 +125,7 @@ class COO { */ bool validate_size() const { - if (this->safe_nnz <= 0 || n_rows <= 0 || n_cols <= 0) return false; + if (this->nnz <= 0 || n_rows <= 0 || n_cols <= 0) return false; return true; } @@ -172,16 +160,16 @@ class COO { /** * @brief Send human-readable state information to output stream */ - friend std::ostream& operator<<(std::ostream& out, const COO& c) + friend std::ostream& operator<<(std::ostream& out, const COO& c) { if (c.validate_size() && c.validate_mem()) { cudaStream_t stream; RAFT_CUDA_TRY(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); - out << raft::arr2Str(c.rows_arr.data(), c.safe_nnz, "rows", stream) << std::endl; - out << raft::arr2Str(c.cols_arr.data(), c.safe_nnz, "cols", stream) << std::endl; - out << raft::arr2Str(c.vals_arr.data(), c.safe_nnz, "vals", stream) << std::endl; - out << "nnz=" << c.safe_nnz << std::endl; + out << raft::arr2Str(c.rows_arr.data(), c.nnz, "rows", stream) << std::endl; + out << raft::arr2Str(c.cols_arr.data(), c.nnz, "cols", stream) << std::endl; + out << raft::arr2Str(c.vals_arr.data(), c.nnz, "vals", stream) << std::endl; + out << "nnz=" << c.nnz << std::endl; out << "n_rows=" << c.n_rows << std::endl; out << "n_cols=" << c.n_cols << std::endl; @@ -220,8 +208,7 @@ class COO { * @param init: should values be initialized to 0? * @param stream: CUDA stream to use */ - template - void allocate(SafeNNZ_Type nnz, bool init, cudaStream_t stream) + void allocate(nnz_type nnz, bool init, cudaStream_t stream) { this->allocate(nnz, 0, init, stream); } @@ -233,8 +220,7 @@ class COO { * @param init: should values be initialized to 0? * @param stream: CUDA stream to use */ - template - void allocate(SafeNNZ_Type nnz, Index_Type size, bool init, cudaStream_t stream) + void allocate(nnz_type nnz, Index_Type size, bool init, cudaStream_t stream) { this->allocate(nnz, size, size, init, stream); } @@ -247,14 +233,11 @@ class COO { * @param init: should values be initialized to 0? * @param stream: stream to use for init */ - template - void allocate( - SafeNNZ_Type nnz, Index_Type n_rows, Index_Type n_cols, bool init, cudaStream_t stream) + void allocate(nnz_type nnz, Index_Type n_rows, Index_Type n_cols, bool init, cudaStream_t stream) { - this->n_rows = n_rows; - this->n_cols = n_cols; - this->nnz = (Index_Type)nnz; - this->safe_nnz = nnz; + this->n_rows = n_rows; + this->n_cols = n_cols; + this->nnz = nnz; this->rows_arr.resize(nnz, stream); this->cols_arr.resize(nnz, stream); diff --git a/cpp/include/raft/sparse/linalg/detail/degree.cuh b/cpp/include/raft/sparse/linalg/detail/degree.cuh index 0163e15878..6a83ddae42 100644 --- a/cpp/include/raft/sparse/linalg/detail/degree.cuh +++ b/cpp/include/raft/sparse/linalg/detail/degree.cuh @@ -54,10 +54,10 @@ RAFT_KERNEL coo_degree_kernel(const T* rows, nnz_t nnz, outT* results) * @param results: output result array * @param stream: cuda stream to use */ -template +template void coo_degree(const T* rows, nnz_t nnz, outT* results, cudaStream_t stream) { - dim3 grid_rc(raft::ceildiv(nnz, TPB_X), 1, 1); + dim3 grid_rc(raft::ceildiv((nnz_t)nnz, (nnz_t)TPB_X), 1, 1); dim3 blk_rc(TPB_X, 1, 1); coo_degree_kernel<<>>(rows, nnz, results); diff --git a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh index 513b71d0b2..af9102fa2f 100644 --- a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh +++ b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh @@ -68,7 +68,7 @@ RAFT_KERNEL coo_symmetrize_kernel(nnz_t* row_ind, nnz_t row_nnz = 0; nnz_t out_start_idx = start_idx * 2; - for (uint64_t idx = 0; idx < stop_idx - start_idx; idx++) { + for (nnz_t idx = 0; idx < stop_idx - start_idx; idx++) { int cur_row = rows[start_idx + idx]; int cur_col = cols[start_idx + idx]; T cur_val = vals[start_idx + idx]; @@ -156,7 +156,7 @@ void coo_symmetrize(COO* in, out->cols(), out->vals(), in->n_rows, - in->nnz, + in->safe_nnz, reduction_op); RAFT_CUDA_TRY(cudaPeekAtLastError()); } diff --git a/cpp/include/raft/sparse/op/detail/filter.cuh b/cpp/include/raft/sparse/op/detail/filter.cuh index 881db377f9..db82c8ea7f 100644 --- a/cpp/include/raft/sparse/op/detail/filter.cuh +++ b/cpp/include/raft/sparse/op/detail/filter.cuh @@ -144,11 +144,11 @@ void coo_remove_scalar(const int* rows, * @param scalar: scalar to remove from arrays * @param stream: cuda stream to use */ -template +template void coo_remove_scalar(COO* in, COO* out, T scalar, cudaStream_t stream) { - rmm::device_uvector row_count_nz(in->n_rows, stream); - rmm::device_uvector row_count(in->n_rows, stream); + rmm::device_uvector row_count_nz(in->n_rows, stream); + rmm::device_uvector row_count(in->n_rows, stream); RAFT_CUDA_TRY( cudaMemsetAsync(row_count_nz.data(), 0, (uint64_t)in->n_rows * sizeof(uint64_t), stream)); @@ -166,7 +166,7 @@ void coo_remove_scalar(COO* in, COO* out, T scalar, cudaStream_t stream) stream); RAFT_CUDA_TRY(cudaPeekAtLastError()); - thrust::device_ptr d_row_count_nz = thrust::device_pointer_cast(row_count_nz.data()); + thrust::device_ptr d_row_count_nz = thrust::device_pointer_cast(row_count_nz.data()); uint64_t out_nnz = thrust::reduce(rmm::exec_policy(stream), d_row_count_nz, d_row_count_nz + in->n_rows); From b831e4057c2f92c0890f947c273cb04b805dba91 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 29 Jan 2025 16:28:42 -0500 Subject: [PATCH 11/25] Updating COO object and sparse primitives to include an NNZ type --- cpp/include/raft/sparse/convert/coo.cuh | 6 +- cpp/include/raft/sparse/convert/csr.cuh | 2 +- .../raft/sparse/convert/detail/coo.cuh | 4 +- cpp/include/raft/sparse/detail/coo.cuh | 2 +- .../raft/sparse/linalg/detail/symmetrize.cuh | 2 +- cpp/include/raft/sparse/op/detail/filter.cuh | 71 +++++++++---------- cpp/include/raft/sparse/op/detail/sort.h | 8 +-- cpp/include/raft/sparse/op/filter.cuh | 33 +++++---- cpp/include/raft/sparse/op/sort.cuh | 8 +-- cpp/tests/sparse/solver/lanczos.cu | 4 +- cpp/tests/sparse/symmetrize.cu | 20 +++--- 11 files changed, 80 insertions(+), 80 deletions(-) diff --git a/cpp/include/raft/sparse/convert/coo.cuh b/cpp/include/raft/sparse/convert/coo.cuh index ba3efc7ff0..69f7864602 100644 --- a/cpp/include/raft/sparse/convert/coo.cuh +++ b/cpp/include/raft/sparse/convert/coo.cuh @@ -32,11 +32,11 @@ namespace convert { * @param nnz: size of output COO row array * @param stream: cuda stream to use */ -template +template void csr_to_coo( - const value_idx* row_ind, value_idx m, value_idx* coo_rows, value_idx nnz, cudaStream_t stream) + const value_idx* row_ind, value_idx m, value_idx* coo_rows, nnz_t nnz, cudaStream_t stream) { - detail::csr_to_coo(row_ind, m, coo_rows, nnz, stream); + detail::csr_to_coo(row_ind, m, coo_rows, nnz, stream); } }; // end NAMESPACE convert diff --git a/cpp/include/raft/sparse/convert/csr.cuh b/cpp/include/raft/sparse/convert/csr.cuh index 53a025ec71..62137d04b7 100644 --- a/cpp/include/raft/sparse/convert/csr.cuh +++ b/cpp/include/raft/sparse/convert/csr.cuh @@ -70,7 +70,7 @@ void sorted_coo_to_csr(const T* rows, nnz_type nnz, outT* row_ind, int m, cudaSt template void sorted_coo_to_csr(COO* coo, outT* row_ind, cudaStream_t stream) { - detail::sorted_coo_to_csr(coo->rows(), coo->safe_nnz, row_ind, coo->n_rows, stream); + detail::sorted_coo_to_csr(coo->rows(), coo->nnz, row_ind, coo->n_rows, stream); } /** diff --git a/cpp/include/raft/sparse/convert/detail/coo.cuh b/cpp/include/raft/sparse/convert/detail/coo.cuh index 469dac3c86..009762b4df 100644 --- a/cpp/include/raft/sparse/convert/detail/coo.cuh +++ b/cpp/include/raft/sparse/convert/detail/coo.cuh @@ -60,9 +60,9 @@ RAFT_KERNEL csr_to_coo_kernel(const value_idx* row_ind, * @param nnz: size of output COO row array * @param stream: cuda stream to use */ -template +template void csr_to_coo( - const value_idx* row_ind, value_idx m, value_idx* coo_rows, value_idx nnz, cudaStream_t stream) + const value_idx* row_ind, value_idx m, value_idx* coo_rows, nnz_t nnz, cudaStream_t stream) { // @TODO: Use cusparse for this. dim3 grid(raft::ceildiv(m, (value_idx)TPB_X), 1, 1); diff --git a/cpp/include/raft/sparse/detail/coo.cuh b/cpp/include/raft/sparse/detail/coo.cuh index deedffa636..5e1514d228 100644 --- a/cpp/include/raft/sparse/detail/coo.cuh +++ b/cpp/include/raft/sparse/detail/coo.cuh @@ -111,7 +111,7 @@ class COO { void init_arrays(cudaStream_t stream) { RAFT_CUDA_TRY( - cudaMemsetAsync(this->rows_arr.data(), 0, this - nnz * sizeof(Index_Type), stream)); + cudaMemsetAsync(this->rows_arr.data(), 0, this->nnz * sizeof(Index_Type), stream)); RAFT_CUDA_TRY( cudaMemsetAsync(this->cols_arr.data(), 0, this->nnz * sizeof(Index_Type), stream)); RAFT_CUDA_TRY(cudaMemsetAsync(this->vals_arr.data(), 0, this->nnz * sizeof(T), stream)); diff --git a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh index af9102fa2f..e60db71bdb 100644 --- a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh +++ b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh @@ -156,7 +156,7 @@ void coo_symmetrize(COO* in, out->cols(), out->vals(), in->n_rows, - in->safe_nnz, + in->nnz, reduction_op); RAFT_CUDA_TRY(cudaPeekAtLastError()); } diff --git a/cpp/include/raft/sparse/op/detail/filter.cuh b/cpp/include/raft/sparse/op/detail/filter.cuh index db82c8ea7f..e0d072e8bb 100644 --- a/cpp/include/raft/sparse/op/detail/filter.cuh +++ b/cpp/include/raft/sparse/op/detail/filter.cuh @@ -90,18 +90,18 @@ RAFT_KERNEL coo_remove_scalar_kernel(const int* rows, * @param d_alloc device allocator for temporary buffers * @param stream: cuda stream to use */ -template -void coo_remove_scalar(const int* rows, - const int* cols, +template +void coo_remove_scalar(const idx_t* rows, + const idx_t* cols, const T* vals, nnz_t nnz, - int* crows, - int* ccols, + idx_t* crows, + idx_t* ccols, T* cvals, nnz_t* cnnz, nnz_t* cur_cnnz, T scalar, - int n, + idx_t n, cudaStream_t stream) { rmm::device_uvector ex_scan(n, stream); @@ -144,46 +144,43 @@ void coo_remove_scalar(const int* rows, * @param scalar: scalar to remove from arrays * @param stream: cuda stream to use */ -template -void coo_remove_scalar(COO* in, COO* out, T scalar, cudaStream_t stream) +template +void coo_remove_scalar(COO* in, + COO* out, + T scalar, + cudaStream_t stream) { - rmm::device_uvector row_count_nz(in->n_rows, stream); - rmm::device_uvector row_count(in->n_rows, stream); + rmm::device_uvector row_count_nz(in->n_rows, stream); + rmm::device_uvector row_count(in->n_rows, stream); - RAFT_CUDA_TRY( - cudaMemsetAsync(row_count_nz.data(), 0, (uint64_t)in->n_rows * sizeof(uint64_t), stream)); - RAFT_CUDA_TRY( - cudaMemsetAsync(row_count.data(), 0, (uint64_t)in->n_rows * sizeof(uint64_t), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(row_count_nz.data(), 0, (idx_t)in->n_rows * sizeof(idx_t), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(row_count.data(), 0, (idx_t)in->n_rows * sizeof(idx_t), stream)); - linalg::coo_degree(in->rows(), in->safe_nnz, row_count.data(), stream); + linalg::coo_degree(in->rows(), in->nnz, row_count.data(), stream); RAFT_CUDA_TRY(cudaPeekAtLastError()); - linalg::coo_degree_scalar(in->rows(), - in->vals(), - in->safe_nnz, - scalar, - (unsigned long long int*)row_count_nz.data(), - stream); + linalg::coo_degree_scalar( + in->rows(), in->vals(), in->nnz, scalar, (unsigned long long int*)row_count_nz.data(), stream); RAFT_CUDA_TRY(cudaPeekAtLastError()); - thrust::device_ptr d_row_count_nz = thrust::device_pointer_cast(row_count_nz.data()); + thrust::device_ptr d_row_count_nz = thrust::device_pointer_cast(row_count_nz.data()); uint64_t out_nnz = thrust::reduce(rmm::exec_policy(stream), d_row_count_nz, d_row_count_nz + in->n_rows); out->allocate(out_nnz, in->n_rows, in->n_cols, false, stream); - coo_remove_scalar(in->rows(), - in->cols(), - in->vals(), - in->nnz, - out->rows(), - out->cols(), - out->vals(), - row_count_nz.data(), - row_count.data(), - scalar, - in->n_rows, - stream); + coo_remove_scalar(in->rows(), + in->cols(), + in->vals(), + in->nnz, + out->rows(), + out->cols(), + out->vals(), + row_count_nz.data(), + row_count.data(), + scalar, + in->n_rows, + stream); RAFT_CUDA_TRY(cudaPeekAtLastError()); } @@ -194,10 +191,10 @@ void coo_remove_scalar(COO* in, COO* out, T scalar, cudaStream_t stream) * @param out: output COO matrix * @param stream: cuda stream to use */ -template -void coo_remove_zeros(COO* in, COO* out, cudaStream_t stream) +template +void coo_remove_zeros(COO* in, COO* out, cudaStream_t stream) { - coo_remove_scalar(in, out, T(0.0), stream); + coo_remove_scalar(in, out, T(0.0), stream); } }; // namespace detail diff --git a/cpp/include/raft/sparse/op/detail/sort.h b/cpp/include/raft/sparse/op/detail/sort.h index 6464ad9374..8bd848bedb 100644 --- a/cpp/include/raft/sparse/op/detail/sort.h +++ b/cpp/include/raft/sparse/op/detail/sort.h @@ -83,11 +83,11 @@ void coo_sort(IdxT m, IdxT n, nnz_t nnz, IdxT* rows, IdxT* cols, T* vals, cudaSt * @param in: COO to sort by row * @param stream: the cuda stream to use */ -template -void coo_sort(COO* const in, cudaStream_t stream) +template +void coo_sort(COO* const in, cudaStream_t stream) { - coo_sort( - in->n_rows, in->n_cols, in->safe_nnz, in->rows(), in->cols(), in->vals(), stream); + coo_sort( + in->n_rows, in->n_cols, in->nnz, in->rows(), in->cols(), in->vals(), stream); } /** diff --git a/cpp/include/raft/sparse/op/filter.cuh b/cpp/include/raft/sparse/op/filter.cuh index 4b329325ca..5541a375d2 100644 --- a/cpp/include/raft/sparse/op/filter.cuh +++ b/cpp/include/raft/sparse/op/filter.cuh @@ -42,21 +42,21 @@ namespace op { * @param n: number of rows in dense matrix * @param stream: cuda stream to use */ -template -void coo_remove_scalar(const int* rows, - const int* cols, +template +void coo_remove_scalar(const idx_t* rows, + const idx_t* cols, const T* vals, int nnz, - int* crows, - int* ccols, + idx_t* crows, + idx_t* ccols, T* cvals, - int* cnnz, - int* cur_cnnz, + nnz_t* cnnz, + nnz_t* cur_cnnz, T scalar, - int n, + idx_t n, cudaStream_t stream) { - detail::coo_remove_scalar<128, T>( + detail::coo_remove_scalar<128, T, idx_t, nnz_t>( rows, cols, vals, nnz, crows, ccols, cvals, cnnz, cur_cnnz, scalar, n, stream); } @@ -68,10 +68,13 @@ void coo_remove_scalar(const int* rows, * @param scalar: scalar to remove from arrays * @param stream: cuda stream to use */ -template -void coo_remove_scalar(COO* in, COO* out, T scalar, cudaStream_t stream) +template +void coo_remove_scalar(COO* in, + COO* out, + T scalar, + cudaStream_t stream) { - detail::coo_remove_scalar<128, T>(in, out, scalar, stream); + detail::coo_remove_scalar<128, T, idx_t, nnz_t>(in, out, scalar, stream); } /** @@ -81,10 +84,10 @@ void coo_remove_scalar(COO* in, COO* out, T scalar, cudaStream_t stream) * @param out: output COO matrix * @param stream: cuda stream to use */ -template -void coo_remove_zeros(COO* in, COO* out, cudaStream_t stream) +template +void coo_remove_zeros(COO* in, COO* out, cudaStream_t stream) { - coo_remove_scalar(in, out, T(0.0), stream); + coo_remove_scalar(in, out, T(0.0), stream); } }; // namespace op diff --git a/cpp/include/raft/sparse/op/sort.cuh b/cpp/include/raft/sparse/op/sort.cuh index 2a6cb52c83..62231e561e 100644 --- a/cpp/include/raft/sparse/op/sort.cuh +++ b/cpp/include/raft/sparse/op/sort.cuh @@ -49,11 +49,11 @@ void coo_sort(IdxT m, IdxT n, nnz_t nnz, IdxT* rows, IdxT* cols, T* vals, cudaSt * @param in: COO to sort by row * @param stream: the cuda stream to use */ -template -void coo_sort(COO* const in, cudaStream_t stream) +template +void coo_sort(COO* const in, cudaStream_t stream) { - coo_sort( - in->n_rows, in->n_cols, in->safe_nnz, in->rows(), in->cols(), in->vals(), stream); + coo_sort( + in->n_rows, in->n_cols, in->nnz, in->rows(), in->cols(), in->vals(), stream); } /** diff --git a/cpp/tests/sparse/solver/lanczos.cu b/cpp/tests/sparse/solver/lanczos.cu index 128ab73747..88bc208f0b 100644 --- a/cpp/tests/sparse/solver/lanczos.cu +++ b/cpp/tests/sparse/solver/lanczos.cu @@ -192,7 +192,7 @@ class rmat_lanczos_tests raft::make_device_matrix( handle, symmetric_coo.n_rows, n_components); - raft::spectral::matrix::sparse_matrix_t const csr_m{ + raft::spectral::matrix::sparse_matrix_t const csr_m{ handle, row_indices.data_handle(), symmetric_coo.cols(), @@ -203,7 +203,7 @@ class rmat_lanczos_tests n_components, params.maxiter, params.restartiter, params.tol, rng.seed}; auto csr_structure = - raft::make_device_compressed_structure_view( + raft::make_device_compressed_structure_view( const_cast(row_indices.data_handle()), const_cast(symmetric_coo.cols()), symmetric_coo.n_rows, diff --git a/cpp/tests/sparse/symmetrize.cu b/cpp/tests/sparse/symmetrize.cu index 5f7463ac89..358bbfaa83 100644 --- a/cpp/tests/sparse/symmetrize.cu +++ b/cpp/tests/sparse/symmetrize.cu @@ -32,9 +32,9 @@ namespace raft { namespace sparse { -template +template RAFT_KERNEL assert_symmetry( - value_idx* rows, value_idx* cols, value_t* vals, value_idx nnz, value_idx* sum) + value_idx* rows, value_idx* cols, value_t* vals, nnz_t nnz, value_idx* sum) { int tid = blockDim.x * blockIdx.x + threadIdx.x; @@ -60,7 +60,7 @@ template return os; } -template +template class SparseSymmetrizeTest : public ::testing::TestWithParam> { public: @@ -93,15 +93,15 @@ class SparseSymmetrizeTest { make_data(); - value_idx m = params.indptr_h.size() - 1; - value_idx n = params.n_cols; - value_idx nnz = params.indices_h.size(); + value_idx m = params.indptr_h.size() - 1; + value_idx n = params.n_cols; + nnz_t nnz = params.indices_h.size(); rmm::device_uvector coo_rows(nnz, stream); raft::sparse::convert::csr_to_coo(indptr.data(), m, coo_rows.data(), nnz, stream); - raft::sparse::COO out(stream); + raft::sparse::COO out(stream); raft::sparse::linalg::symmetrize( handle, coo_rows.data(), indices.data(), data.data(), m, n, coo_rows.size(), out); @@ -109,8 +109,8 @@ class SparseSymmetrizeTest rmm::device_scalar sum(stream); sum.set_value_to_zero_async(stream); - assert_symmetry<<>>( - out.rows(), out.cols(), out.vals(), (value_idx)out.nnz, sum.data()); + assert_symmetry<<>>( + out.rows(), out.cols(), out.vals(), (nnz_t)out.nnz, sum.data()); sum_h = sum.value(stream); resource::sync_stream(handle, stream); @@ -211,7 +211,7 @@ const std::vector> symm_inputs_fint = { }; -typedef SparseSymmetrizeTest SparseSymmetrizeTestF_int; +typedef SparseSymmetrizeTest SparseSymmetrizeTestF_int; TEST_P(SparseSymmetrizeTestF_int, Result) { ASSERT_TRUE(sum_h == 0); } INSTANTIATE_TEST_CASE_P(SparseSymmetrizeTest, From aed76860e5e92e3de12277745839fcf82480ab41 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 29 Jan 2025 16:54:59 -0500 Subject: [PATCH 12/25] More updates --- .../raft/sparse/linalg/detail/symmetrize.cuh | 12 +++++----- cpp/include/raft/sparse/linalg/symmetrize.cuh | 10 ++++---- cpp/include/raft/sparse/op/detail/reduce.cuh | 24 +++++++++---------- cpp/include/raft/sparse/op/reduce.cuh | 10 ++++---- cpp/tests/sparse/reduce.cu | 12 +++++----- cpp/tests/sparse/solver/lanczos.cu | 14 +++++------ 6 files changed, 41 insertions(+), 41 deletions(-) diff --git a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh index e60db71bdb..3b76f0cc4f 100644 --- a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh +++ b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh @@ -324,15 +324,15 @@ void from_knn_symmetrize_matrix(const value_idx* __restrict__ knn_indices, /** * Symmetrizes a COO matrix */ -template +template void symmetrize(raft::resources const& handle, const value_idx* rows, const value_idx* cols, const value_t* vals, - size_t m, - size_t n, - size_t nnz, - raft::sparse::COO& out) + value_idx m, + value_idx n, + nnz_t nnz, + raft::sparse::COO& out) { auto stream = resource::get_cuda_stream(handle); @@ -352,7 +352,7 @@ void symmetrize(raft::resources const& handle, // sort COO raft::sparse::op::coo_sort((value_idx)m, (value_idx)n, - (value_idx)nnz * 2, + (nnz_t)nnz * 2, symm_rows.data(), symm_cols.data(), symm_vals.data(), diff --git a/cpp/include/raft/sparse/linalg/symmetrize.cuh b/cpp/include/raft/sparse/linalg/symmetrize.cuh index 8ee53cd3ae..64bab11233 100644 --- a/cpp/include/raft/sparse/linalg/symmetrize.cuh +++ b/cpp/include/raft/sparse/linalg/symmetrize.cuh @@ -148,15 +148,15 @@ void from_knn_symmetrize_matrix(const value_idx* __restrict__ knn_indices, /** * Symmetrizes a COO matrix */ -template +template void symmetrize(raft::resources const& handle, const value_idx* rows, const value_idx* cols, const value_t* vals, - size_t m, - size_t n, - size_t nnz, - raft::sparse::COO& out) + value_idx m, + value_idx n, + nnz_t nnz, + raft::sparse::COO& out) { detail::symmetrize(handle, rows, cols, vals, m, n, nnz, out); } diff --git a/cpp/include/raft/sparse/op/detail/reduce.cuh b/cpp/include/raft/sparse/op/detail/reduce.cuh index 1e5dd87958..2359628b78 100644 --- a/cpp/include/raft/sparse/op/detail/reduce.cuh +++ b/cpp/include/raft/sparse/op/detail/reduce.cuh @@ -44,13 +44,13 @@ namespace sparse { namespace op { namespace detail { -template +template RAFT_KERNEL compute_duplicates_diffs_kernel(const value_idx* rows, const value_idx* cols, value_idx* diff, - size_t nnz) + nnz_t nnz) { - size_t tid = blockDim.x * blockIdx.x + threadIdx.x; + nnz_t tid = blockDim.x * blockIdx.x + threadIdx.x; if (tid >= nnz) return; value_idx d = 1; @@ -98,13 +98,13 @@ RAFT_KERNEL max_duplicates_kernel(const value_idx* src_rows, * @param[in] nnz number of nonzeros in input arrays * @param[in] stream cuda ops will be ordered wrt this stream */ -template +template void compute_duplicates_mask( - value_idx* mask, const value_idx* rows, const value_idx* cols, size_t nnz, cudaStream_t stream) + value_idx* mask, const value_idx* rows, const value_idx* cols, nnz_t nnz, cudaStream_t stream) { RAFT_CUDA_TRY(cudaMemsetAsync(mask, 0, nnz * sizeof(value_idx), stream)); - compute_duplicates_diffs_kernel<<>>( + compute_duplicates_diffs_kernel<<>>( rows, cols, mask, nnz); } @@ -124,15 +124,15 @@ void compute_duplicates_mask( * @param[in] n number of columns in COO input matrix * @param[in] stream cuda ops will be ordered wrt this stream */ -template +template void max_duplicates(raft::resources const& handle, - raft::sparse::COO& out, + raft::sparse::COO& out, const value_idx* rows, const value_idx* cols, const value_t* vals, - size_t nnz, - size_t m, - size_t n) + nnz_t nnz, + value_idx m, + value_idx n) { auto stream = resource::get_cuda_stream(handle); auto thrust_policy = resource::get_thrust_policy(handle); @@ -153,7 +153,7 @@ void max_duplicates(raft::resources const& handle, out.allocate(size, m, n, true, stream); // perform reduce - max_duplicates_kernel<<>>( + max_duplicates_kernel<<>>( rows, cols, vals, diff.data() + 1, out.rows(), out.cols(), out.vals(), nnz); } diff --git a/cpp/include/raft/sparse/op/reduce.cuh b/cpp/include/raft/sparse/op/reduce.cuh index b03192f111..102e864943 100644 --- a/cpp/include/raft/sparse/op/reduce.cuh +++ b/cpp/include/raft/sparse/op/reduce.cuh @@ -68,15 +68,15 @@ void compute_duplicates_mask( * @param[in] m number of rows in COO input matrix * @param[in] n number of columns in COO input matrix */ -template +template void max_duplicates(raft::resources const& handle, - raft::sparse::COO& out, + raft::sparse::COO& out, const value_idx* rows, const value_idx* cols, const value_t* vals, - size_t nnz, - size_t m, - size_t n) + nnz_t nnz, + value_idx m, + value_idx n) { detail::max_duplicates(handle, out, rows, cols, vals, nnz, m, n); } diff --git a/cpp/tests/sparse/reduce.cu b/cpp/tests/sparse/reduce.cu index f777f4781d..eef54f1ebe 100644 --- a/cpp/tests/sparse/reduce.cu +++ b/cpp/tests/sparse/reduce.cu @@ -41,8 +41,8 @@ struct SparseReduceInputs { std::vector out_cols; std::vector out_vals; - size_t m; - size_t n; + value_idx m; + value_idx n; }; template @@ -73,15 +73,15 @@ class SparseReduceTest : public ::testing::TestWithParam out(stream); + raft::sparse::COO out(stream); raft::sparse::op::max_duplicates(handle, out, in_rows.data(), in_cols.data(), in_vals.data(), - params.in_rows.size(), - params.m, - params.n); + (value_idx)params.in_rows.size(), + (value_idx)params.m, + (value_idx)params.n); RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); ASSERT_TRUE(raft::devArrMatch( out_rows.data(), out.rows(), out.nnz, raft::Compare())); diff --git a/cpp/tests/sparse/solver/lanczos.cu b/cpp/tests/sparse/solver/lanczos.cu index 88bc208f0b..1a009bf8dc 100644 --- a/cpp/tests/sparse/solver/lanczos.cu +++ b/cpp/tests/sparse/solver/lanczos.cu @@ -147,7 +147,7 @@ class rmat_lanczos_tests raft::device_vector out_data = raft::make_device_vector(handle, n_edges); raft::matrix::fill(handle, out_data.view(), 1.0); - raft::sparse::COO coo(stream); + raft::sparse::COO coo(stream); raft::sparse::op::coo_sort(n_nodes, n_nodes, @@ -161,11 +161,11 @@ class rmat_lanczos_tests out_src.data_handle(), out_dst.data_handle(), out_data.data_handle(), - n_edges, - n_nodes, - n_nodes); + (IndexType)n_edges, + (IndexType)n_nodes, + (IndexType)n_nodes); - raft::sparse::COO symmetric_coo(stream); + raft::sparse::COO symmetric_coo(stream); raft::sparse::linalg::symmetrize( handle, coo.rows(), coo.cols(), coo.vals(), coo.n_rows, coo.n_cols, coo.nnz, symmetric_coo); @@ -192,7 +192,7 @@ class rmat_lanczos_tests raft::make_device_matrix( handle, symmetric_coo.n_rows, n_components); - raft::spectral::matrix::sparse_matrix_t const csr_m{ + raft::spectral::matrix::sparse_matrix_t const csr_m{ handle, row_indices.data_handle(), symmetric_coo.cols(), @@ -203,7 +203,7 @@ class rmat_lanczos_tests n_components, params.maxiter, params.restartiter, params.tol, rng.seed}; auto csr_structure = - raft::make_device_compressed_structure_view( + raft::make_device_compressed_structure_view( const_cast(row_indices.data_handle()), const_cast(symmetric_coo.cols()), symmetric_coo.n_rows, From a58ffcbbdca639ce574aedb28a282a734a5a9c43 Mon Sep 17 00:00:00 2001 From: viclafargue Date: Thu, 30 Jan 2025 12:19:48 +0000 Subject: [PATCH 13/25] completing change --- cpp/include/raft/sparse/linalg/degree.cuh | 4 ++-- .../sparse/neighbors/detail/cross_component_nn.cuh | 14 ++++++++++---- .../raft/sparse/neighbors/detail/knn_graph.cuh | 6 +++--- .../raft/spectral/detail/matrix_wrappers.hpp | 2 +- cpp/tests/linalg/eigen_solvers.cu | 2 +- cpp/tests/sparse/solver/lanczos.cu | 2 +- cpp/tests/sparse/spectral_matrix.cu | 2 +- 7 files changed, 19 insertions(+), 13 deletions(-) diff --git a/cpp/include/raft/sparse/linalg/degree.cuh b/cpp/include/raft/sparse/linalg/degree.cuh index 965c923993..567eb187c1 100644 --- a/cpp/include/raft/sparse/linalg/degree.cuh +++ b/cpp/include/raft/sparse/linalg/degree.cuh @@ -50,7 +50,7 @@ void coo_degree(const T* rows, nnz_type nnz, outT* results, cudaStream_t stream) template void coo_degree(COO* in, outT* results, cudaStream_t stream) { - coo_degree(in->rows(), in->safe_nnz, results, stream); + coo_degree(in->rows(), in->nnz, results, stream); } /** @@ -83,7 +83,7 @@ void coo_degree_scalar( template void coo_degree_scalar(COO* in, T scalar, outT* results, cudaStream_t stream) { - coo_degree_scalar(in->rows(), in->vals(), in->safe_nnz, scalar, results, stream); + coo_degree_scalar(in->rows(), in->vals(), in->nnz, scalar, results, stream); } /** diff --git a/cpp/include/raft/sparse/neighbors/detail/cross_component_nn.cuh b/cpp/include/raft/sparse/neighbors/detail/cross_component_nn.cuh index a47d5a6f34..3fea5cb330 100644 --- a/cpp/include/raft/sparse/neighbors/detail/cross_component_nn.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/cross_component_nn.cuh @@ -448,10 +448,10 @@ void min_components_by_color(raft::sparse::COO& coo, * is done * @param[in] metric distance metric */ -template +template void cross_component_nn( raft::resources const& handle, - raft::sparse::COO& out, + raft::sparse::COO& out, const value_t* X, const value_idx* orig_colors, size_t n_rows, @@ -534,8 +534,14 @@ void cross_component_nn( /** * Symmetrize resulting edge list */ - raft::sparse::linalg::symmetrize( - handle, min_edges.rows(), min_edges.cols(), min_edges.vals(), n_rows, n_rows, size, out); + raft::sparse::linalg::symmetrize(handle, + min_edges.rows(), + min_edges.cols(), + min_edges.vals(), + (value_idx)n_rows, + (value_idx)n_rows, + (nnz_t)size, + out); } }; // end namespace raft::sparse::neighbors::detail diff --git a/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh b/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh index 4e46904c83..287722dfff 100644 --- a/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh @@ -92,13 +92,13 @@ void conv_indices(in_t* inds, out_t* out, size_t size, cudaStream_t stream) * @param[out] out output edge list * @param c */ -template +template void knn_graph(raft::resources const& handle, const value_t* X, size_t m, size_t n, raft::distance::DistanceType metric, - raft::sparse::COO& out, + raft::sparse::COO& out, int c = 15) { size_t k = build_k(m, c); @@ -142,7 +142,7 @@ void knn_graph(raft::resources const& handle, conv_indices(int64_indices.data(), indices.data(), nnz, stream); raft::sparse::linalg::symmetrize( - handle, rows.data(), indices.data(), data.data(), m, k, nnz, out); + handle, rows.data(), indices.data(), data.data(), (value_idx)m, (value_idx)k, (nnz_t)nnz, out); } }; // namespace raft::sparse::neighbors::detail diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp index bab5b4515a..81abac7312 100644 --- a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp +++ b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp @@ -134,7 +134,7 @@ class vector_t { const thrust_exec_policy_t thrust_policy; }; -template +template struct sparse_matrix_t { sparse_matrix_t(resources const& raft_handle, index_type const* row_offsets, diff --git a/cpp/tests/linalg/eigen_solvers.cu b/cpp/tests/linalg/eigen_solvers.cu index cf75ff89bf..a7eec5561c 100644 --- a/cpp/tests/linalg/eigen_solvers.cu +++ b/cpp/tests/linalg/eigen_solvers.cu @@ -46,7 +46,7 @@ TEST(Raft, EigenSolvers) index_type nnz = 0; index_type nrows = 0; - sparse_matrix_t sm1{h, ro, ci, vs, nrows, nnz}; + sparse_matrix_t sm1{h, ro, ci, vs, nrows, (uint64_t)nnz}; ASSERT_EQ(nullptr, sm1.row_offsets_); index_type neigvs{10}; diff --git a/cpp/tests/sparse/solver/lanczos.cu b/cpp/tests/sparse/solver/lanczos.cu index 1a009bf8dc..3652b811e9 100644 --- a/cpp/tests/sparse/solver/lanczos.cu +++ b/cpp/tests/sparse/solver/lanczos.cu @@ -198,7 +198,7 @@ class rmat_lanczos_tests symmetric_coo.cols(), symmetric_coo.vals(), symmetric_coo.n_rows, - symmetric_coo.nnz}; + (uint64_t)symmetric_coo.nnz}; raft::sparse::solver::lanczos_solver_config config{ n_components, params.maxiter, params.restartiter, params.tol, rng.seed}; diff --git a/cpp/tests/sparse/spectral_matrix.cu b/cpp/tests/sparse/spectral_matrix.cu index 52f7eff10e..c177ec236e 100644 --- a/cpp/tests/sparse/spectral_matrix.cu +++ b/cpp/tests/sparse/spectral_matrix.cu @@ -55,7 +55,7 @@ TEST(Raft, SpectralMatrices) value_type* vs{nullptr}; index_type nnz = 0; index_type nrows = 0; - sparse_matrix_t sm1{h, ro, ci, vs, nrows, nnz}; + sparse_matrix_t sm1{h, ro, ci, vs, nrows, (uint64_t)nnz}; sparse_matrix_t sm2{h, csr_v}; ASSERT_EQ(nullptr, sm1.row_offsets_); ASSERT_EQ(nullptr, sm2.row_offsets_); From 1d10f7228e014b4be87f61fd12a971f0c192608c Mon Sep 17 00:00:00 2001 From: viclafargue Date: Thu, 30 Jan 2025 14:49:43 +0000 Subject: [PATCH 14/25] fixing issues --- cpp/include/raft/sparse/linalg/degree.cuh | 2 +- cpp/include/raft/sparse/linalg/detail/spectral.cuh | 4 ++-- cpp/include/raft/sparse/op/detail/filter.cuh | 6 +++--- cpp/include/raft/sparse/op/filter.cuh | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/include/raft/sparse/linalg/degree.cuh b/cpp/include/raft/sparse/linalg/degree.cuh index 567eb187c1..9ce7321de0 100644 --- a/cpp/include/raft/sparse/linalg/degree.cuh +++ b/cpp/include/raft/sparse/linalg/degree.cuh @@ -36,7 +36,7 @@ namespace linalg { template void coo_degree(const T* rows, nnz_type nnz, outT* results, cudaStream_t stream) { - detail::coo_degree<64, T>(rows, (nnz_type)nnz, results, stream); + detail::coo_degree<64, T>(rows, nnz, results, stream); } /** diff --git a/cpp/include/raft/sparse/linalg/detail/spectral.cuh b/cpp/include/raft/sparse/linalg/detail/spectral.cuh index 103030cbad..87dfa045fe 100644 --- a/cpp/include/raft/sparse/linalg/detail/spectral.cuh +++ b/cpp/include/raft/sparse/linalg/detail/spectral.cuh @@ -49,7 +49,7 @@ void fit_embedding(raft::resources const& handle, rows, cols, vals, - static_cast(nnz), + static_cast(nnz), static_cast(n), src_offsets.data(), dst_cols.data(), @@ -72,7 +72,7 @@ void fit_embedding(raft::resources const& handle, value_type* vs = dst_vals.data(); raft::spectral::matrix::sparse_matrix_t const r_csr_m{ - handle, ro, ci, vs, static_cast(n), nnz}; + handle, ro, ci, vs, static_cast(n), static_cast(nnz)}; index_type neigvs = n_components + 1; index_type maxiter = 4000; // default reset value (when set to 0); diff --git a/cpp/include/raft/sparse/op/detail/filter.cuh b/cpp/include/raft/sparse/op/detail/filter.cuh index e0d072e8bb..c5c095462d 100644 --- a/cpp/include/raft/sparse/op/detail/filter.cuh +++ b/cpp/include/raft/sparse/op/detail/filter.cuh @@ -153,8 +153,8 @@ void coo_remove_scalar(COO* in, rmm::device_uvector row_count_nz(in->n_rows, stream); rmm::device_uvector row_count(in->n_rows, stream); - RAFT_CUDA_TRY(cudaMemsetAsync(row_count_nz.data(), 0, (idx_t)in->n_rows * sizeof(idx_t), stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(row_count.data(), 0, (idx_t)in->n_rows * sizeof(idx_t), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(row_count_nz.data(), 0, (nnz_t)in->n_rows * sizeof(nnz_t), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(row_count.data(), 0, (nnz_t)in->n_rows * sizeof(nnz_t), stream)); linalg::coo_degree(in->rows(), in->nnz, row_count.data(), stream); RAFT_CUDA_TRY(cudaPeekAtLastError()); @@ -164,7 +164,7 @@ void coo_remove_scalar(COO* in, RAFT_CUDA_TRY(cudaPeekAtLastError()); thrust::device_ptr d_row_count_nz = thrust::device_pointer_cast(row_count_nz.data()); - uint64_t out_nnz = + nnz_t out_nnz = thrust::reduce(rmm::exec_policy(stream), d_row_count_nz, d_row_count_nz + in->n_rows); out->allocate(out_nnz, in->n_rows, in->n_cols, false, stream); diff --git a/cpp/include/raft/sparse/op/filter.cuh b/cpp/include/raft/sparse/op/filter.cuh index 5541a375d2..c257585c0e 100644 --- a/cpp/include/raft/sparse/op/filter.cuh +++ b/cpp/include/raft/sparse/op/filter.cuh @@ -46,7 +46,7 @@ template void coo_remove_scalar(const idx_t* rows, const idx_t* cols, const T* vals, - int nnz, + nnz_t nnz, idx_t* crows, idx_t* ccols, T* cvals, From 2d6f2dcab9bee7c66c1b9610a3b0fa0d72bc077a Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 30 Jan 2025 20:59:14 +0000 Subject: [PATCH 15/25] some updates --- cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh | 10 +++++----- cpp/include/raft/sparse/neighbors/knn_graph.cuh | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh b/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh index 4e46904c83..88391a8935 100644 --- a/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh @@ -92,20 +92,20 @@ void conv_indices(in_t* inds, out_t* out, size_t size, cudaStream_t stream) * @param[out] out output edge list * @param c */ -template +template void knn_graph(raft::resources const& handle, const value_t* X, - size_t m, - size_t n, + value_idx m, + value_idx n, raft::distance::DistanceType metric, - raft::sparse::COO& out, + raft::sparse::COO& out, int c = 15) { size_t k = build_k(m, c); auto stream = resource::get_cuda_stream(handle); - size_t nnz = m * k; + nnz_t nnz = m * k; rmm::device_uvector rows(nnz, stream); rmm::device_uvector indices(nnz, stream); diff --git a/cpp/include/raft/sparse/neighbors/knn_graph.cuh b/cpp/include/raft/sparse/neighbors/knn_graph.cuh index 8257afc16f..6f318e7991 100644 --- a/cpp/include/raft/sparse/neighbors/knn_graph.cuh +++ b/cpp/include/raft/sparse/neighbors/knn_graph.cuh @@ -40,13 +40,13 @@ namespace raft::sparse::neighbors { * @param[out] out output edge list * @param c */ -template +template void knn_graph(raft::resources const& handle, const value_t* X, - std::size_t m, - std::size_t n, + value_idx m, + value_idx n, raft::distance::DistanceType metric, - raft::sparse::COO& out, + raft::sparse::COO& out, int c = 15) { detail::knn_graph(handle, X, m, n, metric, out, c); From cbae3155c0ee9d10a26f4eb0724419602e0b198d Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 30 Jan 2025 22:59:53 +0000 Subject: [PATCH 16/25] working through updates for cuvs --- cpp/include/raft/linalg/detail/lanczos.cuh | 16 ++++---- .../sparse/neighbors/detail/knn_graph.cuh | 2 +- .../raft/spectral/detail/matrix_wrappers.hpp | 40 +++++++++---------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/cpp/include/raft/linalg/detail/lanczos.cuh b/cpp/include/raft/linalg/detail/lanczos.cuh index d4d3325f77..33b9faa27d 100644 --- a/cpp/include/raft/linalg/detail/lanczos.cuh +++ b/cpp/include/raft/linalg/detail/lanczos.cuh @@ -745,10 +745,10 @@ static int lanczosRestart(raft::resources const& handle, * @param seed random seed. * @return error flag. */ -template +template int computeSmallestEigenvectors( raft::resources const& handle, - spectral::matrix::sparse_matrix_t const* A, + spectral::matrix::sparse_matrix_t const* A, index_type_t nEigVecs, index_type_t maxIter, index_type_t restartIter, @@ -986,10 +986,10 @@ int computeSmallestEigenvectors( return 0; } -template +template int computeSmallestEigenvectors( raft::resources const& handle, - spectral::matrix::sparse_matrix_t const& A, + spectral::matrix::sparse_matrix_t const& A, index_type_t nEigVecs, index_type_t maxIter, index_type_t restartIter, @@ -1090,10 +1090,10 @@ int computeSmallestEigenvectors( * @param seed random seed. * @return error flag. */ -template +template int computeLargestEigenvectors( raft::resources const& handle, - spectral::matrix::sparse_matrix_t const* A, + spectral::matrix::sparse_matrix_t const* A, index_type_t nEigVecs, index_type_t maxIter, index_type_t restartIter, @@ -1334,10 +1334,10 @@ int computeLargestEigenvectors( return 0; } -template +template int computeLargestEigenvectors( raft::resources const& handle, - spectral::matrix::sparse_matrix_t const& A, + spectral::matrix::sparse_matrix_t const& A, index_type_t nEigVecs, index_type_t maxIter, index_type_t restartIter, diff --git a/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh b/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh index 435875f813..88391a8935 100644 --- a/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh @@ -142,7 +142,7 @@ void knn_graph(raft::resources const& handle, conv_indices(int64_indices.data(), indices.data(), nnz, stream); raft::sparse::linalg::symmetrize( - handle, rows.data(), indices.data(), data.data(), (value_idx)m, (value_idx)k, (nnz_t)nnz, out); + handle, rows.data(), indices.data(), data.data(), m, k, nnz, out); } }; // namespace raft::sparse::neighbors::detail diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp index 81abac7312..f7aa507059 100644 --- a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp +++ b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp @@ -314,15 +314,15 @@ struct sparse_matrix_t { nnz_type const nnz_; }; -template -struct laplacian_matrix_t : sparse_matrix_t { +template +struct laplacian_matrix_t : sparse_matrix_t { laplacian_matrix_t(resources const& raft_handle, index_type const* row_offsets, index_type const* col_indices, value_type const* values, index_type const nrows, - index_type const nnz) - : sparse_matrix_t( + nnz_type const nnz) + : sparse_matrix_t( raft_handle, row_offsets, col_indices, values, nrows, nnz), diagonal_(raft_handle, nrows) { @@ -332,8 +332,8 @@ struct laplacian_matrix_t : sparse_matrix_t { } laplacian_matrix_t(resources const& raft_handle, - sparse_matrix_t const& csr_m) - : sparse_matrix_t(raft_handle, + sparse_matrix_t const& csr_m) + : sparse_matrix_t(raft_handle, csr_m.row_offsets_, csr_m.col_indices_, csr_m.values_, @@ -343,7 +343,7 @@ struct laplacian_matrix_t : sparse_matrix_t { { vector_t ones{raft_handle, (size_t)csr_m.nrows_}; ones.fill(1.0); - sparse_matrix_t::mv(1, ones.raw(), 0, diagonal_.raw()); + sparse_matrix_t::mv(1, ones.raw(), 0, diagonal_.raw()); } // y = alpha*A*x + beta*y @@ -357,9 +357,9 @@ struct laplacian_matrix_t : sparse_matrix_t { bool symmetric = false) const override { constexpr int BLOCK_SIZE = 1024; - auto n = sparse_matrix_t::nrows_; + auto n = sparse_matrix_t::nrows_; - auto handle = sparse_matrix_t::get_handle(); + auto handle = sparse_matrix_t::get_handle(); auto cublas_h = resource::get_cublas_handle(handle); auto stream = resource::get_cuda_stream(handle); @@ -382,31 +382,31 @@ struct laplacian_matrix_t : sparse_matrix_t { // Apply adjacency matrix // - sparse_matrix_t::mv(-alpha, x, 1, y, alg, transpose, symmetric); + sparse_matrix_t::mv(-alpha, x, 1, y, alg, transpose, symmetric); } vector_t diagonal_; }; -template -struct modularity_matrix_t : laplacian_matrix_t { +template +struct modularity_matrix_t : laplacian_matrix_t { modularity_matrix_t(resources const& raft_handle, index_type const* row_offsets, index_type const* col_indices, value_type const* values, index_type const nrows, - index_type const nnz) - : laplacian_matrix_t( + nnz_type const nnz) + : laplacian_matrix_t( raft_handle, row_offsets, col_indices, values, nrows, nnz) { - edge_sum_ = laplacian_matrix_t::diagonal_.nrm1(); + edge_sum_ = laplacian_matrix_t::diagonal_.nrm1(); } modularity_matrix_t(resources const& raft_handle, sparse_matrix_t const& csr_m) - : laplacian_matrix_t(raft_handle, csr_m) + : laplacian_matrix_t(raft_handle, csr_m) { - edge_sum_ = laplacian_matrix_t::diagonal_.nrm1(); + edge_sum_ = laplacian_matrix_t::diagonal_.nrm1(); } // y = alpha*A*x + beta*y @@ -427,7 +427,7 @@ struct modularity_matrix_t : laplacian_matrix_t { // y = A*x // - sparse_matrix_t::mv(alpha, x, 0, y, alg, transpose, symmetric); + sparse_matrix_t::mv(alpha, x, 0, y, alg, transpose, symmetric); value_type dot_res; // gamma = d'*x @@ -437,7 +437,7 @@ struct modularity_matrix_t : laplacian_matrix_t { RAFT_CUBLAS_TRY( raft::linalg::detail::cublasdot(cublas_h, n, - laplacian_matrix_t::diagonal_.raw(), + laplacian_matrix_t::diagonal_.raw(), 1, x, 1, @@ -452,7 +452,7 @@ struct modularity_matrix_t : laplacian_matrix_t { raft::linalg::detail::cublasaxpy(cublas_h, n, &gamma_, - laplacian_matrix_t::diagonal_.raw(), + laplacian_matrix_t::diagonal_.raw(), 1, y, 1, From 485d0429264b07908a9d5c1a04670a7933abef96 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 30 Jan 2025 23:02:39 +0000 Subject: [PATCH 17/25] working through updates for cuvs --- .../raft/spectral/detail/matrix_wrappers.hpp | 52 ++++++++++--------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp index f7aa507059..6ac0f9c837 100644 --- a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp +++ b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp @@ -334,11 +334,11 @@ struct laplacian_matrix_t : sparse_matrix_t { laplacian_matrix_t(resources const& raft_handle, sparse_matrix_t const& csr_m) : sparse_matrix_t(raft_handle, - csr_m.row_offsets_, - csr_m.col_indices_, - csr_m.values_, - csr_m.nrows_, - csr_m.nnz_), + csr_m.row_offsets_, + csr_m.col_indices_, + csr_m.values_, + csr_m.nrows_, + csr_m.nnz_), diagonal_(raft_handle, csr_m.nrows_) { vector_t ones{raft_handle, (size_t)csr_m.nrows_}; @@ -382,7 +382,8 @@ struct laplacian_matrix_t : sparse_matrix_t { // Apply adjacency matrix // - sparse_matrix_t::mv(-alpha, x, 1, y, alg, transpose, symmetric); + sparse_matrix_t::mv( + -alpha, x, 1, y, alg, transpose, symmetric); } vector_t diagonal_; @@ -427,36 +428,37 @@ struct modularity_matrix_t : laplacian_matrix_t::mv(alpha, x, 0, y, alg, transpose, symmetric); + sparse_matrix_t::mv( + alpha, x, 0, y, alg, transpose, symmetric); value_type dot_res; // gamma = d'*x // // Cublas::dot(this->n, D.raw(), 1, x, 1, &dot_res); // TODO: Call from public API when ready - RAFT_CUBLAS_TRY( - raft::linalg::detail::cublasdot(cublas_h, - n, - laplacian_matrix_t::diagonal_.raw(), - 1, - x, - 1, - &dot_res, - stream)); + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot( + cublas_h, + n, + laplacian_matrix_t::diagonal_.raw(), + 1, + x, + 1, + &dot_res, + stream)); // y = y -(gamma/edge_sum)*d // value_type gamma_ = -dot_res / edge_sum_; // TODO: Call from public API when ready - RAFT_CUBLAS_TRY( - raft::linalg::detail::cublasaxpy(cublas_h, - n, - &gamma_, - laplacian_matrix_t::diagonal_.raw(), - 1, - y, - 1, - stream)); + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasaxpy( + cublas_h, + n, + &gamma_, + laplacian_matrix_t::diagonal_.raw(), + 1, + y, + 1, + stream)); } value_type edge_sum_; From d377a2cff30ad80180d54535d49894ccc6d9eced Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 31 Jan 2025 00:12:49 +0000 Subject: [PATCH 18/25] lanczos tests updates --- .../raft/sparse/solver/detail/lanczos.cuh | 16 +++++++------- cpp/include/raft/sparse/solver/lanczos.cuh | 8 +++---- cpp/tests/sparse/spectral_matrix.cu | 21 ++++++++++++------- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index c9749624e1..3d1c52427c 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -789,10 +789,10 @@ static int lanczosRestart(raft::resources const& handle, * @param seed random seed. * @return error flag. */ -template +template int computeSmallestEigenvectors( raft::resources const& handle, - spectral::matrix::sparse_matrix_t const* A, + spectral::matrix::sparse_matrix_t const* A, index_type_t nEigVecs, index_type_t maxIter, index_type_t restartIter, @@ -1034,10 +1034,10 @@ int computeSmallestEigenvectors( return 0; } -template +template int computeSmallestEigenvectors( raft::resources const& handle, - spectral::matrix::sparse_matrix_t const& A, + spectral::matrix::sparse_matrix_t const& A, index_type_t nEigVecs, index_type_t maxIter, index_type_t restartIter, @@ -1137,10 +1137,10 @@ int computeSmallestEigenvectors( * @param seed random seed. * @return error flag. */ -template +template int computeLargestEigenvectors( raft::resources const& handle, - spectral::matrix::sparse_matrix_t const* A, + spectral::matrix::sparse_matrix_t const* A, index_type_t nEigVecs, index_type_t maxIter, index_type_t restartIter, @@ -1385,10 +1385,10 @@ int computeLargestEigenvectors( return 0; } -template +template int computeLargestEigenvectors( raft::resources const& handle, - spectral::matrix::sparse_matrix_t const& A, + spectral::matrix::sparse_matrix_t const& A, index_type_t nEigVecs, index_type_t maxIter, index_type_t restartIter, diff --git a/cpp/include/raft/sparse/solver/lanczos.cuh b/cpp/include/raft/sparse/solver/lanczos.cuh index 4c45a28cc6..0617dc71da 100644 --- a/cpp/include/raft/sparse/solver/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/lanczos.cuh @@ -137,10 +137,10 @@ auto lanczos_compute_smallest_eigenvectors( * @param seed random seed. * @return error flag. */ -template +template int computeSmallestEigenvectors( raft::resources const& handle, - raft::spectral::matrix::sparse_matrix_t const& A, + raft::spectral::matrix::sparse_matrix_t const& A, index_type_t nEigVecs, index_type_t maxIter, index_type_t restartIter, @@ -201,10 +201,10 @@ int computeSmallestEigenvectors( * @param seed random seed. * @return error flag. */ -template +template int computeLargestEigenvectors( raft::resources const& handle, - raft::spectral::matrix::sparse_matrix_t const& A, + raft::spectral::matrix::sparse_matrix_t const& A, index_type_t nEigVecs, index_type_t maxIter, index_type_t restartIter, diff --git a/cpp/tests/sparse/spectral_matrix.cu b/cpp/tests/sparse/spectral_matrix.cu index c177ec236e..beb93bb425 100644 --- a/cpp/tests/sparse/spectral_matrix.cu +++ b/cpp/tests/sparse/spectral_matrix.cu @@ -41,8 +41,9 @@ TEST(Raft, SpectralMatrices) { using index_type = int; using value_type = double; + using nnz_type = uint64_t - raft::resources h; + raft::resources h; ASSERT_EQ(0, raft::resource::get_device_id(h)); csr_view_t csr_v{nullptr, nullptr, nullptr, 0, 0}; @@ -53,29 +54,33 @@ TEST(Raft, SpectralMatrices) index_type* ro{nullptr}; index_type* ci{nullptr}; value_type* vs{nullptr}; - index_type nnz = 0; + nnz_type nnz = 0; index_type nrows = 0; - sparse_matrix_t sm1{h, ro, ci, vs, nrows, (uint64_t)nnz}; - sparse_matrix_t sm2{h, csr_v}; + sparse_matrix_t sm1{h, ro, ci, vs, nrows, nnz}; + sparse_matrix_t sm2{h, csr_v}; ASSERT_EQ(nullptr, sm1.row_offsets_); ASSERT_EQ(nullptr, sm2.row_offsets_); auto stream = resource::get_cuda_stream(h); auto cnstr_lm1 = [&h, ro, ci, vs, nrows, nnz](void) { - laplacian_matrix_t lm1{h, ro, ci, vs, nrows, nnz}; + laplacian_matrix_t lm1{h, ro, ci, vs, nrows, nnz}; }; EXPECT_ANY_THROW(cnstr_lm1()); // because of nullptr ptr args - auto cnstr_lm2 = [&h, &sm2](void) { laplacian_matrix_t lm2{h, sm2}; }; + auto cnstr_lm2 = [&h, &sm2](void) { + laplacian_matrix_t lm2{h, sm2}; + }; EXPECT_ANY_THROW(cnstr_lm2()); // because of nullptr ptr args auto cnstr_mm1 = [&h, ro, ci, vs, nrows, nnz](void) { - modularity_matrix_t mm1{h, ro, ci, vs, nrows, nnz}; + modularity_matrix_t mm1{h, ro, ci, vs, nrows, nnz}; }; EXPECT_ANY_THROW(cnstr_mm1()); // because of nullptr ptr args - auto cnstr_mm2 = [&h, &sm2](void) { modularity_matrix_t mm2{h, sm2}; }; + auto cnstr_mm2 = [&h, &sm2](void) { + modularity_matrix_t mm2{h, sm2}; + }; EXPECT_ANY_THROW(cnstr_mm2()); // because of nullptr ptr args } From 46db162c227c5b1ef525d36263b98e826cdcf339 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 31 Jan 2025 00:16:50 +0000 Subject: [PATCH 19/25] missing ; --- cpp/tests/sparse/spectral_matrix.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/tests/sparse/spectral_matrix.cu b/cpp/tests/sparse/spectral_matrix.cu index beb93bb425..6d52dfb1bb 100644 --- a/cpp/tests/sparse/spectral_matrix.cu +++ b/cpp/tests/sparse/spectral_matrix.cu @@ -41,9 +41,9 @@ TEST(Raft, SpectralMatrices) { using index_type = int; using value_type = double; - using nnz_type = uint64_t + using nnz_type = uint64_t; - raft::resources h; + raft::resources h; ASSERT_EQ(0, raft::resource::get_device_id(h)); csr_view_t csr_v{nullptr, nullptr, nullptr, 0, 0}; From dd39780ca0a49617def0d29138ab325ee986103d Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 31 Jan 2025 16:19:44 -0800 Subject: [PATCH 20/25] tons of updates to lanczos/eigen --- .../sparse/neighbors/detail/knn_graph.cuh | 2 +- .../raft/sparse/solver/detail/lanczos.cuh | 135 +++++++++--------- .../raft/spectral/detail/matrix_wrappers.hpp | 6 +- .../raft/spectral/detail/partition.hpp | 16 ++- .../raft/spectral/detail/spectral_util.cuh | 21 +-- cpp/include/raft/spectral/eigen_solvers.cuh | 4 +- cpp/include/raft/spectral/partition.cuh | 16 ++- cpp/tests/linalg/eigen_solvers.cu | 10 +- 8 files changed, 111 insertions(+), 99 deletions(-) diff --git a/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh b/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh index 88391a8935..ba007c6bb1 100644 --- a/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh @@ -142,7 +142,7 @@ void knn_graph(raft::resources const& handle, conv_indices(int64_indices.data(), indices.data(), nnz, stream); raft::sparse::linalg::symmetrize( - handle, rows.data(), indices.data(), data.data(), m, k, nnz, out); + handle, rows.data(), indices.data(), data.data(), m, static_cast(k), nnz, out); } }; // namespace raft::sparse::neighbors::detail diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index 3d1c52427c..1088211276 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -123,18 +123,19 @@ inline curandStatus_t curandGenerateNormalX( * Workspace. Not needed if full reorthogonalization is disabled. * @return Zero if successful. Otherwise non-zero. */ -template -int performLanczosIteration(raft::resources const& handle, - spectral::matrix::sparse_matrix_t const* A, - index_type_t* iter, - index_type_t maxIter, - value_type_t shift, - value_type_t tol, - bool reorthogonalize, - value_type_t* __restrict__ alpha_host, - value_type_t* __restrict__ beta_host, - value_type_t* __restrict__ lanczosVecs_dev, - value_type_t* __restrict__ work_dev) +template +int performLanczosIteration( + raft::resources const& handle, + spectral::matrix::sparse_matrix_t const* A, + index_type_t* iter, + index_type_t maxIter, + value_type_t shift, + value_type_t tol, + bool reorthogonalize, + value_type_t* __restrict__ alpha_host, + value_type_t* __restrict__ beta_host, + value_type_t* __restrict__ lanczosVecs_dev, + value_type_t* __restrict__ work_dev) { // ------------------------------------------------------- // Variable declaration @@ -888,17 +889,17 @@ int computeSmallestEigenvectors( // Obtain tridiagonal matrix with Lanczos *effIter = 0; *shift = 0; - status = performLanczosIteration(handle, - A, - effIter, - maxIter_curr, - *shift, - 0.0, - reorthogonalize, - alpha_host, - beta_host, - lanczosVecs_dev, - work_dev); + status = performLanczosIteration(handle, + A, + effIter, + maxIter_curr, + *shift, + 0.0, + reorthogonalize, + alpha_host, + beta_host, + lanczosVecs_dev, + work_dev); if (status) WARNING("error in Lanczos iteration"); // Determine largest eigenvalue @@ -913,17 +914,17 @@ int computeSmallestEigenvectors( // Obtain tridiagonal matrix with Lanczos *effIter = 0; - status = performLanczosIteration(handle, - A, - effIter, - maxIter_curr, - *shift, - 0, - reorthogonalize, - alpha_host, - beta_host, - lanczosVecs_dev, - work_dev); + status = performLanczosIteration(handle, + A, + effIter, + maxIter_curr, + *shift, + 0, + reorthogonalize, + alpha_host, + beta_host, + lanczosVecs_dev, + work_dev); if (status) WARNING("error in Lanczos iteration"); *totalIter += *effIter; @@ -961,17 +962,17 @@ int computeSmallestEigenvectors( // Proceed with Lanczos method - status = performLanczosIteration(handle, - A, - effIter, - maxIter_curr, - *shift, - tol * fabs(shiftLower), - reorthogonalize, - alpha_host, - beta_host, - lanczosVecs_dev, - work_dev); + status = performLanczosIteration(handle, + A, + effIter, + maxIter_curr, + *shift, + tol * fabs(shiftLower), + reorthogonalize, + alpha_host, + beta_host, + lanczosVecs_dev, + work_dev); if (status) WARNING("error in Lanczos iteration"); *totalIter += *effIter - iter_new; } @@ -1236,17 +1237,17 @@ int computeLargestEigenvectors( value_type_t shift_val = 0.0; value_type_t* shift = &shift_val; - status = performLanczosIteration(handle, - A, - effIter, - maxIter_curr, - *shift, - 0, - reorthogonalize, - alpha_host, - beta_host, - lanczosVecs_dev, - work_dev); + status = performLanczosIteration(handle, + A, + effIter, + maxIter_curr, + *shift, + 0, + reorthogonalize, + alpha_host, + beta_host, + lanczosVecs_dev, + work_dev); if (status) WARNING("error in Lanczos iteration"); *totalIter += *effIter; @@ -1284,17 +1285,17 @@ int computeLargestEigenvectors( // Proceed with Lanczos method - status = performLanczosIteration(handle, - A, - effIter, - maxIter_curr, - *shift, - tol * fabs(shiftLower), - reorthogonalize, - alpha_host, - beta_host, - lanczosVecs_dev, - work_dev); + status = performLanczosIteration(handle, + A, + effIter, + maxIter_curr, + *shift, + tol * fabs(shiftLower), + reorthogonalize, + alpha_host, + beta_host, + lanczosVecs_dev, + work_dev); if (status) WARNING("error in Lanczos iteration"); *totalIter += *effIter - iter_new; } diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp index 6ac0f9c837..af4a838bd5 100644 --- a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp +++ b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp @@ -404,7 +404,7 @@ struct modularity_matrix_t : laplacian_matrix_t const& csr_m) + sparse_matrix_t const& csr_m) : laplacian_matrix_t(raft_handle, csr_m) { edge_sum_ = laplacian_matrix_t::diagonal_.nrm1(); @@ -420,9 +420,9 @@ struct modularity_matrix_t : laplacian_matrix_t::nrows_; + auto n = sparse_matrix_t::nrows_; - auto handle = sparse_matrix_t::get_handle(); + auto handle = sparse_matrix_t::get_handle(); auto cublas_h = resource::get_cublas_handle(handle); auto stream = resource::get_cuda_stream(handle); diff --git a/cpp/include/raft/spectral/detail/partition.hpp b/cpp/include/raft/spectral/detail/partition.hpp index f5fc40aad6..26e4a73f9d 100644 --- a/cpp/include/raft/spectral/detail/partition.hpp +++ b/cpp/include/raft/spectral/detail/partition.hpp @@ -63,10 +63,14 @@ namespace detail { * performed. * @return statistics: number of eigensolver iterations, . */ -template +template std::tuple partition( raft::resources const& handle, - spectral::matrix::sparse_matrix_t const& csr_m, + spectral::matrix::sparse_matrix_t const& csr_m, EigenSolver const& eigen_solver, ClusterSolver const& cluster_solver, vertex_t* __restrict__ clusters, @@ -94,7 +98,7 @@ std::tuple partition( // Initialize Laplacian /// sparse_matrix_t A{handle, graph}; - spectral::matrix::laplacian_matrix_t L{handle, csr_m}; + spectral::matrix::laplacian_matrix_t L{handle, csr_m}; auto eigen_config = eigen_solver.get_config(); auto nEigVecs = eigen_config.n_eigVecs; @@ -132,9 +136,9 @@ std::tuple partition( * @param cost On exit, partition cost function. * @return error flag. */ -template +template void analyzePartition(raft::resources const& handle, - spectral::matrix::sparse_matrix_t const& csr_m, + spectral::matrix::sparse_matrix_t const& csr_m, vertex_t nClusters, const vertex_t* __restrict__ clusters, weight_t& edgeCut, @@ -160,7 +164,7 @@ void analyzePartition(raft::resources const& handle, // Initialize Laplacian /// sparse_matrix_t A{handle, graph}; - spectral::matrix::laplacian_matrix_t L{handle, csr_m}; + spectral::matrix::laplacian_matrix_t L{handle, csr_m}; // Initialize output cost = 0; diff --git a/cpp/include/raft/spectral/detail/spectral_util.cuh b/cpp/include/raft/spectral/detail/spectral_util.cuh index 002fad9680..9bbc8878fe 100644 --- a/cpp/include/raft/spectral/detail/spectral_util.cuh +++ b/cpp/include/raft/spectral/detail/spectral_util.cuh @@ -133,16 +133,17 @@ struct equal_to_i_op { // Construct indicator vector for ith partition // -template -bool construct_indicator(raft::resources const& handle, - edge_t index, - edge_t n, - weight_t& clustersize, - weight_t& partStats, - vertex_t const* __restrict__ clusters, - raft::spectral::matrix::vector_t& part_i, - raft::spectral::matrix::vector_t& Bx, - raft::spectral::matrix::laplacian_matrix_t const& B) +template +bool construct_indicator( + raft::resources const& handle, + edge_t index, + edge_t n, + weight_t& clustersize, + weight_t& partStats, + vertex_t const* __restrict__ clusters, + raft::spectral::matrix::vector_t& part_i, + raft::spectral::matrix::vector_t& Bx, + raft::spectral::matrix::laplacian_matrix_t const& B) { auto stream = resource::get_cuda_stream(handle); auto cublas_h = resource::get_cublas_handle(handle); diff --git a/cpp/include/raft/spectral/eigen_solvers.cuh b/cpp/include/raft/spectral/eigen_solvers.cuh index 324f16ac7b..03448e2b5e 100644 --- a/cpp/include/raft/spectral/eigen_solvers.cuh +++ b/cpp/include/raft/spectral/eigen_solvers.cuh @@ -51,7 +51,7 @@ struct lanczos_solver_t { index_type_t solve_smallest_eigenvectors( raft::resources const& handle, - matrix::sparse_matrix_t const& A, + matrix::sparse_matrix_t const& A, value_type_t* __restrict__ eigVals, value_type_t* __restrict__ eigVecs) const { @@ -75,7 +75,7 @@ struct lanczos_solver_t { index_type_t solve_largest_eigenvectors( raft::resources const& handle, - matrix::sparse_matrix_t const& A, + matrix::sparse_matrix_t const& A, value_type_t* __restrict__ eigVals, value_type_t* __restrict__ eigVecs) const { diff --git a/cpp/include/raft/spectral/partition.cuh b/cpp/include/raft/spectral/partition.cuh index a2ac328aa1..319ef0ccd1 100644 --- a/cpp/include/raft/spectral/partition.cuh +++ b/cpp/include/raft/spectral/partition.cuh @@ -45,17 +45,21 @@ namespace spectral { * @param eigVecs Output eigenvector array pointer on device * @return statistics: number of eigensolver iterations, . */ -template +template std::tuple partition( raft::resources const& handle, - matrix::sparse_matrix_t const& csr_m, + matrix::sparse_matrix_t const& csr_m, EigenSolver const& eigen_solver, ClusterSolver const& cluster_solver, vertex_t* __restrict__ clusters, weight_t* eigVals, weight_t* eigVecs) { - return raft::spectral::detail::partition( + return raft::spectral::detail::partition( handle, csr_m, eigen_solver, cluster_solver, clusters, eigVals, eigVecs); } @@ -77,15 +81,15 @@ std::tuple partition( * @param edgeCut On exit, weight of edges cut by partition. * @param cost On exit, partition cost function. */ -template +template void analyzePartition(raft::resources const& handle, - matrix::sparse_matrix_t const& csr_m, + matrix::sparse_matrix_t const& csr_m, vertex_t nClusters, const vertex_t* __restrict__ clusters, weight_t& edgeCut, weight_t& cost) { - raft::spectral::detail::analyzePartition( + raft::spectral::detail::analyzePartition( handle, csr_m, nClusters, clusters, edgeCut, cost); } diff --git a/cpp/tests/linalg/eigen_solvers.cu b/cpp/tests/linalg/eigen_solvers.cu index a7eec5561c..250deb6f33 100644 --- a/cpp/tests/linalg/eigen_solvers.cu +++ b/cpp/tests/linalg/eigen_solvers.cu @@ -36,6 +36,7 @@ TEST(Raft, EigenSolvers) using namespace matrix; using index_type = int; using value_type = double; + using nnz_type = int; raft::resources h; ASSERT_EQ(0, resource::get_device_id(h)); @@ -46,7 +47,7 @@ TEST(Raft, EigenSolvers) index_type nnz = 0; index_type nrows = 0; - sparse_matrix_t sm1{h, ro, ci, vs, nrows, (uint64_t)nnz}; + sparse_matrix_t sm1{h, ro, ci, vs, nrows, nnz}; ASSERT_EQ(nullptr, sm1.row_offsets_); index_type neigvs{10}; @@ -64,7 +65,7 @@ TEST(Raft, EigenSolvers) eigen_solver_config_t cfg{ neigvs, maxiter, restart_iter, tol, reorthog, seed}; - lanczos_solver_t eig_solver{cfg}; + lanczos_solver_t eig_solver{cfg}; EXPECT_ANY_THROW(eig_solver.solve_smallest_eigenvectors(h, sm1, eigvals, eigvecs)); @@ -77,6 +78,7 @@ TEST(Raft, SpectralSolvers) using namespace matrix; using index_type = int; using value_type = double; + using nnz_type = int; raft::resources h; ASSERT_EQ(0, resource::get_device_id(h) @@ -99,14 +101,14 @@ TEST(Raft, SpectralSolvers) eigen_solver_config_t eig_cfg{ neigvs, maxiter, restart_iter, tol, reorthog, seed}; - lanczos_solver_t eig_solver{eig_cfg}; + lanczos_solver_t eig_solver{eig_cfg}; index_type k{5}; cluster_solver_config_t clust_cfg{k, maxiter, tol, seed}; kmeans_solver_t cluster_solver{clust_cfg}; - sparse_matrix_t sm{h, nullptr, nullptr, nullptr, 0, 0}; + sparse_matrix_t sm{h, nullptr, nullptr, nullptr, 0, 0}; EXPECT_ANY_THROW( spectral::partition(h, sm, eig_solver, cluster_solver, clusters, eigvals, eigvecs)); From f17e921bf038328adb8602bf3e790d05cd115ce9 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 3 Feb 2025 20:06:48 +0000 Subject: [PATCH 21/25] some fixes for cuml --- .../raft/cluster/detail/connectivities.cuh | 20 +++++++++---------- cpp/include/raft/cluster/single_linkage.cuh | 8 ++++---- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cpp/include/raft/cluster/detail/connectivities.cuh b/cpp/include/raft/cluster/detail/connectivities.cuh index c527b754c3..86bae07711 100644 --- a/cpp/include/raft/cluster/detail/connectivities.cuh +++ b/cpp/include/raft/cluster/detail/connectivities.cuh @@ -43,8 +43,8 @@ template & indptr, rmm::device_uvector& indices, @@ -61,8 +61,8 @@ template struct distance_graph_impl { void run(raft::resources const& handle, const value_t* X, - size_t m, - size_t n, + value_idx m, + value_idx n, raft::distance::DistanceType metric, rmm::device_uvector& indptr, rmm::device_uvector& indices, @@ -130,8 +130,8 @@ RAFT_KERNEL fill_indices2(value_idx* indices, size_t m, size_t nnz) template void pairwise_distances(const raft::resources& handle, const value_t* X, - size_t m, - size_t n, + value_idx m, + value_idx n, raft::distance::DistanceType metric, value_idx* indptr, value_idx* indices, @@ -178,8 +178,8 @@ template struct distance_graph_impl { void run(const raft::resources& handle, const value_t* X, - size_t m, - size_t n, + value_idx m, + value_idx n, raft::distance::DistanceType metric, rmm::device_uvector& indptr, rmm::device_uvector& indices, @@ -216,8 +216,8 @@ struct distance_graph_impl void get_distance_graph(raft::resources const& handle, const value_t* X, - size_t m, - size_t n, + value_idx m, + value_idx n, raft::distance::DistanceType metric, rmm::device_uvector& indptr, rmm::device_uvector& indices, diff --git a/cpp/include/raft/cluster/single_linkage.cuh b/cpp/include/raft/cluster/single_linkage.cuh index 067445c542..de56386b96 100644 --- a/cpp/include/raft/cluster/single_linkage.cuh +++ b/cpp/include/raft/cluster/single_linkage.cuh @@ -52,8 +52,8 @@ template [[deprecated("Use cuVS instead")]] void single_linkage(raft::resources const& handle, const value_t* X, - size_t m, - size_t n, + value_idx m, + value_idx n, raft::distance::DistanceType metric, linkage_output* out, int c, @@ -103,8 +103,8 @@ template ( handle, X.data_handle(), - static_cast(X.extent(0)), - static_cast(X.extent(1)), + X.extent(0), + X.extent(1), metric, &out_arrs, c.has_value() ? c.value() : DEFAULT_CONST_C, From 5b30389da28d01d995b7281315076cb98869b531 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 3 Feb 2025 22:12:33 +0000 Subject: [PATCH 22/25] fixes for cuml --- cpp/include/raft/sparse/linalg/detail/spectral.cuh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/include/raft/sparse/linalg/detail/spectral.cuh b/cpp/include/raft/sparse/linalg/detail/spectral.cuh index 87dfa045fe..a2d50f9a77 100644 --- a/cpp/include/raft/sparse/linalg/detail/spectral.cuh +++ b/cpp/include/raft/sparse/linalg/detail/spectral.cuh @@ -71,7 +71,7 @@ void fit_embedding(raft::resources const& handle, index_type* ci = dst_cols.data(); value_type* vs = dst_vals.data(); - raft::spectral::matrix::sparse_matrix_t const r_csr_m{ + raft::spectral::matrix::sparse_matrix_t const r_csr_m{ handle, ro, ci, vs, static_cast(n), static_cast(nnz)}; index_type neigvs = n_components + 1; @@ -79,12 +79,12 @@ void fit_embedding(raft::resources const& handle, value_type tol = 0.01; index_type restart_iter = 15 + neigvs; // what cugraph is using - raft::spectral::eigen_solver_config_t cfg{ + raft::spectral::eigen_solver_config_t cfg{ neigvs, maxiter, restart_iter, tol}; cfg.seed = seed; - raft::spectral::lanczos_solver_t eig_solver{cfg}; + raft::spectral::lanczos_solver_t eig_solver{cfg}; // cluster computation here is irrelevant, // hence define a no-op such solver to From a444f900e06e24b5a2aa723be7b95a096723908b Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 3 Feb 2025 23:43:54 +0000 Subject: [PATCH 23/25] more cuml fixes --- cpp/include/raft/sparse/linalg/detail/spectral.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/raft/sparse/linalg/detail/spectral.cuh b/cpp/include/raft/sparse/linalg/detail/spectral.cuh index a2d50f9a77..7b8cb545cf 100644 --- a/cpp/include/raft/sparse/linalg/detail/spectral.cuh +++ b/cpp/include/raft/sparse/linalg/detail/spectral.cuh @@ -72,7 +72,7 @@ void fit_embedding(raft::resources const& handle, value_type* vs = dst_vals.data(); raft::spectral::matrix::sparse_matrix_t const r_csr_m{ - handle, ro, ci, vs, static_cast(n), static_cast(nnz)}; + handle, ro, ci, vs, static_cast(n), nnz}; index_type neigvs = n_components + 1; index_type maxiter = 4000; // default reset value (when set to 0); From 0ad7bacf4a8f4f57e06ee8061be08fce32f9673e Mon Sep 17 00:00:00 2001 From: viclafargue Date: Tue, 11 Feb 2025 12:17:37 +0000 Subject: [PATCH 24/25] Answering review --- cpp/include/raft/linalg/detail/lanczos.cuh | 2 +- cpp/include/raft/sparse/linalg/degree.cuh | 2 +- cpp/include/raft/sparse/linalg/detail/degree.cuh | 12 ++++++------ cpp/include/raft/sparse/linalg/detail/symmetrize.cuh | 10 +++++----- cpp/include/raft/sparse/op/detail/filter.cuh | 8 ++++---- cpp/include/raft/sparse/op/detail/sort.h | 2 +- cpp/include/raft/sparse/solver/detail/lanczos.cuh | 10 +++++----- 7 files changed, 23 insertions(+), 23 deletions(-) diff --git a/cpp/include/raft/linalg/detail/lanczos.cuh b/cpp/include/raft/linalg/detail/lanczos.cuh index 33b9faa27d..06c3cb1357 100644 --- a/cpp/include/raft/linalg/detail/lanczos.cuh +++ b/cpp/include/raft/linalg/detail/lanczos.cuh @@ -1004,7 +1004,7 @@ int computeSmallestEigenvectors( index_type_t n = A.nrows_; // Check that parameters are valid - RAFT_EXPECTS(nEigVecs > 0 && static_cast(nEigVecs) <= n, + RAFT_EXPECTS(nEigVecs > 0 && static_cast(nEigVecs) <= n, "Invalid number of eigenvectors."); RAFT_EXPECTS(restartIter > 0, "Invalid restartIter."); RAFT_EXPECTS(tol > 0, "Invalid tolerance."); diff --git a/cpp/include/raft/sparse/linalg/degree.cuh b/cpp/include/raft/sparse/linalg/degree.cuh index 9ce7321de0..dde811ee2d 100644 --- a/cpp/include/raft/sparse/linalg/degree.cuh +++ b/cpp/include/raft/sparse/linalg/degree.cuh @@ -68,7 +68,7 @@ template void coo_degree_scalar( const int* rows, const T* vals, nnz_type nnz, T scalar, outT* results, cudaStream_t stream = 0) { - detail::coo_degree_scalar<64>(rows, vals, (uint64_t)nnz, scalar, results, stream); + detail::coo_degree_scalar<64>(rows, vals, nnz, scalar, results, stream); } /** diff --git a/cpp/include/raft/sparse/linalg/detail/degree.cuh b/cpp/include/raft/sparse/linalg/detail/degree.cuh index 6a83ddae42..d51188c54c 100644 --- a/cpp/include/raft/sparse/linalg/detail/degree.cuh +++ b/cpp/include/raft/sparse/linalg/detail/degree.cuh @@ -39,10 +39,10 @@ namespace detail { * @param nnz the size of the rows array * @param results array to place results */ -template +template RAFT_KERNEL coo_degree_kernel(const T* rows, nnz_t nnz, outT* results) { - uint64_t row = (blockIdx.x * TPB_X) + threadIdx.x; + nnz_t row = (blockIdx.x * static_cast(TPB_X)) + threadIdx.x; if (row < nnz) { atomicAdd(results + rows[row], (outT)1); } } @@ -71,11 +71,11 @@ RAFT_KERNEL coo_degree_nz_kernel(const int* rows, const T* vals, nnz_t nnz, int* if (row < nnz && vals[row] != 0.0) { raft::myAtomicAdd(results + rows[row], 1); } } -template +template RAFT_KERNEL coo_degree_scalar_kernel( const int* rows, const T* vals, nnz_t nnz, T scalar, outT* results) { - uint64_t row = (blockIdx.x * TPB_X) + threadIdx.x; + nnz_t row = (blockIdx.x * static_cast(TPB_X)) + threadIdx.x; if (row < nnz && vals[row] != scalar) { raft::myAtomicAdd((outT*)results + rows[row], (outT)1); } } @@ -90,11 +90,11 @@ RAFT_KERNEL coo_degree_scalar_kernel( * @param results: output row counts * @param stream: cuda stream to use */ -template +template void coo_degree_scalar( const int* rows, const T* vals, nnz_t nnz, T scalar, outT* results, cudaStream_t stream = 0) { - dim3 grid_rc(raft::ceildiv(nnz, TPB_X), 1, 1); + dim3 grid_rc(raft::ceildiv(nnz, static_cast(TPB_X)), 1, 1); dim3 blk_rc(TPB_X, 1, 1); coo_degree_scalar_kernel <<>>(rows, vals, nnz, scalar, results); diff --git a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh index 3b76f0cc4f..686c601e98 100644 --- a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh +++ b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh @@ -104,7 +104,7 @@ RAFT_KERNEL coo_symmetrize_kernel(nnz_t* row_ind, // Note that if we did find a match, we don't need to // compute `res` on it here because it will be computed // in a different thread. - if (!found_match && cur_val != 0.0) { + if (!found_match && vals[idx] != 0.0) { orows[out_start_idx + row_nnz] = cur_col; ocols[out_start_idx + row_nnz] = cur_row; ovals[out_start_idx + row_nnz] = res; @@ -131,9 +131,9 @@ RAFT_KERNEL coo_symmetrize_kernel(nnz_t* row_ind, * @param reduction_op: a custom reduction function * @param stream: cuda stream to use */ -template -void coo_symmetrize(COO* in, - COO* out, +template +void coo_symmetrize(COO* in, + COO* out, Lambda reduction_op, // two-argument reducer cudaStream_t stream) { @@ -142,7 +142,7 @@ void coo_symmetrize(COO* in, ASSERT(!out->validate_mem(), "Expecting unallocated COO for output"); - rmm::device_uvector in_row_ind(in->n_rows, stream); + rmm::device_uvector in_row_ind(in->n_rows, stream); convert::sorted_coo_to_csr(in, in_row_ind.data(), stream); diff --git a/cpp/include/raft/sparse/op/detail/filter.cuh b/cpp/include/raft/sparse/op/detail/filter.cuh index c5c095462d..dede0747a7 100644 --- a/cpp/include/raft/sparse/op/detail/filter.cuh +++ b/cpp/include/raft/sparse/op/detail/filter.cuh @@ -104,10 +104,10 @@ void coo_remove_scalar(const idx_t* rows, idx_t n, cudaStream_t stream) { - rmm::device_uvector ex_scan(n, stream); - rmm::device_uvector cur_ex_scan(n, stream); - RAFT_CUDA_TRY(cudaMemsetAsync(ex_scan.data(), 0, (nnz_t)n * sizeof(uint64_t), stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(cur_ex_scan.data(), 0, (nnz_t)n * sizeof(uint64_t), stream)); + rmm::device_uvector ex_scan(n, stream); + rmm::device_uvector cur_ex_scan(n, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(ex_scan.data(), 0, (nnz_t)n * sizeof(nnz_t), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(cur_ex_scan.data(), 0, (nnz_t)n * sizeof(nnz_t), stream)); thrust::device_ptr dev_cnnz = thrust::device_pointer_cast(cnnz); thrust::device_ptr dev_ex_scan = thrust::device_pointer_cast(ex_scan.data()); diff --git a/cpp/include/raft/sparse/op/detail/sort.h b/cpp/include/raft/sparse/op/detail/sort.h index 8bd848bedb..2c5337bf0e 100644 --- a/cpp/include/raft/sparse/op/detail/sort.h +++ b/cpp/include/raft/sparse/op/detail/sort.h @@ -83,7 +83,7 @@ void coo_sort(IdxT m, IdxT n, nnz_t nnz, IdxT* rows, IdxT* cols, T* vals, cudaSt * @param in: COO to sort by row * @param stream: the cuda stream to use */ -template +template void coo_sort(COO* const in, cudaStream_t stream) { coo_sort( diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh index 1088211276..71274333e8 100644 --- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh +++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh @@ -152,7 +152,7 @@ int performLanczosIteration( RAFT_EXPECTS(A != nullptr, "Null matrix pointer."); - uint64_t n = A->nrows_; + nnz_type_t n = A->nrows_; // ------------------------------------------------------- // Compute second Lanczos vector @@ -815,7 +815,7 @@ int computeSmallestEigenvectors( constexpr value_type_t zero = 0; // Matrix dimension - uint64_t n = A->nrows_; + nnz_type_t n = A->nrows_; // Shift for implicit restart value_type_t shiftUpper; @@ -837,7 +837,7 @@ int computeSmallestEigenvectors( // ------------------------------------------------------- // Check that parameters are valid // ------------------------------------------------------- - RAFT_EXPECTS(nEigVecs > 0 && static_cast(nEigVecs) <= n, + RAFT_EXPECTS(nEigVecs > 0 && static_cast(nEigVecs) <= n, "Invalid number of eigenvectors."); RAFT_EXPECTS(restartIter > 0, "Invalid restartIter."); RAFT_EXPECTS(tol > 0, "Invalid tolerance."); @@ -1162,7 +1162,7 @@ int computeLargestEigenvectors( constexpr value_type_t zero = 0; // Matrix dimension - uint64_t n = A->nrows_; + nnz_type_t n = A->nrows_; // Lanczos iteration counters index_type_t maxIter_curr = restartIter; // Maximum size of Lanczos system @@ -1185,7 +1185,7 @@ int computeLargestEigenvectors( // ------------------------------------------------------- // Check that parameters are valid // ------------------------------------------------------- - RAFT_EXPECTS(nEigVecs > 0 && static_cast(nEigVecs) <= n, + RAFT_EXPECTS(nEigVecs > 0 && static_cast(nEigVecs) <= n, "Invalid number of eigenvectors."); RAFT_EXPECTS(restartIter > 0, "Invalid restartIter."); RAFT_EXPECTS(tol > 0, "Invalid tolerance."); From f6f7908084819cc1310f3f7cb4d53793778e9f18 Mon Sep 17 00:00:00 2001 From: viclafargue Date: Tue, 11 Feb 2025 12:39:45 +0000 Subject: [PATCH 25/25] Additional changes --- cpp/include/raft/sparse/convert/detail/csr.cuh | 7 ++++--- cpp/include/raft/sparse/linalg/detail/symmetrize.cuh | 2 +- cpp/include/raft/sparse/op/detail/filter.cuh | 11 +++++++---- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/cpp/include/raft/sparse/convert/detail/csr.cuh b/cpp/include/raft/sparse/convert/detail/csr.cuh index 44d6f21f9d..64ed1bbeea 100644 --- a/cpp/include/raft/sparse/convert/detail/csr.cuh +++ b/cpp/include/raft/sparse/convert/detail/csr.cuh @@ -84,11 +84,12 @@ void coo_to_csr(raft::resources const& handle, * @param m: number of rows in dense matrix * @param stream: cuda stream to use */ -template -void sorted_coo_to_csr(const T* rows, uint64_t nnz, outT* row_ind, int m, cudaStream_t stream) +template +void sorted_coo_to_csr(const T* rows, nnz_t nnz, outT* row_ind, int m, cudaStream_t stream) { rmm::device_uvector row_counts(m, stream); - RAFT_CUDA_TRY(cudaMemsetAsync(row_counts.data(), 0, (uint64_t)m * sizeof(outT), stream)); + RAFT_CUDA_TRY( + cudaMemsetAsync(row_counts.data(), 0, static_cast(m) * sizeof(outT), stream)); linalg::coo_degree(rows, nnz, row_counts.data(), stream); diff --git a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh index 686c601e98..b248de855d 100644 --- a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh +++ b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh @@ -352,7 +352,7 @@ void symmetrize(raft::resources const& handle, // sort COO raft::sparse::op::coo_sort((value_idx)m, (value_idx)n, - (nnz_t)nnz * 2, + static_cast(nnz) * 2, symm_rows.data(), symm_cols.data(), symm_vals.data(), diff --git a/cpp/include/raft/sparse/op/detail/filter.cuh b/cpp/include/raft/sparse/op/detail/filter.cuh index dede0747a7..db2e3b858b 100644 --- a/cpp/include/raft/sparse/op/detail/filter.cuh +++ b/cpp/include/raft/sparse/op/detail/filter.cuh @@ -106,8 +106,9 @@ void coo_remove_scalar(const idx_t* rows, { rmm::device_uvector ex_scan(n, stream); rmm::device_uvector cur_ex_scan(n, stream); - RAFT_CUDA_TRY(cudaMemsetAsync(ex_scan.data(), 0, (nnz_t)n * sizeof(nnz_t), stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(cur_ex_scan.data(), 0, (nnz_t)n * sizeof(nnz_t), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(ex_scan.data(), 0, static_cast(n) * sizeof(nnz_t), stream)); + RAFT_CUDA_TRY( + cudaMemsetAsync(cur_ex_scan.data(), 0, static_cast(n) * sizeof(nnz_t), stream)); thrust::device_ptr dev_cnnz = thrust::device_pointer_cast(cnnz); thrust::device_ptr dev_ex_scan = thrust::device_pointer_cast(ex_scan.data()); @@ -153,8 +154,10 @@ void coo_remove_scalar(COO* in, rmm::device_uvector row_count_nz(in->n_rows, stream); rmm::device_uvector row_count(in->n_rows, stream); - RAFT_CUDA_TRY(cudaMemsetAsync(row_count_nz.data(), 0, (nnz_t)in->n_rows * sizeof(nnz_t), stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(row_count.data(), 0, (nnz_t)in->n_rows * sizeof(nnz_t), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync( + row_count_nz.data(), 0, static_cast(in->n_rows) * sizeof(nnz_t), stream)); + RAFT_CUDA_TRY( + cudaMemsetAsync(row_count.data(), 0, static_cast(in->n_rows) * sizeof(nnz_t), stream)); linalg::coo_degree(in->rows(), in->nnz, row_count.data(), stream); RAFT_CUDA_TRY(cudaPeekAtLastError());