rapidsai · viclafargue · Jan 14, 2025 · Jan 21, 2025 · Jan 22, 2025 · Jan 22, 2025
@@ -43,8 +43,8 @@ template <raft::cluster::LinkageDistance dist_type, typename value_idx, typename
 struct distance_graph_impl {
   void run(raft::resources const& handle,
            const value_t* X,
-           size_t m,
-           size_t n,
+           value_idx m,
+           value_idx n,
            raft::distance::DistanceType metric,
            rmm::device_uvector<value_idx>& indptr,
            rmm::device_uvector<value_idx>& indices,
@@ -61,8 +61,8 @@ template <typename value_idx, typename value_t>
 struct distance_graph_impl<raft::cluster::LinkageDistance::KNN_GRAPH, value_idx, value_t> {
   void run(raft::resources const& handle,
            const value_t* X,
-           size_t m,
-           size_t n,
+           value_idx m,
+           value_idx n,
            raft::distance::DistanceType metric,
            rmm::device_uvector<value_idx>& indptr,
            rmm::device_uvector<value_idx>& indices,
@@ -130,8 +130,8 @@ RAFT_KERNEL fill_indices2(value_idx* indices, size_t m, size_t nnz)
 template <typename value_idx, typename value_t>
 void pairwise_distances(const raft::resources& handle,
                         const value_t* X,
-                        size_t m,
-                        size_t n,
+                        value_idx m,
+                        value_idx n,
                         raft::distance::DistanceType metric,
                         value_idx* indptr,
                         value_idx* indices,
@@ -178,8 +178,8 @@ template <typename value_idx, typename value_t>
 struct distance_graph_impl<raft::cluster::LinkageDistance::PAIRWISE, value_idx, value_t> {
   void run(const raft::resources& handle,
            const value_t* X,
-           size_t m,
-           size_t n,
+           value_idx m,
+           value_idx n,
            raft::distance::DistanceType metric,
            rmm::device_uvector<value_idx>& indptr,
            rmm::device_uvector<value_idx>& indices,
@@ -216,8 +216,8 @@ struct distance_graph_impl<raft::cluster::LinkageDistance::PAIRWISE, value_idx,
 template <typename value_idx, typename value_t, raft::cluster::LinkageDistance dist_type>
 void get_distance_graph(raft::resources const& handle,
                         const value_t* X,
-                        size_t m,
-                        size_t n,
+                        value_idx m,
+                        value_idx n,
                         raft::distance::DistanceType metric,
                         rmm::device_uvector<value_idx>& indptr,
                         rmm::device_uvector<value_idx>& indices,

@@ -52,8 +52,8 @@ template <typename value_idx,
           LinkageDistance dist_type = LinkageDistance::KNN_GRAPH>
 [[deprecated("Use cuVS instead")]] void single_linkage(raft::resources const& handle,
                                                        const value_t* X,
-                                                       size_t m,
-                                                       size_t n,
+                                                       value_idx m,
+                                                       value_idx n,
                                                        raft::distance::DistanceType metric,
                                                        linkage_output<value_idx>* out,
                                                        int c,
@@ -103,8 +103,8 @@ template <typename value_t, typename idx_t, LinkageDistance dist_type = LinkageD
   raft::cluster::single_linkage<idx_t, value_t, dist_type>(
     handle,
     X.data_handle(),
-    static_cast<std::size_t>(X.extent(0)),
-    static_cast<std::size_t>(X.extent(1)),
+    X.extent(0),
+    X.extent(1),
     metric,
     &out_arrs,
     c.has_value() ? c.value() : DEFAULT_CONST_C,

@@ -745,10 +745,10 @@ static int lanczosRestart(raft::resources const& handle,
  *  @param seed random seed.
  *  @return error flag.
  */
-template <typename index_type_t, typename value_type_t>
+template <typename index_type_t, typename value_type_t, typename nnz_type_t>
 int computeSmallestEigenvectors(
   raft::resources const& handle,
-  spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const* A,
+  spectral::matrix::sparse_matrix_t<index_type_t, value_type_t, nnz_type_t> const* A,
   index_type_t nEigVecs,
   index_type_t maxIter,
   index_type_t restartIter,
@@ -986,10 +986,10 @@ int computeSmallestEigenvectors(
   return 0;
 }
 
-template <typename index_type_t, typename value_type_t>
+template <typename index_type_t, typename value_type_t, typename nnz_type_t>
 int computeSmallestEigenvectors(
   raft::resources const& handle,
-  spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
+  spectral::matrix::sparse_matrix_t<index_type_t, value_type_t, nnz_type_t> const& A,
   index_type_t nEigVecs,
   index_type_t maxIter,
   index_type_t restartIter,
@@ -1004,7 +1004,8 @@ int computeSmallestEigenvectors(
   index_type_t n = A.nrows_;
 
   // Check that parameters are valid
-  RAFT_EXPECTS(nEigVecs > 0 && nEigVecs <= n, "Invalid number of eigenvectors.");
+  RAFT_EXPECTS(nEigVecs > 0 && static_cast<uint64_t>(nEigVecs) <= n,
+               "Invalid number of eigenvectors.");
   RAFT_EXPECTS(restartIter > 0, "Invalid restartIter.");
   RAFT_EXPECTS(tol > 0, "Invalid tolerance.");
   RAFT_EXPECTS(maxIter >= nEigVecs, "Invalid maxIter.");
@@ -1089,10 +1090,10 @@ int computeSmallestEigenvectors(
  *  @param seed random seed.
  *  @return error flag.
  */
-template <typename index_type_t, typename value_type_t>
+template <typename index_type_t, typename value_type_t, typename nnz_type_t>
 int computeLargestEigenvectors(
   raft::resources const& handle,
-  spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const* A,
+  spectral::matrix::sparse_matrix_t<index_type_t, value_type_t, nnz_type_t> const* A,
   index_type_t nEigVecs,
   index_type_t maxIter,
   index_type_t restartIter,
@@ -1333,10 +1334,10 @@ int computeLargestEigenvectors(
   return 0;
 }
 
-template <typename index_type_t, typename value_type_t>
+template <typename index_type_t, typename value_type_t, typename nnz_type_t>
 int computeLargestEigenvectors(
   raft::resources const& handle,
-  spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
+  spectral::matrix::sparse_matrix_t<index_type_t, value_type_t, nnz_type_t> const& A,
   index_type_t nEigVecs,
   index_type_t maxIter,
   index_type_t restartIter,

@@ -32,11 +32,11 @@ namespace convert {
  * @param nnz: size of output COO row array
  * @param stream: cuda stream to use
  */
-template <typename value_idx = int>
+template <typename value_idx = int, typename nnz_t>
 void csr_to_coo(
-  const value_idx* row_ind, value_idx m, value_idx* coo_rows, value_idx nnz, cudaStream_t stream)
+  const value_idx* row_ind, value_idx m, value_idx* coo_rows, nnz_t nnz, cudaStream_t stream)
 {
-  detail::csr_to_coo<value_idx, 32>(row_ind, m, coo_rows, nnz, stream);
+  detail::csr_to_coo<value_idx, nnz_t, 32>(row_ind, m, coo_rows, nnz, stream);
 }
 
 };  // end NAMESPACE convert

@@ -54,8 +54,8 @@ void coo_to_csr(raft::resources const& handle,
  * @param m: number of rows in dense matrix
  * @param stream: cuda stream to use
  */
-template <typename T>
-void sorted_coo_to_csr(const T* rows, int nnz, T* row_ind, int m, cudaStream_t stream)
+template <typename T, typename nnz_type, typename outT>
+void sorted_coo_to_csr(const T* rows, nnz_type nnz, outT* row_ind, int m, cudaStream_t stream)
 {
   detail::sorted_coo_to_csr(rows, nnz, row_ind, m, stream);
 }
@@ -67,8 +67,8 @@ void sorted_coo_to_csr(const T* rows, int nnz, T* row_ind, int m, cudaStream_t s
  * @param row_ind: output row indices array
  * @param stream: cuda stream to use
  */
-template <typename T>
-void sorted_coo_to_csr(COO<T>* coo, int* row_ind, cudaStream_t stream)
+template <typename T, typename outT>
+void sorted_coo_to_csr(COO<T>* coo, outT* row_ind, cudaStream_t stream)
 {
   detail::sorted_coo_to_csr(coo->rows(), coo->nnz, row_ind, coo->n_rows, stream);
 }

@@ -60,9 +60,9 @@ RAFT_KERNEL csr_to_coo_kernel(const value_idx* row_ind,
  * @param nnz: size of output COO row array
  * @param stream: cuda stream to use
  */
-template <typename value_idx = int, int TPB_X = 32>
+template <typename value_idx = int, typename nnz_t, int TPB_X = 32>
 void csr_to_coo(
-  const value_idx* row_ind, value_idx m, value_idx* coo_rows, value_idx nnz, cudaStream_t stream)
+  const value_idx* row_ind, value_idx m, value_idx* coo_rows, nnz_t nnz, cudaStream_t stream)
 {
   // @TODO: Use cusparse for this.
   dim3 grid(raft::ceildiv(m, (value_idx)TPB_X), 1, 1);

@@ -84,18 +84,17 @@ void coo_to_csr(raft::resources const& handle,
  * @param m: number of rows in dense matrix
  * @param stream: cuda stream to use
  */
-template <typename T>
-void sorted_coo_to_csr(const T* rows, int nnz, T* row_ind, int m, cudaStream_t stream)
+template <typename T, typename outT>
+void sorted_coo_to_csr(const T* rows, uint64_t nnz, outT* row_ind, int m, cudaStream_t stream)
 {
-  rmm::device_uvector<T> row_counts(m, stream);
-
-  RAFT_CUDA_TRY(cudaMemsetAsync(row_counts.data(), 0, m * sizeof(T), stream));
+  rmm::device_uvector<outT> row_counts(m, stream);
+  RAFT_CUDA_TRY(cudaMemsetAsync(row_counts.data(), 0, (uint64_t)m * sizeof(outT), stream));
 
   linalg::coo_degree(rows, nnz, row_counts.data(), stream);
 
   // create csr compressed row index from row counts
-  thrust::device_ptr<T> row_counts_d = thrust::device_pointer_cast(row_counts.data());
-  thrust::device_ptr<T> c_ind_d      = thrust::device_pointer_cast(row_ind);
+  thrust::device_ptr<outT> row_counts_d = thrust::device_pointer_cast(row_counts.data());
+  thrust::device_ptr<outT> c_ind_d      = thrust::device_pointer_cast(row_ind);
   exclusive_scan(rmm::exec_policy(stream), row_counts_d, row_counts_d + m, c_ind_d);
 }
 

@@ -39,8 +39,8 @@ namespace sparse {
  * @tparam value_idx: the type of index array
  *
  */
-template <typename value_t, typename value_idx = int>
-using COO = detail::COO<value_t, value_idx>;
+template <typename value_t, typename value_idx = int, typename nnz_t = uint64_t>
+using COO = detail::COO<value_t, value_idx, nnz_t>;
 
 };  // namespace sparse
 };  // namespace raft
@@ -44,15 +44,19 @@ namespace detail {
  * @tparam Index_Type: the type of index array
  *
  */
-template <typename T, typename Index_Type = int>
+template <typename T, typename Index_Type = int, typename nnz_type = uint64_t>
 class COO {
  protected:
   rmm::device_uvector<Index_Type> rows_arr;
   rmm::device_uvector<Index_Type> cols_arr;
   rmm::device_uvector<T> vals_arr;
 
  public:
-  Index_Type nnz;
+  using value_t = T;
+  using index_t = Index_Type;
+  using nnz_t   = nnz_type;
+
+  nnz_type nnz;
   Index_Type n_rows;
   Index_Type n_cols;
 
@@ -75,7 +79,7 @@ class COO {
   COO(rmm::device_uvector<Index_Type>& rows,
       rmm::device_uvector<Index_Type>& cols,
       rmm::device_uvector<T>& vals,
-      Index_Type nnz,
+      nnz_type nnz,
       Index_Type n_rows = 0,
       Index_Type n_cols = 0)
     : rows_arr(rows), cols_arr(cols), vals_arr(vals), nnz(nnz), n_rows(n_rows), n_cols(n_cols)
@@ -90,7 +94,7 @@ class COO {
    * @param init: initialize arrays with zeros
    */
   COO(cudaStream_t stream,
-      Index_Type nnz,
+      nnz_type nnz,
       Index_Type n_rows = 0,
       Index_Type n_cols = 0,
       bool init         = true)
@@ -121,7 +125,7 @@ class COO {
    */
   bool validate_size() const
   {
-    if (this->nnz < 0 || n_rows < 0 || n_cols < 0) return false;
+    if (this->nnz <= 0 || n_rows <= 0 || n_cols <= 0) return false;
     return true;
   }
 
@@ -156,7 +160,7 @@ class COO {
   /**
    * @brief Send human-readable state information to output stream
    */
-  friend std::ostream& operator<<(std::ostream& out, const COO<T, Index_Type>& c)
+  friend std::ostream& operator<<(std::ostream& out, const COO<T, Index_Type, nnz_type>& c)
   {
     if (c.validate_size() && c.validate_mem()) {
       cudaStream_t stream;
@@ -204,7 +208,7 @@ class COO {
    * @param init: should values be initialized to 0?
    * @param stream: CUDA stream to use
    */
-  void allocate(Index_Type nnz, bool init, cudaStream_t stream)
+  void allocate(nnz_type nnz, bool init, cudaStream_t stream)
   {
     this->allocate(nnz, 0, init, stream);
   }
@@ -216,7 +220,7 @@ class COO {
    * @param init: should values be initialized to 0?
    * @param stream: CUDA stream to use
    */
-  void allocate(Index_Type nnz, Index_Type size, bool init, cudaStream_t stream)
+  void allocate(nnz_type nnz, Index_Type size, bool init, cudaStream_t stream)
   {
     this->allocate(nnz, size, size, init, stream);
   }
@@ -229,16 +233,15 @@ class COO {
    * @param init: should values be initialized to 0?
    * @param stream: stream to use for init
    */
-  void allocate(
-    Index_Type nnz, Index_Type n_rows, Index_Type n_cols, bool init, cudaStream_t stream)
+  void allocate(nnz_type nnz, Index_Type n_rows, Index_Type n_cols, bool init, cudaStream_t stream)
   {
     this->n_rows = n_rows;
     this->n_cols = n_cols;
     this->nnz    = nnz;
 
-    this->rows_arr.resize(this->nnz, stream);
-    this->cols_arr.resize(this->nnz, stream);
-    this->vals_arr.resize(this->nnz, stream);
+    this->rows_arr.resize(nnz, stream);
+    this->cols_arr.resize(nnz, stream);
+    this->vals_arr.resize(nnz, stream);
 
     if (init) init_arrays(stream);
   }

@@ -103,10 +103,10 @@ void iota_fill(value_idx* indices, value_idx nrows, value_idx ncols, cudaStream_
   iota_fill_block_kernel<<<nrows, blockdim, 0, stream>>>(indices, ncols);
 }
 
-template <typename T>
-__device__ int get_stop_idx(T row, T m, T nnz, const T* ind)
+template <typename T, typename indT>
+__device__ indT get_stop_idx(T row, T m, indT nnz, const indT* ind)
 {
-  int stop_idx = 0;
+  indT stop_idx = 0;
   if (row < (m - 1))
     stop_idx = ind[row + 1];
   else

@@ -33,8 +33,8 @@ namespace linalg {
  * @param results: output result array
  * @param stream: cuda stream to use
  */
-template <typename T = int>
-void coo_degree(const T* rows, int nnz, T* results, cudaStream_t stream)
+template <typename T = int, typename nnz_type, typename outT>
+void coo_degree(const T* rows, nnz_type nnz, outT* results, cudaStream_t stream)
 {
   detail::coo_degree<64, T>(rows, nnz, results, stream);
 }
@@ -47,8 +47,8 @@ void coo_degree(const T* rows, int nnz, T* results, cudaStream_t stream)
  * @param results: output array with row counts (size=in->n_rows)
  * @param stream: cuda stream to use
  */
-template <typename T>
-void coo_degree(COO<T>* in, int* results, cudaStream_t stream)
+template <typename T, typename outT>
+void coo_degree(COO<T>* in, outT* results, cudaStream_t stream)
 {
   coo_degree(in->rows(), in->nnz, results, stream);
 }
@@ -64,11 +64,11 @@ void coo_degree(COO<T>* in, int* results, cudaStream_t stream)
  * @param results: output row counts
  * @param stream: cuda stream to use
  */
-template <typename T>
+template <typename T, typename nnz_type, typename outT>
 void coo_degree_scalar(
-  const int* rows, const T* vals, int nnz, T scalar, int* results, cudaStream_t stream = 0)
+  const int* rows, const T* vals, nnz_type nnz, T scalar, outT* results, cudaStream_t stream = 0)
 {
-  detail::coo_degree_scalar<64>(rows, vals, nnz, scalar, results, stream);
+  detail::coo_degree_scalar<64>(rows, vals, (uint64_t)nnz, scalar, results, stream);
 }
 
 /**
@@ -80,8 +80,8 @@ void coo_degree_scalar(
  * @param results: output row counts
  * @param stream: cuda stream to use
  */
-template <typename T>
-void coo_degree_scalar(COO<T>* in, T scalar, int* results, cudaStream_t stream)
+template <typename T, typename outT>
+void coo_degree_scalar(COO<T>* in, T scalar, outT* results, cudaStream_t stream)
 {
   coo_degree_scalar(in->rows(), in->vals(), in->nnz, scalar, results, stream);
 }