Skip to content

Commit

Permalink
Merge branch 'branch-25.04' into bm25-tfidf
Browse files Browse the repository at this point in the history
  • Loading branch information
cjnolet authored Feb 19, 2025
2 parents 4175a4d + 68d412a commit faef8ab
Show file tree
Hide file tree
Showing 16 changed files with 424 additions and 46 deletions.
22 changes: 8 additions & 14 deletions ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,29 @@

set -euo pipefail

rapids-logger "Downloading artifacts from previous jobs"
CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)

rapids-logger "Create test conda environment"
. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION="$(rapids-version)"
RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
export RAPIDS_VERSION_MAJOR_MINOR

rapids-dependency-file-generator \
--output conda \
--file-key docs \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" \
--prepend-channel "${CPP_CHANNEL}" \
--prepend-channel "${PYTHON_CHANNEL}" \
| tee env.yaml

rapids-mamba-retry env create --yes -f env.yaml -n docs
conda activate docs

rapids-print-env

rapids-logger "Downloading artifacts from previous jobs"
CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
"libraft=${RAPIDS_VERSION}" \
"libraft-headers=${RAPIDS_VERSION}" \
"pylibraft=${RAPIDS_VERSION}" \
"raft-dask=${RAPIDS_VERSION}"

RAPIDS_DOCS_DIR="$(mktemp -d)"
export RAPIDS_DOCS_DIR

Expand Down
8 changes: 4 additions & 4 deletions cpp/include/raft/cluster/detail/kmeans.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

#include <cuda.h>
#include <thrust/fill.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/transform.h>

#include <algorithm>
Expand Down Expand Up @@ -443,13 +444,12 @@ void kmeans_fit_main(raft::resources const& handle,
params.batch_centroids,
workspace);

// Using TransformInputIteratorT to dereference an array of
// Using thrust::transform_iterator to dereference an array of
// raft::KeyValuePair and converting them to just return the Key to be used
// in reduce_rows_by_key prims
detail::KeyValueIndexOp<IndexT, DataT> conversion_op;
cub::TransformInputIterator<IndexT,
detail::KeyValueIndexOp<IndexT, DataT>,
raft::KeyValuePair<IndexT, DataT>*>
thrust::transform_iterator<detail::KeyValueIndexOp<IndexT, DataT>,
raft::KeyValuePair<IndexT, DataT>*>
itr(minClusterAndDistance.data_handle(), conversion_op);

update_centroids(handle,
Expand Down
7 changes: 5 additions & 2 deletions cpp/include/raft/cluster/detail/kmeans_balanced.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#include <rmm/resource_ref.hpp>

#include <thrust/gather.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/transform.h>

#include <limits>
Expand Down Expand Up @@ -288,7 +289,8 @@ void calc_centers_and_sizes(const raft::resources& handle,
dataset, dim, labels, nullptr, n_rows, dim, n_clusters, centers, stream, reset_counters);
} else {
// todo(lsugy): use iterator from KV output of fusedL2NN
cub::TransformInputIterator<MathT, MappingOpT, const T*> mapping_itr(dataset, mapping_op);
thrust::transform_iterator<MappingOpT, const T*, thrust::use_default, MathT> mapping_itr(
dataset, mapping_op);
raft::linalg::reduce_rows_by_key(
mapping_itr, dim, labels, nullptr, n_rows, dim, n_clusters, centers, stream, reset_counters);
}
Expand Down Expand Up @@ -894,7 +896,8 @@ auto build_fine_clusters(const raft::resources& handle,
"Number of fine clusters must be non-zero for a non-empty mesocluster");
}

cub::TransformInputIterator<MathT, MappingOpT, const T*> mapping_itr(dataset_mptr, mapping_op);
thrust::transform_iterator<MappingOpT, const T*, thrust::use_default, MathT> mapping_itr(
dataset_mptr, mapping_op);
raft::matrix::gather(mapping_itr, dim, n_rows, mc_trainset_ids, k, mc_trainset, stream);
if (params.metric == raft::distance::DistanceType::L2Expanded ||
params.metric == raft::distance::DistanceType::L2SqrtExpanded) {
Expand Down
12 changes: 6 additions & 6 deletions cpp/include/raft/cluster/detail/kmeans_common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <cuda.h>
#include <thrust/fill.h>
#include <thrust/for_each.h>
#include <thrust/iterator/transform_iterator.h>

#include <algorithm>
#include <cmath>
Expand Down Expand Up @@ -199,8 +200,8 @@ void computeClusterCost(raft::resources const& handle,
{
cudaStream_t stream = resource::get_cuda_stream(handle);

cub::TransformInputIterator<OutputT, MainOpT, InputT*> itr(minClusterDistance.data_handle(),
main_op);
thrust::transform_iterator<MainOpT, InputT*, thrust::use_default, OutputT> itr(
minClusterDistance.data_handle(), main_op);

size_t temp_storage_bytes = 0;
RAFT_CUDA_TRY(cub::DeviceReduce::Reduce(nullptr,
Expand Down Expand Up @@ -641,13 +642,12 @@ void countSamplesInCluster(raft::resources const& handle,
params.batch_centroids,
workspace);

// Using TransformInputIteratorT to dereference an array of raft::KeyValuePair
// Using thrust::transform_iterator to dereference an array of raft::KeyValuePair
// and converting them to just return the Key to be used in reduce_rows_by_key
// prims
detail::KeyValueIndexOp<IndexT, DataT> conversion_op;
cub::TransformInputIterator<IndexT,
detail::KeyValueIndexOp<IndexT, DataT>,
raft::KeyValuePair<IndexT, DataT>*>
thrust::transform_iterator<detail::KeyValueIndexOp<IndexT, DataT>,
raft::KeyValuePair<IndexT, DataT>*>
itr(minClusterAndDistance.data_handle(), conversion_op);

// count # of samples in each cluster
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/raft/core/sparse_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ class sparse_matrix {
row_type n_rows,
col_type n_cols,
nnz_type nnz = 0) noexcept(std::is_nothrow_default_constructible_v<container_type>)
: structure_{handle, n_rows, n_cols, nnz}, cp_{}, c_elements_{cp_.create(handle, 0)} {};
: structure_{handle, n_rows, n_cols, nnz}, cp_{}, c_elements_{cp_.create(handle, nnz)} {};

// Constructor that owns the data but not the structure
// This constructor is only callable with a `structure_type == *_structure_view`
Expand Down
5 changes: 3 additions & 2 deletions cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@

#include <cuda_fp16.h>
#include <thrust/extrema.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/scan.h>

#include <memory>
Expand Down Expand Up @@ -180,8 +181,8 @@ void select_residuals(raft::resources const& handle,
rmm::device_uvector<float> tmp(size_t(n_rows) * size_t(dim), stream, device_memory);
// Note: the number of rows of the input dataset isn't actually n_rows, but matrix::gather doesn't
// need to know it, any strictly positive number would work.
cub::TransformInputIterator<float, utils::mapping<float>, const T*> mapping_itr(
dataset, utils::mapping<float>{});
thrust::transform_iterator<utils::mapping<float>, const T*> mapping_itr(dataset,
utils::mapping<float>{});
raft::matrix::gather(mapping_itr, (IdxT)dim, n_rows, row_ids, n_rows, tmp.data(), stream);

raft::matrix::linewise_op(handle,
Expand Down
118 changes: 118 additions & 0 deletions cpp/include/raft/sparse/linalg/detail/laplacian.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <raft/core/detail/macros.hpp>
#include <raft/core/device_csr_matrix.hpp>
#include <raft/core/resources.hpp>

#include <type_traits>

namespace raft {
namespace sparse {
namespace linalg {
namespace detail {

/* Compute the graph Laplacian of an adjacency matrix
*
* This kernel implements the necessary logic for computing a graph
* Laplacian for an adjacency matrix in CSR format. A custom kernel is
* required because cusparse does not conveniently implement matrix subtraction with 64-bit
* indices. The custom kernel also allows the computation to be completed
* with no extra allocations or compute.
*/
template <typename ElementType, typename IndptrType, typename IndicesType>
RAFT_KERNEL compute_graph_laplacian_kernel(ElementType* output_values,
IndicesType* output_indices,
IndptrType* output_indptr,
IndptrType dim,
ElementType const* adj_values,
IndicesType const* adj_indices,
IndptrType const* adj_indptr)
{
/* The graph Laplacian L of an adjacency matrix A is given by:
* L = D - A
* where D is the degree matrix of A. The degree matrix is itself defined
* as the sum of each row of A and represents the degree of the node
* indicated by the index of the row. */

for (auto row = threadIdx.x + blockIdx.x * blockDim.x; row < dim; row += blockDim.x * gridDim.x) {
auto row_begin = adj_indptr[row];
auto row_end = adj_indptr[row + 1];
// All output indexes will need to be offset by the row, since every row will
// gain exactly one new non-zero element. degree_output_index is the index
// where we will store the degree of each row
auto degree_output_index = row_begin + row;
auto degree_value = ElementType{};
// value_index indicates the index of the current value in the original
// adjacency matrix
for (auto value_index = row_begin; value_index < row_end; ++value_index) {
auto col_index = adj_indices[value_index];
auto is_lower_diagonal = col_index < row;
auto output_index = value_index + row + !is_lower_diagonal;
auto input_value = adj_values[value_index];
degree_value += input_value;
output_values[output_index] = ElementType{-1} * input_value;
output_indices[output_index] = col_index;
// Increment the index where we will store the degree for every non-zero
// element before we reach the diagonal
degree_output_index += is_lower_diagonal;
}
output_values[degree_output_index] = degree_value;
output_indices[degree_output_index] = row;
output_indptr[row] = row_begin + row;
output_indptr[row + 1] = row_end + row + 1;
}
}

template <typename ElementType, typename IndptrType, typename IndicesType, typename NZType>
auto compute_graph_laplacian(
raft::resources const& res,
device_csr_matrix_view<ElementType, IndptrType, IndicesType, NZType> input)
{
auto input_structure = input.structure_view();
auto dim = input_structure.get_n_rows();
RAFT_EXPECTS(dim == input_structure.get_n_cols(),
"The graph Laplacian can only be computed on a square adjacency matrix");
auto result = make_device_csr_matrix<std::remove_const_t<ElementType>,
std::remove_const_t<IndptrType>,
std::remove_const_t<IndicesType>,
std::remove_const_t<NZType>>(
res,
dim,
dim,
/* The nnz for the result will be the dimension of the (square) input matrix plus the number of
* non-zero elements in the original matrix, since we introduce non-zero elements along the
* diagonal to represent the degree of each node. */
input_structure.get_nnz() + dim);
auto result_structure = result.structure_view();
auto static constexpr const threads_per_block = 256;
auto blocks = std::min(int((dim + threads_per_block - 1) / threads_per_block), 65535);
auto stream = resource::get_cuda_stream(res);
detail::compute_graph_laplacian_kernel<<<threads_per_block, blocks, 0, stream>>>(
result.get_elements().data(),
result_structure.get_indices().data(),
result_structure.get_indptr().data(),
dim,
input.get_elements().data(),
input_structure.get_indices().data(),
input_structure.get_indptr().data());
return result;
}

} // namespace detail
} // namespace linalg
} // namespace sparse
} // namespace raft
39 changes: 39 additions & 0 deletions cpp/include/raft/sparse/linalg/laplacian.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <raft/core/device_csr_matrix.hpp>
#include <raft/core/resources.hpp>
#include <raft/sparse/linalg/detail/laplacian.cuh>

namespace raft {
namespace sparse {
namespace linalg {

/** Given a CSR adjacency matrix, return the graph Laplacian
*
* Note that for non-symmetric matrices, the out-degree Laplacian is returned.
*/
template <typename ElementType, typename IndptrType, typename IndicesType, typename NZType>
auto compute_graph_laplacian(
raft::resources const& res,
device_csr_matrix_view<ElementType, IndptrType, IndicesType, NZType> input)
{
return detail::compute_graph_laplacian(res, input);
}

} // namespace linalg
} // namespace sparse
} // namespace raft
16 changes: 16 additions & 0 deletions cpp/include/raft/spectral/detail/matrix_wrappers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
*/
#pragma once

#include <raft/core/device_csr_matrix.hpp>
#include <raft/core/device_span.hpp>
#include <raft/core/resource/cublas_handle.hpp>
#include <raft/core/resource/cuda_stream.hpp>
#include <raft/core/resource/cusparse_handle.hpp>
Expand All @@ -33,6 +35,7 @@
#include <thrust/system/cuda/execution_policy.h>

#include <algorithm>
#include <cstddef>

// =========================================================
// Useful macros
Expand Down Expand Up @@ -181,6 +184,19 @@ struct sparse_matrix_t {
{
}

auto to_csr_matrix_view() const
{
// The usage of sparse_matrix_t prior to introduction of this method
// assumed that all data was strictly on device. We will make the same
// assumption for construction of the csr_matrix_view
return device_csr_matrix_view<value_type const, index_type const, index_type const, index_type>{
device_span<value_type const>{values_, std::uint64_t(nnz_)},
device_compressed_structure_view<index_type const, index_type const, index_type>{
device_span<index_type const>{row_offsets_, std::uint64_t(nrows_ + 1)},
device_span<index_type const>{col_indices_, std::uint64_t(nnz_)},
ncols_}};
}

virtual ~sparse_matrix_t(void) =
default; // virtual because used as base for following matrix types

Expand Down
8 changes: 5 additions & 3 deletions cpp/include/raft/spectral/detail/partition.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <raft/core/resource/cublas_handle.hpp>
#include <raft/core/resource/cuda_stream.hpp>
#include <raft/linalg/detail/cublas_wrappers.hpp>
#include <raft/sparse/linalg/laplacian.cuh>
#include <raft/spectral/cluster_solvers.cuh>
#include <raft/spectral/detail/spectral_util.cuh>
#include <raft/spectral/eigen_solvers.cuh>
Expand Down Expand Up @@ -97,14 +98,15 @@ std::tuple<vertex_t, weight_t, vertex_t> partition(
// Compute eigenvectors of Laplacian

// Initialize Laplacian
/// sparse_matrix_t<vertex_t, weight_t> A{handle, graph};
spectral::matrix::laplacian_matrix_t<vertex_t, weight_t, nnz_t> L{handle, csr_m};
auto laplacian =
raft::sparse::linalg::compute_graph_laplacian(handle, csr_m.to_csr_matrix_view());

auto eigen_config = eigen_solver.get_config();
auto nEigVecs = eigen_config.n_eigVecs;

// Compute smallest eigenvalues and eigenvectors
std::get<0>(stats) = eigen_solver.solve_smallest_eigenvectors(handle, L, eigVals, eigVecs);
std::get<0>(stats) =
eigen_solver.solve_smallest_eigenvectors(handle, laplacian.view(), eigVals, eigVecs);

// Whiten eigenvector matrix
transform_eigen_matrix(handle, n, nEigVecs, eigVecs);
Expand Down
Loading

0 comments on commit faef8ab

Please sign in to comment.