Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix UMAP issues with large inputs #6245

Open
wants to merge 30 commits into
base: branch-25.04
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
58dea2c
Fix UMAP issues with large inputs
viclafargue Jan 22, 2025
ea9a476
Re-enable coo_sort before removing zeroes
viclafargue Jan 22, 2025
c8db94b
updates
viclafargue Jan 23, 2025
fb26681
fix issue
viclafargue Jan 23, 2025
038f31e
answering review
viclafargue Jan 24, 2025
fb70daf
fix small issue
viclafargue Jan 24, 2025
ca86392
typos
viclafargue Jan 24, 2025
72a83ab
typos
viclafargue Jan 24, 2025
a7134dd
compilation fix
viclafargue Jan 27, 2025
9abe842
changes so far
viclafargue Jan 29, 2025
6ff906b
completing change
viclafargue Jan 30, 2025
5650fc1
point to raft and cuvs branch
divyegala Feb 3, 2025
2e2bfbf
Merge branch 'branch-25.02' into fix-umap-large-inputs
divyegala Feb 3, 2025
3151144
Merge branch 'branch-25.04' into fix-umap-large-inputs
divyegala Feb 3, 2025
e013cb7
cleanup merge
divyegala Feb 3, 2025
cb80c5b
cleanup merge
divyegala Feb 3, 2025
72361ec
run pre-commit
divyegala Feb 3, 2025
edfb14b
treelite back to normal version
divyegala Feb 3, 2025
7a7229b
fix cmake args in get_cuvs
divyegala Feb 3, 2025
2c46ad1
fix cmake args in get_cuvs
divyegala Feb 3, 2025
c51a1e9
bump
divyegala Feb 3, 2025
53d276c
bump
divyegala Feb 4, 2025
0aaeac8
Merge branch 'branch-25.04' into fix-umap-large-inputs
divyegala Feb 4, 2025
f7ce445
Merge branch-25.04
viclafargue Feb 5, 2025
b0d4ef1
answering reviews
viclafargue Feb 6, 2025
bd94ddf
Merge branch 'branch-25.04' into fix-umap-large-inputs
viclafargue Feb 6, 2025
04732d3
bumping treelite version again
viclafargue Feb 7, 2025
dea4a16
Restore FastIntDiv use in UMAP optimization kernel
viclafargue Feb 7, 2025
671a06c
Restore thirdparty CMake files
viclafargue Feb 7, 2025
d81eb00
Merge branch 'branch-25.04' into fix-umap-large-inputs
viclafargue Feb 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions cpp/include/cuml/manifold/common.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,6 +16,8 @@

#pragma once

#include <stdint.h>

namespace ML {

// Dense input uses int64_t until FAISS is updated
Expand Down Expand Up @@ -55,8 +57,8 @@ struct knn_graph {
template <typename T>
struct manifold_inputs_t {
T* y;
int n;
int d;
uint64_t n;
viclafargue marked this conversation as resolved.
Show resolved Hide resolved
uint64_t d;

manifold_inputs_t(T* y_, int n_, int d_) : y(y_), n(n_), d(d_) {}

Expand Down
3 changes: 2 additions & 1 deletion cpp/src/tsne/tsne_runner.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

#include <cuvs/distance/distance.hpp>
#include <pca/pca.cuh>
#include <stdint.h>

namespace ML {

Expand Down Expand Up @@ -167,7 +168,7 @@ class TSNE_runner {
{
distance_and_perplexity();

const auto NNZ = COO_Matrix.nnz;
const auto NNZ = value_idx{COO_Matrix.nnz};
auto* VAL = COO_Matrix.vals();
const auto* COL = COO_Matrix.cols();
const auto* ROW = COO_Matrix.rows();
Expand Down
33 changes: 18 additions & 15 deletions cpp/src/umap/fuzzy_simpl_set/naive.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

#include <cuda_runtime.h>

#include <stdint.h>
#include <stdio.h>

#include <string>
Expand Down Expand Up @@ -91,8 +92,8 @@ CUML_KERNEL void smooth_knn_dist_kernel(const value_t* knn_dists,
float bandwidth = 1.0)
{
// row-based matrix 1 thread per row
int row = (blockIdx.x * TPB_X) + threadIdx.x;
int i = row * n_neighbors; // each thread processes one row of the dist matrix
int row = (blockIdx.x * TPB_X) + threadIdx.x;
uint64_t i = uint64_t{row} * n_neighbors; // each thread processes one row of the dist matrix

if (row < n) {
float target = __log2f(n_neighbors) * bandwidth;
Expand Down Expand Up @@ -190,7 +191,7 @@ CUML_KERNEL void smooth_knn_dist_kernel(const value_t* knn_dists,
*
* Descriptions adapted from: https://github.com/lmcinnes/umap/blob/master/umap/umap_.py
*/
template <int TPB_X, typename value_idx, typename value_t>
template <uint64_t TPB_X, typename value_idx, typename value_t>
CUML_KERNEL void compute_membership_strength_kernel(
const value_idx* knn_indices,
const float* knn_dists, // nn outputs
Expand All @@ -199,14 +200,14 @@ CUML_KERNEL void compute_membership_strength_kernel(
value_t* vals,
int* rows,
int* cols, // result coo
int n,
int n_neighbors)
int n_neighbors,
uint64_t to_process)
{ // model params

// row-based matrix is best
int idx = (blockIdx.x * TPB_X) + threadIdx.x;
uint64_t idx = (blockIdx.x * TPB_X) + threadIdx.x;

if (idx < n * n_neighbors) {
if (idx < to_process) {
int row = idx / n_neighbors; // one neighbor per thread

double cur_rho = rhos[row];
Expand Down Expand Up @@ -237,8 +238,8 @@ CUML_KERNEL void compute_membership_strength_kernel(
/*
* Sets up and runs the knn dist smoothing
*/
template <int TPB_X, typename value_idx, typename value_t>
void smooth_knn_dist(int n,
template <uint64_t TPB_X, typename value_idx, typename value_t>
void smooth_knn_dist(uint64_t n,
const value_idx* knn_indices,
const float* knn_dists,
value_t* rhos,
Expand All @@ -253,7 +254,8 @@ void smooth_knn_dist(int n,

rmm::device_uvector<value_t> dist_means_dev(n_neighbors, stream);

raft::stats::mean(dist_means_dev.data(), knn_dists, 1, n_neighbors * n, false, false, stream);
raft::stats::mean(
dist_means_dev.data(), knn_dists, uint64_t{1}, n * n_neighbors, false, false, stream);
RAFT_CUDA_TRY(cudaPeekAtLastError());

value_t mean_dist = 0.0;
Expand Down Expand Up @@ -284,8 +286,8 @@ void smooth_knn_dist(int n,
* @param params UMAPParams config object
* @param stream cuda stream to use for device operations
*/
template <int TPB_X, typename value_idx, typename value_t>
void launcher(int n,
template <uint64_t TPB_X, typename value_idx, typename value_t>
void launcher(uint64_t n,
const value_idx* knn_indices,
const value_t* knn_dists,
int n_neighbors,
Expand Down Expand Up @@ -328,7 +330,8 @@ void launcher(int n,
* Compute graph of membership strengths
*/

dim3 grid_elm(raft::ceildiv(n * n_neighbors, TPB_X), 1, 1);
uint64_t to_process = {in.n_rows} * n_neighbors;
dim3 grid_elm(raft::ceildiv(to_process, TPB_X), 1, 1);
viclafargue marked this conversation as resolved.
Show resolved Hide resolved
dim3 blk_elm(TPB_X, 1, 1);

compute_membership_strength_kernel<TPB_X><<<grid_elm, blk_elm, 0, stream>>>(knn_indices,
Expand All @@ -338,8 +341,8 @@ void launcher(int n,
in.vals(),
in.rows(),
in.cols(),
in.n_rows,
n_neighbors);
n_neighbors,
to_process);
RAFT_CUDA_TRY(cudaPeekAtLastError());

if (ML::default_logger().should_log(ML::level_enum::debug)) {
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/umap/init_embed/random_algo.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -20,14 +20,16 @@

#include <raft/random/rng.cuh>

#include <stdint.h>

namespace UMAPAlgo {
namespace InitEmbed {
namespace RandomInit {

using namespace ML;

template <typename T>
void launcher(int n, int d, UMAPParams* params, T* embedding, cudaStream_t stream)
void launcher(uint64_t n, int d, UMAPParams* params, T* embedding, cudaStream_t stream)
{
uint64_t seed = params->random_state;

Expand Down
6 changes: 4 additions & 2 deletions cpp/src/umap/init_embed/spectral_algo.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -29,6 +29,8 @@
#include <thrust/execution_policy.h>
#include <thrust/extrema.h>

#include <stdint.h>

#include <iostream>

namespace UMAPAlgo {
Expand All @@ -44,7 +46,7 @@ using namespace ML;
*/
template <typename T>
void launcher(const raft::handle_t& handle,
int n,
uint64_t n,
viclafargue marked this conversation as resolved.
Show resolved Hide resolved
int d,
raft::sparse::COO<float>* coo,
UMAPParams* params,
Expand Down
8 changes: 5 additions & 3 deletions cpp/src/umap/knn_graph/algo.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -35,6 +35,7 @@

#include <cuvs/distance/distance.hpp>
#include <cuvs/neighbors/brute_force.hpp>
#include <stdint.h>

#include <iostream>

Expand Down Expand Up @@ -126,11 +127,12 @@ inline void launcher(const raft::handle_t& handle,

RAFT_EXPECTS(graph.distances().has_value(),
"return_distances for nn descent should be set to true to be used for UMAP");
auto out_knn_dists_view = raft::make_device_matrix_view(out.knn_dists, inputsA.n, n_neighbors);
auto out_knn_dists_view =
raft::make_device_matrix_view(out.knn_dists, inputsA.n, uint64_t{n_neighbors});
raft::matrix::slice<float, int64_t, raft::row_major>(
handle, raft::make_const_mdspan(graph.distances().value()), out_knn_dists_view, coords);
auto out_knn_indices_view =
raft::make_device_matrix_view(out.knn_indices, inputsA.n, n_neighbors);
raft::make_device_matrix_view(out.knn_indices, inputsA.n, uint64_t{n_neighbors});
raft::matrix::slice<int64_t, int64_t, raft::row_major>(
handle, raft::make_const_mdspan(indices_d.view()), out_knn_indices_view, coords);
}
Expand Down
18 changes: 8 additions & 10 deletions cpp/src/umap/runner.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
#include <thrust/scan.h>
#include <thrust/system/cuda/execution_policy.h>

#include <stdint.h>

#include <memory>

namespace UMAPAlgo {
Expand Down Expand Up @@ -348,7 +350,7 @@ void _fit_supervised(const raft::handle_t& handle,
/**
*
*/
template <typename value_idx, typename value_t, typename umap_inputs, int TPB_X>
template <typename value_idx, typename value_t, typename umap_inputs, uint64_t TPB_X>
void _transform(const raft::handle_t& handle,
const umap_inputs& inputs,
umap_inputs& orig_x_inputs,
Expand Down Expand Up @@ -425,7 +427,7 @@ void _transform(const raft::handle_t& handle,
* Compute graph of membership strengths
*/

int nnz = inputs.n * params->n_neighbors;
uint64_t nnz = uint64_t{inputs.n} * params->n_neighbors;
viclafargue marked this conversation as resolved.
Show resolved Hide resolved

dim3 grid_nnz(raft::ceildiv(nnz, TPB_X), 1, 1);

Expand All @@ -437,6 +439,7 @@ void _transform(const raft::handle_t& handle,

raft::sparse::COO<value_t> graph_coo(stream, nnz, inputs.n, inputs.n);

uint64_t to_process = uint64_t{graph_coo.n_rows} * params->n_neighbors;
FuzzySimplSetImpl::compute_membership_strength_kernel<TPB_X>
<<<grid_nnz, blk, 0, stream>>>(knn_graph.knn_indices,
knn_graph.knn_dists,
Expand All @@ -445,15 +448,13 @@ void _transform(const raft::handle_t& handle,
graph_coo.vals(),
graph_coo.rows(),
graph_coo.cols(),
graph_coo.n_rows,
params->n_neighbors);
params->n_neighbors,
to_process);
RAFT_CUDA_TRY(cudaPeekAtLastError());

rmm::device_uvector<int> row_ind(inputs.n, stream);
rmm::device_uvector<int> ia(inputs.n, stream);
rmm::device_uvector<uint64_t> row_ind(inputs.n, stream);

raft::sparse::convert::sorted_coo_to_csr(&graph_coo, row_ind.data(), stream);
raft::sparse::linalg::coo_degree(&graph_coo, ia.data(), stream);
viclafargue marked this conversation as resolved.
Show resolved Hide resolved

rmm::device_uvector<value_t> vals_normed(graph_coo.nnz, stream);
RAFT_CUDA_TRY(cudaMemsetAsync(vals_normed.data(), 0, graph_coo.nnz * sizeof(value_t), stream));
Expand All @@ -471,9 +472,6 @@ void _transform(const raft::handle_t& handle,
params->n_components,
transformed,
params->n_neighbors);
RAFT_CUDA_TRY(cudaPeekAtLastError());

RAFT_CUDA_TRY(cudaMemsetAsync(ia.data(), 0.0, ia.size() * sizeof(int), stream));

RAFT_CUDA_TRY(cudaPeekAtLastError());

Expand Down
Loading
Loading