From 074055fa75e179a7e125863abfacb40790413a39 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 14 Jun 2024 09:42:35 -0500 Subject: [PATCH 1/2] resolve dependency-file-generator warning, other rapids-build-backend followup (#2360) Contributes to https://github.com/rapidsai/build-planning/issues/31 Contributes to https://github.com/rapidsai/dependency-file-generator/issues/89 Since #2331 was merged, we've made some small adjustments to the approach for `rapids-build-backend`. This catches `raft` up with those changes: * consolidates version-handling in `ci/` scripts * uses `--file-key` instead of `--file_key` in `rapids-dependency-file-generator` calls * reduces duplication of `rapids-build-backend` version constraint in `dependencies.yaml`, and renames some lists there to match the patterns used in other projects (e.g. `rapids_build_skbuild` instead of `build`) Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Kyle Edwards (https://github.com/KyleFromNVIDIA) URL: https://github.com/rapidsai/raft/pull/2360 --- ci/build_cpp.sh | 4 +--- ci/build_docs.sh | 2 +- ci/check_style.sh | 2 +- ci/test_cpp.sh | 2 +- ci/test_python.sh | 2 +- dependencies.yaml | 20 ++++++++++---------- 6 files changed, 15 insertions(+), 17 deletions(-) diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index 2778c2a7d7..c456bcae80 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -13,10 +13,8 @@ export CMAKE_GENERATOR=Ninja rapids-print-env -version=$(rapids-generate-version) - rapids-logger "Begin cpp build" -RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild conda/recipes/libraft +RAPIDS_PACKAGE_VERSION=$(rapids-generate-version) rapids-conda-retry mambabuild conda/recipes/libraft rapids-upload-conda-to-s3 cpp diff --git a/ci/build_docs.sh b/ci/build_docs.sh index f0313f27d1..a2447f5f06 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -8,7 +8,7 @@ rapids-logger "Create test conda environment" rapids-dependency-file-generator \ --output conda \ - --file_key docs \ + --file-key docs \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml rapids-mamba-retry env create --yes -f env.yaml -n docs diff --git a/ci/check_style.sh b/ci/check_style.sh index d7baa88e8f..d7ba4cae25 100755 --- a/ci/check_style.sh +++ b/ci/check_style.sh @@ -8,7 +8,7 @@ rapids-logger "Create checks conda environment" rapids-dependency-file-generator \ --output conda \ - --file_key checks \ + --file-key checks \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml rapids-mamba-retry env create --yes -f env.yaml -n checks diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index f83ddf616d..05323e4f5d 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -11,7 +11,7 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ rapids-logger "Generate C++ testing dependencies" rapids-dependency-file-generator \ --output conda \ - --file_key test_cpp \ + --file-key test_cpp \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee env.yaml rapids-mamba-retry env create --yes -f env.yaml -n test diff --git a/ci/test_python.sh b/ci/test_python.sh index 59da1f0bc4..01e5ac9456 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -11,7 +11,7 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ rapids-logger "Generate Python testing dependencies" rapids-dependency-file-generator \ --output conda \ - --file_key test_python \ + --file-key test_python \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml rapids-mamba-retry env create --yes -f env.yaml -n test diff --git a/dependencies.yaml b/dependencies.yaml index 3d5de9bdc2..0f0e39bb1b 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -6,7 +6,6 @@ files: cuda: ["11.8", "12.2"] arch: [x86_64, aarch64] includes: - - build - rapids_build - build_pylibraft - cuda @@ -15,9 +14,10 @@ files: - depends_on_distributed_ucxx - develop - checks - - build_wheels - test_libraft - docs + - rapids_build_setuptools + - rapids_build_skbuild - run_raft_dask - run_pylibraft - test_python_common @@ -28,13 +28,13 @@ files: cuda: ["11.8", "12.0"] arch: [x86_64, aarch64] includes: - - build - rapids_build - cuda - cuda_version - develop - nn_bench - nn_bench_python + - rapids_build_skbuild test_cpp: output: none includes: @@ -67,7 +67,7 @@ files: extras: table: build-system includes: - - build + - rapids_build_skbuild py_rapids_build_pylibraft: output: pyproject pyproject_dir: python/pylibraft @@ -100,7 +100,7 @@ files: extras: table: build-system includes: - - build + - rapids_build_skbuild py_rapids_build_raft_dask: output: pyproject pyproject_dir: python/raft-dask @@ -132,7 +132,7 @@ files: extras: table: build-system includes: - - build_wheels + - rapids_build_setuptools py_run_raft_ann_bench: output: pyproject pyproject_dir: python/raft-ann-bench @@ -147,11 +147,11 @@ channels: - conda-forge - nvidia dependencies: - build: + rapids_build_skbuild: common: - output_types: [conda, requirements, pyproject] packages: - - rapids-build-backend>=0.3.0,<0.4.0.dev0 + - &rapids_build_backend rapids-build-backend>=0.3.0,<0.4.0.dev0 - output_types: [conda] packages: - scikit-build-core>=0.7.0 @@ -404,13 +404,13 @@ dependencies: - recommonmark - sphinx-copybutton - sphinx-markdown-tables - build_wheels: + rapids_build_setuptools: common: - output_types: [requirements, pyproject] packages: - wheel - setuptools - - rapids-build-backend>=0.3.0,<0.4.0.dev0 + - *rapids_build_backend py_version: specific: - output_types: conda From 877644a423c0268746af62cecb7150afa65d8386 Mon Sep 17 00:00:00 2001 From: Jinsol Park Date: Fri, 14 Jun 2024 11:01:25 -0700 Subject: [PATCH 2/2] [REVIEW] Enable distance return for NN Descent (#2345) - Enable NN Descent to return the distances array as well (previously only returning indices array) - Added a `return_distances` flag in `index_params`. When set to 1 (true), allocates a distance array to return. - Test for checking distances recall compared to naive knn Authors: - Jinsol Park (https://github.com/jinsolp) Approvers: - Divye Gala (https://github.com/divyegala) URL: https://github.com/rapidsai/raft/pull/2345 --- .../raft/neighbors/detail/nn_descent.cuh | 45 ++++++++++++++++--- .../raft/neighbors/nn_descent_types.hpp | 38 ++++++++++++++-- cpp/test/neighbors/ann_nn_descent.cuh | 33 +++++++++++--- 3 files changed, 102 insertions(+), 14 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/nn_descent.cuh b/cpp/include/raft/neighbors/detail/nn_descent.cuh index cd3c6f3947..cdfb9d9931 100644 --- a/cpp/include/raft/neighbors/detail/nn_descent.cuh +++ b/cpp/include/raft/neighbors/detail/nn_descent.cuh @@ -45,6 +45,7 @@ #include #include +#include #include #include @@ -217,6 +218,7 @@ struct BuildConfig { // If internal_node_degree == 0, the value of node_degree will be assigned to it size_t max_iterations{50}; float termination_threshold{0.0001}; + size_t output_graph_degree{32}; }; template @@ -345,7 +347,11 @@ class GNND { GNND(const GNND&) = delete; GNND& operator=(const GNND&) = delete; - void build(Data_t* data, const Index_t nrow, Index_t* output_graph); + void build(Data_t* data, + const Index_t nrow, + Index_t* output_graph, + bool return_distances, + DistData_t* output_distances); ~GNND() = default; using ID_t = InternalID_t; @@ -1212,7 +1218,11 @@ void GNND::local_join(cudaStream_t stream) } template -void GNND::build(Data_t* data, const Index_t nrow, Index_t* output_graph) +void GNND::build(Data_t* data, + const Index_t nrow, + Index_t* output_graph, + bool return_distances, + DistData_t* output_distances) { using input_t = typename std::remove_const::type; @@ -1338,6 +1348,16 @@ void GNND::build(Data_t* data, const Index_t nrow, Index_t* out // Reuse graph_.h_dists as the buffer for shrink the lists in graph static_assert(sizeof(decltype(*(graph_.h_dists.data_handle()))) >= sizeof(Index_t)); + + if (return_distances) { + for (size_t i = 0; i < (size_t)nrow_; i++) { + raft::copy(output_distances + i * build_config_.output_graph_degree, + graph_.h_dists.data_handle() + i * build_config_.node_degree, + build_config_.output_graph_degree, + raft::resource::get_cuda_stream(res)); + } + } + Index_t* graph_shrink_buffer = (Index_t*)graph_.h_dists.data_handle(); #pragma omp parallel for @@ -1410,10 +1430,24 @@ void build(raft::resources const& res, .node_degree = extended_graph_degree, .internal_node_degree = extended_intermediate_degree, .max_iterations = params.max_iterations, - .termination_threshold = params.termination_threshold}; + .termination_threshold = params.termination_threshold, + .output_graph_degree = params.graph_degree}; GNND nnd(res, build_config); - nnd.build(dataset.data_handle(), dataset.extent(0), int_graph.data_handle()); + + if (idx.distances().has_value() || !params.return_distances) { + nnd.build(dataset.data_handle(), + dataset.extent(0), + int_graph.data_handle(), + params.return_distances, + idx.distances() + .value_or(raft::make_device_matrix(res, 0, 0).view()) + .data_handle()); + } else { + RAFT_EXPECTS(!params.return_distances, + "Distance view not allocated. Using return_distances set to true requires " + "distance view to be allocated."); + } #pragma omp parallel for for (size_t i = 0; i < static_cast(dataset.extent(0)); i++) { @@ -1444,7 +1478,8 @@ index build(raft::resources const& res, graph_degree = intermediate_degree; } - index idx{res, dataset.extent(0), static_cast(graph_degree)}; + index idx{ + res, dataset.extent(0), static_cast(graph_degree), params.return_distances}; build(res, params, dataset, idx); diff --git a/cpp/include/raft/neighbors/nn_descent_types.hpp b/cpp/include/raft/neighbors/nn_descent_types.hpp index e1fc96878a..5d23ff2c2e 100644 --- a/cpp/include/raft/neighbors/nn_descent_types.hpp +++ b/cpp/include/raft/neighbors/nn_descent_types.hpp @@ -18,6 +18,8 @@ #include "ann_types.hpp" +#include +#include #include #include #include @@ -25,6 +27,8 @@ #include #include +#include + namespace raft::neighbors::experimental::nn_descent { /** * @ingroup nn-descent @@ -51,6 +55,7 @@ struct index_params : ann::index_params { size_t intermediate_graph_degree = 128; // Degree of input graph for pruning. size_t max_iterations = 20; // Number of nn-descent iterations. float termination_threshold = 0.0001; // Termination threshold of nn-descent. + bool return_distances = false; // return distances if true }; /** @@ -79,14 +84,20 @@ struct index : ann::index { * @param res raft::resources is an object mangaging resources * @param n_rows number of rows in knn-graph * @param n_cols number of cols in knn-graph + * @param return_distances whether to allocate and get distances information */ - index(raft::resources const& res, int64_t n_rows, int64_t n_cols) + index(raft::resources const& res, int64_t n_rows, int64_t n_cols, bool return_distances = false) : ann::index(), res_{res}, metric_{raft::distance::DistanceType::L2Expanded}, graph_{raft::make_host_matrix(n_rows, n_cols)}, - graph_view_{graph_.view()} + graph_view_{graph_.view()}, + return_distances_(return_distances) { + if (return_distances) { + distances_ = raft::make_device_matrix(res_, n_rows, n_cols); + distances_view_ = distances_.value().view(); + } } /** @@ -98,14 +109,23 @@ struct index : ann::index { * * @param res raft::resources is an object mangaging resources * @param graph_view raft::host_matrix_view for storing knn-graph + * @param distances_view std::optional> for + * storing knn-graph distances + * @param return_distances whether to allocate and get distances information */ index(raft::resources const& res, - raft::host_matrix_view graph_view) + raft::host_matrix_view graph_view, + std::optional> distances_view = + std::nullopt, + bool return_distances = false) : ann::index(), res_{res}, metric_{raft::distance::DistanceType::L2Expanded}, graph_{raft::make_host_matrix(0, 0)}, - graph_view_{graph_view} + distances_{raft::make_device_matrix(res_, 0, 0)}, + graph_view_{graph_view}, + distances_view_(distances_view), + return_distances_(return_distances) { } @@ -133,6 +153,13 @@ struct index : ann::index { return graph_view_; } + /** neighborhood graph distances [size, graph-degree] */ + [[nodiscard]] inline auto distances() noexcept + -> std::optional> + { + return distances_view_; + } + // Don't allow copying the index for performance reasons (try avoiding copying data) index(const index&) = delete; index(index&&) = default; @@ -144,8 +171,11 @@ struct index : ann::index { raft::resources const& res_; raft::distance::DistanceType metric_; raft::host_matrix graph_; // graph to return for non-int IdxT + std::optional> distances_; raft::host_matrix_view graph_view_; // view of graph for user provided matrix + std::optional> distances_view_; + bool return_distances_; }; /** @} */ diff --git a/cpp/test/neighbors/ann_nn_descent.cuh b/cpp/test/neighbors/ann_nn_descent.cuh index 495af081f1..f74cadb415 100644 --- a/cpp/test/neighbors/ann_nn_descent.cuh +++ b/cpp/test/neighbors/ann_nn_descent.cuh @@ -15,11 +15,11 @@ */ #pragma once -#include "../test_utils.cuh" #include "ann_utils.cuh" #include #include +#include #include #include @@ -65,7 +65,9 @@ class AnnNNDescentTest : public ::testing::TestWithParam { { size_t queries_size = ps.n_rows * ps.graph_degree; std::vector indices_NNDescent(queries_size); + std::vector distances_NNDescent(queries_size); std::vector indices_naive(queries_size); + std::vector distances_naive(queries_size); { rmm::device_uvector distances_naive_dev(queries_size, stream_); @@ -81,6 +83,7 @@ class AnnNNDescentTest : public ::testing::TestWithParam { ps.graph_degree, ps.metric); update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); + update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); resource::sync_stream(handle_); } @@ -91,6 +94,7 @@ class AnnNNDescentTest : public ::testing::TestWithParam { index_params.graph_degree = ps.graph_degree; index_params.intermediate_graph_degree = 2 * ps.graph_degree; index_params.max_iterations = 100; + index_params.return_distances = true; auto database_view = raft::make_device_matrix_view( (const DataT*)database.data(), ps.n_rows, ps.dim); @@ -102,20 +106,39 @@ class AnnNNDescentTest : public ::testing::TestWithParam { auto database_host_view = raft::make_host_matrix_view( (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); auto index = nn_descent::build(handle_, index_params, database_host_view); - update_host( + raft::copy( indices_NNDescent.data(), index.graph().data_handle(), queries_size, stream_); + if (index.distances().has_value()) { + raft::copy(distances_NNDescent.data(), + index.distances().value().data_handle(), + queries_size, + stream_); + } + } else { auto index = nn_descent::build(handle_, index_params, database_view); - update_host( + raft::copy( indices_NNDescent.data(), index.graph().data_handle(), queries_size, stream_); + if (index.distances().has_value()) { + raft::copy(distances_NNDescent.data(), + index.distances().value().data_handle(), + queries_size, + stream_); + } }; } resource::sync_stream(handle_); } double min_recall = ps.min_recall; - EXPECT_TRUE(eval_recall( - indices_naive, indices_NNDescent, ps.n_rows, ps.graph_degree, 0.001, min_recall)); + EXPECT_TRUE(eval_neighbours(indices_naive, + indices_NNDescent, + distances_naive, + distances_NNDescent, + ps.n_rows, + ps.graph_degree, + 0.001, + min_recall)); } }