Merge branch 'rapidsai:branch-24.10' into hdbscan-nndescent

rapidsai · Sep 21, 2024 · fea3dc8 · fea3dc8
2 parents 517f7ab + 7de8831
commit fea3dc8
Show file tree

Hide file tree

Showing 31 changed files with 366 additions and 159 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -9,7 +9,7 @@ repos:
             files: python/.*
             args: [--config, python/cuml/pyproject.toml]
     - repo: https://github.com/PyCQA/flake8
-      rev: 5.0.4
+      rev: 7.1.1
       hooks:
           - id: flake8
             args: [--config=python/cuml/.flake8]
@@ -60,7 +60,7 @@ repos:
             pass_filenames: false
             language: python
     - repo: https://github.com/rapidsai/pre-commit-hooks
-      rev: v0.3.1
+      rev: v0.4.0
       hooks:
         - id: verify-copyright
           files: |

diff --git a/BUILD.md b/BUILD.md
@@ -18,7 +18,7 @@ To install cuML from source, ensure the following dependencies are met:
 It is recommended to use conda for environment/package management. If doing so, development environment .yaml files are located in `conda/environments/all_*.yaml`. These files contains most of the dependencies mentioned above (notable exceptions are `gcc` and `zlib`). To create a development environment named `cuml_dev`, you can use the follow commands:
 
 ```bash
-conda create -n cuml_dev python=3.11
+conda create -n cuml_dev python=3.12
 conda env update -n cuml_dev --file=conda/environments/all_cuda-118_arch-x86_64.yaml
 conda activate cuml_dev
 ```

diff --git a/build.sh b/build.sh
@@ -168,7 +168,7 @@ while true; do
             CMAKE_LOG_LEVEL=VERBOSE
             ;;
         -g | --debug )
-            BUILD_TYPE=Debug
+            BUILD_TYPE=RelWithDebInfo
             ;;
         -n | --no-install )
             INSTALL_TARGET=""

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -45,19 +45,20 @@ dependencies:
 - ninja
 - nltk
 - numba>=0.57
+- numpy>=1.23,<3.0a0
 - numpydoc
 - nvcc_linux-64=11.8
 - packaging
 - pip
 - pydata-sphinx-theme!=0.14.2
 - pylibraft==24.10.*,>=0.0.0a0
-- pynndescent==0.5.8
+- pynndescent
 - pytest-benchmark
 - pytest-cases
 - pytest-cov
 - pytest-xdist
 - pytest==7.*
-- python>=3.9,<3.12
+- python>=3.10,<3.13
 - raft-dask==24.10.*,>=0.0.0a0
 - rapids-build-backend>=0.3.0,<0.4.0.dev0
 - rapids-dask-dependency==24.10.*,>=0.0.0a0
@@ -67,13 +68,14 @@ dependencies:
 - scikit-learn==1.5
 - scipy>=1.8.0
 - seaborn
+- setuptools
 - sphinx-copybutton
 - sphinx-markdown-tables
 - sphinx<6
 - statsmodels
 - sysroot_linux-64==2.17
 - treelite==4.3.0
-- umap-learn==0.5.3
+- umap-learn==0.5.6
 - pip:
   - dask-glm==0.3.0
 name: all_cuda-118_arch-x86_64
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -42,18 +42,19 @@ dependencies:
 - ninja
 - nltk
 - numba>=0.57
+- numpy>=1.23,<3.0a0
 - numpydoc
 - packaging
 - pip
 - pydata-sphinx-theme!=0.14.2
 - pylibraft==24.10.*,>=0.0.0a0
-- pynndescent==0.5.8
+- pynndescent
 - pytest-benchmark
 - pytest-cases
 - pytest-cov
 - pytest-xdist
 - pytest==7.*
-- python>=3.9,<3.12
+- python>=3.10,<3.13
 - raft-dask==24.10.*,>=0.0.0a0
 - rapids-build-backend>=0.3.0,<0.4.0.dev0
 - rapids-dask-dependency==24.10.*,>=0.0.0a0
@@ -63,13 +64,14 @@ dependencies:
 - scikit-learn==1.5
 - scipy>=1.8.0
 - seaborn
+- setuptools
 - sphinx-copybutton
 - sphinx-markdown-tables
 - sphinx<6
 - statsmodels
 - sysroot_linux-64==2.17
 - treelite==4.3.0
-- umap-learn==0.5.3
+- umap-learn==0.5.6
 - pip:
   - dask-glm==0.3.0
 name: all_cuda-125_arch-x86_64
diff --git a/conda/recipes/cuml-cpu/meta.yaml b/conda/recipes/cuml-cpu/meta.yaml
@@ -31,11 +31,11 @@ requirements:
     - rapids-build-backend>=0.3.0,<0.4.0.dev0
   run:
     - python x.x
-    - numpy>=1.23,<2.0a0
+    - numpy>=1.23,<3.0a0
     - pandas
     - scikit-learn=1.2
     - hdbscan>=0.8.38,<0.8.39
-    - umap-learn=0.5.3
+    - umap-learn=0.5.6
     - nvtx
 
 tests:                                 # [linux64]

diff --git a/conda/recipes/cuml/meta.yaml b/conda/recipes/cuml/meta.yaml
@@ -85,6 +85,7 @@ requirements:
     - joblib >=0.11
     - libcuml ={{ version }}
     - libcumlprims ={{ minor_version }}
+    - numpy >=1.23,<3.0a0
     - pylibraft ={{ minor_version }}
     - python x.x
     - raft-dask ={{ minor_version }}

diff --git a/cpp/include/cuml/experimental/fil/README.md b/cpp/include/cuml/experimental/fil/README.md
@@ -39,7 +39,7 @@ similar load methods for each of the serialization formats it supports.
 
 ```cpp
 auto filename = "xgboost.json";
-auto tl_model = treelite::frontend::LoadXGBoostModel(filename);
+auto tl_model = treelite::model_loader::LoadXGBoostModelJSON(filename, "{}");
 ```
 
 We then import the Treelite model into FIL via the

diff --git a/cpp/include/cuml/manifold/umap.hpp b/cpp/include/cuml/manifold/umap.hpp
@@ -84,6 +84,27 @@ void refine(const raft::handle_t& handle,
             UMAPParams* params,
             float* embeddings);
 
+/**
+ * Initializes embeddings and performs a UMAP fit on them, which enables
+ * iterative fitting without callbacks.
+ *
+ * @param[in] handle: raft::handle_t
+ * @param[in] X: pointer to input array
+ * @param[in] n: n_samples of input array
+ * @param[in] d: n_features of input array
+ * @param[in] graph: pointer to raft::sparse::COO object computed using ML::UMAP::get_graph
+ * @param[in] params: pointer to ML::UMAPParams object
+ * @param[out] embeddings: pointer to current embedding with shape n * n_components, stores updated
+ * embeddings on executing refine
+ */
+void init_and_refine(const raft::handle_t& handle,
+                     float* X,
+                     int n,
+                     int d,
+                     raft::sparse::COO<float, int>* graph,
+                     UMAPParams* params,
+                     float* embeddings);
+
 /**
  * Dense fit
  *

diff --git a/cpp/src/fil/treelite_import.cu b/cpp/src/fil/treelite_import.cu
@@ -490,10 +490,11 @@ void tl2fil_common(forest_params_t* params,
   ASSERT(model.num_target == 1, "FIL does not support multi-target models");
 
   // assuming either all leaves use the .leaf_vector() or all leaves use .leaf_value()
-  size_t leaf_vec_size = tl_leaf_vector_size(model);
+  std::size_t leaf_vec_size = tl_leaf_vector_size(model);
   std::string pred_transform(model.postprocessor);
   if (leaf_vec_size > 0) {
-    ASSERT(leaf_vec_size == model.num_class[0], "treelite model inconsistent");
+    ASSERT(leaf_vec_size == static_cast<std::size_t>(model.num_class[0]),
+           "treelite model inconsistent");
     params->num_classes = leaf_vec_size;
     params->leaf_algo   = leaf_algo_t::VECTOR_LEAF;
 
@@ -513,7 +514,8 @@ void tl2fil_common(forest_params_t* params,
       // Ensure that the trees follow the grove-per-class layout.
       for (size_t tree_id = 0; tree_id < model_preset.trees.size(); ++tree_id) {
         ASSERT(model.target_id[tree_id] == 0, "FIL does not support multi-target models");
-        ASSERT(model.class_id[tree_id] == tree_id % static_cast<size_t>(model.num_class[0]),
+        ASSERT(static_cast<std::size_t>(model.class_id[tree_id]) ==
+                 tree_id % static_cast<size_t>(model.num_class[0]),
                "The tree model is not compatible with FIL; the trees must be laid out "
                "such that tree i's output contributes towards class (i %% num_class).");
       }

diff --git a/cpp/src/umap/runner.cuh b/cpp/src/umap/runner.cuh
@@ -247,12 +247,31 @@ void _refine(const raft::handle_t& handle,
              value_t* embeddings)
 {
   cudaStream_t stream = handle.get_stream();
+  ML::Logger::get().setLevel(params->verbosity);
+
   /**
    * Run simplicial set embedding to approximate low-dimensional representation
    */
   SimplSetEmbed::run<TPB_X, value_t>(inputs.n, inputs.d, graph, params, embeddings, stream);
 }
 
+template <typename value_idx, typename value_t, typename umap_inputs, int TPB_X>
+void _init_and_refine(const raft::handle_t& handle,
+                      const umap_inputs& inputs,
+                      UMAPParams* params,
+                      raft::sparse::COO<value_t>* graph,
+                      value_t* embeddings)
+{
+  cudaStream_t stream = handle.get_stream();
+  ML::Logger::get().setLevel(params->verbosity);
+
+  // Initialize embeddings
+  InitEmbed::run(handle, inputs.n, inputs.d, graph, params, embeddings, stream, params->init);
+
+  // Run simplicial set embedding
+  SimplSetEmbed::run<TPB_X, value_t>(inputs.n, inputs.d, graph, params, embeddings, stream);
+}
+
 template <typename value_idx, typename value_t, typename umap_inputs, int TPB_X>
 void _fit(const raft::handle_t& handle,
           const umap_inputs& inputs,

diff --git a/cpp/src/umap/umap.cu b/cpp/src/umap/umap.cu
@@ -92,6 +92,20 @@ void refine(const raft::handle_t& handle,
     handle, inputs, params, graph, embeddings);
 }
 
+void init_and_refine(const raft::handle_t& handle,
+                     float* X,
+                     int n,
+                     int d,
+                     raft::sparse::COO<float>* graph,
+                     UMAPParams* params,
+                     float* embeddings)
+{
+  CUML_LOG_DEBUG("Calling UMAP::init_and_refine() with precomputed KNN");
+  manifold_dense_inputs_t<float> inputs(X, nullptr, n, d);
+  UMAPAlgo::_init_and_refine<knn_indices_dense_t, float, manifold_dense_inputs_t<float>, TPB_X>(
+    handle, inputs, params, graph, embeddings);
+}
+
 void fit(const raft::handle_t& handle,
          float* X,
          float* y,

diff --git a/dependencies.yaml b/dependencies.yaml
@@ -229,6 +229,7 @@ dependencies:
           - dask-cuda==24.10.*,>=0.0.0a0
           - joblib>=0.11
           - numba>=0.57
+          - numpy>=1.23,<3.0a0
             # TODO: Is scipy really a hard dependency, or should
             # we make it optional (i.e. an extra for pip
             # installation/run_constrained for conda)?
@@ -462,10 +463,6 @@ dependencies:
     specific:
       - output_types: conda
         matrices:
-          - matrix:
-              py: "3.9"
-            packages:
-              - python=3.9
           - matrix:
               py: "3.10"
             packages:
@@ -474,9 +471,13 @@ dependencies:
               py: "3.11"
             packages:
               - python=3.11
+          - matrix:
+              py: "3.12"
+            packages:
+              - python=3.12
           - matrix:
             packages:
-              - python>=3.9,<3.12
+              - python>=3.10,<3.13
   test_libcuml:
     common:
       - output_types: conda
@@ -512,8 +513,9 @@ dependencies:
           - seaborn
           - *scikit_learn
           - statsmodels
-          - umap-learn==0.5.3
-          - pynndescent==0.5.8
+          - umap-learn==0.5.6
+          - pynndescent
+          - setuptools  # Needed on Python 3.12 for dask-glm, which requires pkg_resources but Python 3.12 doesn't have setuptools by default
       - output_types: conda
         packages:
           - pip

diff --git a/python/cuml/cuml/common/kernel_utils.py b/python/cuml/cuml/common/kernel_utils.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -101,11 +101,9 @@ def cuda_kernel_factory(nvrtc_kernel_str, dtypes, kernel_name=None):
             "{%d}" % idx, dtype_strs[idx]
         )
 
-    kernel_name = f"""{uuid1()
-                      if kernel_name is None
-                      else kernel_name}_{
-                        "".join(dtype_strs).replace(" ", "_")
-                    }"""
+    kernel_name_prefix = uuid1() if kernel_name is None else kernel_name
+    kernel_name_suffix = "".join(dtype_strs).replace(" ", "_")
+    kernel_name = f"{kernel_name_prefix}_{kernel_name_suffix}"
 
     nvrtc_kernel_str = "%s\nvoid %s%s" % (
         extern_prefix,

diff --git a/python/cuml/cuml/dask/manifold/umap.py b/python/cuml/cuml/dask/manifold/umap.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -83,9 +83,7 @@ class UMAP(BaseEstimator, DelayedTransformMixin):
 
     In addition to these missing features, you should expect to see
     the final embeddings differing between `cuml.umap` and the reference
-    UMAP. In particular, the reference UMAP uses an approximate kNN
-    algorithm for large data sizes while cuml.umap always uses exact
-    kNN.
+    UMAP.
 
     **Known issue:** If a UMAP model has not yet been fit, it cannot be pickled
 

diff --git a/python/cuml/cuml/internals/available_devices.py b/python/cuml/cuml/internals/available_devices.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,12 +16,8 @@
 from cuml.internals.device_support import GPU_ENABLED
 from cuml.internals.safe_imports import gpu_only_import_from, UnavailableError
 
-try:
-    from functools import cache  # requires Python >= 3.9
-except ImportError:
-    from functools import lru_cache
 
-    cache = lru_cache(maxsize=None)
+from functools import cache
 
 
 def gpu_available_no_context_creation():