Merge branch-25.02 into branch-25.04

rapidsai · Feb 4, 2025 · ce49f50 · ce49f50
2 parents 68072dd + def265e
commit ce49f50
Show file tree

Hide file tree

Showing 18 changed files with 167 additions and 82 deletions.
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -13,6 +13,7 @@ jobs:
   # Please keep pr-builder as the top job here
   pr-builder:
     needs:
+      - check-nightly-ci
       - changed-files
       - checks
       - clang-tidy
@@ -43,6 +44,18 @@ jobs:
       - name: Telemetry setup
         if: ${{ vars.TELEMETRY_ENABLED == 'true' }}
         uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main
+  check-nightly-ci:
+    # Switch to ubuntu-latest once it defaults to a version of Ubuntu that
+    # provides at least Python 3.11 (see
+    # https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat)
+    runs-on: ubuntu-24.04
+    env:
+      RAPIDS_GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - name: Check if nightly CI is passing
+        uses: rapidsai/shared-actions/check_nightly_success/dispatch@main
+        with:
+          repo: cuml
   changed-files:
     secrets: inherit
     needs: telemetry-setup

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -76,7 +76,7 @@ dependencies:
 - sphinx-markdown-tables
 - statsmodels
 - sysroot_linux-64==2.28
-- treelite==4.3.0
+- treelite==4.4.1
 - umap-learn==0.5.6
 - xgboost>=2.1.0
 name: all_cuda-118_arch-x86_64
diff --git a/conda/environments/all_cuda-128_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml
@@ -72,7 +72,7 @@ dependencies:
 - sphinx-markdown-tables
 - statsmodels
 - sysroot_linux-64==2.28
-- treelite==4.3.0
+- treelite==4.4.1
 - umap-learn==0.5.6
 - xgboost>=2.1.0
 name: all_cuda-128_arch-x86_64
diff --git a/conda/recipes/cuml/conda_build_config.yaml b/conda/recipes/cuml/conda_build_config.yaml
@@ -20,4 +20,4 @@ c_stdlib_version:
   - "=2.28"
 
 treelite_version:
-  - "=4.3.0"
+  - "=4.4.1"
diff --git a/conda/recipes/libcuml/conda_build_config.yaml b/conda/recipes/libcuml/conda_build_config.yaml
@@ -26,7 +26,7 @@ spdlog_version:
   - ">=1.14.1,<1.15"
 
 treelite_version:
-  - "=4.3.0"
+  - "=4.4.1"
 
 # The CTK libraries below are missing from the conda-forge::cudatoolkit package
 # for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages

diff --git a/cpp/cmake/thirdparty/get_treelite.cmake b/cpp/cmake/thirdparty/get_treelite.cmake
@@ -79,7 +79,7 @@ function(find_and_configure_treelite)
     rapids_export_find_package_root(BUILD Treelite [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cuml-exports)
 endfunction()
 
-find_and_configure_treelite(VERSION     4.3.0
-                        PINNED_TAG  575e4208f2b18e40d818c338ecb95d7a26e69aab
+find_and_configure_treelite(VERSION     4.4.1
+                        PINNED_TAG  386bd0de99f5a66584c7e58221ee38ce606ad1ae
                         EXCLUDE_FROM_ALL  ${CUML_EXCLUDE_TREELITE_FROM_ALL}
                         BUILD_STATIC_LIBS ${CUML_USE_TREELITE_STATIC})
diff --git a/dependencies.yaml b/dependencies.yaml
@@ -256,7 +256,7 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - &cython cython>=3.0.0
-          - &treelite treelite==4.3.0
+          - &treelite treelite==4.4.1
 
   py_run_cuml:
     common:

diff --git a/python/cuml/cuml/cluster/kmeans.pyx b/python/cuml/cuml/cluster/kmeans.pyx
@@ -21,6 +21,7 @@ np = cpu_only_import('numpy')
 from cuml.internals.safe_imports import gpu_only_import
 rmm = gpu_only_import('rmm')
 from cuml.internals.safe_imports import safe_import_from, return_false
+from cuml.internals.utils import check_random_seed
 import typing
 
 IF GPUBUILD == 1:
@@ -209,8 +210,11 @@ class KMeans(UniversalBase,
             params.init = self._params_init
             params.max_iter = <int>self.max_iter
             params.tol = <double>self.tol
+            # After transferring from one device to another `_seed` might not be set
+            # so we need to pass a dummy value here. Its value does not matter as the
+            # seed is only used during fitting
+            params.rng_state.seed = <int>getattr(self, "_seed", 0)
             params.verbosity = <raft_level_enum>(<int>self.verbose)
-            params.rng_state.seed = self.random_state
             params.metric = DistanceType.L2Expanded   # distance metric as squared L2: @todo - support other metrics # noqa: E501
             params.batch_samples = <int>self.max_samples_per_batch
             params.oversampling_factor = <double>self.oversampling_factor
@@ -307,6 +311,7 @@ class KMeans(UniversalBase,
                                                   else None),
                                 check_dtype=check_dtype)
 
+        self._seed = check_random_seed(self.random_state)
         self.feature_names_in_ = _X_m.index
 
         IF GPUBUILD == 1:

diff --git a/python/cuml/cuml/cluster/kmeans_mg.pyx b/python/cuml/cuml/cluster/kmeans_mg.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -32,6 +32,7 @@ from cuml.common import input_to_cuml_array
 
 from cuml.cluster import KMeans
 from cuml.cluster.kmeans_utils cimport params as KMeansParams
+from cuml.internals.utils import check_random_seed
 
 
 cdef extern from "cuml/cluster/kmeans_mg.hpp" \
@@ -129,6 +130,8 @@ class KMeansMG(KMeans):
 
         cdef uintptr_t sample_weight_ptr = sample_weight_m.ptr
 
+        self._seed = check_random_seed(self.random_state)
+
         if (self.init in ['scalable-k-means++', 'k-means||', 'random']):
             self.cluster_centers_ = CumlArray.zeros(shape=(self.n_clusters,
                                                            self.n_cols),

diff --git a/python/cuml/cuml/decomposition/pca.pyx b/python/cuml/cuml/decomposition/pca.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -209,9 +209,6 @@ class PCA(UniversalBase,
 
             ``n_components = min(n_samples, n_features)``
 
-    random_state : int / None (default = None)
-        If you want results to be the same when you restart Python, select a
-        state.
     svd_solver : 'full' or 'jacobi' or 'auto' (default = 'full')
         Full uses a eigendecomposition of the covariance matrix then discards
         components.
@@ -292,7 +289,7 @@ class PCA(UniversalBase,
 
     @device_interop_preparation
     def __init__(self, *, copy=True, handle=None, iterated_power=15,
-                 n_components=None, random_state=None, svd_solver='auto',
+                 n_components=None, svd_solver='auto',
                  tol=1e-7, verbose=False, whiten=False,
                  output_type=None):
         # parameters
@@ -302,7 +299,6 @@ class PCA(UniversalBase,
         self.copy = copy
         self.iterated_power = iterated_power
         self.n_components = n_components
-        self.random_state = random_state
         self.svd_solver = svd_solver
         self.tol = tol
         self.whiten = whiten
@@ -739,7 +735,7 @@ class PCA(UniversalBase,
     def _get_param_names(cls):
         return super()._get_param_names() + \
             ["copy", "iterated_power", "n_components", "svd_solver", "tol",
-                "whiten", "random_state"]
+                "whiten"]
 
     def _check_is_fitted(self, attr):
         if not hasattr(self, attr) or (getattr(self, attr) is None):

diff --git a/python/cuml/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/cuml/ensemble/randomforestclassifier.pyx
@@ -33,6 +33,7 @@ import cuml.internals
 from cuml.common.doc_utils import generate_docstring
 from cuml.common.doc_utils import insert_into_docstring
 from cuml.common import input_to_cuml_array
+from cuml.internals.utils import check_random_seed
 
 from cuml.internals.logger cimport level_enum
 from cuml.ensemble.randomforest_common import BaseRandomForestModel
@@ -451,7 +452,7 @@ class RandomForestClassifier(BaseRandomForestModel,
         if self.random_state is None:
             seed_val = <uintptr_t>NULL
         else:
-            seed_val = <uintptr_t>self.random_state
+            seed_val = <uintptr_t>check_random_seed(self.random_state)
 
         rf_params = set_rf_params(<int> self.max_depth,
                                   <int> self.max_leaves,

diff --git a/python/cuml/cuml/ensemble/randomforestregressor.pyx b/python/cuml/cuml/ensemble/randomforestregressor.pyx
@@ -34,6 +34,7 @@ from cuml.internals.logger cimport level_enum
 from cuml.common.doc_utils import generate_docstring
 from cuml.common.doc_utils import insert_into_docstring
 from cuml.common import input_to_cuml_array
+from cuml.internals.utils import check_random_seed
 
 from cuml.ensemble.randomforest_common import BaseRandomForestModel
 from cuml.ensemble.randomforest_common import _obtain_fil_model
@@ -438,7 +439,7 @@ class RandomForestRegressor(BaseRandomForestModel,
         if self.random_state is None:
             seed_val = <uintptr_t>NULL
         else:
-            seed_val = <uintptr_t>self.random_state
+            seed_val = <uintptr_t>check_random_seed(self.random_state)
 
         rf_params = set_rf_params(<int> self.max_depth,
                                   <int> self.max_leaves,

diff --git a/python/cuml/cuml/internals/utils.py b/python/cuml/cuml/internals/utils.py
@@ -0,0 +1,39 @@
+#
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import numbers
+import numpy as np
+
+
+def check_random_seed(seed):
+    """Turn a np.random.RandomState instance into a seed.
+    Parameters
+    ----------
+    seed : None | int | instance of RandomState
+        If seed is None, return a random int as seed.
+        If seed is an int, return it.
+        If seed is a RandomState instance, derive a seed from it.
+        Otherwise raise ValueError.
+    """
+    if seed is None:
+        seed = np.random.RandomState(None)
+
+    if isinstance(seed, numbers.Integral):
+        return seed
+    if isinstance(seed, np.random.RandomState):
+        return seed.randint(
+            low=0, high=np.iinfo(np.uint32).max, dtype=np.uint32
+        )
+    raise ValueError("%r cannot be used to create a seed." % seed)
diff --git a/python/cuml/cuml/manifold/t_sne.pyx b/python/cuml/cuml/manifold/t_sne.pyx
@@ -31,10 +31,10 @@ from cuml.internals.base import UniversalBase
 from pylibraft.common.handle cimport handle_t
 from cuml.internals.api_decorators import device_interop_preparation
 from cuml.internals.api_decorators import enable_device_interop
+from cuml.internals.utils import check_random_seed
 from cuml.internals import logger
 from cuml.internals cimport logger
 
-
 from cuml.internals.array import CumlArray
 from cuml.internals.array_sparse import SparseCumlArray
 from cuml.common.sparse_utils import is_sparse
@@ -596,7 +596,7 @@ class TSNE(UniversalBase,
     def _build_tsne_params(self, algo):
         cdef long long seed = -1
         if self.random_state is not None:
-            seed = self.random_state
+            seed = check_random_seed(self.random_state)
 
         cdef TSNEParams* params = new TSNEParams()
         params.dim = <int> self.n_components