Skip to content

Commit

Permalink
Merge branch-25.02 into branch-25.04
Browse files Browse the repository at this point in the history
  • Loading branch information
dantegd committed Feb 4, 2025
2 parents 68072dd + def265e commit ce49f50
Show file tree
Hide file tree
Showing 18 changed files with 167 additions and 82 deletions.
13 changes: 13 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ jobs:
# Please keep pr-builder as the top job here
pr-builder:
needs:
- check-nightly-ci
- changed-files
- checks
- clang-tidy
Expand Down Expand Up @@ -43,6 +44,18 @@ jobs:
- name: Telemetry setup
if: ${{ vars.TELEMETRY_ENABLED == 'true' }}
uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main
check-nightly-ci:
# Switch to ubuntu-latest once it defaults to a version of Ubuntu that
# provides at least Python 3.11 (see
# https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat)
runs-on: ubuntu-24.04
env:
RAPIDS_GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- name: Check if nightly CI is passing
uses: rapidsai/shared-actions/check_nightly_success/dispatch@main
with:
repo: cuml
changed-files:
secrets: inherit
needs: telemetry-setup
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ dependencies:
- sphinx-markdown-tables
- statsmodels
- sysroot_linux-64==2.28
- treelite==4.3.0
- treelite==4.4.1
- umap-learn==0.5.6
- xgboost>=2.1.0
name: all_cuda-118_arch-x86_64
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-128_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ dependencies:
- sphinx-markdown-tables
- statsmodels
- sysroot_linux-64==2.28
- treelite==4.3.0
- treelite==4.4.1
- umap-learn==0.5.6
- xgboost>=2.1.0
name: all_cuda-128_arch-x86_64
2 changes: 1 addition & 1 deletion conda/recipes/cuml/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ c_stdlib_version:
- "=2.28"

treelite_version:
- "=4.3.0"
- "=4.4.1"
2 changes: 1 addition & 1 deletion conda/recipes/libcuml/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ spdlog_version:
- ">=1.14.1,<1.15"

treelite_version:
- "=4.3.0"
- "=4.4.1"

# The CTK libraries below are missing from the conda-forge::cudatoolkit package
# for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages
Expand Down
4 changes: 2 additions & 2 deletions cpp/cmake/thirdparty/get_treelite.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ function(find_and_configure_treelite)
rapids_export_find_package_root(BUILD Treelite [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cuml-exports)
endfunction()

find_and_configure_treelite(VERSION 4.3.0
PINNED_TAG 575e4208f2b18e40d818c338ecb95d7a26e69aab
find_and_configure_treelite(VERSION 4.4.1
PINNED_TAG 386bd0de99f5a66584c7e58221ee38ce606ad1ae
EXCLUDE_FROM_ALL ${CUML_EXCLUDE_TREELITE_FROM_ALL}
BUILD_STATIC_LIBS ${CUML_USE_TREELITE_STATIC})
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ dependencies:
- output_types: [conda, requirements, pyproject]
packages:
- &cython cython>=3.0.0
- &treelite treelite==4.3.0
- &treelite treelite==4.4.1

py_run_cuml:
common:
Expand Down
7 changes: 6 additions & 1 deletion python/cuml/cuml/cluster/kmeans.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ np = cpu_only_import('numpy')
from cuml.internals.safe_imports import gpu_only_import
rmm = gpu_only_import('rmm')
from cuml.internals.safe_imports import safe_import_from, return_false
from cuml.internals.utils import check_random_seed
import typing

IF GPUBUILD == 1:
Expand Down Expand Up @@ -209,8 +210,11 @@ class KMeans(UniversalBase,
params.init = self._params_init
params.max_iter = <int>self.max_iter
params.tol = <double>self.tol
# After transferring from one device to another `_seed` might not be set
# so we need to pass a dummy value here. Its value does not matter as the
# seed is only used during fitting
params.rng_state.seed = <int>getattr(self, "_seed", 0)
params.verbosity = <raft_level_enum>(<int>self.verbose)
params.rng_state.seed = self.random_state
params.metric = DistanceType.L2Expanded # distance metric as squared L2: @todo - support other metrics # noqa: E501
params.batch_samples = <int>self.max_samples_per_batch
params.oversampling_factor = <double>self.oversampling_factor
Expand Down Expand Up @@ -307,6 +311,7 @@ class KMeans(UniversalBase,
else None),
check_dtype=check_dtype)

self._seed = check_random_seed(self.random_state)
self.feature_names_in_ = _X_m.index

IF GPUBUILD == 1:
Expand Down
5 changes: 4 additions & 1 deletion python/cuml/cuml/cluster/kmeans_mg.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Copyright (c) 2019-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -32,6 +32,7 @@ from cuml.common import input_to_cuml_array

from cuml.cluster import KMeans
from cuml.cluster.kmeans_utils cimport params as KMeansParams
from cuml.internals.utils import check_random_seed


cdef extern from "cuml/cluster/kmeans_mg.hpp" \
Expand Down Expand Up @@ -129,6 +130,8 @@ class KMeansMG(KMeans):

cdef uintptr_t sample_weight_ptr = sample_weight_m.ptr

self._seed = check_random_seed(self.random_state)

if (self.init in ['scalable-k-means++', 'k-means||', 'random']):
self.cluster_centers_ = CumlArray.zeros(shape=(self.n_clusters,
self.n_cols),
Expand Down
10 changes: 3 additions & 7 deletions python/cuml/cuml/decomposition/pca.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Copyright (c) 2019-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -209,9 +209,6 @@ class PCA(UniversalBase,
``n_components = min(n_samples, n_features)``
random_state : int / None (default = None)
If you want results to be the same when you restart Python, select a
state.
svd_solver : 'full' or 'jacobi' or 'auto' (default = 'full')
Full uses a eigendecomposition of the covariance matrix then discards
components.
Expand Down Expand Up @@ -292,7 +289,7 @@ class PCA(UniversalBase,

@device_interop_preparation
def __init__(self, *, copy=True, handle=None, iterated_power=15,
n_components=None, random_state=None, svd_solver='auto',
n_components=None, svd_solver='auto',
tol=1e-7, verbose=False, whiten=False,
output_type=None):
# parameters
Expand All @@ -302,7 +299,6 @@ class PCA(UniversalBase,
self.copy = copy
self.iterated_power = iterated_power
self.n_components = n_components
self.random_state = random_state
self.svd_solver = svd_solver
self.tol = tol
self.whiten = whiten
Expand Down Expand Up @@ -739,7 +735,7 @@ class PCA(UniversalBase,
def _get_param_names(cls):
return super()._get_param_names() + \
["copy", "iterated_power", "n_components", "svd_solver", "tol",
"whiten", "random_state"]
"whiten"]

def _check_is_fitted(self, attr):
if not hasattr(self, attr) or (getattr(self, attr) is None):
Expand Down
3 changes: 2 additions & 1 deletion python/cuml/cuml/ensemble/randomforestclassifier.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import cuml.internals
from cuml.common.doc_utils import generate_docstring
from cuml.common.doc_utils import insert_into_docstring
from cuml.common import input_to_cuml_array
from cuml.internals.utils import check_random_seed

from cuml.internals.logger cimport level_enum
from cuml.ensemble.randomforest_common import BaseRandomForestModel
Expand Down Expand Up @@ -451,7 +452,7 @@ class RandomForestClassifier(BaseRandomForestModel,
if self.random_state is None:
seed_val = <uintptr_t>NULL
else:
seed_val = <uintptr_t>self.random_state
seed_val = <uintptr_t>check_random_seed(self.random_state)

rf_params = set_rf_params(<int> self.max_depth,
<int> self.max_leaves,
Expand Down
3 changes: 2 additions & 1 deletion python/cuml/cuml/ensemble/randomforestregressor.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ from cuml.internals.logger cimport level_enum
from cuml.common.doc_utils import generate_docstring
from cuml.common.doc_utils import insert_into_docstring
from cuml.common import input_to_cuml_array
from cuml.internals.utils import check_random_seed

from cuml.ensemble.randomforest_common import BaseRandomForestModel
from cuml.ensemble.randomforest_common import _obtain_fil_model
Expand Down Expand Up @@ -438,7 +439,7 @@ class RandomForestRegressor(BaseRandomForestModel,
if self.random_state is None:
seed_val = <uintptr_t>NULL
else:
seed_val = <uintptr_t>self.random_state
seed_val = <uintptr_t>check_random_seed(self.random_state)

rf_params = set_rf_params(<int> self.max_depth,
<int> self.max_leaves,
Expand Down
39 changes: 39 additions & 0 deletions python/cuml/cuml/internals/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import numbers
import numpy as np


def check_random_seed(seed):
"""Turn a np.random.RandomState instance into a seed.
Parameters
----------
seed : None | int | instance of RandomState
If seed is None, return a random int as seed.
If seed is an int, return it.
If seed is a RandomState instance, derive a seed from it.
Otherwise raise ValueError.
"""
if seed is None:
seed = np.random.RandomState(None)

if isinstance(seed, numbers.Integral):
return seed
if isinstance(seed, np.random.RandomState):
return seed.randint(
low=0, high=np.iinfo(np.uint32).max, dtype=np.uint32
)
raise ValueError("%r cannot be used to create a seed." % seed)
4 changes: 2 additions & 2 deletions python/cuml/cuml/manifold/t_sne.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ from cuml.internals.base import UniversalBase
from pylibraft.common.handle cimport handle_t
from cuml.internals.api_decorators import device_interop_preparation
from cuml.internals.api_decorators import enable_device_interop
from cuml.internals.utils import check_random_seed
from cuml.internals import logger
from cuml.internals cimport logger


from cuml.internals.array import CumlArray
from cuml.internals.array_sparse import SparseCumlArray
from cuml.common.sparse_utils import is_sparse
Expand Down Expand Up @@ -596,7 +596,7 @@ class TSNE(UniversalBase,
def _build_tsne_params(self, algo):
cdef long long seed = -1
if self.random_state is not None:
seed = self.random_state
seed = check_random_seed(self.random_state)

cdef TSNEParams* params = new TSNEParams()
params.dim = <int> self.n_components
Expand Down
Loading

0 comments on commit ce49f50

Please sign in to comment.