Skip to content

Commit

Permalink
Make get_param_names a class method on single GPU estimators to mat…
Browse files Browse the repository at this point in the history
…ch Scikit-learn closer (#6101)

Small difference between our estimators and Scikit-learn is that `get_param_names` are a classmethod in sklearn, and not in ours. This can make a few corner cases fail for using our estimators when Scikit-learn like estimators are expected. This PR fixes that. 

**Note:** This will not include dask-based estimators for the time being since they depend on introspection at object creation time.

Authors:
  - Dante Gama Dessavre (https://github.com/dantegd)
  - Divye Gala (https://github.com/divyegala)

Approvers:
  - William Hicks (https://github.com/wphicks)

URL: #6101
  • Loading branch information
dantegd authored Nov 12, 2024
1 parent 22c3ee8 commit 8e195fb
Show file tree
Hide file tree
Showing 54 changed files with 238 additions and 174 deletions.
35 changes: 21 additions & 14 deletions python/cuml/cuml/_thirdparty/sklearn/preprocessing/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,9 @@ def _reset(self):
del self.data_max_
del self.data_range_

def get_param_names(self):
return super().get_param_names() + [
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + [
"feature_range",
"copy"
]
Expand Down Expand Up @@ -651,8 +652,9 @@ def _reset(self):
del self.mean_
del self.var_

def get_param_names(self):
return super().get_param_names() + [
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + [
"with_mean",
"with_std",
"copy"
Expand Down Expand Up @@ -955,8 +957,9 @@ def _reset(self):
del self.n_samples_seen_
del self.max_abs_

def get_param_names(self):
return super().get_param_names() + [
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + [
"copy"
]

Expand Down Expand Up @@ -1205,8 +1208,9 @@ def __init__(self, *, with_centering=True, with_scaling=True,
self.quantile_range = quantile_range
self.copy = copy

def get_param_names(self):
return super().get_param_names() + [
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + [
"with_centering",
"with_scaling",
"quantile_range",
Expand Down Expand Up @@ -1478,8 +1482,9 @@ def __init__(self, degree=2, *, interaction_only=False, include_bias=True,
self.include_bias = include_bias
self.order = order

def get_param_names(self):
return super().get_param_names() + [
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + [
"degree",
"interaction_only",
"include_bias",
Expand Down Expand Up @@ -2273,8 +2278,9 @@ def __init__(self, *, n_quantiles=1000, output_distribution='uniform',
self.random_state = random_state
self.copy = copy

def get_param_names(self):
return super().get_param_names() + [
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + [
"n_quantiles",
"output_distribution",
"ignore_implicit_zeros",
Expand Down Expand Up @@ -2797,8 +2803,9 @@ def __init__(self, method='yeo-johnson', *, standardize=True, copy=True):
self.standardize = standardize
self.copy = copy

def get_param_names(self):
return super().get_param_names() + [
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + [
"method",
"standardize",
"copy"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,9 @@ def __init__(self, n_bins=5, *, encode='onehot', strategy='quantile'):
self.encode = encode
self.strategy = strategy

def get_param_names(self):
return super().get_param_names() + [
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + [
"n_bins",
"encode",
"strategy"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,9 @@ def __init__(self, *, missing_values=np.nan, strategy="mean",
self.fill_value = fill_value
self.copy = copy

def get_param_names(self):
return super().get_param_names() + [
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + [
"strategy",
"fill_value",
"verbose",
Expand Down Expand Up @@ -544,8 +545,9 @@ def __init__(self, *, missing_values=np.nan, features="missing-only",
self.sparse = sparse
self.error_on_new = error_on_new

def get_param_names(self):
return super().get_param_names() + [
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + [
"missing_values",
"features",
"sparse",
Expand Down
5 changes: 3 additions & 2 deletions python/cuml/cuml/cluster/agglomerative.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,9 @@ class AgglomerativeClustering(Base, ClusterMixin, CMajorInputTagMixin):
"""
return self.fit(X).labels_

def get_param_names(self):
return super().get_param_names() + [
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + [
"n_clusters",
"affinity",
"metric",
Expand Down
5 changes: 3 additions & 2 deletions python/cuml/cuml/cluster/dbscan.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -466,8 +466,9 @@ class DBSCAN(UniversalBase,
self.fit(X, out_dtype, sample_weight)
return self.labels_

def get_param_names(self):
return super().get_param_names() + [
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + [
"eps",
"min_samples",
"max_mbytes_per_batch",
Expand Down
5 changes: 3 additions & 2 deletions python/cuml/cuml/cluster/hdbscan/hdbscan.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1112,8 +1112,9 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):

self._cpu_to_gpu_interop_prepped = True

def get_param_names(self):
return super().get_param_names() + [
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + [
"metric",
"min_cluster_size",
"max_cluster_size",
Expand Down
5 changes: 3 additions & 2 deletions python/cuml/cuml/cluster/kmeans.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -696,8 +696,9 @@ class KMeans(UniversalBase,
self.fit(X, sample_weight=sample_weight)
return self.transform(X, convert_dtype=convert_dtype)

def get_param_names(self):
return super().get_param_names() + \
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + \
['n_init', 'oversampling_factor', 'max_samples_per_batch',
'init', 'max_iter', 'n_clusters', 'random_state',
'tol', "convert_dtype"]
Expand Down
4 changes: 2 additions & 2 deletions python/cuml/cuml/dask/cluster/dbscan.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -160,5 +160,5 @@ def fit_predict(self, X, out_dtype="int32"):
self.fit(X, out_dtype)
return self.get_combined_model().labels_

def get_param_names(self):
def _get_param_names(self):
return list(self.kwargs.keys())
2 changes: 1 addition & 1 deletion python/cuml/cuml/dask/cluster/kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,5 +302,5 @@ def score(self, X, sample_weight=None):
cp.asarray(self.client.compute(scores, sync=True)) * -1.0
)

def get_param_names(self):
def _get_param_names(self):
return list(self.kwargs.keys())
4 changes: 2 additions & 2 deletions python/cuml/cuml/dask/decomposition/pca.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -215,7 +215,7 @@ def inverse_transform(self, X, delayed=True):
"""
return self._inverse_transform(X, n_dims=2, delayed=delayed)

def get_param_names(self):
def _get_param_names(self):
return list(self.kwargs.keys())

@staticmethod
Expand Down
4 changes: 2 additions & 2 deletions python/cuml/cuml/dask/decomposition/tsvd.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -177,7 +177,7 @@ def inverse_transform(self, X, delayed=True):
"""
return self._inverse_transform(X, n_dims=2, delayed=delayed)

def get_param_names(self):
def _get_param_names(self):
return list(self.kwargs.keys())

@staticmethod
Expand Down
4 changes: 2 additions & 2 deletions python/cuml/cuml/dask/linear_model/linear_regression.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -106,7 +106,7 @@ def predict(self, X, delayed=True):
"""
return self._predict(X, delayed=delayed)

def get_param_names(self):
def _get_param_names(self):
return list(self.kwargs.keys())

@staticmethod
Expand Down
4 changes: 2 additions & 2 deletions python/cuml/cuml/dask/linear_model/ridge.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -114,7 +114,7 @@ def predict(self, X, delayed=True):
"""
return self._predict(X, delayed=delayed)

def get_param_names(self):
def _get_param_names(self):
return list(self.kwargs.keys())

@staticmethod
Expand Down
11 changes: 8 additions & 3 deletions python/cuml/cuml/decomposition/incremental_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,6 @@ def __init__(
output_type=output_type,
)
self.batch_size = batch_size
self._hyperparams = ["n_components", "whiten", "copy", "batch_size"]
self._sparse_model = True

def fit(self, X, y=None, convert_dtype=True) -> "IncrementalPCA":
Expand Down Expand Up @@ -449,9 +448,15 @@ def transform(self, X, convert_dtype=False) -> CumlArray:
else:
return super().transform(X)

def get_param_names(self):
@classmethod
def _get_param_names(cls):
# Skip super() since we dont pass any extra parameters in __init__
return Base.get_param_names(self) + self._hyperparams
return Base._get_param_names() + [
"n_components",
"whiten",
"copy",
"batch_size",
]


def _validate_sparse_input(X):
Expand Down
5 changes: 3 additions & 2 deletions python/cuml/cuml/decomposition/pca.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -725,8 +725,9 @@ class PCA(UniversalBase,

return t_input_data

def get_param_names(self):
return super().get_param_names() + \
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + \
["copy", "iterated_power", "n_components", "svd_solver", "tol",
"whiten", "random_state"]

Expand Down
5 changes: 3 additions & 2 deletions python/cuml/cuml/decomposition/tsvd.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -482,8 +482,9 @@ class TruncatedSVD(UniversalBase,

return t_input_data

def get_param_names(self):
return super().get_param_names() + \
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + \
["algorithm", "n_components", "n_iter", "random_state", "tol"]

def get_attr_names(self):
Expand Down
5 changes: 3 additions & 2 deletions python/cuml/cuml/ensemble/randomforest_common.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -382,8 +382,9 @@ class BaseRandomForestModel(Base):
preds = tl_to_fil_model.predict(X)
return preds

def get_param_names(self):
return super().get_param_names() + BaseRandomForestModel._param_names
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + BaseRandomForestModel._param_names

def set_params(self, **params):
self.treelite_serialized_model = None
Expand Down
5 changes: 3 additions & 2 deletions python/cuml/cuml/experimental/linear_model/lars.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,8 @@ class Lars(Base, RegressorMixin):

return preds

def get_param_names(self):
return super().get_param_names() + \
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + \
['copy_X', 'fit_intercept', 'fit_path', 'n_nonzero_coefs',
'normalize', 'precompute', 'eps']
7 changes: 4 additions & 3 deletions python/cuml/cuml/feature_extraction/_tfidf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -301,8 +301,9 @@ def idf_(self, value):
(value, 0), shape=(n_features, n_features), dtype=cp.float32
)

def get_param_names(self):
return super().get_param_names() + [
@classmethod
def _get_param_names(cls):
return super()._get_param_names() + [
"norm",
"use_idf",
"smooth_idf",
Expand Down
16 changes: 9 additions & 7 deletions python/cuml/cuml/internals/base.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -182,7 +182,8 @@ class Base(TagsMixin,
self._check_output_type(data)
# inference logic goes here
def get_param_names(self):
@classmethod
def _get_param_names(cls):
# return a list of hyperparam names supported by this algo
# stream and handle example:
Expand Down Expand Up @@ -270,7 +271,8 @@ class Base(TagsMixin,
output += ' <sk_model_ attribute used>'
return output

def get_param_names(self):
@classmethod
def _get_param_names(cls):
"""
Returns a list of hyperparameter names owned by this class. It is
expected that every child class overrides this method and appends its
Expand All @@ -282,12 +284,12 @@ class Base(TagsMixin,
def get_params(self, deep=True):
"""
Returns a dict of all params owned by this class. If the child class
has appropriately overridden the `get_param_names` method and does not
has appropriately overridden the `_get_param_names` method and does not
need anything other than what is there in this method, then it doesn't
have to override this method
"""
params = dict()
variables = self.get_param_names()
variables = self._get_param_names()
for key in variables:
var_value = getattr(self, key, None)
params[key] = var_value
Expand All @@ -297,12 +299,12 @@ class Base(TagsMixin,
"""
Accepts a dict of params and updates the corresponding ones owned by
this class. If the child class has appropriately overridden the
`get_param_names` method and does not need anything other than what is,
`_get_param_names` method and does not need anything other than what is,
there in this method, then it doesn't have to override this method
"""
if not params:
return self
variables = self.get_param_names()
variables = self._get_param_names()
for key, value in params.items():
if key not in variables:
raise ValueError("Bad param '%s' passed to set_params" % key)
Expand Down
Loading

0 comments on commit 8e195fb

Please sign in to comment.