Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python-package] deprecate support for H2O 'datatable' #6670

Merged
merged 4 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions python-package/lightgbm/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,15 @@ class LGBMDeprecationWarning(FutureWarning):
pass


def _emit_datatable_deprecation_warning() -> None:
msg = (
"Support for 'datatable' in LightGBM is deprecated, and will be removed in a future release. "
"To avoid this warning, convert 'datatable' inputs to a supported format "
"(for example, use the 'to_numpy()' method)."
)
warnings.warn(msg, category=LGBMDeprecationWarning, stacklevel=2)


class _ConfigAliases:
# lazy evaluation to allow import without dynamic library, e.g., for docs generation
aliases = None
Expand Down Expand Up @@ -1086,7 +1095,7 @@ def predict(

Parameters
----------
data : str, pathlib.Path, numpy array, pandas DataFrame, pyarrow Table, H2O DataTable's Frame or scipy.sparse
data : str, pathlib.Path, numpy array, pandas DataFrame, pyarrow Table, H2O DataTable's Frame (deprecated) or scipy.sparse
Data source for prediction.
If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM).
start_iteration : int, optional (default=0)
Expand Down Expand Up @@ -1200,6 +1209,7 @@ def predict(
predict_type=predict_type,
)
elif isinstance(data, dt_DataTable):
_emit_datatable_deprecation_warning()
preds, nrow = self.__pred_for_np2d(
mat=data.to_numpy(),
start_iteration=start_iteration,
Expand Down Expand Up @@ -1766,7 +1776,7 @@ def __init__(

Parameters
----------
data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequence, list of numpy array or pyarrow Table
data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame (deprecated), scipy.sparse, Sequence, list of Sequence, list of numpy array or pyarrow Table
Data source of Dataset.
If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM) or a LightGBM Dataset binary file.
label : list, numpy 1-D array, pandas Series / one-column DataFrame, pyarrow Array, pyarrow ChunkedArray or None, optional (default=None)
Expand Down Expand Up @@ -2172,6 +2182,7 @@ def _lazy_init(
elif isinstance(data, Sequence):
self.__init_from_seqs([data], ref_dataset)
elif isinstance(data, dt_DataTable):
_emit_datatable_deprecation_warning()
self.__init_from_np2d(data.to_numpy(), params_str, ref_dataset)
else:
try:
Expand Down Expand Up @@ -2598,7 +2609,7 @@ def create_valid(

Parameters
----------
data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequence or list of numpy array
data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame (deprecated), scipy.sparse, Sequence, list of Sequence or list of numpy array
Data source of Dataset.
If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM) or a LightGBM Dataset binary file.
label : list, numpy 1-D array, pandas Series / one-column DataFrame, pyarrow Array, pyarrow ChunkedArray or None, optional (default=None)
Expand Down Expand Up @@ -3255,7 +3266,7 @@ def get_data(self) -> Optional[_LGBM_TrainDataType]:

Returns
-------
data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequence or list of numpy array or None
data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame (deprecated), scipy.sparse, Sequence, list of Sequence or list of numpy array or None
Raw data used in the Dataset construction.
"""
if self._handle is None:
Expand All @@ -3268,6 +3279,7 @@ def get_data(self) -> Optional[_LGBM_TrainDataType]:
elif isinstance(self.data, pd_DataFrame):
self.data = self.data.iloc[self.used_indices].copy()
elif isinstance(self.data, dt_DataTable):
_emit_datatable_deprecation_warning()
self.data = self.data[self.used_indices, :]
elif isinstance(self.data, Sequence):
self.data = self.data[self.used_indices]
Expand Down Expand Up @@ -3456,6 +3468,7 @@ def add_features_from(self, other: "Dataset") -> "Dataset":
elif isinstance(other.data, pd_DataFrame):
self.data = np.hstack((self.data, other.data.values))
elif isinstance(other.data, dt_DataTable):
_emit_datatable_deprecation_warning()
self.data = np.hstack((self.data, other.data.to_numpy()))
else:
self.data = None
Expand All @@ -3466,6 +3479,7 @@ def add_features_from(self, other: "Dataset") -> "Dataset":
elif isinstance(other.data, pd_DataFrame):
self.data = scipy.sparse.hstack((self.data, other.data.values), format=sparse_format)
elif isinstance(other.data, dt_DataTable):
_emit_datatable_deprecation_warning()
self.data = scipy.sparse.hstack((self.data, other.data.to_numpy()), format=sparse_format)
else:
self.data = None
Expand All @@ -3483,10 +3497,12 @@ def add_features_from(self, other: "Dataset") -> "Dataset":
elif isinstance(other.data, pd_DataFrame):
self.data = concat((self.data, other.data), axis=1, ignore_index=True)
elif isinstance(other.data, dt_DataTable):
_emit_datatable_deprecation_warning()
self.data = concat((self.data, pd_DataFrame(other.data.to_numpy())), axis=1, ignore_index=True)
else:
self.data = None
elif isinstance(self.data, dt_DataTable):
_emit_datatable_deprecation_warning()
if isinstance(other.data, np.ndarray):
self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data)))
elif isinstance(other.data, scipy.sparse.spmatrix):
Expand Down Expand Up @@ -4688,7 +4704,7 @@ def predict(

Parameters
----------
data : str, pathlib.Path, numpy array, pandas DataFrame, pyarrow Table, H2O DataTable's Frame or scipy.sparse
data : str, pathlib.Path, numpy array, pandas DataFrame, pyarrow Table, H2O DataTable's Frame (deprecated) or scipy.sparse
Data source for prediction.
If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM).
start_iteration : int, optional (default=0)
Expand Down Expand Up @@ -4769,7 +4785,7 @@ def refit(

Parameters
----------
data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequence or list of numpy array
data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame (deprecated), scipy.sparse, Sequence, list of Sequence or list of numpy array
Data source for refit.
If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM).
label : list, numpy 1-D array, pandas Series / one-column DataFrame, pyarrow Array or pyarrow ChunkedArray
Expand Down
6 changes: 3 additions & 3 deletions python-package/lightgbm/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -976,7 +976,7 @@ def fit(

fit.__doc__ = (
_lgbmmodel_doc_fit.format(
X_shape="numpy array, pandas DataFrame, H2O DataTable's Frame , scipy.sparse, list of lists of int or float of shape = [n_samples, n_features]",
X_shape="numpy array, pandas DataFrame, H2O DataTable's Frame (deprecated), scipy.sparse, list of lists of int or float of shape = [n_samples, n_features]",
y_shape="numpy array, pandas DataFrame, pandas Series, list of int or float of shape = [n_samples]",
sample_weight_shape="numpy array, pandas Series, list of int or float of shape = [n_samples] or None, optional (default=None)",
init_score_shape="numpy array, pandas DataFrame, pandas Series, list of int or float of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task) or shape = [n_samples, n_classes] (for multi-class task) or None, optional (default=None)",
Expand Down Expand Up @@ -1046,7 +1046,7 @@ def predict(

predict.__doc__ = _lgbmmodel_doc_predict.format(
description="Return the predicted value for each sample.",
X_shape="numpy array, pandas DataFrame, H2O DataTable's Frame , scipy.sparse, list of lists of int or float of shape = [n_samples, n_features]",
X_shape="numpy array, pandas DataFrame, H2O DataTable's Frame (deprecated), scipy.sparse, list of lists of int or float of shape = [n_samples, n_features]",
output_name="predicted_result",
predicted_result_shape="array-like of shape = [n_samples] or shape = [n_samples, n_classes]",
X_leaves_shape="array-like of shape = [n_samples, n_trees] or shape = [n_samples, n_trees * n_classes]",
Expand Down Expand Up @@ -1372,7 +1372,7 @@ def predict_proba(

predict_proba.__doc__ = _lgbmmodel_doc_predict.format(
description="Return the predicted probability for each class for each sample.",
X_shape="numpy array, pandas DataFrame, H2O DataTable's Frame , scipy.sparse, list of lists of int or float of shape = [n_samples, n_features]",
X_shape="numpy array, pandas DataFrame, H2O DataTable's Frame (deprecated), scipy.sparse, list of lists of int or float of shape = [n_samples, n_features]",
output_name="predicted_probability",
predicted_result_shape="array-like of shape = [n_samples] or shape = [n_samples, n_classes]",
X_leaves_shape="array-like of shape = [n_samples, n_trees] or shape = [n_samples, n_trees * n_classes]",
Expand Down
Loading