Skip to content

Commit

Permalink
Check __cuda_array_interface__ instead of cupy class. (#9971)
Browse files Browse the repository at this point in the history

* Now XGBoost can directly consume CUDA data from torch.
  • Loading branch information
trivialfis authored Jan 9, 2024
1 parent 2f57bbd commit 01c4711
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 29 deletions.
2 changes: 2 additions & 0 deletions doc/python/python_intro.rst
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ Support Matrix
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
| cupy.ndarray | T | T | T | T | T | T |
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
| torch.Tensor | T | T | T | T | T | T |
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
| dlpack | CPA | CPA | | CPA | FF | FF |
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
| datatable.Frame | T | FF | | NPA | FF | |
Expand Down
4 changes: 2 additions & 2 deletions python-package/xgboost/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,9 @@ def concat(value: Sequence[_T]) -> _T: # pylint: disable=too-many-return-statem
from cudf import concat as CUDF_concat # pylint: disable=import-error

return CUDF_concat(value, axis=0)
from .data import _is_cupy_array
from .data import _is_cupy_alike

if _is_cupy_array(value[0]):
if _is_cupy_alike(value[0]):
import cupy # pylint: disable=import-error

# pylint: disable=c-extension-no-member,no-member
Expand Down
19 changes: 11 additions & 8 deletions python-package/xgboost/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,10 +357,13 @@ def _numpy2ctypes_type(dtype: Type[np.number]) -> Type[CNumeric]:
return _NUMPY_TO_CTYPES_MAPPING[dtype]


def _array_hasobject(data: DataType) -> bool:
return hasattr(data.dtype, "hasobject") and data.dtype.hasobject


def _cuda_array_interface(data: DataType) -> bytes:
assert (
data.dtype.hasobject is False
), "Input data contains `object` dtype. Expecting numeric data."
if _array_hasobject(data):
raise ValueError("Input data contains `object` dtype. Expecting numeric data.")
interface = data.__cuda_array_interface__
if "mask" in interface:
interface["mask"] = interface["mask"].__cuda_array_interface__
Expand Down Expand Up @@ -2102,7 +2105,7 @@ def boost(
_array_interface,
_cuda_array_interface,
_ensure_np_dtype,
_is_cupy_array,
_is_cupy_alike,
)

self._assign_dmatrix_features(dtrain)
Expand All @@ -2116,7 +2119,7 @@ def array_interface(array: NumpyOrCupy) -> bytes:
"Expecting `np.ndarray` or `cupy.ndarray` for gradient and hessian."
f" Got: {type(array)}"
)
if not isinstance(array, np.ndarray) and not _is_cupy_array(array):
if not isinstance(array, np.ndarray) and not _is_cupy_alike(array):
raise TypeError(msg)

n_samples = dtrain.num_row()
Expand All @@ -2131,7 +2134,7 @@ def array_interface(array: NumpyOrCupy) -> bytes:
if isinstance(array, np.ndarray):
array, _ = _ensure_np_dtype(array, array.dtype)
interface = _array_interface(array)
elif _is_cupy_array(array):
elif _is_cupy_alike(array):
interface = _cuda_array_interface(array)
else:
raise TypeError(msg)
Expand Down Expand Up @@ -2461,7 +2464,7 @@ def inplace_predict(
_arrow_transform,
_is_arrow,
_is_cudf_df,
_is_cupy_array,
_is_cupy_alike,
_is_list,
_is_np_array_like,
_is_pandas_df,
Expand Down Expand Up @@ -2543,7 +2546,7 @@ def inplace_predict(
)
)
return _prediction_output(shape, dims, preds, False)
if _is_cupy_array(data):
if _is_cupy_alike(data):
from .data import _transform_cupy_array

data = _transform_cupy_array(data)
Expand Down
4 changes: 2 additions & 2 deletions python-package/xgboost/dask/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
_deprecate_positional_args,
_expect,
)
from xgboost.data import _is_cudf_ser, _is_cupy_array
from xgboost.data import _is_cudf_ser, _is_cupy_alike
from xgboost.sklearn import (
XGBClassifier,
XGBClassifierBase,
Expand Down Expand Up @@ -1909,7 +1909,7 @@ async def _fit_async(
self.classes_ = await self.client.compute(y.drop_duplicates())
if _is_cudf_ser(self.classes_):
self.classes_ = self.classes_.to_cupy()
if _is_cupy_array(self.classes_):
if _is_cupy_alike(self.classes_):
self.classes_ = self.classes_.get()
self.classes_ = numpy.array(self.classes_)
self.n_classes_ = len(self.classes_)
Expand Down
25 changes: 11 additions & 14 deletions python-package/xgboost/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
DataIter,
DataSplitMode,
DMatrix,
_array_hasobject,
_check_call,
_cuda_array_interface,
_ProxyDMatrix,
Expand Down Expand Up @@ -77,9 +78,8 @@ def is_scipy_csr(data: DataType) -> bool:


def _array_interface_dict(data: np.ndarray) -> dict:
assert (
data.dtype.hasobject is False
), "Input data contains `object` dtype. Expecting numeric data."
if _array_hasobject(data):
raise ValueError("Input data contains `object` dtype. Expecting numeric data.")
interface = data.__array_interface__
if "mask" in interface:
interface["mask"] = interface["mask"].__array_interface__
Expand Down Expand Up @@ -219,7 +219,7 @@ def _is_np_array_like(data: DataType) -> bool:
def _ensure_np_dtype(
data: DataType, dtype: Optional[NumpyDType]
) -> Tuple[np.ndarray, Optional[NumpyDType]]:
if data.dtype.hasobject or data.dtype in [np.float16, np.bool_]:
if _array_hasobject(data) or data.dtype in [np.float16, np.bool_]:
dtype = np.float32
data = data.astype(dtype, copy=False)
if not data.flags.aligned:
Expand Down Expand Up @@ -998,19 +998,16 @@ def _is_cudf_ser(data: DataType) -> bool:
return lazy_isinstance(data, "cudf.core.series", "Series")


def _is_cupy_array(data: DataType) -> bool:
return any(
lazy_isinstance(data, n, "ndarray")
for n in ("cupy.core.core", "cupy", "cupy._core.core")
)
def _is_cupy_alike(data: DataType) -> bool:
return hasattr(data, "__cuda_array_interface__")


def _transform_cupy_array(data: DataType) -> CupyT:
import cupy # pylint: disable=import-error

if not hasattr(data, "__cuda_array_interface__") and hasattr(data, "__array__"):
data = cupy.array(data, copy=False)
if data.dtype.hasobject or data.dtype in [cupy.bool_]:
if _array_hasobject(data) or data.dtype in [cupy.bool_]:
data = data.astype(cupy.float32, copy=False)
return data

Expand Down Expand Up @@ -1222,7 +1219,7 @@ def dispatch_data_backend(
return _from_cudf_df(
data, missing, threads, feature_names, feature_types, enable_categorical
)
if _is_cupy_array(data):
if _is_cupy_alike(data):
return _from_cupy_array(data, missing, threads, feature_names, feature_types)
if _is_cupy_csr(data):
raise TypeError("cupyx CSR is not supported yet.")
Expand Down Expand Up @@ -1354,7 +1351,7 @@ def dispatch_meta_backend(
data = _transform_dlpack(data)
_meta_from_cupy_array(data, name, handle)
return
if _is_cupy_array(data):
if _is_cupy_alike(data):
_meta_from_cupy_array(data, name, handle)
return
if _is_cudf_ser(data):
Expand Down Expand Up @@ -1419,7 +1416,7 @@ def _proxy_transform(
return _transform_cudf_df(
data, feature_names, feature_types, enable_categorical
)
if _is_cupy_array(data):
if _is_cupy_alike(data):
data = _transform_cupy_array(data)
return data, None, feature_names, feature_types
if _is_dlpack(data):
Expand Down Expand Up @@ -1470,7 +1467,7 @@ def dispatch_proxy_set_data(
# pylint: disable=W0212
proxy._set_data_from_cuda_columnar(data, cast(List, cat_codes))
return
if _is_cupy_array(data):
if _is_cupy_alike(data):
proxy._set_data_from_cuda_interface(data) # pylint: disable=W0212
return
if _is_dlpack(data):
Expand Down
6 changes: 3 additions & 3 deletions python-package/xgboost/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
_deprecate_positional_args,
_parse_eval_str,
)
from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array, _is_pandas_df
from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_alike, _is_pandas_df
from .training import train


Expand Down Expand Up @@ -1177,7 +1177,7 @@ def predict(
base_margin=base_margin,
validate_features=validate_features,
)
if _is_cupy_array(predts):
if _is_cupy_alike(predts):
import cupy # pylint: disable=import-error

predts = cupy.asnumpy(predts) # ensure numpy array is used.
Expand Down Expand Up @@ -1458,7 +1458,7 @@ def fit(
classes = cp.unique(y.values)
self.n_classes_ = len(classes)
expected_classes = cp.array(self.classes_)
elif _is_cupy_array(y):
elif _is_cupy_alike(y):
import cupy as cp # pylint: disable=E0401

classes = cp.unique(y)
Expand Down

0 comments on commit 01c4711

Please sign in to comment.