Skip to content

Commit

Permalink
Add QuantileDMatrix support (#279)
Browse files Browse the repository at this point in the history
* add quantile matrix

Signed-off-by: Zhi Lin <[email protected]>

* revert unrelated changes

Signed-off-by: Zhi Lin <[email protected]>

* add helper function

Signed-off-by: Zhi Lin <[email protected]>

* format

Signed-off-by: Zhi Lin <[email protected]>

---------

Signed-off-by: Zhi Lin <[email protected]>
  • Loading branch information
kira-lin authored Apr 21, 2023
1 parent 3a3123e commit 648d6dd
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 14 deletions.
37 changes: 23 additions & 14 deletions xgboost_ray/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def inner_f(*args, **kwargs):

from xgboost_ray.matrix import RayDMatrix, combine_data, \
RayDeviceQuantileDMatrix, RayDataIter, concat_dataframes, \
LEGACY_MATRIX
LEGACY_MATRIX, QUANTILE_AVAILABLE, RayQuantileDMatrix
from xgboost_ray.session import init_session, put_queue, \
set_session_queue, get_rabit_rank

Expand Down Expand Up @@ -320,7 +320,28 @@ def _set_omp_num_threads():
return int(float(os.environ.get("OMP_NUM_THREADS", "0.0")))


def _prepare_dmatrix_params(param: Dict) -> Dict:
dm_param = {
"data": concat_dataframes(param["data"]),
"label": concat_dataframes(param["label"]),
"weight": concat_dataframes(param["weight"]),
"feature_weights": concat_dataframes(param["feature_weights"]),
"qid": concat_dataframes(param["qid"]),
"base_margin": concat_dataframes(param["base_margin"]),
"label_lower_bound": concat_dataframes(param["label_lower_bound"]),
"label_upper_bound": concat_dataframes(param["label_upper_bound"]),
}
return dm_param


def _get_dmatrix(data: RayDMatrix, param: Dict) -> xgb.DMatrix:
if QUANTILE_AVAILABLE and isinstance(data, RayQuantileDMatrix):
if isinstance(param["data"], list):
qdm_param = _prepare_dmatrix_params(param)
param.update(qdm_param)
if data.enable_categorical is not None:
param["enable_categorical"] = data.enable_categorical
matrix = xgb.QuantileDMatrix(**param)
if not LEGACY_MATRIX and isinstance(data, RayDeviceQuantileDMatrix):
# If we only got a single data shard, create a list so we can
# iterate over it
Expand Down Expand Up @@ -355,18 +376,7 @@ def _get_dmatrix(data: RayDMatrix, param: Dict) -> xgb.DMatrix:
matrix = xgb.DeviceQuantileDMatrix(it, **dm_param)
else:
if isinstance(param["data"], list):
dm_param = {
"data": concat_dataframes(param["data"]),
"label": concat_dataframes(param["label"]),
"weight": concat_dataframes(param["weight"]),
"feature_weights": concat_dataframes(param["feature_weights"]),
"qid": concat_dataframes(param["qid"]),
"base_margin": concat_dataframes(param["base_margin"]),
"label_lower_bound": concat_dataframes(
param["label_lower_bound"]),
"label_upper_bound": concat_dataframes(
param["label_upper_bound"]),
}
dm_param = _prepare_dmatrix_params(param)
param.update(dm_param)

ll = param.pop("label_lower_bound", None)
Expand Down Expand Up @@ -669,7 +679,6 @@ def _train():
for deval, name in evals:
local_evals.append((_get_dmatrix(
deval, self._data[deval]), name))

if LEGACY_CALLBACK:
for xgb_callback in kwargs.get("callbacks", []):
if isinstance(xgb_callback, TrainingCallback):
Expand Down
12 changes: 12 additions & 0 deletions xgboost_ray/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ class RayDataset:
DataIter = object
LEGACY_MATRIX = True

try:
from xgboost.core import QuantileDmatrix
QUANTILE_AVAILABLE = True
except ImportError:
QuantileDmatrix = object
QUANTILE_AVAILABLE = False

if TYPE_CHECKING:
from xgboost_ray.xgb import xgboost as xgb

Expand Down Expand Up @@ -875,6 +882,11 @@ def __eq__(self, other):
return self.__hash__() == other.__hash__()


class RayQuantileDMatrix(RayDMatrix):
"""Currently just a thin wrapper for type detection"""
pass


class RayDeviceQuantileDMatrix(RayDMatrix):
"""Currently just a thin wrapper for type detection"""

Expand Down

0 comments on commit 648d6dd

Please sign in to comment.