Skip to content

Commit

Permalink
Supports XGBRFClassifier and XGBRFRegressor (#665)
Browse files Browse the repository at this point in the history
* Supports XGBRFClassifier and XGBRFRegressor

Signed-off-by: Xavier Dupre <[email protected]>

* simplify CI script

Signed-off-by: Xavier Dupre <[email protected]>

* lint

Signed-off-by: Xavier Dupre <[email protected]>

* black

Signed-off-by: Xavier Dupre <[email protected]>

---------

Signed-off-by: Xavier Dupre <[email protected]>
  • Loading branch information
xadupre authored Nov 21, 2023
1 parent 59ed379 commit 0b5bcf1
Show file tree
Hide file tree
Showing 8 changed files with 160 additions and 32 deletions.
30 changes: 20 additions & 10 deletions .azure-pipelines/linux-conda-CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,39 +15,49 @@ jobs:
strategy:
matrix:

Python311-1140-RT1160-xgb175-lgbm40:
Python311-1150-RT1160-xgb175-lgbm40:
python.version: '3.11'
ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
ONNXRT_PATH: 'onnxruntime==1.16.0'
ONNX_PATH: 'onnx==1.15.0'
ONNXRT_PATH: 'onnxruntime==1.16.2'
COREML_PATH: NONE
lightgbm.version: '>=4.0'
xgboost.version: '>=1.7.5'
numpy.version: ''
scipy.version: ''

Python310-1140-RT1151-xgb175:
Python311-1141-RT1162-xgb175-lgbm40:
python.version: '3.11'
ONNX_PATH: 'onnx==1.14.1'
ONNXRT_PATH: 'onnxruntime==1.16.2'
COREML_PATH: NONE
lightgbm.version: '>=4.0'
xgboost.version: '>=1.7.5'
numpy.version: ''
scipy.version: ''

Python310-1141-RT1151-xgb175:
python.version: '3.10'
ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
ONNX_PATH: 'onnx==1.14.1'
ONNXRT_PATH: 'onnxruntime==1.15.1'
COREML_PATH: NONE
lightgbm.version: '<4.0'
xgboost.version: '>=1.7.5'
numpy.version: ''
scipy.version: ''

Python310-1140-RT1140-xgb175:
Python310-1141-RT1140-xgb175:
python.version: '3.10'
ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
ONNXRT_PATH: onnxruntime==1.14.0 #'-i https://test.pypi.org/simple/ ort-nightly==1.11.0.dev20220311003'
ONNX_PATH: 'onnx==1.14.1'
ONNXRT_PATH: 'onnxruntime==1.14.0'
COREML_PATH: NONE
lightgbm.version: '<4.0'
xgboost.version: '>=1.7.5'
numpy.version: ''
scipy.version: ''

Python39-1140-RT1151-xgb175-scipy180:
Python39-1141-RT1151-xgb175-scipy180:
python.version: '3.9'
ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
ONNX_PATH: 'onnx==1.14.1'
ONNXRT_PATH: 'onnxruntime==1.15.1'
COREML_PATH: NONE
lightgbm.version: '>=4.0'
Expand Down
26 changes: 20 additions & 6 deletions .azure-pipelines/win32-conda-CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,38 @@ jobs:
strategy:
matrix:

Python311-1141-RT1160:
Python311-1150-RT1162:
python.version: '3.11'
ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
ONNXRT_PATH: 'onnxruntime==1.16.0'
ONNX_PATH: 'onnx==1.15.0'
ONNXRT_PATH: 'onnxruntime==1.16.2'
COREML_PATH: NONE
numpy.version: ''

Python311-1141-RT1162:
python.version: '3.11'
ONNX_PATH: 'onnx==1.14.1'
ONNXRT_PATH: 'onnxruntime==1.16.2'
COREML_PATH: NONE
numpy.version: ''

Python310-1141-RT1151:
python.version: '3.10'
ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
ONNX_PATH: 'onnx==1.14.1'
ONNXRT_PATH: 'onnxruntime==1.15.1'
COREML_PATH: NONE
numpy.version: ''

Python310-1141-RT1140:
python.version: '3.10'
ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
ONNXRT_PATH: onnxruntime==1.14.0 #'-i https://test.pypi.org/simple/ ort-nightly==1.11.0.dev20220311003'
ONNX_PATH: 'onnx==1.14.1'
ONNXRT_PATH: onnxruntime==1.14.0
COREML_PATH: NONE
numpy.version: ''

Python39-1141-RT1140:
python.version: '3.9'
ONNX_PATH: 'onnx==1.14.1'
ONNXRT_PATH: onnxruntime==1.14.0
COREML_PATH: NONE
numpy.version: ''

Expand Down
22 changes: 18 additions & 4 deletions onnxmltools/convert/xgboost/_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
from packaging.version import Version
import numpy as np
from xgboost import XGBRegressor, XGBClassifier, __version__

try:
from xgboost import XGBRFRegressor, XGBRFClassifier
except ImportError:
# old version of xgboost
XGBRFRegressor, XGBRFClassifier = None, None
from onnxconverter_common.data_types import FloatTensorType
from ..common._container import XGBoostModelContainer
from ..common._topology import Topology
Expand All @@ -19,6 +25,15 @@
XGBRegressor: "XGBRegressor",
}

if XGBRFClassifier:
xgboost_operator_name_map.update(
{
XGBRFClassifier: "XGBRFClassifier",
XGBRFRegressor: "XGBRFRegressor",
}
)
xgboost_classifier_list.append(XGBRFClassifier)


def _append_covers(node):
res = []
Expand Down Expand Up @@ -161,10 +176,9 @@ def _parse_xgboost_simple_model(scope, model, inputs):
)
this_operator.inputs = inputs

if (
type(model) in xgboost_classifier_list
or getattr(model, "operator_name", None) == "XGBClassifier"
):
if type(model) in xgboost_classifier_list or getattr(
model, "operator_name", None
) in ("XGBClassifier", "XGBRFClassifier"):
# For classifiers, we may have two outputs, one for label and
# the other one for probabilities of all classes.
# Notice that their types here are not necessarily correct
Expand Down
14 changes: 10 additions & 4 deletions onnxmltools/convert/xgboost/operator_converters/XGBoost.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
import numpy as np
from onnx import TensorProto
from xgboost import XGBClassifier

try:
from xgboost import XGBRFClassifier
except ImportError:
XGBRFClassifier = None
from ...common._registration import register_converter
from ..common import get_xgb_params

Expand Down Expand Up @@ -390,10 +395,9 @@ def convert(scope, operator, container):

def convert_xgboost(scope, operator, container):
xgb_node = operator.raw_operator
if (
isinstance(xgb_node, XGBClassifier)
or getattr(xgb_node, "operator_name", None) == "XGBClassifier"
):
if isinstance(xgb_node, (XGBClassifier, XGBRFClassifier)) or getattr(
xgb_node, "operator_name", None
) in ("XGBClassifier", "XGBRFClassifier"):
cls = XGBClassifierConverter
else:
cls = XGBRegressorConverter
Expand All @@ -402,4 +406,6 @@ def convert_xgboost(scope, operator, container):


register_converter("XGBClassifier", convert_xgboost)
register_converter("XGBRFClassifier", convert_xgboost)
register_converter("XGBRegressor", convert_xgboost)
register_converter("XGBRFRegressor", convert_xgboost)
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,4 @@ def calculate_xgboost_classifier_output_shapes(operator):


register_shape_calculator("XGBClassifier", calculate_xgboost_classifier_output_shapes)
register_shape_calculator("XGBRFClassifier", calculate_xgboost_classifier_output_shapes)
1 change: 1 addition & 0 deletions onnxmltools/convert/xgboost/shape_calculators/Regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from ...common.shape_calculator import calculate_linear_regressor_output_shapes

register_shape_calculator("XGBRegressor", calculate_linear_regressor_output_shapes)
register_shape_calculator("XGBRFRegressor", calculate_linear_regressor_output_shapes)
16 changes: 8 additions & 8 deletions tests/xgboost/test_xgboost_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def test_xgb_regressor(self):
initial_types=[("input", FloatTensorType(shape=[None, None]))],
target_opset=TARGET_OPSET,
)
self.assertTrue(conv_model is not None)

dump_data_and_model(
x_test.astype("float32"),
xgb,
Expand All @@ -96,7 +96,7 @@ def test_xgb_classifier(self):
initial_types=[("input", FloatTensorType(shape=[None, None]))],
target_opset=TARGET_OPSET,
)
self.assertTrue(conv_model is not None)

dump_data_and_model(x_test, xgb, conv_model, basename="SklearnXGBClassifier")

def test_xgb_classifier_uint8(self):
Expand All @@ -106,7 +106,7 @@ def test_xgb_classifier_uint8(self):
initial_types=[("input", FloatTensorType(shape=["None", "None"]))],
target_opset=TARGET_OPSET,
)
self.assertTrue(conv_model is not None)

dump_data_and_model(x_test, xgb, conv_model, basename="SklearnXGBClassifier")

def test_xgb_classifier_multi(self):
Expand All @@ -116,7 +116,7 @@ def test_xgb_classifier_multi(self):
initial_types=[("input", FloatTensorType(shape=[None, None]))],
target_opset=TARGET_OPSET,
)
self.assertTrue(conv_model is not None)

dump_data_and_model(
x_test, xgb, conv_model, basename="SklearnXGBClassifierMulti"
)
Expand All @@ -130,7 +130,7 @@ def test_xgb_classifier_multi_reglog(self):
initial_types=[("input", FloatTensorType(shape=[None, None]))],
target_opset=TARGET_OPSET,
)
self.assertTrue(conv_model is not None)

dump_data_and_model(
x_test, xgb, conv_model, basename="SklearnXGBClassifierMultiRegLog"
)
Expand All @@ -144,7 +144,7 @@ def test_xgb_classifier_reglog(self):
initial_types=[("input", FloatTensorType(shape=[None, None]))],
target_opset=TARGET_OPSET,
)
self.assertTrue(conv_model is not None)

dump_data_and_model(
x_test, xgb, conv_model, basename="SklearnXGBClassifierRegLog"
)
Expand All @@ -163,7 +163,7 @@ def test_xgb_classifier_multi_discrete_int_labels(self):
initial_types=[("input", FloatTensorType(shape=[None, None]))],
target_opset=TARGET_OPSET,
)
self.assertTrue(conv_model is not None)

dump_data_and_model(
x_test.astype("float32"),
xgb,
Expand Down Expand Up @@ -631,7 +631,7 @@ def test_xgb_classifier_601(self):
initial_types=[("input", FloatTensorType(shape=[None, None]))],
target_opset=TARGET_OPSET,
)
self.assertTrue(conv_model is not None)

dump_data_and_model(x_test, xgb, conv_model, basename="SklearnXGBClassifier601")

def test_xgb_classifier_hinge(self):
Expand Down
82 changes: 82 additions & 0 deletions tests/xgboost/test_xgboost_converters_rf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# SPDX-License-Identifier: Apache-2.0

import unittest
import numpy as np
from sklearn.datasets import load_diabetes, make_classification
from sklearn.model_selection import train_test_split
from xgboost import XGBRFRegressor, XGBRFClassifier
from onnx.defs import onnx_opset_version
from onnxconverter_common.onnx_ex import DEFAULT_OPSET_NUMBER
from onnxmltools.convert import convert_xgboost
from onnxmltools.convert.common.data_types import FloatTensorType
from onnxmltools.utils import dump_data_and_model


TARGET_OPSET = min(DEFAULT_OPSET_NUMBER, onnx_opset_version())


def fct_cl2(y):
y[y == 2] = 0
return y


def fct_cl3(y):
y[y == 0] = 6
return y


def fct_id(y):
return y


def _fit_classification_model(model, n_classes, is_str=False, dtype=None):
x, y = make_classification(
n_classes=n_classes,
n_features=100,
n_samples=1000,
random_state=42,
n_informative=7,
)
y = y.astype(np.str_) if is_str else y.astype(np.int64)
x_train, x_test, y_train, _ = train_test_split(x, y, test_size=0.5, random_state=42)
if dtype is not None:
y_train = y_train.astype(dtype)
model.fit(x_train, y_train)
return model, x_test.astype(np.float32)


class TestXGBoostRFModels(unittest.TestCase):
def test_xgbrf_regressor(self):
iris = load_diabetes()
x = iris.data
y = iris.target
x_train, x_test, y_train, _ = train_test_split(
x, y, test_size=0.5, random_state=42
)
xgb = XGBRFRegressor()
xgb.fit(x_train, y_train)
conv_model = convert_xgboost(
xgb,
initial_types=[("input", FloatTensorType(shape=[None, None]))],
target_opset=TARGET_OPSET,
)
dump_data_and_model(
x_test.astype("float32"),
xgb,
conv_model,
basename="SklearnXGBRFRegressor-Dec3",
)

def test_xgbrf_classifier(self):
xgb, x_test = _fit_classification_model(XGBRFClassifier(), 2)
conv_model = convert_xgboost(
xgb,
initial_types=[("input", FloatTensorType(shape=[None, None]))],
target_opset=TARGET_OPSET,
)
dump_data_and_model(x_test, xgb, conv_model, basename="SklearnXGBRFClassifier")


if __name__ == "__main__":
# TestXGBoostModels().test_xgboost_booster_classifier_multiclass_softprob()
unittest.main(verbosity=2)

0 comments on commit 0b5bcf1

Please sign in to comment.