Supports XGBRFClassifier and XGBRFRegressor (#665)

* Supports XGBRFClassifier and XGBRFRegressor Signed-off-by: Xavier Dupre <[email protected]> * simplify CI script Signed-off-by: Xavier Dupre <[email protected]> * lint Signed-off-by: Xavier Dupre <[email protected]> * black Signed-off-by: Xavier Dupre <[email protected]> --------- Signed-off-by: Xavier Dupre <[email protected]>
onnx · Nov 21, 2023 · 0b5bcf1 · 0b5bcf1
1 parent 59ed379
commit 0b5bcf1
Show file tree

Hide file tree

Showing 8 changed files with 160 additions and 32 deletions.
diff --git a/.azure-pipelines/linux-conda-CI.yml b/.azure-pipelines/linux-conda-CI.yml
@@ -15,39 +15,49 @@ jobs:
   strategy:
     matrix:
 
-      Python311-1140-RT1160-xgb175-lgbm40:
+      Python311-1150-RT1160-xgb175-lgbm40:
         python.version: '3.11'
-        ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
-        ONNXRT_PATH: 'onnxruntime==1.16.0'
+        ONNX_PATH: 'onnx==1.15.0'
+        ONNXRT_PATH: 'onnxruntime==1.16.2'
         COREML_PATH: NONE
         lightgbm.version: '>=4.0'
         xgboost.version: '>=1.7.5'
         numpy.version: ''
         scipy.version: ''
 
-      Python310-1140-RT1151-xgb175:
+      Python311-1141-RT1162-xgb175-lgbm40:
+        python.version: '3.11'
+        ONNX_PATH: 'onnx==1.14.1'
+        ONNXRT_PATH: 'onnxruntime==1.16.2'
+        COREML_PATH: NONE
+        lightgbm.version: '>=4.0'
+        xgboost.version: '>=1.7.5'
+        numpy.version: ''
+        scipy.version: ''
+
+      Python310-1141-RT1151-xgb175:
         python.version: '3.10'
-        ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
+        ONNX_PATH: 'onnx==1.14.1'
         ONNXRT_PATH: 'onnxruntime==1.15.1'
         COREML_PATH: NONE
         lightgbm.version: '<4.0'
         xgboost.version: '>=1.7.5'
         numpy.version: ''
         scipy.version: ''
 
-      Python310-1140-RT1140-xgb175:
+      Python310-1141-RT1140-xgb175:
         python.version: '3.10'
-        ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
-        ONNXRT_PATH: onnxruntime==1.14.0 #'-i https://test.pypi.org/simple/ ort-nightly==1.11.0.dev20220311003'
+        ONNX_PATH: 'onnx==1.14.1'
+        ONNXRT_PATH: 'onnxruntime==1.14.0'
         COREML_PATH: NONE
         lightgbm.version: '<4.0'
         xgboost.version: '>=1.7.5'
         numpy.version: ''
         scipy.version: ''
 
-      Python39-1140-RT1151-xgb175-scipy180:
+      Python39-1141-RT1151-xgb175-scipy180:
         python.version: '3.9'
-        ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
+        ONNX_PATH: 'onnx==1.14.1'
         ONNXRT_PATH: 'onnxruntime==1.15.1'
         COREML_PATH: NONE
         lightgbm.version: '>=4.0'

diff --git a/.azure-pipelines/win32-conda-CI.yml b/.azure-pipelines/win32-conda-CI.yml
@@ -15,24 +15,38 @@ jobs:
   strategy:
     matrix:
 
-      Python311-1141-RT1160:
+      Python311-1150-RT1162:
         python.version: '3.11'
-        ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
-        ONNXRT_PATH: 'onnxruntime==1.16.0'
+        ONNX_PATH: 'onnx==1.15.0'
+        ONNXRT_PATH: 'onnxruntime==1.16.2'
+        COREML_PATH: NONE
+        numpy.version: ''
+
+      Python311-1141-RT1162:
+        python.version: '3.11'
+        ONNX_PATH: 'onnx==1.14.1'
+        ONNXRT_PATH: 'onnxruntime==1.16.2'
         COREML_PATH: NONE
         numpy.version: ''
 
       Python310-1141-RT1151:
         python.version: '3.10'
-        ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
+        ONNX_PATH: 'onnx==1.14.1'
         ONNXRT_PATH: 'onnxruntime==1.15.1'
         COREML_PATH: NONE
         numpy.version: ''
 
       Python310-1141-RT1140:
         python.version: '3.10'
-        ONNX_PATH: 'onnx==1.14.1' #'-i https://test.pypi.org/simple/ onnx==1.14.0rc3'
-        ONNXRT_PATH: onnxruntime==1.14.0 #'-i https://test.pypi.org/simple/ ort-nightly==1.11.0.dev20220311003'
+        ONNX_PATH: 'onnx==1.14.1'
+        ONNXRT_PATH: onnxruntime==1.14.0
+        COREML_PATH: NONE
+        numpy.version: ''
+
+      Python39-1141-RT1140:
+        python.version: '3.9'
+        ONNX_PATH: 'onnx==1.14.1'
+        ONNXRT_PATH: onnxruntime==1.14.0
         COREML_PATH: NONE
         numpy.version: ''
 

diff --git a/onnxmltools/convert/xgboost/_parse.py b/onnxmltools/convert/xgboost/_parse.py
@@ -6,6 +6,12 @@
 from packaging.version import Version
 import numpy as np
 from xgboost import XGBRegressor, XGBClassifier, __version__
+
+try:
+    from xgboost import XGBRFRegressor, XGBRFClassifier
+except ImportError:
+    # old version of xgboost
+    XGBRFRegressor, XGBRFClassifier = None, None
 from onnxconverter_common.data_types import FloatTensorType
 from ..common._container import XGBoostModelContainer
 from ..common._topology import Topology
@@ -19,6 +25,15 @@
     XGBRegressor: "XGBRegressor",
 }
 
+if XGBRFClassifier:
+    xgboost_operator_name_map.update(
+        {
+            XGBRFClassifier: "XGBRFClassifier",
+            XGBRFRegressor: "XGBRFRegressor",
+        }
+    )
+    xgboost_classifier_list.append(XGBRFClassifier)
+
 
 def _append_covers(node):
     res = []
@@ -161,10 +176,9 @@ def _parse_xgboost_simple_model(scope, model, inputs):
     )
     this_operator.inputs = inputs
 
-    if (
-        type(model) in xgboost_classifier_list
-        or getattr(model, "operator_name", None) == "XGBClassifier"
-    ):
+    if type(model) in xgboost_classifier_list or getattr(
+        model, "operator_name", None
+    ) in ("XGBClassifier", "XGBRFClassifier"):
         # For classifiers, we may have two outputs, one for label and
         # the other one for probabilities of all classes.
         # Notice that their types here are not necessarily correct

diff --git a/onnxmltools/convert/xgboost/operator_converters/XGBoost.py b/onnxmltools/convert/xgboost/operator_converters/XGBoost.py
@@ -4,6 +4,11 @@
 import numpy as np
 from onnx import TensorProto
 from xgboost import XGBClassifier
+
+try:
+    from xgboost import XGBRFClassifier
+except ImportError:
+    XGBRFClassifier = None
 from ...common._registration import register_converter
 from ..common import get_xgb_params
 
@@ -390,10 +395,9 @@ def convert(scope, operator, container):
 
 def convert_xgboost(scope, operator, container):
     xgb_node = operator.raw_operator
-    if (
-        isinstance(xgb_node, XGBClassifier)
-        or getattr(xgb_node, "operator_name", None) == "XGBClassifier"
-    ):
+    if isinstance(xgb_node, (XGBClassifier, XGBRFClassifier)) or getattr(
+        xgb_node, "operator_name", None
+    ) in ("XGBClassifier", "XGBRFClassifier"):
         cls = XGBClassifierConverter
     else:
         cls = XGBRegressorConverter
@@ -402,4 +406,6 @@ def convert_xgboost(scope, operator, container):
 
 
 register_converter("XGBClassifier", convert_xgboost)
+register_converter("XGBRFClassifier", convert_xgboost)
 register_converter("XGBRegressor", convert_xgboost)
+register_converter("XGBRFRegressor", convert_xgboost)
diff --git a/onnxmltools/convert/xgboost/shape_calculators/Classifier.py b/onnxmltools/convert/xgboost/shape_calculators/Classifier.py
@@ -42,3 +42,4 @@ def calculate_xgboost_classifier_output_shapes(operator):
 
 
 register_shape_calculator("XGBClassifier", calculate_xgboost_classifier_output_shapes)
+register_shape_calculator("XGBRFClassifier", calculate_xgboost_classifier_output_shapes)
diff --git a/onnxmltools/convert/xgboost/shape_calculators/Regressor.py b/onnxmltools/convert/xgboost/shape_calculators/Regressor.py
@@ -4,3 +4,4 @@
 from ...common.shape_calculator import calculate_linear_regressor_output_shapes
 
 register_shape_calculator("XGBRegressor", calculate_linear_regressor_output_shapes)
+register_shape_calculator("XGBRFRegressor", calculate_linear_regressor_output_shapes)
diff --git a/tests/xgboost/test_xgboost_converters.py b/tests/xgboost/test_xgboost_converters.py
@@ -81,7 +81,7 @@ def test_xgb_regressor(self):
             initial_types=[("input", FloatTensorType(shape=[None, None]))],
             target_opset=TARGET_OPSET,
         )
-        self.assertTrue(conv_model is not None)
+
         dump_data_and_model(
             x_test.astype("float32"),
             xgb,
@@ -96,7 +96,7 @@ def test_xgb_classifier(self):
             initial_types=[("input", FloatTensorType(shape=[None, None]))],
             target_opset=TARGET_OPSET,
         )
-        self.assertTrue(conv_model is not None)
+
         dump_data_and_model(x_test, xgb, conv_model, basename="SklearnXGBClassifier")
 
     def test_xgb_classifier_uint8(self):
@@ -106,7 +106,7 @@ def test_xgb_classifier_uint8(self):
             initial_types=[("input", FloatTensorType(shape=["None", "None"]))],
             target_opset=TARGET_OPSET,
         )
-        self.assertTrue(conv_model is not None)
+
         dump_data_and_model(x_test, xgb, conv_model, basename="SklearnXGBClassifier")
 
     def test_xgb_classifier_multi(self):
@@ -116,7 +116,7 @@ def test_xgb_classifier_multi(self):
             initial_types=[("input", FloatTensorType(shape=[None, None]))],
             target_opset=TARGET_OPSET,
         )
-        self.assertTrue(conv_model is not None)
+
         dump_data_and_model(
             x_test, xgb, conv_model, basename="SklearnXGBClassifierMulti"
         )
@@ -130,7 +130,7 @@ def test_xgb_classifier_multi_reglog(self):
             initial_types=[("input", FloatTensorType(shape=[None, None]))],
             target_opset=TARGET_OPSET,
         )
-        self.assertTrue(conv_model is not None)
+
         dump_data_and_model(
             x_test, xgb, conv_model, basename="SklearnXGBClassifierMultiRegLog"
         )
@@ -144,7 +144,7 @@ def test_xgb_classifier_reglog(self):
             initial_types=[("input", FloatTensorType(shape=[None, None]))],
             target_opset=TARGET_OPSET,
         )
-        self.assertTrue(conv_model is not None)
+
         dump_data_and_model(
             x_test, xgb, conv_model, basename="SklearnXGBClassifierRegLog"
         )
@@ -163,7 +163,7 @@ def test_xgb_classifier_multi_discrete_int_labels(self):
             initial_types=[("input", FloatTensorType(shape=[None, None]))],
             target_opset=TARGET_OPSET,
         )
-        self.assertTrue(conv_model is not None)
+
         dump_data_and_model(
             x_test.astype("float32"),
             xgb,
@@ -631,7 +631,7 @@ def test_xgb_classifier_601(self):
             initial_types=[("input", FloatTensorType(shape=[None, None]))],
             target_opset=TARGET_OPSET,
         )
-        self.assertTrue(conv_model is not None)
+
         dump_data_and_model(x_test, xgb, conv_model, basename="SklearnXGBClassifier601")
 
     def test_xgb_classifier_hinge(self):

diff --git a/tests/xgboost/test_xgboost_converters_rf.py b/tests/xgboost/test_xgboost_converters_rf.py
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+import numpy as np
+from sklearn.datasets import load_diabetes, make_classification
+from sklearn.model_selection import train_test_split
+from xgboost import XGBRFRegressor, XGBRFClassifier
+from onnx.defs import onnx_opset_version
+from onnxconverter_common.onnx_ex import DEFAULT_OPSET_NUMBER
+from onnxmltools.convert import convert_xgboost
+from onnxmltools.convert.common.data_types import FloatTensorType
+from onnxmltools.utils import dump_data_and_model
+
+
+TARGET_OPSET = min(DEFAULT_OPSET_NUMBER, onnx_opset_version())
+
+
+def fct_cl2(y):
+    y[y == 2] = 0
+    return y
+
+
+def fct_cl3(y):
+    y[y == 0] = 6
+    return y
+
+
+def fct_id(y):
+    return y
+
+
+def _fit_classification_model(model, n_classes, is_str=False, dtype=None):
+    x, y = make_classification(
+        n_classes=n_classes,
+        n_features=100,
+        n_samples=1000,
+        random_state=42,
+        n_informative=7,
+    )
+    y = y.astype(np.str_) if is_str else y.astype(np.int64)
+    x_train, x_test, y_train, _ = train_test_split(x, y, test_size=0.5, random_state=42)
+    if dtype is not None:
+        y_train = y_train.astype(dtype)
+    model.fit(x_train, y_train)
+    return model, x_test.astype(np.float32)
+
+
+class TestXGBoostRFModels(unittest.TestCase):
+    def test_xgbrf_regressor(self):
+        iris = load_diabetes()
+        x = iris.data
+        y = iris.target
+        x_train, x_test, y_train, _ = train_test_split(
+            x, y, test_size=0.5, random_state=42
+        )
+        xgb = XGBRFRegressor()
+        xgb.fit(x_train, y_train)
+        conv_model = convert_xgboost(
+            xgb,
+            initial_types=[("input", FloatTensorType(shape=[None, None]))],
+            target_opset=TARGET_OPSET,
+        )
+        dump_data_and_model(
+            x_test.astype("float32"),
+            xgb,
+            conv_model,
+            basename="SklearnXGBRFRegressor-Dec3",
+        )
+
+    def test_xgbrf_classifier(self):
+        xgb, x_test = _fit_classification_model(XGBRFClassifier(), 2)
+        conv_model = convert_xgboost(
+            xgb,
+            initial_types=[("input", FloatTensorType(shape=[None, None]))],
+            target_opset=TARGET_OPSET,
+        )
+        dump_data_and_model(x_test, xgb, conv_model, basename="SklearnXGBRFClassifier")
+
+
+if __name__ == "__main__":
+    # TestXGBoostModels().test_xgboost_booster_classifier_multiclass_softprob()
+    unittest.main(verbosity=2)