diff --git a/CHANGELOGS.md b/CHANGELOGS.md
index f76df176b..85e954fc5 100644
--- a/CHANGELOGS.md
+++ b/CHANGELOGS.md
@@ -2,6 +2,12 @@
## 1.17.0 (development)
+* Upgrade the maximum supported opset to 21,
+ update requirements to scikit-learn>=1.1,
+ older versions are not tested anymore,
+ [#1098](https://github.com/onnx/sklearn-onnx/pull/1098)
+* Support infrequent categories for OneHotEncoder
+ [#1029](https://github.com/onnx/sklearn-onnx/pull/1029)
* Support kernel Matern in Gaussian Process
[#978](https://github.com/onnx/sklearn-onnx/pull/978)
* Fix for multidimensional gaussian process
diff --git a/README.md b/README.md
index 2d5852f98..0059fe74b 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ including models or transformers coming from external libraries.
## Documentation
Full documentation including tutorials is available at [https://onnx.ai/sklearn-onnx/](https://onnx.ai/sklearn-onnx/).
[Supported scikit-learn Models](https://onnx.ai/sklearn-onnx/supported.html)
-Last supported opset is 19.
+Last supported opset is 21.
You may also find answers in [existing issues](https://github.com/onnx/sklearn-onnx/issues?utf8=%E2%9C%93&q=is%3Aissue)
or submit a new one.
diff --git a/docs/api_summary.rst b/docs/api_summary.rst
index 24a0e2c89..e22d6c80e 100644
--- a/docs/api_summary.rst
+++ b/docs/api_summary.rst
@@ -124,6 +124,6 @@ Topology
--------
.. autoclass:: skl2onnx.common._topology.Topology
- :members: compile, topological_operator_iterator
+ :members:
.. autofunction:: skl2onnx.common._topology.convert_topology
diff --git a/docs/exts/sphinx_skl2onnx_extension.py b/docs/exts/sphinx_skl2onnx_extension.py
index f7cf24cae..d05f97110 100644
--- a/docs/exts/sphinx_skl2onnx_extension.py
+++ b/docs/exts/sphinx_skl2onnx_extension.py
@@ -174,6 +174,19 @@ def make_ref(name):
rows.append("")
for name in sorted_keys:
+ if name in {
+ "OnnxDecorrelateTransformer",
+ "OnnxGrowthCalculator",
+ "OnnxPredictableTSNE",
+ "OnnxSklearnLGBMClassifier",
+ "OnnxSklearnLGBMRegressor",
+ "OnnxSklearnXGBClassifier",
+ "OnnxSklearnXGBRegressor",
+ "OnnxSklearnPipeline",
+ "OnnxSklearnColumnTransformer",
+ "OnnxSklearnFeatureUnion",
+ }:
+ continue
rows = []
cl = cls[name]
rows.append(".. _l-sklops-{}:".format(cl.__name__))
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 0da5e225f..2cf99182f 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,5 +1,6 @@
# tests
black
+jinja2
onnxruntime-extensions
onnxscript
pandas
diff --git a/requirements.txt b/requirements.txt
index cca532fb2..0954921e1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
onnx>=1.2.1
-scikit-learn>=0.19
+scikit-learn>=1.1
onnxconverter-common>=1.7.0
diff --git a/skl2onnx/__init__.py b/skl2onnx/__init__.py
index f81feebc2..3db90b766 100644
--- a/skl2onnx/__init__.py
+++ b/skl2onnx/__init__.py
@@ -9,7 +9,7 @@
__producer_version__ = __version__
__domain__ = "ai.onnx"
__model_version__ = 0
-__max_supported_opset__ = 19 # Converters are tested up to this version.
+__max_supported_opset__ = 21 # Converters are tested up to this version.
from .convert import convert_sklearn, to_onnx, wrap_as_onnx_mixin # noqa
diff --git a/skl2onnx/algebra/_cache/__init__.py b/skl2onnx/algebra/_cache/__init__.py
deleted file mode 100644
index 4d0f7c497..000000000
--- a/skl2onnx/algebra/_cache/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-import os
-
-
-def cache_folder():
- return os.path.abspath(os.path.dirname(__file__))
diff --git a/skl2onnx/algebra/automation.py b/skl2onnx/algebra/automation.py
index 187be25c5..a96c8a951 100644
--- a/skl2onnx/algebra/automation.py
+++ b/skl2onnx/algebra/automation.py
@@ -3,7 +3,6 @@
import textwrap
import onnx
import onnx.defs # noqa
-from onnx.defs import OpSchema
def _get_doc_template():
@@ -115,154 +114,6 @@ def get_domain_list():
)
-def get_rst_doc(op_name=None):
- """
- Returns a documentation in RST format
- for all :class:`OnnxOperator`.
-
- :param op_name: operator name of None for all
- :return: string
-
- The function relies on module *jinja2* or replaces it
- with a simple rendering if not present.
- """
- if op_name is None:
- schemas = onnx.defs.get_all_schemas_with_history()
- elif isinstance(op_name, str):
- schemas = [
- schema
- for schema in onnx.defs.get_all_schemas_with_history()
- if schema.name == op_name
- ]
- if len(schemas) > 1:
- raise RuntimeError(
- "Multiple operators have the same name '{}'.".format(op_name)
- )
- elif not isinstance(op_name, list):
- schemas = [op_name]
- if len(schemas) == 0:
- raise ValueError("Unable to find any operator with name '{}'.".format(op_name))
-
- # from onnx.backend.sample.ops import collect_sample_implementations
- # from onnx.backend.test.case import collect_snippets
- # SNIPPETS = collect_snippets()
- # SAMPLE_IMPLEMENTATIONS = collect_sample_implementations()
- def format_name_with_domain(sch):
- if sch.domain:
- return "{} ({})".format(sch.name, sch.domain)
- return sch.name
-
- def get_is_homogeneous(obj):
- try:
- return obj.is_homogeneous
- except AttributeError:
- try:
- return obj.isHomogeneous
- except AttributeError:
- return False
-
- def format_option(obj):
- opts = []
- if OpSchema.FormalParameterOption.Optional == obj.option:
- opts.append("optional")
- elif OpSchema.FormalParameterOption.Variadic == obj.option:
- opts.append("variadic")
- if get_is_homogeneous(obj):
- opts.append("heterogeneous")
- if opts:
- return " (%s)" % ", ".join(opts)
- return ""
-
- def getconstraint(const, ii):
- if const.type_param_str:
- name = const.type_param_str
- else:
- name = str(ii)
- if const.allowed_type_strs:
- name += " " + ", ".join(const.allowed_type_strs)
- return name
-
- def getname(obj, i):
- name = obj.name
- if len(name) == 0:
- return str(i)
- return name
-
- def process_documentation(doc):
- if doc is None:
- doc = ""
- doc = textwrap.dedent(doc)
- main_docs_url = "https://github.com/onnx/onnx/blob/main/"
- rep = {
- "[the doc](IR.md)": "`ONNX <{0}docs/IR.md>`_",
- "[the doc](Broadcasting.md)": (
- "`Broadcasting in ONNX <{0}docs/Broadcasting.md>`_"
- ),
- "
": "",
- "
": "",
- "": "* ",
- "": " ",
- "": "",
- "": "",
- "": "``",
- "": "``",
- "
": "\n",
- }
- for k, v in rep.items():
- doc = doc.replace(k, v.format(main_docs_url))
- move = 0
- lines = []
- for line in doc.split("\n"):
- if line.startswith("```"):
- if move > 0:
- move -= 4
- lines.append("\n")
- else:
- lines.append("::\n")
- move += 4
- elif move > 0:
- lines.append(" " * move + line)
- else:
- lines.append(line)
- return "\n".join(lines)
-
- def build_doc_url(sch):
- doc_url = "https://github.com/onnx/onnx/blob/main/docs/Operators"
- if "ml" in sch.domain:
- doc_url += "-ml"
- doc_url += ".md"
- doc_url += "#"
- if sch.domain not in (None, "", "ai.onnx"):
- doc_url += sch.domain + "."
- return doc_url
-
- def get_type_str(inou):
- try:
- return inou.type_str
- except AttributeError:
- return inou.typeStr
-
- fnwd = format_name_with_domain
- tmpl = _template_operator
- docs = tmpl.render(
- schemas=schemas,
- OpSchema=OpSchema,
- len=len,
- getattr=getattr,
- sorted=sorted,
- format_option=format_option,
- getconstraint=getconstraint,
- getname=getname,
- enumerate=enumerate,
- format_name_with_domain=fnwd,
- process_documentation=process_documentation,
- build_doc_url=build_doc_url,
- str=str,
- get_type_str=get_type_str,
- )
- return docs
-
-
def _get_doc_template_sklearn():
try:
from jinja2 import Template
diff --git a/skl2onnx/algebra/onnx_ops.py b/skl2onnx/algebra/onnx_ops.py
index 4517e1963..fb270a7af 100644
--- a/skl2onnx/algebra/onnx_ops.py
+++ b/skl2onnx/algebra/onnx_ops.py
@@ -4,7 +4,6 @@
Place holder for all ONNX operators.
"""
import sys
-import os
import numpy as np
try:
@@ -14,8 +13,6 @@
import onnx
from ..common.data_types import DataType
from ..common._topology import Variable
-from .automation import get_rst_doc
-from ._cache import cache_folder
def ClassFactory(
@@ -169,7 +166,6 @@ def dynamic_class_creation(cache=False):
`_.
"""
- cache_dir = cache_folder()
res = {}
for schema in onnx.defs.get_all_schemas_with_history():
if schema.support_level == schema.SupportType.EXPERIMENTAL:
@@ -199,22 +195,8 @@ def _c(obj, label, i):
outputs = [_c(o, "O", i) for i, o in enumerate(schema.outputs)]
args = [p for p in schema.attributes]
- if "_" in name:
- class_name = "Onnx" + name
- else:
- class_name = "Onnx" + schema.name
-
- filename = os.path.join(
- cache_dir, schema.name + "_" + str(schema.since_version) + ".rst"
- )
- if not cache and os.path.exists(filename):
- with open(filename, "r", encoding="utf-8") as f:
- doc = f.read()
- else:
- doc = get_rst_doc(schema)
- if cache:
- with open(filename, "w", encoding="utf-8") as f:
- f.write(doc)
+ class_name = "Onnx" + (name if "_" in name else schema.name)
+ doc = f"See `{name} `_."
cl = ClassFactory(
class_name,
diff --git a/skl2onnx/common/_topology.py b/skl2onnx/common/_topology.py
index 1be1db166..4518ac1aa 100644
--- a/skl2onnx/common/_topology.py
+++ b/skl2onnx/common/_topology.py
@@ -63,6 +63,7 @@ def _default_OPSET_TO_IR_VERSION():
18: 8,
19: 9,
20: 9,
+ 21: 10,
}
diff --git a/skl2onnx/operator_converters/one_hot_encoder.py b/skl2onnx/operator_converters/one_hot_encoder.py
index 95eb8b7b6..27b78b73c 100644
--- a/skl2onnx/operator_converters/one_hot_encoder.py
+++ b/skl2onnx/operator_converters/one_hot_encoder.py
@@ -45,13 +45,6 @@ def convert_sklearn_one_hot_encoder(
enum_cats = []
index_inputs = 0
- to_drop = ohe_op._drop_idx_after_grouping
- if to_drop is not None:
- # raise NotImplementedError(
- # f"The converter is not implemented when "
- # f"_drop_idx_after_grouping is not None: {to_drop}."
- # )
- pass
for index, cats in enumerate(ohe_op.categories_):
filtered_cats = ohe_op._compute_transformed_categories(index)
diff --git a/tests/test_algebra_onnx_doc.py b/tests/test_algebra_onnx_doc.py
index da70a236c..25f3e821d 100644
--- a/tests/test_algebra_onnx_doc.py
+++ b/tests/test_algebra_onnx_doc.py
@@ -6,7 +6,7 @@
from numpy.testing import assert_almost_equal
import onnx
from skl2onnx.algebra.onnx_ops import dynamic_class_creation
-from skl2onnx.algebra.automation import get_rst_doc_sklearn, get_rst_doc
+from skl2onnx.algebra.automation import get_rst_doc_sklearn
from test_utils import TARGET_OPSET
@@ -43,27 +43,16 @@ def test_transpose2(self):
res = self.predict_with_onnxruntime(model_def, X)
assert_almost_equal(res["Y"], X)
- @unittest.skipIf(
- sys.platform.startswith("win"), reason="onnx schema are incorrect on Windows"
- )
- @unittest.skipIf(TARGET_OPSET <= 20, reason="not available")
- def test_doc_onnx(self):
- rst = get_rst_doc()
- assert "**Summary**" in rst
-
@unittest.skipIf(
sys.platform.startswith("win"), reason="onnx schema are incorrect on Windows"
)
@unittest.skipIf(TARGET_OPSET <= 20, reason="not available")
def test_doc_sklearn(self):
- try:
- rst = get_rst_doc_sklearn()
- assert ".. _l-sklops-OnnxSklearnBernoulliNB:" in rst
- except KeyError as e:
- assert "SklearnGaussianProcessRegressor" in str(
- e
- ) or "SklearnGaussianProcessClassifier" in str(e)
+ rst = get_rst_doc_sklearn()
+ assert (
+ ".. _l-sklops-OnnxSklearnBernoulliNB:" in rst
+ ), f"Unable to find a substring in {rst}"
if __name__ == "__main__":
- unittest.main()
+ unittest.main(verbosity=2)
diff --git a/tests/test_sklearn_one_hot_encoder_converter.py b/tests/test_sklearn_one_hot_encoder_converter.py
index ec705fb9d..6696cf888 100644
--- a/tests/test_sklearn_one_hot_encoder_converter.py
+++ b/tests/test_sklearn_one_hot_encoder_converter.py
@@ -472,6 +472,7 @@ def test_shape_inference_onnx(self):
def test_shape_inference_onnxruntime(self):
self._shape_inference("onnxruntime")
+ @unittest.skipIf(not skl12(), reason="sparse output not available")
def test_min_frequency(self):
data = pandas.DataFrame(
[
diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py
index bb9385e7e..3e9f67d09 100644
--- a/tests/test_utils/__init__.py
+++ b/tests/test_utils/__init__.py
@@ -60,6 +60,8 @@ def create_tensor(N, C, H=None, W=None):
def _get_ir_version(opv):
+ if opv >= 21:
+ return 10
if opv >= 19:
return 9
if opv >= 15:
@@ -83,7 +85,11 @@ def max_onnxruntime_opset():
`_.
"""
vi = pv.Version(ort_version.split("+")[0])
- if vi >= pv.Version("1.16.0"):
+ if vi >= pv.Version("1.18.0"):
+ return 21
+ if vi >= pv.Version("1.17.0"):
+ return 20
+ if vi >= pv.Version("1.15.0"):
return 19
if vi >= pv.Version("1.14.0"):
return 18
@@ -120,6 +126,7 @@ def max_onnxruntime_opset():
)
)
+# opset-ml == 4 still not implemented in onnxruntime
value_ml = 3
if TARGET_OPSET <= 16:
# TreeEnsemble* for opset-ml == 3 is implemented in onnxruntime==1.12.0