diff --git a/CHANGELOGS.md b/CHANGELOGS.md index f76df176b..85e954fc5 100644 --- a/CHANGELOGS.md +++ b/CHANGELOGS.md @@ -2,6 +2,12 @@ ## 1.17.0 (development) +* Upgrade the maximum supported opset to 21, + update requirements to scikit-learn>=1.1, + older versions are not tested anymore, + [#1098](https://github.com/onnx/sklearn-onnx/pull/1098) +* Support infrequent categories for OneHotEncoder + [#1029](https://github.com/onnx/sklearn-onnx/pull/1029) * Support kernel Matern in Gaussian Process [#978](https://github.com/onnx/sklearn-onnx/pull/978) * Fix for multidimensional gaussian process diff --git a/README.md b/README.md index 2d5852f98..0059fe74b 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ including models or transformers coming from external libraries. ## Documentation Full documentation including tutorials is available at [https://onnx.ai/sklearn-onnx/](https://onnx.ai/sklearn-onnx/). [Supported scikit-learn Models](https://onnx.ai/sklearn-onnx/supported.html) -Last supported opset is 19. +Last supported opset is 21. You may also find answers in [existing issues](https://github.com/onnx/sklearn-onnx/issues?utf8=%E2%9C%93&q=is%3Aissue) or submit a new one. diff --git a/docs/api_summary.rst b/docs/api_summary.rst index 24a0e2c89..e22d6c80e 100644 --- a/docs/api_summary.rst +++ b/docs/api_summary.rst @@ -124,6 +124,6 @@ Topology -------- .. autoclass:: skl2onnx.common._topology.Topology - :members: compile, topological_operator_iterator + :members: .. autofunction:: skl2onnx.common._topology.convert_topology diff --git a/docs/exts/sphinx_skl2onnx_extension.py b/docs/exts/sphinx_skl2onnx_extension.py index f7cf24cae..d05f97110 100644 --- a/docs/exts/sphinx_skl2onnx_extension.py +++ b/docs/exts/sphinx_skl2onnx_extension.py @@ -174,6 +174,19 @@ def make_ref(name): rows.append("") for name in sorted_keys: + if name in { + "OnnxDecorrelateTransformer", + "OnnxGrowthCalculator", + "OnnxPredictableTSNE", + "OnnxSklearnLGBMClassifier", + "OnnxSklearnLGBMRegressor", + "OnnxSklearnXGBClassifier", + "OnnxSklearnXGBRegressor", + "OnnxSklearnPipeline", + "OnnxSklearnColumnTransformer", + "OnnxSklearnFeatureUnion", + }: + continue rows = [] cl = cls[name] rows.append(".. _l-sklops-{}:".format(cl.__name__)) diff --git a/requirements-dev.txt b/requirements-dev.txt index 0da5e225f..2cf99182f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,6 @@ # tests black +jinja2 onnxruntime-extensions onnxscript pandas diff --git a/requirements.txt b/requirements.txt index cca532fb2..0954921e1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ onnx>=1.2.1 -scikit-learn>=0.19 +scikit-learn>=1.1 onnxconverter-common>=1.7.0 diff --git a/skl2onnx/__init__.py b/skl2onnx/__init__.py index f81feebc2..3db90b766 100644 --- a/skl2onnx/__init__.py +++ b/skl2onnx/__init__.py @@ -9,7 +9,7 @@ __producer_version__ = __version__ __domain__ = "ai.onnx" __model_version__ = 0 -__max_supported_opset__ = 19 # Converters are tested up to this version. +__max_supported_opset__ = 21 # Converters are tested up to this version. from .convert import convert_sklearn, to_onnx, wrap_as_onnx_mixin # noqa diff --git a/skl2onnx/algebra/_cache/__init__.py b/skl2onnx/algebra/_cache/__init__.py deleted file mode 100644 index 4d0f7c497..000000000 --- a/skl2onnx/algebra/_cache/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -import os - - -def cache_folder(): - return os.path.abspath(os.path.dirname(__file__)) diff --git a/skl2onnx/algebra/automation.py b/skl2onnx/algebra/automation.py index 187be25c5..a96c8a951 100644 --- a/skl2onnx/algebra/automation.py +++ b/skl2onnx/algebra/automation.py @@ -3,7 +3,6 @@ import textwrap import onnx import onnx.defs # noqa -from onnx.defs import OpSchema def _get_doc_template(): @@ -115,154 +114,6 @@ def get_domain_list(): ) -def get_rst_doc(op_name=None): - """ - Returns a documentation in RST format - for all :class:`OnnxOperator`. - - :param op_name: operator name of None for all - :return: string - - The function relies on module *jinja2* or replaces it - with a simple rendering if not present. - """ - if op_name is None: - schemas = onnx.defs.get_all_schemas_with_history() - elif isinstance(op_name, str): - schemas = [ - schema - for schema in onnx.defs.get_all_schemas_with_history() - if schema.name == op_name - ] - if len(schemas) > 1: - raise RuntimeError( - "Multiple operators have the same name '{}'.".format(op_name) - ) - elif not isinstance(op_name, list): - schemas = [op_name] - if len(schemas) == 0: - raise ValueError("Unable to find any operator with name '{}'.".format(op_name)) - - # from onnx.backend.sample.ops import collect_sample_implementations - # from onnx.backend.test.case import collect_snippets - # SNIPPETS = collect_snippets() - # SAMPLE_IMPLEMENTATIONS = collect_sample_implementations() - def format_name_with_domain(sch): - if sch.domain: - return "{} ({})".format(sch.name, sch.domain) - return sch.name - - def get_is_homogeneous(obj): - try: - return obj.is_homogeneous - except AttributeError: - try: - return obj.isHomogeneous - except AttributeError: - return False - - def format_option(obj): - opts = [] - if OpSchema.FormalParameterOption.Optional == obj.option: - opts.append("optional") - elif OpSchema.FormalParameterOption.Variadic == obj.option: - opts.append("variadic") - if get_is_homogeneous(obj): - opts.append("heterogeneous") - if opts: - return " (%s)" % ", ".join(opts) - return "" - - def getconstraint(const, ii): - if const.type_param_str: - name = const.type_param_str - else: - name = str(ii) - if const.allowed_type_strs: - name += " " + ", ".join(const.allowed_type_strs) - return name - - def getname(obj, i): - name = obj.name - if len(name) == 0: - return str(i) - return name - - def process_documentation(doc): - if doc is None: - doc = "" - doc = textwrap.dedent(doc) - main_docs_url = "https://github.com/onnx/onnx/blob/main/" - rep = { - "[the doc](IR.md)": "`ONNX <{0}docs/IR.md>`_", - "[the doc](Broadcasting.md)": ( - "`Broadcasting in ONNX <{0}docs/Broadcasting.md>`_" - ), - "
": "", - "
": "", - "
": "* ", - "
": " ", - "": "", - "
": "", - "": "``", - "": "``", - "
": "\n", - } - for k, v in rep.items(): - doc = doc.replace(k, v.format(main_docs_url)) - move = 0 - lines = [] - for line in doc.split("\n"): - if line.startswith("```"): - if move > 0: - move -= 4 - lines.append("\n") - else: - lines.append("::\n") - move += 4 - elif move > 0: - lines.append(" " * move + line) - else: - lines.append(line) - return "\n".join(lines) - - def build_doc_url(sch): - doc_url = "https://github.com/onnx/onnx/blob/main/docs/Operators" - if "ml" in sch.domain: - doc_url += "-ml" - doc_url += ".md" - doc_url += "#" - if sch.domain not in (None, "", "ai.onnx"): - doc_url += sch.domain + "." - return doc_url - - def get_type_str(inou): - try: - return inou.type_str - except AttributeError: - return inou.typeStr - - fnwd = format_name_with_domain - tmpl = _template_operator - docs = tmpl.render( - schemas=schemas, - OpSchema=OpSchema, - len=len, - getattr=getattr, - sorted=sorted, - format_option=format_option, - getconstraint=getconstraint, - getname=getname, - enumerate=enumerate, - format_name_with_domain=fnwd, - process_documentation=process_documentation, - build_doc_url=build_doc_url, - str=str, - get_type_str=get_type_str, - ) - return docs - - def _get_doc_template_sklearn(): try: from jinja2 import Template diff --git a/skl2onnx/algebra/onnx_ops.py b/skl2onnx/algebra/onnx_ops.py index 4517e1963..fb270a7af 100644 --- a/skl2onnx/algebra/onnx_ops.py +++ b/skl2onnx/algebra/onnx_ops.py @@ -4,7 +4,6 @@ Place holder for all ONNX operators. """ import sys -import os import numpy as np try: @@ -14,8 +13,6 @@ import onnx from ..common.data_types import DataType from ..common._topology import Variable -from .automation import get_rst_doc -from ._cache import cache_folder def ClassFactory( @@ -169,7 +166,6 @@ def dynamic_class_creation(cache=False): `_. """ - cache_dir = cache_folder() res = {} for schema in onnx.defs.get_all_schemas_with_history(): if schema.support_level == schema.SupportType.EXPERIMENTAL: @@ -199,22 +195,8 @@ def _c(obj, label, i): outputs = [_c(o, "O", i) for i, o in enumerate(schema.outputs)] args = [p for p in schema.attributes] - if "_" in name: - class_name = "Onnx" + name - else: - class_name = "Onnx" + schema.name - - filename = os.path.join( - cache_dir, schema.name + "_" + str(schema.since_version) + ".rst" - ) - if not cache and os.path.exists(filename): - with open(filename, "r", encoding="utf-8") as f: - doc = f.read() - else: - doc = get_rst_doc(schema) - if cache: - with open(filename, "w", encoding="utf-8") as f: - f.write(doc) + class_name = "Onnx" + (name if "_" in name else schema.name) + doc = f"See `{name} `_." cl = ClassFactory( class_name, diff --git a/skl2onnx/common/_topology.py b/skl2onnx/common/_topology.py index 1be1db166..4518ac1aa 100644 --- a/skl2onnx/common/_topology.py +++ b/skl2onnx/common/_topology.py @@ -63,6 +63,7 @@ def _default_OPSET_TO_IR_VERSION(): 18: 8, 19: 9, 20: 9, + 21: 10, } diff --git a/skl2onnx/operator_converters/one_hot_encoder.py b/skl2onnx/operator_converters/one_hot_encoder.py index 95eb8b7b6..27b78b73c 100644 --- a/skl2onnx/operator_converters/one_hot_encoder.py +++ b/skl2onnx/operator_converters/one_hot_encoder.py @@ -45,13 +45,6 @@ def convert_sklearn_one_hot_encoder( enum_cats = [] index_inputs = 0 - to_drop = ohe_op._drop_idx_after_grouping - if to_drop is not None: - # raise NotImplementedError( - # f"The converter is not implemented when " - # f"_drop_idx_after_grouping is not None: {to_drop}." - # ) - pass for index, cats in enumerate(ohe_op.categories_): filtered_cats = ohe_op._compute_transformed_categories(index) diff --git a/tests/test_algebra_onnx_doc.py b/tests/test_algebra_onnx_doc.py index da70a236c..25f3e821d 100644 --- a/tests/test_algebra_onnx_doc.py +++ b/tests/test_algebra_onnx_doc.py @@ -6,7 +6,7 @@ from numpy.testing import assert_almost_equal import onnx from skl2onnx.algebra.onnx_ops import dynamic_class_creation -from skl2onnx.algebra.automation import get_rst_doc_sklearn, get_rst_doc +from skl2onnx.algebra.automation import get_rst_doc_sklearn from test_utils import TARGET_OPSET @@ -43,27 +43,16 @@ def test_transpose2(self): res = self.predict_with_onnxruntime(model_def, X) assert_almost_equal(res["Y"], X) - @unittest.skipIf( - sys.platform.startswith("win"), reason="onnx schema are incorrect on Windows" - ) - @unittest.skipIf(TARGET_OPSET <= 20, reason="not available") - def test_doc_onnx(self): - rst = get_rst_doc() - assert "**Summary**" in rst - @unittest.skipIf( sys.platform.startswith("win"), reason="onnx schema are incorrect on Windows" ) @unittest.skipIf(TARGET_OPSET <= 20, reason="not available") def test_doc_sklearn(self): - try: - rst = get_rst_doc_sklearn() - assert ".. _l-sklops-OnnxSklearnBernoulliNB:" in rst - except KeyError as e: - assert "SklearnGaussianProcessRegressor" in str( - e - ) or "SklearnGaussianProcessClassifier" in str(e) + rst = get_rst_doc_sklearn() + assert ( + ".. _l-sklops-OnnxSklearnBernoulliNB:" in rst + ), f"Unable to find a substring in {rst}" if __name__ == "__main__": - unittest.main() + unittest.main(verbosity=2) diff --git a/tests/test_sklearn_one_hot_encoder_converter.py b/tests/test_sklearn_one_hot_encoder_converter.py index ec705fb9d..6696cf888 100644 --- a/tests/test_sklearn_one_hot_encoder_converter.py +++ b/tests/test_sklearn_one_hot_encoder_converter.py @@ -472,6 +472,7 @@ def test_shape_inference_onnx(self): def test_shape_inference_onnxruntime(self): self._shape_inference("onnxruntime") + @unittest.skipIf(not skl12(), reason="sparse output not available") def test_min_frequency(self): data = pandas.DataFrame( [ diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py index bb9385e7e..3e9f67d09 100644 --- a/tests/test_utils/__init__.py +++ b/tests/test_utils/__init__.py @@ -60,6 +60,8 @@ def create_tensor(N, C, H=None, W=None): def _get_ir_version(opv): + if opv >= 21: + return 10 if opv >= 19: return 9 if opv >= 15: @@ -83,7 +85,11 @@ def max_onnxruntime_opset(): `_. """ vi = pv.Version(ort_version.split("+")[0]) - if vi >= pv.Version("1.16.0"): + if vi >= pv.Version("1.18.0"): + return 21 + if vi >= pv.Version("1.17.0"): + return 20 + if vi >= pv.Version("1.15.0"): return 19 if vi >= pv.Version("1.14.0"): return 18 @@ -120,6 +126,7 @@ def max_onnxruntime_opset(): ) ) +# opset-ml == 4 still not implemented in onnxruntime value_ml = 3 if TARGET_OPSET <= 16: # TreeEnsemble* for opset-ml == 3 is implemented in onnxruntime==1.12.0