From 855ea624e6deab2c7b988a1f705f4f8761ffce12 Mon Sep 17 00:00:00 2001 From: Brett Date: Thu, 28 Sep 2023 10:54:38 -0400 Subject: [PATCH 1/5] index converters by type when no converter is found This changes how converters with class paths/names as types are handled by asdf. Prior to this commit, when converting a class instance (foo) of class (Foo) the class path of the instance was inspected with ``get_class_name`` and the ``_converters_by_type`` index was searched using the class name. This index contained both classes and class paths (as converters could use both/either) with classes being preferred. This creates issues when a module has a different class path from the typical 'public' path for the class as the module might move the private implementation without changing the 'public' path with the expectation that this will not break downstream code. For converters that use class paths these types of moves will break the converter. This commit changes the handling of class paths used for converters so that the 'public' path can be used. If a converter is not found for a class in ``_converters_by_type`` (which now only contains classes as keys) then ``_converters_by_class_path`` is indexed by checking what classes referred to by the paths are already imported (to avoid importing every class supported by every extension). If the class is imported it is added to ``_converters_by_type`` and removed from ``_converters_by_class_path``. --- asdf/extension/_manager.py | 97 ++++++++++++++++++++++++++++++++------ 1 file changed, 82 insertions(+), 15 deletions(-) diff --git a/asdf/extension/_manager.py b/asdf/extension/_manager.py index 8094ef701..7acc843c6 100644 --- a/asdf/extension/_manager.py +++ b/asdf/extension/_manager.py @@ -1,3 +1,4 @@ +import sys from functools import lru_cache from asdf.tagged import Tagged @@ -23,8 +24,35 @@ def __init__(self, extensions): self._tag_defs_by_tag = {} self._converters_by_tag = {} - # This dict has both str and type keys: + + # To optimize performance converters can be registered using either: + # - the class/type they convert + # - the name/path (string) of the class/type they convert + # This allows the registration to continue without importing + # every module for every extension (which would be needed to turn + # the class paths into proper classes). Using class paths can be + # complicated by packages that have private implementations of + # classes that are exposed at a different 'public' location. + # These private classes are may change between minor versions + # and would break converters that are registered using the private + # class path. However, often libraries to not modify the module + # of the 'public' class (so inspecting the class path returns + # the private class path). One example of this in asdf is + # Converter (exposed as ``asdf.extension.Converter`` but with + # a class path of ``asdf.extension._converter.Converter``). + # To allow converters to be registered with the public location + # we will need to attempt to import the public class path + # and then register the private class path after the class is + # imported. We don't want to do this unnecessarily and since + # class instances do not contain the public class path + # we adopt a strategy of checking class paths and only + # registering those that have already been imported. Thiss + # is ok because asdf will only use the converter type + # when attempting to serialize an object in memory (so the + # public class path will already be imported at the time + # the converter is needed). self._converters_by_type = {} + self._converters_by_class_path = {} validators = set() @@ -38,13 +66,16 @@ def __init__(self, extensions): self._converters_by_tag[tag] = converter for typ in converter.types: if isinstance(typ, str): - if typ not in self._converters_by_type: - self._converters_by_type[typ] = converter + if typ not in self._converters_by_class_path: + self._converters_by_class_path[typ] = converter else: type_class_name = get_class_name(typ, instance=False) - if typ not in self._converters_by_type and type_class_name not in self._converters_by_type: + if ( + typ not in self._converters_by_type + and type_class_name not in self._converters_by_class_path + ): self._converters_by_type[typ] = converter - self._converters_by_type[type_class_name] = converter + self._converters_by_class_path[type_class_name] = converter validators.update(extension.validators) @@ -90,7 +121,10 @@ def handles_type(self, typ): ------- bool """ - return typ in self._converters_by_type or get_class_name(typ, instance=False) in self._converters_by_type + if typ in self._converters_by_type: + return True + self._index_converters() + return typ in self._converters_by_type def handles_tag_definition(self, tag): """ @@ -172,18 +206,51 @@ def get_converter_for_type(self, typ): KeyError Unrecognized type. """ + if typ not in self._converters_by_type: + self._index_converters() try: return self._converters_by_type[typ] except KeyError: - class_name = get_class_name(typ, instance=False) - try: - return self._converters_by_type[class_name] - except KeyError: - msg = ( - f"No support available for Python type '{get_class_name(typ, instance=False)}'. " - "You may need to install or enable an extension." - ) - raise KeyError(msg) from None + msg = ( + f"No support available for Python type '{get_class_name(typ, instance=False)}'. " + "You may need to install or enable an extension." + ) + raise KeyError(msg) from None + + def _index_converters(self): + """ + Search _converters_by_class_path for paths (strings) that + refer to classes that are currently imported. For imported + classes, add them to _converters_by_class (if the class + doesn't already have a converter). + """ + # search class paths to find ones that are imported + for class_path in list(self._converters_by_class_path): + class_ = None + if "." not in class_path: + # this class path does not appear to include a module + if class_path in globals(): + class_ = globals()[class_path] + elif class_path in sys.modules: + class_ = sys.modules[class_path] + else: + continue + else: + # this class is part of a module + module_name, class_name = class_path.rsplit(".", maxsplit=1) + # if the module is not imported, don't index it + if module_name not in sys.modules: + continue + module = sys.modules[module_name] + if not hasattr(module, class_name): + # the imported module does not have this class, perhaps + # it is dynamically created so do not index it yet + continue + class_ = getattr(module, class_name) + if class_ is not None: + if class_ not in self._converters_by_type: + self._converters_by_type[class_] = self._converters_by_class_path[class_path] + del self._converters_by_class_path[class_path] @property def validator_manager(self): From 5a210941af7db77270bf848b7f62cad734dd53c4 Mon Sep 17 00:00:00 2001 From: Brett Date: Mon, 16 Oct 2023 16:11:55 -0400 Subject: [PATCH 2/5] update changelog --- CHANGES.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 0b19deecb..9b7e451c1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -20,6 +20,10 @@ extension was created from a manifest registered with a uri that does not match the id in the manifest [#1785] +- Allow converters to provide types as strings that can + resolve to public classes (even if the class is implemented + in a private module). [#1654] + 3.2.0 (2024-04-05) ------------------ From 3c47c8dd033dcdeec7b9e3c608c430d133b91171 Mon Sep 17 00:00:00 2001 From: Brett Date: Mon, 16 Oct 2023 16:16:16 -0400 Subject: [PATCH 3/5] update docs describing new public class path for converter types --- docs/asdf/extending/converters.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/asdf/extending/converters.rst b/docs/asdf/extending/converters.rst index 9ac8c94e7..c2bc60b57 100644 --- a/docs/asdf/extending/converters.rst +++ b/docs/asdf/extending/converters.rst @@ -26,12 +26,12 @@ characters up to a ``/``, or ``**``, which matches any sequence of characters. The `~asdf.util.uri_match` method can be used to test URI patterns. `Converter.types` - a list of Python types or fully-qualified Python type names handled -by the converter. Note that a string name must reflect the actual location of the -class's implementation and not just a module where it is imported for convenience. -For example, if class ``Foo`` is implemented in ``example_package.foo.Foo`` but -imported as ``example_package.Foo`` for convenience, it is the former name that -must be used. The `~asdf.util.get_class_name` method will return the name that -`asdf` expects. +by the converter. For strings, the private or public path can be used. For example, +if class ``Foo`` is implemented in ``example_package.foo.Foo`` but imported +as ``example_package.Foo`` for convenience either ``example_package.foo.Foo`` +or ``example_package.Foo`` can be used. As most libraries do not consider moving +where a class is implemented it is preferred to use the "public" location +where the class is imported (in this example ``example_package.Foo``). The string type name is recommended over a type object for performance reasons, see :ref:`extending_converters_performance`. From 225305da73fa77a2d20bff3d5b3e572cbb1924e6 Mon Sep 17 00:00:00 2001 From: Brett Date: Wed, 22 Nov 2023 15:40:17 -0500 Subject: [PATCH 4/5] index converters after discovery this fixes an issue with extension order and class path. If extension 1 is found first, and registers a converter using a class path (with a module that hasn't been imported). Then, extension 2 registers the same class but uses the class (not the path). Prior to this commit (in this branch) asdf would use extension 2 for the type (because the class path registered with extension 1 was never indexed). With this commit all converter class paths/types are recorded and then indexed (in the order they're registered) fixing this issue. --- asdf/extension/_manager.py | 90 +++++++++++++++++++++----------------- 1 file changed, 50 insertions(+), 40 deletions(-) diff --git a/asdf/extension/_manager.py b/asdf/extension/_manager.py index 7acc843c6..634231b1e 100644 --- a/asdf/extension/_manager.py +++ b/asdf/extension/_manager.py @@ -7,6 +7,27 @@ from ._extension import ExtensionProxy +def _resolve_type(path): + if "." not in path: + # this path does not appear to include a module + if path in globals(): + return globals()[path] + elif path in sys.modules: + return sys.modules[path] + return None + # this type is part of a module + module_name, type_name = path.rsplit(".", maxsplit=1) + # if the module is not imported, don't index it + if module_name not in sys.modules: + return None + module = sys.modules[module_name] + if not hasattr(module, type_name): + # the imported module does not have this class, perhaps + # it is dynamically created so do not index it yet + return None + return getattr(module, type_name) + + class ExtensionManager: """ Wraps a list of extensions and indexes their converters @@ -33,9 +54,9 @@ def __init__(self, extensions): # the class paths into proper classes). Using class paths can be # complicated by packages that have private implementations of # classes that are exposed at a different 'public' location. - # These private classes are may change between minor versions + # These private classes may change between minor versions # and would break converters that are registered using the private - # class path. However, often libraries to not modify the module + # class path. However, often libraries do not modify the module # of the 'public' class (so inspecting the class path returns # the private class path). One example of this in asdf is # Converter (exposed as ``asdf.extension.Converter`` but with @@ -51,9 +72,10 @@ def __init__(self, extensions): # when attempting to serialize an object in memory (so the # public class path will already be imported at the time # the converter is needed). - self._converters_by_type = {} - self._converters_by_class_path = {} + # first we store the converters in the order they are discovered + # the key here can either be a class path (str) or class (type) + converters_by_type = {} validators = set() for extension in self._extensions: @@ -65,20 +87,27 @@ def __init__(self, extensions): if tag not in self._converters_by_tag: self._converters_by_tag[tag] = converter for typ in converter.types: - if isinstance(typ, str): - if typ not in self._converters_by_class_path: - self._converters_by_class_path[typ] = converter - else: - type_class_name = get_class_name(typ, instance=False) - if ( - typ not in self._converters_by_type - and type_class_name not in self._converters_by_class_path - ): - self._converters_by_type[typ] = converter - self._converters_by_class_path[type_class_name] = converter + if typ not in converters_by_type: + converters_by_type[typ] = converter validators.update(extension.validators) + self._converters_by_class_path = {} + self._converters_by_type = {} + + for type_or_path, converter in converters_by_type.items(): + if isinstance(type_or_path, str): + path = type_or_path + typ = _resolve_type(path) + if typ is None: + if path not in self._converters_by_class_path: + self._converters_by_class_path[path] = converter + continue + else: + typ = type_or_path + if typ not in self._converters_by_type: + self._converters_by_type[typ] = converter + self._validator_manager = _get_cached_validator_manager(tuple(validators)) @property @@ -226,31 +255,12 @@ def _index_converters(self): """ # search class paths to find ones that are imported for class_path in list(self._converters_by_class_path): - class_ = None - if "." not in class_path: - # this class path does not appear to include a module - if class_path in globals(): - class_ = globals()[class_path] - elif class_path in sys.modules: - class_ = sys.modules[class_path] - else: - continue - else: - # this class is part of a module - module_name, class_name = class_path.rsplit(".", maxsplit=1) - # if the module is not imported, don't index it - if module_name not in sys.modules: - continue - module = sys.modules[module_name] - if not hasattr(module, class_name): - # the imported module does not have this class, perhaps - # it is dynamically created so do not index it yet - continue - class_ = getattr(module, class_name) - if class_ is not None: - if class_ not in self._converters_by_type: - self._converters_by_type[class_] = self._converters_by_class_path[class_path] - del self._converters_by_class_path[class_path] + typ = _resolve_type(class_path) + if typ is None: + continue + if typ not in self._converters_by_type: + self._converters_by_type[typ] = self._converters_by_class_path[class_path] + del self._converters_by_class_path[class_path] @property def validator_manager(self): From 7bdf1a709c0ec2c1b79f479a3ef055e50e664024 Mon Sep 17 00:00:00 2001 From: Brett Date: Wed, 22 Nov 2023 15:56:23 -0500 Subject: [PATCH 5/5] add docs and unit test for _resolve_type --- asdf/_tests/test_extension.py | 65 +++++++++++++++++++++++++++++++++++ asdf/extension/_manager.py | 23 ++++++++++--- 2 files changed, 84 insertions(+), 4 deletions(-) diff --git a/asdf/_tests/test_extension.py b/asdf/_tests/test_extension.py index ec0280cfb..788451918 100644 --- a/asdf/_tests/test_extension.py +++ b/asdf/_tests/test_extension.py @@ -1,4 +1,5 @@ import fractions +import sys import pytest from packaging.specifiers import SpecifierSet @@ -18,6 +19,7 @@ Validator, get_cached_extension_manager, ) +from asdf.extension._manager import _resolve_type from asdf.testing.helpers import roundtrip_object @@ -982,3 +984,66 @@ def from_yaml_tree(self, *args): fn = tmp_path / "foo.asdf" with pytest.warns(AsdfManifestURIMismatchWarning): af.write_to(fn) + + +def test_resolve_type_not_imported(): + path = "mailbox.Mailbox" + + if "mailbox" in sys.modules: + del sys.modules["mailbox"] + + assert _resolve_type(path) is None + + import mailbox + + assert _resolve_type(path) is mailbox.Mailbox + + +@pytest.mark.parametrize( + "path, obj", (("sys", sys), ("asdf.AsdfFile", AsdfFile), ("asdf.Missing", None), ("not_a_module", None)) +) +def test_resolve_type(path, obj): + assert _resolve_type(path) is obj + + +def test_extension_converter_by_class_path(): + class MailboxConverter: + tags = ["asdf://example.com/tags/mailbox-1.0.0"] + types = ["mailbox.Mailbox"] + + def to_yaml_tree(self, obj, tag, ctx): + return {} + + def from_yaml_tree(self, node, tag, ctx): + return None + + class MailboxExtension: + tags = MailboxConverter.tags + converters = [MailboxConverter()] + extension_uri = "asdf://example.com/extensions/mailbox-1.0.0" + + # grab the type so we can use it for extension_manager.get_converter_for_type + import mailbox + + typ = mailbox.Mailbox + del sys.modules["mailbox"], mailbox + + with config_context() as cfg: + cfg.add_extension(MailboxExtension()) + extension_manager = AsdfFile().extension_manager + + # make sure that registering the extension did not load the module + assert "mailbox" not in sys.modules + + # as the module hasn't been loaded, the converter shouldn't be found + with pytest.raises(KeyError, match="No support available for Python type 'mailbox.Mailbox'"): + extension_manager.get_converter_for_type(typ) + + # make sure inspecting the type didn't import the module + assert "mailbox" not in sys.modules + + # finally, import the module and check that the converter can now be found + import mailbox + + converter = extension_manager.get_converter_for_type(mailbox.Mailbox) + assert isinstance(converter.delegate, MailboxConverter) diff --git a/asdf/extension/_manager.py b/asdf/extension/_manager.py index 634231b1e..cb6a10d2c 100644 --- a/asdf/extension/_manager.py +++ b/asdf/extension/_manager.py @@ -8,11 +8,26 @@ def _resolve_type(path): + """ + Convert a class path (like the string "asdf.AsdfFile") to a + class (``asdf.AsdfFile``) only if the module implementing the + class has already been imported. + + Parameters + ---------- + + path : str + Path/name of class (for example, "asdf.AsdfFile") + + Returns + ------- + + typ : class or None + The class (if it's already been imported) or None + """ if "." not in path: - # this path does not appear to include a module - if path in globals(): - return globals()[path] - elif path in sys.modules: + # check if this path is a module + if path in sys.modules: return sys.modules[path] return None # this type is part of a module