diff --git a/CHANGES.rst b/CHANGES.rst index 0b19deecb..9b7e451c1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -20,6 +20,10 @@ extension was created from a manifest registered with a uri that does not match the id in the manifest [#1785] +- Allow converters to provide types as strings that can + resolve to public classes (even if the class is implemented + in a private module). [#1654] + 3.2.0 (2024-04-05) ------------------ diff --git a/asdf/_tests/test_extension.py b/asdf/_tests/test_extension.py index ec0280cfb..788451918 100644 --- a/asdf/_tests/test_extension.py +++ b/asdf/_tests/test_extension.py @@ -1,4 +1,5 @@ import fractions +import sys import pytest from packaging.specifiers import SpecifierSet @@ -18,6 +19,7 @@ Validator, get_cached_extension_manager, ) +from asdf.extension._manager import _resolve_type from asdf.testing.helpers import roundtrip_object @@ -982,3 +984,66 @@ def from_yaml_tree(self, *args): fn = tmp_path / "foo.asdf" with pytest.warns(AsdfManifestURIMismatchWarning): af.write_to(fn) + + +def test_resolve_type_not_imported(): + path = "mailbox.Mailbox" + + if "mailbox" in sys.modules: + del sys.modules["mailbox"] + + assert _resolve_type(path) is None + + import mailbox + + assert _resolve_type(path) is mailbox.Mailbox + + +@pytest.mark.parametrize( + "path, obj", (("sys", sys), ("asdf.AsdfFile", AsdfFile), ("asdf.Missing", None), ("not_a_module", None)) +) +def test_resolve_type(path, obj): + assert _resolve_type(path) is obj + + +def test_extension_converter_by_class_path(): + class MailboxConverter: + tags = ["asdf://example.com/tags/mailbox-1.0.0"] + types = ["mailbox.Mailbox"] + + def to_yaml_tree(self, obj, tag, ctx): + return {} + + def from_yaml_tree(self, node, tag, ctx): + return None + + class MailboxExtension: + tags = MailboxConverter.tags + converters = [MailboxConverter()] + extension_uri = "asdf://example.com/extensions/mailbox-1.0.0" + + # grab the type so we can use it for extension_manager.get_converter_for_type + import mailbox + + typ = mailbox.Mailbox + del sys.modules["mailbox"], mailbox + + with config_context() as cfg: + cfg.add_extension(MailboxExtension()) + extension_manager = AsdfFile().extension_manager + + # make sure that registering the extension did not load the module + assert "mailbox" not in sys.modules + + # as the module hasn't been loaded, the converter shouldn't be found + with pytest.raises(KeyError, match="No support available for Python type 'mailbox.Mailbox'"): + extension_manager.get_converter_for_type(typ) + + # make sure inspecting the type didn't import the module + assert "mailbox" not in sys.modules + + # finally, import the module and check that the converter can now be found + import mailbox + + converter = extension_manager.get_converter_for_type(mailbox.Mailbox) + assert isinstance(converter.delegate, MailboxConverter) diff --git a/asdf/extension/_manager.py b/asdf/extension/_manager.py index 8094ef701..cb6a10d2c 100644 --- a/asdf/extension/_manager.py +++ b/asdf/extension/_manager.py @@ -1,3 +1,4 @@ +import sys from functools import lru_cache from asdf.tagged import Tagged @@ -6,6 +7,42 @@ from ._extension import ExtensionProxy +def _resolve_type(path): + """ + Convert a class path (like the string "asdf.AsdfFile") to a + class (``asdf.AsdfFile``) only if the module implementing the + class has already been imported. + + Parameters + ---------- + + path : str + Path/name of class (for example, "asdf.AsdfFile") + + Returns + ------- + + typ : class or None + The class (if it's already been imported) or None + """ + if "." not in path: + # check if this path is a module + if path in sys.modules: + return sys.modules[path] + return None + # this type is part of a module + module_name, type_name = path.rsplit(".", maxsplit=1) + # if the module is not imported, don't index it + if module_name not in sys.modules: + return None + module = sys.modules[module_name] + if not hasattr(module, type_name): + # the imported module does not have this class, perhaps + # it is dynamically created so do not index it yet + return None + return getattr(module, type_name) + + class ExtensionManager: """ Wraps a list of extensions and indexes their converters @@ -23,9 +60,37 @@ def __init__(self, extensions): self._tag_defs_by_tag = {} self._converters_by_tag = {} - # This dict has both str and type keys: - self._converters_by_type = {} + # To optimize performance converters can be registered using either: + # - the class/type they convert + # - the name/path (string) of the class/type they convert + # This allows the registration to continue without importing + # every module for every extension (which would be needed to turn + # the class paths into proper classes). Using class paths can be + # complicated by packages that have private implementations of + # classes that are exposed at a different 'public' location. + # These private classes may change between minor versions + # and would break converters that are registered using the private + # class path. However, often libraries do not modify the module + # of the 'public' class (so inspecting the class path returns + # the private class path). One example of this in asdf is + # Converter (exposed as ``asdf.extension.Converter`` but with + # a class path of ``asdf.extension._converter.Converter``). + # To allow converters to be registered with the public location + # we will need to attempt to import the public class path + # and then register the private class path after the class is + # imported. We don't want to do this unnecessarily and since + # class instances do not contain the public class path + # we adopt a strategy of checking class paths and only + # registering those that have already been imported. Thiss + # is ok because asdf will only use the converter type + # when attempting to serialize an object in memory (so the + # public class path will already be imported at the time + # the converter is needed). + + # first we store the converters in the order they are discovered + # the key here can either be a class path (str) or class (type) + converters_by_type = {} validators = set() for extension in self._extensions: @@ -37,17 +102,27 @@ def __init__(self, extensions): if tag not in self._converters_by_tag: self._converters_by_tag[tag] = converter for typ in converter.types: - if isinstance(typ, str): - if typ not in self._converters_by_type: - self._converters_by_type[typ] = converter - else: - type_class_name = get_class_name(typ, instance=False) - if typ not in self._converters_by_type and type_class_name not in self._converters_by_type: - self._converters_by_type[typ] = converter - self._converters_by_type[type_class_name] = converter + if typ not in converters_by_type: + converters_by_type[typ] = converter validators.update(extension.validators) + self._converters_by_class_path = {} + self._converters_by_type = {} + + for type_or_path, converter in converters_by_type.items(): + if isinstance(type_or_path, str): + path = type_or_path + typ = _resolve_type(path) + if typ is None: + if path not in self._converters_by_class_path: + self._converters_by_class_path[path] = converter + continue + else: + typ = type_or_path + if typ not in self._converters_by_type: + self._converters_by_type[typ] = converter + self._validator_manager = _get_cached_validator_manager(tuple(validators)) @property @@ -90,7 +165,10 @@ def handles_type(self, typ): ------- bool """ - return typ in self._converters_by_type or get_class_name(typ, instance=False) in self._converters_by_type + if typ in self._converters_by_type: + return True + self._index_converters() + return typ in self._converters_by_type def handles_tag_definition(self, tag): """ @@ -172,18 +250,32 @@ def get_converter_for_type(self, typ): KeyError Unrecognized type. """ + if typ not in self._converters_by_type: + self._index_converters() try: return self._converters_by_type[typ] except KeyError: - class_name = get_class_name(typ, instance=False) - try: - return self._converters_by_type[class_name] - except KeyError: - msg = ( - f"No support available for Python type '{get_class_name(typ, instance=False)}'. " - "You may need to install or enable an extension." - ) - raise KeyError(msg) from None + msg = ( + f"No support available for Python type '{get_class_name(typ, instance=False)}'. " + "You may need to install or enable an extension." + ) + raise KeyError(msg) from None + + def _index_converters(self): + """ + Search _converters_by_class_path for paths (strings) that + refer to classes that are currently imported. For imported + classes, add them to _converters_by_class (if the class + doesn't already have a converter). + """ + # search class paths to find ones that are imported + for class_path in list(self._converters_by_class_path): + typ = _resolve_type(class_path) + if typ is None: + continue + if typ not in self._converters_by_type: + self._converters_by_type[typ] = self._converters_by_class_path[class_path] + del self._converters_by_class_path[class_path] @property def validator_manager(self): diff --git a/docs/asdf/extending/converters.rst b/docs/asdf/extending/converters.rst index 9ac8c94e7..c2bc60b57 100644 --- a/docs/asdf/extending/converters.rst +++ b/docs/asdf/extending/converters.rst @@ -26,12 +26,12 @@ characters up to a ``/``, or ``**``, which matches any sequence of characters. The `~asdf.util.uri_match` method can be used to test URI patterns. `Converter.types` - a list of Python types or fully-qualified Python type names handled -by the converter. Note that a string name must reflect the actual location of the -class's implementation and not just a module where it is imported for convenience. -For example, if class ``Foo`` is implemented in ``example_package.foo.Foo`` but -imported as ``example_package.Foo`` for convenience, it is the former name that -must be used. The `~asdf.util.get_class_name` method will return the name that -`asdf` expects. +by the converter. For strings, the private or public path can be used. For example, +if class ``Foo`` is implemented in ``example_package.foo.Foo`` but imported +as ``example_package.Foo`` for convenience either ``example_package.foo.Foo`` +or ``example_package.Foo`` can be used. As most libraries do not consider moving +where a class is implemented it is preferred to use the "public" location +where the class is imported (in this example ``example_package.Foo``). The string type name is recommended over a type object for performance reasons, see :ref:`extending_converters_performance`.