From 5b200af3bacacc92112a9ad8966ef2ac0f3bd820 Mon Sep 17 00:00:00 2001 From: Sachaa-Thanasius Date: Tue, 3 Sep 2024 01:39:35 +0530 Subject: [PATCH] Comment formatting within DeferredImportKey. Also improve documentation in README for examples and use cases. - Also, find another sporadic race condition! --- README.rst | 52 +++++++++++++++++++---------------- src/deferred/_core.py | 32 +++++++++++----------- tests/test_deferred.py | 61 +++++++++++++++++++++++++++++++++++++----- 3 files changed, 100 insertions(+), 45 deletions(-) diff --git a/README.rst b/README.rst index 7c98cc1..f4521c5 100644 --- a/README.rst +++ b/README.rst @@ -26,8 +26,25 @@ This can be installed via pip:: It can also easily be vendored, as it has zero dependencies and is less than 1,000 lines of code. +Usage +===== + +Setup +----- + +``deferred`` hooks into the Python import system with a path hook. That path hook needs to be registered before code using ``defer_imports_until_use`` is executed. To do that, include the following somewhere such that it will be executed before your code: + +.. code:: python + + import deferred + + deferred.install_defer_import_hook() + + Example -======= +------- + +Assuming the path hook has been registered, you can write something like this: .. code:: python @@ -41,40 +58,29 @@ Example # this import cost can be avoided entirely by making sure all annotations are strings. -Setup -===== - -``deferred`` hooks into the Python import system with a path hook. That path hook needs to be registered before code using ``defer_imports_until_use`` is executed. To do that, include the following somewhere such that it will be executed before your code: +Use Cases +--------- -.. code:: python - - import deferred - - deferred.install_defer_import_hook() - - -Documentation -============= - -See this README as well as docstrings and comments in the code. +- If imports are necessary to get symbols that are only used within annotations, but those would cause import chains. The current workaround for this is to perform the problematic imports within ``if typing.TYPE_CHECKING: ...`` blocks and then stringify the fake-imported symbols to prevent NameErrors at runtime from the symbols not existing; however, resulting annotations are difficult to introspect with standard library introspection tools, since they assume the symbols exist. With ``deferred``, however, those imports can be deferred, annotations can be stringified (or deferred under PEP 649 semantics), and the deferred imports would only occur when the annotations are introspected/evaluated for the sake of making the contained symbols exist at runtime, thus making the imports not circular and almost zero cost. +- If imports are expensive but only necessary for certain code paths that won't always be hit, e.g. in subcommands in CLI tools. Features --------- +======== -- Python implementation–agnostic, in theory +- Python implementation–agnostic, in theory. - - The main dependency is on ``locals()`` at module scope to maintain its current API: specifically, that its return value will be a read-through, *write-through*, dict-like view of the module locals. + - The library mainly depends on ``locals()`` at module scope to maintain its current API: specifically, that its return value will be a read-through, *write-through*, dict-like view of the module locals, and that a reference to that view can be passed around. - Supports all syntactically valid Python import statements. Caveats -------- +======== -- Doesn't support lazy importing in class or function scope -- Doesn't support wildcard imports -- (WIP) Has an initial setup cost that could be smaller. +- Doesn't support lazy importing in class or function scope. +- Doesn't support wildcard imports. +- (WIP) Has an initial setup cost that could be smaller. Benchmarks diff --git a/src/deferred/_core.py b/src/deferred/_core.py index 34c8f9e..759d96e 100644 --- a/src/deferred/_core.py +++ b/src/deferred/_core.py @@ -464,7 +464,8 @@ def __getattr__(self, name: str, /): elif name == self.defer_proxy_name.rpartition(".")[2]: sub_proxy.defer_proxy_sub = name else: - raise AttributeError(name) + msg = f"module {self.defer_proxy_name!r} has no attribute {name!r}" + raise AttributeError(msg) return sub_proxy @@ -475,7 +476,7 @@ class DeferredImportKey(str): When referenced, the key will replace itself in the namespace with the resolved import or the right name from it. """ - __slots__ = ("defer_key_str", "defer_key_proxy", "is_recursing", "_rlock") + __slots__ = ("defer_key_str", "defer_key_proxy", "is_resolving", "lock") def __new__(cls, key: str, proxy: DeferredImportProxy, /): return super().__new__(cls, key) @@ -483,9 +484,9 @@ def __new__(cls, key: str, proxy: DeferredImportProxy, /): def __init__(self, key: str, proxy: DeferredImportProxy, /) -> None: self.defer_key_str = str(key) self.defer_key_proxy = proxy - self.is_recursing = False - self._rlock = original_import.get()("threading").RLock() + self.is_resolving = False + self.lock = original_import.get()("threading").RLock() def __repr__(self) -> str: return f"" @@ -496,15 +497,12 @@ def __eq__(self, value: object, /) -> bool: if self.defer_key_str != value: return False - # NOTE: This RLock prevents a scenario where the proxy begins resolution in one thread, but before it completes - # resolution, a context switch occurs and another thread that tries to resolve the same proxy just gets - # the proxy back, since it hasn't been resolved and replaced yet. This also partially happens because - # is_recursing is a guard that is only intended for one thread, but other threads will see it without the - # RLock. - with self._rlock: - # This recursion guard allows self-referential imports within __init__.py files. - if not self.is_recursing: - self.is_recursing = True + # Only the first thread to grab the lock should resolve the deferred import. + with self.lock: + # Reentrant calls from the same thread shouldn't re-trigger the resolution. + # This can be caused by self-referential imports, e.g. within __init__.py files. + if not self.is_resolving: + self.is_resolving = True if not is_deferred.get(): self._resolve() @@ -526,19 +524,21 @@ def _resolve(self) -> None: module_vars = vars(module) for attr_key, attr_val in vars(proxy).items(): if isinstance(attr_val, DeferredImportProxy) and not hasattr(module, attr_key): - # NOTE: This could have used setattr() if pypy didn't normalize the attr name to a str, so we must - # resort to direct placement in the module's __dict__ to avoid that. + # This could have used setattr() if pypy didn't normalize the attr key type to str, so we resort to + # direct placement in the module's __dict__ to avoid that. module_vars[DeferredImportKey(attr_key, attr_val)] = attr_val + # Change the namespaces as well to make sure nested proxies are replaced in the right place. attr_val.defer_proxy_global_ns = attr_val.defer_proxy_local_ns = module_vars # Replace the proxy with the resolved module or module attribute in the relevant namespace. + # 1. Let the regular string key and the relevant namespace. key = self.defer_key_str namespace = proxy.defer_proxy_local_ns # 2. Replace the deferred version of the key to avoid it sticking around. - # NOTE: This is necessary to prevent recursive resolution for proxies, since __eq__ will be triggered again. + # This is_deferred usage is necessary to prevent recursive resolution, since __eq__ will be triggered again. _is_def_tok = is_deferred.set(True) try: namespace[key] = namespace.pop(key) diff --git a/tests/test_deferred.py b/tests/test_deferred.py index b88a62e..8c6fdef 100644 --- a/tests/test_deferred.py +++ b/tests/test_deferred.py @@ -824,16 +824,19 @@ def test_thread_safety(tmp_path: Path): import threading import time + _missing = type("Missing", (), {}) + class CapturingThread(threading.Thread): + """Thread subclass that captures a returned result or raised exception from the called target.""" + def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) - self.result = None - self.exc = None + self.result = _missing + self.exc = _missing def run(self) -> None: # pragma: no cover try: - if self._target is not None: # pyright: ignore - self.result = self._target(*self._args, **self._kwargs) # pyright: ignore + self.result = self._target(*self._args, **self._kwargs) # pyright: ignore except Exception as exc: # noqa: BLE001 self.exc = exc finally: @@ -852,8 +855,9 @@ def access_module_attr() -> object: for thread in threads: thread.join() + # FIXME: There's another race condition in here somehow. Hard to reproduce, so we'll handle it later. + assert thread.exc is _missing assert callable(thread.result) # pyright: ignore - assert thread.exc is None @pytest.mark.skip(reason="Leaking patch problem is currently out of scope.") @@ -883,7 +887,7 @@ def test_leaking_patch(tmp_path: Path): """, encoding="utf-8", ) - leaking_patch_pkg_path.joinpath("b.py").write_text('B = "original thing"') + leaking_patch_pkg_path.joinpath("b.py").write_text('B = "original thing"', encoding="utf-8") leaking_patch_pkg_path.joinpath("patching.py").write_text( """\ from unittest import mock @@ -913,3 +917,48 @@ def test_leaking_patch(tmp_path: Path): spec.loader.exec_module(module) exec(f"import {package_name}.patching; from {package_name}.b import B", vars(module)) assert module.B == "original thing" + + +@pytest.mark.skipif(sys.version_info < (3, 12), reason="type statements are only valid in 3.12+") +def test_type_statement_312(tmp_path: Path): + """Test that the loading still occurs when a type statement resolves in python 3.12+. + + The package has the following structure: + . + └───type_stmt_pkg + ├───__init__.py + └───exp.py + """ + + type_stmt_pkg_path = tmp_path / "type_stmt_pkg" + type_stmt_pkg_path.mkdir() + type_stmt_pkg_path.joinpath("__init__.py").write_text( + """\ +from deferred import defer_imports_until_use + +with defer_imports_until_use: + from .exp import Expensive + +type ManyExpensive = tuple[Expensive, ...] +""" + ) + type_stmt_pkg_path.joinpath("exp.py").write_text("class Expensive: ...", encoding="utf-8") + + package_name = "type_stmt_pkg" + package_init_path = str(type_stmt_pkg_path / "__init__.py") + + loader = DeferredFileLoader(package_name, package_init_path) + spec = importlib.util.spec_from_file_location( + package_name, + package_init_path, + loader=loader, + submodule_search_locations=[], # A signal that this is a package. + ) + assert spec + assert spec.loader + + module = importlib.util.module_from_spec(spec) + + with temp_cache_module(package_name, module): + spec.loader.exec_module(module) + assert str(module.ManyExpensive.__value__) == "tuple[type_stmt_pkg.exp.Expensive, ...]"