Skip to content

Add a lazy_import utility function (part one of our lazy import strategy) #330

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 125 additions & 1 deletion redisvl/utils/utils.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
import asyncio
import importlib
import inspect
import json
import logging
import sys
import warnings
from contextlib import contextmanager
from enum import Enum
from functools import wraps
from time import time
from typing import Any, Callable, Coroutine, Dict, Optional, Sequence
from typing import Any, Callable, Coroutine, Dict, Optional, Sequence, TypeVar, cast
from warnings import warn

from pydantic import BaseModel
from redis import Redis
from ulid import ULID

T = TypeVar("T")


def create_ulid() -> str:
"""Generate a unique identifier to group related Redis documents."""
Expand Down Expand Up @@ -233,3 +237,123 @@ def scan_by_pattern(
from redisvl.redis.utils import convert_bytes

return convert_bytes(list(redis_client.scan_iter(match=pattern)))


def lazy_import(module_path: str) -> Any:
"""
Lazily import a module or object from a module only when it's actually used.

This function helps reduce startup time and avoid unnecessary dependencies
by only importing modules when they are actually needed.

Args:
module_path (str): The import path, e.g., "numpy" or "numpy.array"

Returns:
Any: The imported module or object, or a proxy that will import it when used

Examples:
>>> np = lazy_import("numpy")
>>> # numpy is not imported yet
>>> array = np.array([1, 2, 3]) # numpy is imported here

>>> array_func = lazy_import("numpy.array")
>>> # numpy is not imported yet
>>> arr = array_func([1, 2, 3]) # numpy is imported here
"""
parts = module_path.split(".")
top_module_name = parts[0]

# Check if the module is already imported and we're not trying to access a specific attribute
if top_module_name in sys.modules and len(parts) == 1:
return sys.modules[top_module_name]

# Create a proxy class that will import the module when any attribute is accessed
class LazyModule:
def __init__(self, module_path: str):
self._module_path = module_path
self._module = None
self._parts = module_path.split(".")

def _import_module(self):
"""Import the module or attribute on first use"""
if self._module is not None:
return self._module

try:
# Import the base module
base_module_name = self._parts[0]
module = importlib.import_module(base_module_name)

# If we're importing just the module, return it
if len(self._parts) == 1:
self._module = module
return module

# Otherwise, try to get the specified attribute or submodule
obj = module
for part in self._parts[1:]:
try:
obj = getattr(obj, part)
except AttributeError:
# Attribute doesn't exist - we'll raise this error when the attribute is accessed
return None

self._module = obj
return obj
except ImportError as e:
# Store the error to raise it when the module is accessed
self._import_error = e
return None

def __getattr__(self, name: str) -> Any:
# Import the module if it hasn't been imported yet
if self._module is None:
module = self._import_module()

# If import failed, raise the appropriate error
if module is None:
# Use direct dictionary access to avoid recursion
if "_import_error" in self.__dict__:
raise ImportError(
f"Failed to lazily import {self._module_path}: {self._import_error}"
)
else:
# This means we couldn't find the attribute in the module path
raise AttributeError(
f"{self._parts[0]} has no attribute '{self._parts[1]}'"
)

# If we have a module, get the requested attribute
if hasattr(self._module, name):
return getattr(self._module, name)

# If the attribute doesn't exist, raise AttributeError
raise AttributeError(f"{self._module_path} has no attribute '{name}'")
Copy link
Preview

Copilot AI Apr 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error message for a missing attribute does not include the term "module" as expected by the tests. Consider changing it to "module '{self._parts[0]}' has no attribute '{name}'" to provide a clearer and more consistent error message.

Suggested change
raise AttributeError(f"{self._module_path} has no attribute '{name}'")
raise AttributeError(f"module '{self._parts[0]}' has no attribute '{name}'")

Copilot uses AI. Check for mistakes.


def __call__(self, *args: Any, **kwargs: Any) -> Any:
# Import the module if it hasn't been imported yet
if self._module is None:
module = self._import_module()

# If import failed, raise the appropriate error
if module is None:
# Use direct dictionary access to avoid recursion
if "_import_error" in self.__dict__:
raise ImportError(
f"Failed to lazily import {self._module_path}: {self._import_error}"
)
else:
# This means we couldn't find the attribute in the module path
raise ImportError(
f"Failed to find {self._module_path}: module '{self._parts[0]}' has no attribute '{self._parts[1]}'"
)

# If the imported object is callable, call it
if callable(self._module):
return self._module(*args, **kwargs)

# If it's not callable, this is an error
raise TypeError(f"{self._module_path} is not callable")

return LazyModule(module_path)
128 changes: 128 additions & 0 deletions tests/unit/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
denorm_cosine_distance,
deprecated_argument,
deprecated_function,
lazy_import,
norm_cosine_distance,
)

Expand Down Expand Up @@ -518,3 +519,130 @@ def test_logging_configuration_not_overridden(self):
assert (
has_date_pre == has_date_post
), f"Date format changed: was present before: {has_date_pre}, present after: {has_date_post}"


class TestLazyImport:
def test_import_standard_library(self):
"""Test lazy importing of a standard library module"""
# Remove the module from sys.modules if it's already imported
if "json" in sys.modules:
del sys.modules["json"]

# Lazy import the module
json = lazy_import("json")

# Verify the module is not imported yet
assert "json" not in sys.modules

# Use the module, which should trigger the import
result = json.dumps({"key": "value"})

# Verify the module is now imported
assert "json" in sys.modules
assert result == '{"key": "value"}'

def test_import_already_imported_module(self):
"""Test lazy importing of an already imported module"""
# Make sure the module is imported
import math

assert "math" in sys.modules

# Lazy import the module
math_lazy = lazy_import("math")

# Since the module is already imported, it should be returned directly
assert math_lazy is sys.modules["math"]

# Use the module
assert math_lazy.sqrt(4) == 2.0

def test_import_submodule(self):
"""Test lazy importing of a submodule"""
# Remove the module from sys.modules if it's already imported
if "os.path" in sys.modules:
del sys.modules["os.path"]
if "os" in sys.modules:
del sys.modules["os"]

# Lazy import the submodule
path = lazy_import("os.path")

# Verify the module is not imported yet
assert "os" not in sys.modules

# Use the submodule, which should trigger the import
result = path.join("dir", "file.txt")

# Verify the module is now imported
assert "os" in sys.modules
assert (
result == "dir/file.txt" or result == "dir\\file.txt"
) # Handle Windows paths

def test_import_function(self):
"""Test lazy importing of a function"""
# Remove the module from sys.modules if it's already imported
if "math" in sys.modules:
del sys.modules["math"]

# Lazy import the function
sqrt = lazy_import("math.sqrt")

# Verify the module is not imported yet
assert "math" not in sys.modules

# Use the function, which should trigger the import
result = sqrt(4)

# Verify the module is now imported
assert "math" in sys.modules
assert result == 2.0

def test_import_nonexistent_module(self):
"""Test lazy importing of a nonexistent module"""
# Lazy import a nonexistent module
nonexistent = lazy_import("nonexistent_module_xyz")

# Accessing an attribute should raise ImportError
with pytest.raises(ImportError) as excinfo:
nonexistent.some_attribute

assert "Failed to lazily import nonexistent_module_xyz" in str(excinfo.value)

def test_import_nonexistent_attribute(self):
"""Test lazy importing of a nonexistent attribute"""
# Lazy import a nonexistent attribute
nonexistent_attr = lazy_import("math.nonexistent_attribute")

# Accessing the attribute should raise ImportError
with pytest.raises(ImportError) as excinfo:
nonexistent_attr()

assert "module 'math' has no attribute 'nonexistent_attribute'" in str(
excinfo.value
)

def test_import_noncallable(self):
"""Test calling a non-callable lazy imported object"""
# Lazy import a non-callable attribute
pi = lazy_import("math.pi")

# Calling it should raise TypeError
with pytest.raises(TypeError) as excinfo:
pi()

assert "math.pi is not callable" in str(excinfo.value)

def test_attribute_error(self):
"""Test accessing a nonexistent attribute on a lazy imported module"""
# Lazy import a module
math = lazy_import("math")

# Accessing a nonexistent attribute should raise AttributeError
with pytest.raises(AttributeError) as excinfo:
math.nonexistent_attribute

assert "module 'math' has no attribute 'nonexistent_attribute'" in str(
excinfo.value
)