Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REFACTOR] Make allow_list / KnownList optional #3643

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/databricks/labs/ucx/contexts/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ def verify_progress_tracking(self) -> VerifyProgressTracking:

@cached_property
def pip_resolver(self) -> PythonLibraryResolver:
return PythonLibraryResolver(self.allow_list)
return PythonLibraryResolver(allow_list=self.allow_list)

@cached_property
def notebook_loader(self) -> NotebookLoader:
Expand Down Expand Up @@ -572,7 +572,7 @@ def allow_list(self) -> KnownList:

@cached_property
def file_resolver(self) -> ImportFileResolver:
return ImportFileResolver(self.file_loader, self.allow_list)
return ImportFileResolver(self.file_loader, allow_list=self.allow_list)

@cached_property
def dependency_resolver(self) -> DependencyResolver:
Expand Down
4 changes: 2 additions & 2 deletions src/databricks/labs/ucx/source_code/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,10 @@ def __repr__(self):

class ImportFileResolver(BaseImportResolver, BaseFileResolver):

def __init__(self, file_loader: FileLoader, allow_list: KnownList):
def __init__(self, file_loader: FileLoader, *, allow_list: KnownList | None = None):
super().__init__()
self._allow_list = allow_list
self._file_loader = file_loader
self._allow_list = allow_list or KnownList()

def resolve_file(self, path_lookup, path: Path) -> MaybeDependency:
absolute_path = path_lookup.resolve(path)
Expand Down
13 changes: 6 additions & 7 deletions src/databricks/labs/ucx/source_code/python_libraries.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,10 @@
from databricks.labs.blueprint.entrypoint import is_in_debug

from databricks.labs.ucx.framework.utils import run_command
from databricks.labs.ucx.source_code.graph import (
LibraryResolver,
DependencyProblem,
)
from databricks.labs.ucx.source_code.path_lookup import PathLookup
from databricks.labs.ucx.source_code.graph import DependencyProblem, LibraryResolver
from databricks.labs.ucx.source_code.known import KnownList
from databricks.labs.ucx.source_code.path_lookup import PathLookup


logger = logging.getLogger(__name__)

Expand All @@ -26,10 +24,11 @@ class PythonLibraryResolver(LibraryResolver):

def __init__(
self,
allow_list: KnownList,
*,
allow_list: KnownList | None = None,
runner: Callable[[str | list[str]], tuple[int, str, str]] = run_command,
) -> None:
self._allow_list = allow_list
self._allow_list = allow_list or KnownList()
self._runner = runner

def register_library(self, path_lookup: PathLookup, *libraries: str) -> list[DependencyProblem]:
Expand Down
9 changes: 4 additions & 5 deletions tests/integration/source_code/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ def module_compatibility(self, name: str) -> Compatibility:
return super().module_compatibility(name)

allow_list = TestKnownList()
library_resolver = PythonLibraryResolver(allow_list)
library_resolver = PythonLibraryResolver(allow_list=allow_list)
notebook_resolver = NotebookResolver(NotebookLoader())
import_resolver = ImportFileResolver(FileLoader(), allow_list)
import_resolver = ImportFileResolver(FileLoader(), allow_list=allow_list)
path_lookup = PathLookup.from_sys_path(Path(__file__).parent)
dependency_resolver = DependencyResolver(
library_resolver, notebook_resolver, import_resolver, import_resolver, path_lookup
Expand All @@ -41,10 +41,9 @@ def module_compatibility(self, name: str) -> Compatibility:


def test_graph_imports_dynamic_import():
allow_list = KnownList()
library_resolver = PythonLibraryResolver(allow_list)
library_resolver = PythonLibraryResolver()
notebook_resolver = NotebookResolver(NotebookLoader())
import_resolver = ImportFileResolver(FileLoader(), allow_list)
import_resolver = ImportFileResolver(FileLoader())
path_lookup = PathLookup.from_sys_path(Path(__file__).parent)
dependency_resolver = DependencyResolver(
library_resolver, notebook_resolver, import_resolver, import_resolver, path_lookup
Expand Down
6 changes: 2 additions & 4 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from databricks.labs.ucx.hive_metastore import TablesCrawler
from databricks.labs.ucx.hive_metastore.tables import FasterTableScanCrawler
from databricks.labs.ucx.source_code.graph import BaseNotebookResolver, DependencyResolver
from databricks.labs.ucx.source_code.known import KnownList
from databricks.labs.ucx.source_code.files import FileLoader, ImportFileResolver
from databricks.labs.ucx.source_code.notebooks.loaders import NotebookResolver, NotebookLoader
from databricks.labs.ucx.source_code.path_lookup import PathLookup
Expand Down Expand Up @@ -252,10 +251,9 @@ def mock_pip_install_always_successful(_) -> tuple[int, str, str]:
"""
return 0, "", ""

allow_list = KnownList()
library_resolver = PythonLibraryResolver(allow_list, mock_pip_install_always_successful)
library_resolver = PythonLibraryResolver(runner=mock_pip_install_always_successful)
notebook_resolver = NotebookResolver(NotebookLoader())
import_resolver = ImportFileResolver(FileLoader(), allow_list)
import_resolver = ImportFileResolver(FileLoader())
return DependencyResolver(library_resolver, notebook_resolver, import_resolver, import_resolver, mock_path_lookup)


Expand Down
15 changes: 6 additions & 9 deletions tests/unit/source_code/linters/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from databricks.labs.ucx.source_code.notebooks.loaders import NotebookResolver, NotebookLoader
from databricks.labs.ucx.source_code.linters.files import NotebookMigrator
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver
from databricks.labs.ucx.source_code.known import KnownList

from databricks.sdk.service.workspace import Language

Expand Down Expand Up @@ -105,10 +104,9 @@ def local_code_linter(mock_path_lookup, migration_index):
notebook_loader = NotebookLoader()
file_loader = FileLoader()
folder_loader = FolderLoader(notebook_loader, file_loader)
allow_list = KnownList()
pip_resolver = PythonLibraryResolver(allow_list)
pip_resolver = PythonLibraryResolver()
session_state = CurrentSessionState()
import_file_resolver = ImportFileResolver(file_loader, allow_list)
import_file_resolver = ImportFileResolver(file_loader)
resolver = DependencyResolver(
pip_resolver,
NotebookResolver(NotebookLoader()),
Expand Down Expand Up @@ -146,7 +144,7 @@ def test_linter_lints_children_in_context(mock_path_lookup, local_code_linter) -


def test_triple_dot_import() -> None:
file_resolver = ImportFileResolver(FileLoader(), KnownList())
file_resolver = ImportFileResolver(FileLoader())
path_lookup = create_autospec(PathLookup)
path_lookup.cwd.as_posix.return_value = '/some/path/to/folder'
path_lookup.resolve.return_value = Path('/some/path/foo.py')
Expand All @@ -159,7 +157,7 @@ def test_triple_dot_import() -> None:


def test_single_dot_import() -> None:
file_resolver = ImportFileResolver(FileLoader(), KnownList())
file_resolver = ImportFileResolver(FileLoader())
path_lookup = create_autospec(PathLookup)
path_lookup.cwd.as_posix.return_value = '/some/path/to/folder'
path_lookup.resolve.return_value = Path('/some/path/to/folder/foo.py')
Expand Down Expand Up @@ -192,10 +190,9 @@ def test_known_issues(path: Path, migration_index) -> None:
folder_loader = FolderLoader(notebook_loader, file_loader)
path_lookup = PathLookup.from_sys_path(Path.cwd())
session_state = CurrentSessionState()
allow_list = KnownList()
notebook_resolver = NotebookResolver(NotebookLoader())
import_resolver = ImportFileResolver(file_loader, allow_list)
pip_resolver = PythonLibraryResolver(allow_list)
import_resolver = ImportFileResolver(file_loader)
pip_resolver = PythonLibraryResolver()
resolver = DependencyResolver(pip_resolver, notebook_resolver, import_resolver, import_resolver, path_lookup)
linter = LocalCodeLinter(
notebook_loader,
Expand Down
11 changes: 4 additions & 7 deletions tests/unit/source_code/notebooks/test_cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
)
from databricks.labs.ucx.source_code.path_lookup import PathLookup
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver
from databricks.labs.ucx.source_code.known import KnownList


def test_basic_cell_extraction() -> None:
Expand Down Expand Up @@ -127,9 +126,8 @@ def test_pip_cell_build_dependency_graph_reports_unknown_library(mock_path_looku
dependency = Dependency(FileLoader(), Path("test"))
notebook_loader = NotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
allow_list = KnownList()
pip_resolver = PythonLibraryResolver(allow_list)
file_resolver = ImportFileResolver(FileLoader(), allow_list)
pip_resolver = PythonLibraryResolver()
file_resolver = ImportFileResolver(FileLoader())
dependency_resolver = DependencyResolver(
pip_resolver, notebook_resolver, file_resolver, file_resolver, mock_path_lookup
)
Expand All @@ -149,10 +147,9 @@ def test_pip_cell_build_dependency_graph_resolves_installed_library(mock_path_lo
dependency = Dependency(FileLoader(), Path("test"))
notebook_loader = NotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
allow_list = KnownList()
file_loader = FileLoader()
pip_resolver = PythonLibraryResolver(allow_list)
import_resolver = ImportFileResolver(file_loader, allow_list)
pip_resolver = PythonLibraryResolver()
import_resolver = ImportFileResolver(file_loader)
dependency_resolver = DependencyResolver(
pip_resolver, notebook_resolver, import_resolver, import_resolver, mock_path_lookup
)
Expand Down
20 changes: 8 additions & 12 deletions tests/unit/source_code/test_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,9 @@
NotebookResolver,
)
from databricks.labs.ucx.source_code.path_lookup import PathLookup
from databricks.labs.ucx.source_code.known import KnownList
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver
from tests.unit import (
locate_site_packages,
)

from tests.unit import locate_site_packages
from tests.unit.conftest import MockPathLookup


Expand Down Expand Up @@ -141,8 +139,8 @@ def test_dependency_resolver_terminates_at_known_libraries(
site_packages_path = locate_site_packages()
lookup.append_path(site_packages_path)
file_loader = FileLoader()
import_resolver = ImportFileResolver(file_loader, KnownList())
library_resolver = PythonLibraryResolver(KnownList())
import_resolver = ImportFileResolver(file_loader)
library_resolver = PythonLibraryResolver()
resolver = DependencyResolver(library_resolver, mock_notebook_resolver, import_resolver, import_resolver, lookup)
maybe = resolver.build_local_file_dependency_graph(Path("import-site-package.py"), CurrentSessionState())
assert not maybe.failed
Expand Down Expand Up @@ -174,9 +172,8 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> No
return None

file_loader = FailingFileLoader()
allow_list = KnownList()
import_resolver = ImportFileResolver(file_loader, allow_list)
pip_resolver = PythonLibraryResolver(allow_list)
import_resolver = ImportFileResolver(file_loader)
pip_resolver = PythonLibraryResolver()
resolver = DependencyResolver(
pip_resolver, mock_notebook_resolver, import_resolver, import_resolver, mock_path_lookup
)
Expand All @@ -196,9 +193,8 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> So

notebook_loader = FailingNotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
known_list = KnownList()
pip_resolver = PythonLibraryResolver(known_list)
import_resolver = ImportFileResolver(FileLoader(), known_list)
pip_resolver = PythonLibraryResolver()
import_resolver = ImportFileResolver(FileLoader())
resolver = DependencyResolver(pip_resolver, notebook_resolver, import_resolver, import_resolver, mock_path_lookup)
maybe = resolver.build_notebook_dependency_graph(Path("root5.py"), CurrentSessionState())
assert list(maybe.problems) == [
Expand Down
6 changes: 2 additions & 4 deletions tests/unit/source_code/test_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from databricks.labs.ucx.source_code.base import CurrentSessionState
from databricks.labs.ucx.source_code.directfs_access import DirectFsAccessCrawler
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver
from databricks.labs.ucx.source_code.known import KnownList
from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import NotFound
from databricks.sdk.service import compute, jobs, pipelines
Expand Down Expand Up @@ -45,10 +44,9 @@ def test_job_problem_as_message() -> None:
@pytest.fixture
def dependency_resolver(mock_path_lookup) -> DependencyResolver:
file_loader = FileLoader()
allow_list = KnownList()
import_file_resolver = ImportFileResolver(file_loader, allow_list)
import_file_resolver = ImportFileResolver(file_loader)
resolver = DependencyResolver(
PythonLibraryResolver(allow_list),
PythonLibraryResolver(),
NotebookResolver(NotebookLoader()),
import_file_resolver,
import_file_resolver,
Expand Down
5 changes: 2 additions & 3 deletions tests/unit/source_code/test_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from databricks.labs.ucx.source_code.base import CurrentSessionState
from databricks.labs.ucx.source_code.graph import DependencyGraph, DependencyResolver
from databricks.labs.ucx.source_code.known import KnownList
from databricks.labs.ucx.source_code.files import FileLoader, ImportFileResolver
from databricks.labs.ucx.source_code.linters.imports import DbutilsPyLinter
from databricks.labs.ucx.source_code.python.python_ast import MaybeTree
Expand Down Expand Up @@ -132,8 +131,8 @@ def test_notebook_generates_runnable_cells(source: tuple[str, Language, list[str
def dependency_resolver(mock_path_lookup) -> DependencyResolver:
notebook_loader = NotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
library_resolver = PythonLibraryResolver(KnownList())
import_resolver = ImportFileResolver(FileLoader(), KnownList())
library_resolver = PythonLibraryResolver()
import_resolver = ImportFileResolver(FileLoader())
return DependencyResolver(library_resolver, notebook_resolver, import_resolver, import_resolver, mock_path_lookup)


Expand Down
21 changes: 8 additions & 13 deletions tests/unit/source_code/test_path_lookup_simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from databricks.labs.ucx.source_code.path_lookup import PathLookup
from databricks.labs.ucx.source_code.graph import SourceContainer, DependencyResolver
from databricks.labs.ucx.source_code.notebooks.loaders import NotebookResolver, NotebookLoader
from databricks.labs.ucx.source_code.known import KnownList
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver
from tests.unit import (
_samples_path,
Expand Down Expand Up @@ -45,9 +44,8 @@ def test_locates_notebooks(source: list[str], expected: int, mock_path_lookup) -
file_loader = FileLoader()
notebook_loader = NotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
allow_list = KnownList()
import_resolver = ImportFileResolver(file_loader, allow_list)
pip_resolver = PythonLibraryResolver(allow_list)
import_resolver = ImportFileResolver(file_loader)
pip_resolver = PythonLibraryResolver()
dependency_resolver = DependencyResolver(
pip_resolver, notebook_resolver, import_resolver, import_resolver, mock_path_lookup
)
Expand All @@ -70,13 +68,12 @@ def test_locates_files(source: list[str], expected: int) -> None:
elems = [_samples_path(SourceContainer)]
elems.extend(source)
file_path = Path(*elems)
allow_list = KnownList()
lookup = PathLookup.from_sys_path(Path.cwd())
file_loader = FileLoader()
notebook_loader = NotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
import_resolver = ImportFileResolver(file_loader, allow_list)
pip_resolver = PythonLibraryResolver(allow_list)
import_resolver = ImportFileResolver(file_loader)
pip_resolver = PythonLibraryResolver()
resolver = DependencyResolver(pip_resolver, notebook_resolver, import_resolver, import_resolver, lookup)
maybe = resolver.build_local_file_dependency_graph(file_path, CurrentSessionState())
assert not maybe.problems
Expand Down Expand Up @@ -113,9 +110,8 @@ def test_locates_notebooks_with_absolute_path() -> None:
notebook_loader = NotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
file_loader = FileLoader()
allow_list = KnownList()
import_resolver = ImportFileResolver(file_loader, allow_list)
pip_resolver = PythonLibraryResolver(allow_list)
import_resolver = ImportFileResolver(file_loader)
pip_resolver = PythonLibraryResolver()
resolver = DependencyResolver(pip_resolver, notebook_resolver, import_resolver, import_resolver, lookup)
maybe = resolver.build_notebook_dependency_graph(parent_file_path, CurrentSessionState())
assert not maybe.problems
Expand Down Expand Up @@ -152,10 +148,9 @@ def func():
lookup = PathLookup.from_sys_path(Path.cwd())
notebook_loader = NotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
allow_list = KnownList()
file_loader = FileLoader()
import_resolver = ImportFileResolver(file_loader, allow_list)
pip_resolver = PythonLibraryResolver(allow_list)
import_resolver = ImportFileResolver(file_loader)
pip_resolver = PythonLibraryResolver()
resolver = DependencyResolver(pip_resolver, notebook_resolver, import_resolver, import_resolver, lookup)
maybe = resolver.build_notebook_dependency_graph(parent_file_path, CurrentSessionState())
assert not maybe.problems
Expand Down
Loading