diff --git a/src/databricks/labs/ucx/contexts/application.py b/src/databricks/labs/ucx/contexts/application.py index fb0549e5e7..55bf3abc86 100644 --- a/src/databricks/labs/ucx/contexts/application.py +++ b/src/databricks/labs/ucx/contexts/application.py @@ -538,7 +538,7 @@ def verify_progress_tracking(self) -> VerifyProgressTracking: @cached_property def pip_resolver(self) -> PythonLibraryResolver: - return PythonLibraryResolver(self.allow_list) + return PythonLibraryResolver(allow_list=self.allow_list) @cached_property def notebook_loader(self) -> NotebookLoader: @@ -572,7 +572,7 @@ def allow_list(self) -> KnownList: @cached_property def file_resolver(self) -> ImportFileResolver: - return ImportFileResolver(self.file_loader, self.allow_list) + return ImportFileResolver(self.file_loader, allow_list=self.allow_list) @cached_property def dependency_resolver(self) -> DependencyResolver: diff --git a/src/databricks/labs/ucx/source_code/files.py b/src/databricks/labs/ucx/source_code/files.py index d35f6b138e..4b2dcbeb7e 100644 --- a/src/databricks/labs/ucx/source_code/files.py +++ b/src/databricks/labs/ucx/source_code/files.py @@ -102,10 +102,10 @@ def __repr__(self): class ImportFileResolver(BaseImportResolver, BaseFileResolver): - def __init__(self, file_loader: FileLoader, allow_list: KnownList): + def __init__(self, file_loader: FileLoader, *, allow_list: KnownList | None = None): super().__init__() - self._allow_list = allow_list self._file_loader = file_loader + self._allow_list = allow_list or KnownList() def resolve_file(self, path_lookup, path: Path) -> MaybeDependency: absolute_path = path_lookup.resolve(path) diff --git a/src/databricks/labs/ucx/source_code/python_libraries.py b/src/databricks/labs/ucx/source_code/python_libraries.py index d470cee209..83ad53ee77 100644 --- a/src/databricks/labs/ucx/source_code/python_libraries.py +++ b/src/databricks/labs/ucx/source_code/python_libraries.py @@ -11,12 +11,10 @@ from databricks.labs.blueprint.entrypoint import is_in_debug from databricks.labs.ucx.framework.utils import run_command -from databricks.labs.ucx.source_code.graph import ( - LibraryResolver, - DependencyProblem, -) -from databricks.labs.ucx.source_code.path_lookup import PathLookup +from databricks.labs.ucx.source_code.graph import DependencyProblem, LibraryResolver from databricks.labs.ucx.source_code.known import KnownList +from databricks.labs.ucx.source_code.path_lookup import PathLookup + logger = logging.getLogger(__name__) @@ -26,10 +24,11 @@ class PythonLibraryResolver(LibraryResolver): def __init__( self, - allow_list: KnownList, + *, + allow_list: KnownList | None = None, runner: Callable[[str | list[str]], tuple[int, str, str]] = run_command, ) -> None: - self._allow_list = allow_list + self._allow_list = allow_list or KnownList() self._runner = runner def register_library(self, path_lookup: PathLookup, *libraries: str) -> list[DependencyProblem]: diff --git a/tests/integration/source_code/test_graph.py b/tests/integration/source_code/test_graph.py index b131d94634..e4bad5b039 100644 --- a/tests/integration/source_code/test_graph.py +++ b/tests/integration/source_code/test_graph.py @@ -20,9 +20,9 @@ def module_compatibility(self, name: str) -> Compatibility: return super().module_compatibility(name) allow_list = TestKnownList() - library_resolver = PythonLibraryResolver(allow_list) + library_resolver = PythonLibraryResolver(allow_list=allow_list) notebook_resolver = NotebookResolver(NotebookLoader()) - import_resolver = ImportFileResolver(FileLoader(), allow_list) + import_resolver = ImportFileResolver(FileLoader(), allow_list=allow_list) path_lookup = PathLookup.from_sys_path(Path(__file__).parent) dependency_resolver = DependencyResolver( library_resolver, notebook_resolver, import_resolver, import_resolver, path_lookup @@ -41,10 +41,9 @@ def module_compatibility(self, name: str) -> Compatibility: def test_graph_imports_dynamic_import(): - allow_list = KnownList() - library_resolver = PythonLibraryResolver(allow_list) + library_resolver = PythonLibraryResolver() notebook_resolver = NotebookResolver(NotebookLoader()) - import_resolver = ImportFileResolver(FileLoader(), allow_list) + import_resolver = ImportFileResolver(FileLoader()) path_lookup = PathLookup.from_sys_path(Path(__file__).parent) dependency_resolver = DependencyResolver( library_resolver, notebook_resolver, import_resolver, import_resolver, path_lookup diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index dae0adbc7b..43bc03312e 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -13,7 +13,6 @@ from databricks.labs.ucx.hive_metastore import TablesCrawler from databricks.labs.ucx.hive_metastore.tables import FasterTableScanCrawler from databricks.labs.ucx.source_code.graph import BaseNotebookResolver, DependencyResolver -from databricks.labs.ucx.source_code.known import KnownList from databricks.labs.ucx.source_code.files import FileLoader, ImportFileResolver from databricks.labs.ucx.source_code.notebooks.loaders import NotebookResolver, NotebookLoader from databricks.labs.ucx.source_code.path_lookup import PathLookup @@ -252,10 +251,9 @@ def mock_pip_install_always_successful(_) -> tuple[int, str, str]: """ return 0, "", "" - allow_list = KnownList() - library_resolver = PythonLibraryResolver(allow_list, mock_pip_install_always_successful) + library_resolver = PythonLibraryResolver(runner=mock_pip_install_always_successful) notebook_resolver = NotebookResolver(NotebookLoader()) - import_resolver = ImportFileResolver(FileLoader(), allow_list) + import_resolver = ImportFileResolver(FileLoader()) return DependencyResolver(library_resolver, notebook_resolver, import_resolver, import_resolver, mock_path_lookup) diff --git a/tests/unit/source_code/linters/test_files.py b/tests/unit/source_code/linters/test_files.py index 3c2b145377..6aa18959a2 100644 --- a/tests/unit/source_code/linters/test_files.py +++ b/tests/unit/source_code/linters/test_files.py @@ -9,7 +9,6 @@ from databricks.labs.ucx.source_code.notebooks.loaders import NotebookResolver, NotebookLoader from databricks.labs.ucx.source_code.linters.files import NotebookMigrator from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver -from databricks.labs.ucx.source_code.known import KnownList from databricks.sdk.service.workspace import Language @@ -105,10 +104,9 @@ def local_code_linter(mock_path_lookup, migration_index): notebook_loader = NotebookLoader() file_loader = FileLoader() folder_loader = FolderLoader(notebook_loader, file_loader) - allow_list = KnownList() - pip_resolver = PythonLibraryResolver(allow_list) + pip_resolver = PythonLibraryResolver() session_state = CurrentSessionState() - import_file_resolver = ImportFileResolver(file_loader, allow_list) + import_file_resolver = ImportFileResolver(file_loader) resolver = DependencyResolver( pip_resolver, NotebookResolver(NotebookLoader()), @@ -146,7 +144,7 @@ def test_linter_lints_children_in_context(mock_path_lookup, local_code_linter) - def test_triple_dot_import() -> None: - file_resolver = ImportFileResolver(FileLoader(), KnownList()) + file_resolver = ImportFileResolver(FileLoader()) path_lookup = create_autospec(PathLookup) path_lookup.cwd.as_posix.return_value = '/some/path/to/folder' path_lookup.resolve.return_value = Path('/some/path/foo.py') @@ -159,7 +157,7 @@ def test_triple_dot_import() -> None: def test_single_dot_import() -> None: - file_resolver = ImportFileResolver(FileLoader(), KnownList()) + file_resolver = ImportFileResolver(FileLoader()) path_lookup = create_autospec(PathLookup) path_lookup.cwd.as_posix.return_value = '/some/path/to/folder' path_lookup.resolve.return_value = Path('/some/path/to/folder/foo.py') @@ -192,10 +190,9 @@ def test_known_issues(path: Path, migration_index) -> None: folder_loader = FolderLoader(notebook_loader, file_loader) path_lookup = PathLookup.from_sys_path(Path.cwd()) session_state = CurrentSessionState() - allow_list = KnownList() notebook_resolver = NotebookResolver(NotebookLoader()) - import_resolver = ImportFileResolver(file_loader, allow_list) - pip_resolver = PythonLibraryResolver(allow_list) + import_resolver = ImportFileResolver(file_loader) + pip_resolver = PythonLibraryResolver() resolver = DependencyResolver(pip_resolver, notebook_resolver, import_resolver, import_resolver, path_lookup) linter = LocalCodeLinter( notebook_loader, diff --git a/tests/unit/source_code/notebooks/test_cells.py b/tests/unit/source_code/notebooks/test_cells.py index 1c685b6aeb..25acbd5ed2 100644 --- a/tests/unit/source_code/notebooks/test_cells.py +++ b/tests/unit/source_code/notebooks/test_cells.py @@ -23,7 +23,6 @@ ) from databricks.labs.ucx.source_code.path_lookup import PathLookup from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver -from databricks.labs.ucx.source_code.known import KnownList def test_basic_cell_extraction() -> None: @@ -127,9 +126,8 @@ def test_pip_cell_build_dependency_graph_reports_unknown_library(mock_path_looku dependency = Dependency(FileLoader(), Path("test")) notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - allow_list = KnownList() - pip_resolver = PythonLibraryResolver(allow_list) - file_resolver = ImportFileResolver(FileLoader(), allow_list) + pip_resolver = PythonLibraryResolver() + file_resolver = ImportFileResolver(FileLoader()) dependency_resolver = DependencyResolver( pip_resolver, notebook_resolver, file_resolver, file_resolver, mock_path_lookup ) @@ -149,10 +147,9 @@ def test_pip_cell_build_dependency_graph_resolves_installed_library(mock_path_lo dependency = Dependency(FileLoader(), Path("test")) notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - allow_list = KnownList() file_loader = FileLoader() - pip_resolver = PythonLibraryResolver(allow_list) - import_resolver = ImportFileResolver(file_loader, allow_list) + pip_resolver = PythonLibraryResolver() + import_resolver = ImportFileResolver(file_loader) dependency_resolver = DependencyResolver( pip_resolver, notebook_resolver, import_resolver, import_resolver, mock_path_lookup ) diff --git a/tests/unit/source_code/test_dependencies.py b/tests/unit/source_code/test_dependencies.py index d5051a53de..8a7aa46948 100644 --- a/tests/unit/source_code/test_dependencies.py +++ b/tests/unit/source_code/test_dependencies.py @@ -13,11 +13,9 @@ NotebookResolver, ) from databricks.labs.ucx.source_code.path_lookup import PathLookup -from databricks.labs.ucx.source_code.known import KnownList from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver -from tests.unit import ( - locate_site_packages, -) + +from tests.unit import locate_site_packages from tests.unit.conftest import MockPathLookup @@ -141,8 +139,8 @@ def test_dependency_resolver_terminates_at_known_libraries( site_packages_path = locate_site_packages() lookup.append_path(site_packages_path) file_loader = FileLoader() - import_resolver = ImportFileResolver(file_loader, KnownList()) - library_resolver = PythonLibraryResolver(KnownList()) + import_resolver = ImportFileResolver(file_loader) + library_resolver = PythonLibraryResolver() resolver = DependencyResolver(library_resolver, mock_notebook_resolver, import_resolver, import_resolver, lookup) maybe = resolver.build_local_file_dependency_graph(Path("import-site-package.py"), CurrentSessionState()) assert not maybe.failed @@ -174,9 +172,8 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> No return None file_loader = FailingFileLoader() - allow_list = KnownList() - import_resolver = ImportFileResolver(file_loader, allow_list) - pip_resolver = PythonLibraryResolver(allow_list) + import_resolver = ImportFileResolver(file_loader) + pip_resolver = PythonLibraryResolver() resolver = DependencyResolver( pip_resolver, mock_notebook_resolver, import_resolver, import_resolver, mock_path_lookup ) @@ -196,9 +193,8 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> So notebook_loader = FailingNotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - known_list = KnownList() - pip_resolver = PythonLibraryResolver(known_list) - import_resolver = ImportFileResolver(FileLoader(), known_list) + pip_resolver = PythonLibraryResolver() + import_resolver = ImportFileResolver(FileLoader()) resolver = DependencyResolver(pip_resolver, notebook_resolver, import_resolver, import_resolver, mock_path_lookup) maybe = resolver.build_notebook_dependency_graph(Path("root5.py"), CurrentSessionState()) assert list(maybe.problems) == [ diff --git a/tests/unit/source_code/test_jobs.py b/tests/unit/source_code/test_jobs.py index 12f008017a..1da236f37c 100644 --- a/tests/unit/source_code/test_jobs.py +++ b/tests/unit/source_code/test_jobs.py @@ -14,7 +14,6 @@ from databricks.labs.ucx.source_code.base import CurrentSessionState from databricks.labs.ucx.source_code.directfs_access import DirectFsAccessCrawler from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver -from databricks.labs.ucx.source_code.known import KnownList from databricks.sdk import WorkspaceClient from databricks.sdk.errors import NotFound from databricks.sdk.service import compute, jobs, pipelines @@ -45,10 +44,9 @@ def test_job_problem_as_message() -> None: @pytest.fixture def dependency_resolver(mock_path_lookup) -> DependencyResolver: file_loader = FileLoader() - allow_list = KnownList() - import_file_resolver = ImportFileResolver(file_loader, allow_list) + import_file_resolver = ImportFileResolver(file_loader) resolver = DependencyResolver( - PythonLibraryResolver(allow_list), + PythonLibraryResolver(), NotebookResolver(NotebookLoader()), import_file_resolver, import_file_resolver, diff --git a/tests/unit/source_code/test_notebook.py b/tests/unit/source_code/test_notebook.py index 87e7e8c9d4..31eb78cde2 100644 --- a/tests/unit/source_code/test_notebook.py +++ b/tests/unit/source_code/test_notebook.py @@ -6,7 +6,6 @@ from databricks.labs.ucx.source_code.base import CurrentSessionState from databricks.labs.ucx.source_code.graph import DependencyGraph, DependencyResolver -from databricks.labs.ucx.source_code.known import KnownList from databricks.labs.ucx.source_code.files import FileLoader, ImportFileResolver from databricks.labs.ucx.source_code.linters.imports import DbutilsPyLinter from databricks.labs.ucx.source_code.python.python_ast import MaybeTree @@ -132,8 +131,8 @@ def test_notebook_generates_runnable_cells(source: tuple[str, Language, list[str def dependency_resolver(mock_path_lookup) -> DependencyResolver: notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - library_resolver = PythonLibraryResolver(KnownList()) - import_resolver = ImportFileResolver(FileLoader(), KnownList()) + library_resolver = PythonLibraryResolver() + import_resolver = ImportFileResolver(FileLoader()) return DependencyResolver(library_resolver, notebook_resolver, import_resolver, import_resolver, mock_path_lookup) diff --git a/tests/unit/source_code/test_path_lookup_simulation.py b/tests/unit/source_code/test_path_lookup_simulation.py index 7181998845..b042a1efcd 100644 --- a/tests/unit/source_code/test_path_lookup_simulation.py +++ b/tests/unit/source_code/test_path_lookup_simulation.py @@ -9,7 +9,6 @@ from databricks.labs.ucx.source_code.path_lookup import PathLookup from databricks.labs.ucx.source_code.graph import SourceContainer, DependencyResolver from databricks.labs.ucx.source_code.notebooks.loaders import NotebookResolver, NotebookLoader -from databricks.labs.ucx.source_code.known import KnownList from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver from tests.unit import ( _samples_path, @@ -45,9 +44,8 @@ def test_locates_notebooks(source: list[str], expected: int, mock_path_lookup) - file_loader = FileLoader() notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - allow_list = KnownList() - import_resolver = ImportFileResolver(file_loader, allow_list) - pip_resolver = PythonLibraryResolver(allow_list) + import_resolver = ImportFileResolver(file_loader) + pip_resolver = PythonLibraryResolver() dependency_resolver = DependencyResolver( pip_resolver, notebook_resolver, import_resolver, import_resolver, mock_path_lookup ) @@ -70,13 +68,12 @@ def test_locates_files(source: list[str], expected: int) -> None: elems = [_samples_path(SourceContainer)] elems.extend(source) file_path = Path(*elems) - allow_list = KnownList() lookup = PathLookup.from_sys_path(Path.cwd()) file_loader = FileLoader() notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - import_resolver = ImportFileResolver(file_loader, allow_list) - pip_resolver = PythonLibraryResolver(allow_list) + import_resolver = ImportFileResolver(file_loader) + pip_resolver = PythonLibraryResolver() resolver = DependencyResolver(pip_resolver, notebook_resolver, import_resolver, import_resolver, lookup) maybe = resolver.build_local_file_dependency_graph(file_path, CurrentSessionState()) assert not maybe.problems @@ -113,9 +110,8 @@ def test_locates_notebooks_with_absolute_path() -> None: notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) file_loader = FileLoader() - allow_list = KnownList() - import_resolver = ImportFileResolver(file_loader, allow_list) - pip_resolver = PythonLibraryResolver(allow_list) + import_resolver = ImportFileResolver(file_loader) + pip_resolver = PythonLibraryResolver() resolver = DependencyResolver(pip_resolver, notebook_resolver, import_resolver, import_resolver, lookup) maybe = resolver.build_notebook_dependency_graph(parent_file_path, CurrentSessionState()) assert not maybe.problems @@ -152,10 +148,9 @@ def func(): lookup = PathLookup.from_sys_path(Path.cwd()) notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - allow_list = KnownList() file_loader = FileLoader() - import_resolver = ImportFileResolver(file_loader, allow_list) - pip_resolver = PythonLibraryResolver(allow_list) + import_resolver = ImportFileResolver(file_loader) + pip_resolver = PythonLibraryResolver() resolver = DependencyResolver(pip_resolver, notebook_resolver, import_resolver, import_resolver, lookup) maybe = resolver.build_notebook_dependency_graph(parent_file_path, CurrentSessionState()) assert not maybe.problems diff --git a/tests/unit/source_code/test_python_libraries.py b/tests/unit/source_code/test_python_libraries.py index 895910ec02..6ac8e6a74f 100644 --- a/tests/unit/source_code/test_python_libraries.py +++ b/tests/unit/source_code/test_python_libraries.py @@ -2,7 +2,6 @@ from databricks.labs.ucx.source_code.path_lookup import PathLookup from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver -from databricks.labs.ucx.source_code.known import KnownList def test_python_library_resolver_resolves_library(mock_path_lookup) -> None: @@ -11,7 +10,7 @@ def mock_pip_install(command): assert command_str.startswith("pip --disable-pip-version-check install anything -t") return 0, "", "" - python_library_resolver = PythonLibraryResolver(KnownList(), mock_pip_install) + python_library_resolver = PythonLibraryResolver(runner=mock_pip_install) problems = python_library_resolver.register_library(mock_path_lookup, "anything") assert len(problems) == 0 @@ -21,7 +20,7 @@ def test_python_library_resolver_failing(mock_path_lookup) -> None: def mock_pip_install(_): return 1, "", "nope" - python_library_resolver = PythonLibraryResolver(KnownList(), mock_pip_install) + python_library_resolver = PythonLibraryResolver(runner=mock_pip_install) problems = python_library_resolver.register_library(mock_path_lookup, "anything") assert len(problems) == 1 @@ -36,7 +35,7 @@ def mock_pip_install(_): path_lookup = create_autospec(PathLookup) path_lookup.resolve.return_value = None - python_library_resolver = PythonLibraryResolver(KnownList(), mock_pip_install) + python_library_resolver = PythonLibraryResolver(runner=mock_pip_install) problems = python_library_resolver.register_library(path_lookup, "library") assert len(problems) == 0 @@ -51,7 +50,7 @@ def test_python_library_resolver_resolves_library_with_known_problems(mock_path_ def mock_pip_install(_): return 0, "", "" - python_library_resolver = PythonLibraryResolver(KnownList(), mock_pip_install) + python_library_resolver = PythonLibraryResolver(runner=mock_pip_install) problems = python_library_resolver.register_library(mock_path_lookup, "boto3==1.17.0") assert len(problems) == 1 @@ -62,14 +61,14 @@ def test_python_library_resolver_installs_with_command(mock_path_lookup) -> None def mock_pip_install(_): return 0, "", "" - python_library_resolver = PythonLibraryResolver(KnownList(), mock_pip_install) + python_library_resolver = PythonLibraryResolver(runner=mock_pip_install) problems = python_library_resolver.register_library(mock_path_lookup, "library.whl", "--verbose") assert len(problems) == 0 def test_python_library_resolver_installs_multiple_eggs(mock_path_lookup) -> None: - python_library_resolver = PythonLibraryResolver(KnownList()) + python_library_resolver = PythonLibraryResolver() problems = python_library_resolver.register_library(mock_path_lookup, "first.egg", "second.egg") assert len(problems) == 2 diff --git a/tests/unit/source_code/test_s3fs.py b/tests/unit/source_code/test_s3fs.py index db20a3d332..86575a2ba7 100644 --- a/tests/unit/source_code/test_s3fs.py +++ b/tests/unit/source_code/test_s3fs.py @@ -10,7 +10,6 @@ ) from databricks.labs.ucx.source_code.files import FileLoader, ImportFileResolver from databricks.labs.ucx.source_code.notebooks.loaders import NotebookLoader, NotebookResolver -from databricks.labs.ucx.source_code.known import KnownList from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver S3FS_DEPRECATION_MESSAGE = ( @@ -119,12 +118,11 @@ def test_detect_s3fs_import( sample = tmp_path / "test_detect_s3fs_import.py" sample.write_text(source) mock_path_lookup.append_path(tmp_path) - allow_list = KnownList() notebook_loader = NotebookLoader() file_loader = FileLoader() notebook_resolver = NotebookResolver(notebook_loader) - import_resolver = ImportFileResolver(file_loader, allow_list) - pip_resolver = PythonLibraryResolver(allow_list) + import_resolver = ImportFileResolver(file_loader) + pip_resolver = PythonLibraryResolver() dependency_resolver = DependencyResolver( pip_resolver, notebook_resolver, import_resolver, import_resolver, mock_path_lookup ) @@ -154,9 +152,8 @@ def test_detect_s3fs_import_in_dependencies( empty_index, expected: list[DependencyProblem], mock_path_lookup, mock_notebook_resolver ) -> None: file_loader = FileLoader() - allow_list = KnownList() - import_resolver = ImportFileResolver(file_loader, allow_list) - pip_resolver = PythonLibraryResolver(allow_list) + import_resolver = ImportFileResolver(file_loader) + pip_resolver = PythonLibraryResolver() dependency_resolver = DependencyResolver( pip_resolver, mock_notebook_resolver, import_resolver, import_resolver, mock_path_lookup )