Skip to content

Commit

Permalink
Merge branch 'main' into fix/file-loader-handles-files
Browse files Browse the repository at this point in the history
  • Loading branch information
JCZuurmond committed Feb 5, 2025
2 parents 36f1b31 + 5bb3a96 commit f2fbb18
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 24 deletions.
19 changes: 7 additions & 12 deletions src/databricks/labs/ucx/source_code/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
MaybeDependency,
)
from databricks.labs.ucx.source_code.known import KnownList
from databricks.labs.ucx.source_code.notebooks.cells import CellLanguage
from databricks.labs.ucx.source_code.path_lookup import PathLookup
from databricks.labs.ucx.source_code.linters.python import PythonCodeAnalyzer

Expand All @@ -28,39 +27,35 @@


class LocalFile(SourceContainer):
"""A container for accessing local files."""

def __init__(self, path: Path, source: str, language: Language):
self._path = path
self._original_code = source
# using CellLanguage so we can reuse the facilities it provides
self._language = CellLanguage.of_language(language)

@property
def path(self) -> Path:
return self._path
self._language = language

@property
def content(self) -> str:
"""The file content"""
return self._original_code

def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]:
if self._language is CellLanguage.PYTHON:
"""The dependency graph for the local file."""
if self._language == Language.PYTHON:
context = parent.new_dependency_graph_context()
analyzer = PythonCodeAnalyzer(context, self._original_code)
problems = analyzer.build_graph()
for idx, problem in enumerate(problems):
if problem.has_missing_path():
problems[idx] = dataclasses.replace(problem, source_path=self._path)
return problems
# supported language that does not generate dependencies
if self._language is CellLanguage.SQL:
if self._language == Language.SQL: # SQL cannot refer other dependencies
return []
logger.warning(f"Unsupported language: {self._language.language}")
logger.warning(f"Unsupported language: {self._language}")
return []

def build_inherited_context(self, graph: DependencyGraph, child_path: Path) -> InheritedContext:
if self._language is CellLanguage.PYTHON:
if self._language == Language.PYTHON:
context = graph.new_dependency_graph_context()
analyzer = PythonCodeAnalyzer(context, self._original_code)
inherited = analyzer.build_inherited_context(child_path)
Expand Down
108 changes: 96 additions & 12 deletions tests/unit/source_code/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,105 @@
import pytest
from databricks.sdk.service.workspace import Language

from databricks.labs.ucx.source_code.base import CurrentSessionState
from databricks.labs.ucx.source_code.files import FileLoader, LocalFile
from databricks.labs.ucx.source_code.graph import Dependency
from databricks.labs.ucx.source_code.path_lookup import PathLookup
from databricks.labs.ucx.source_code.graph import DependencyGraph, DependencyProblem


def test_local_file_content_is_accessible() -> None:
local_file = LocalFile(Path("test.py"), "print(1)", Language.PYTHON)
assert local_file.content == "print(1)"


def test_file_loader_loads_non_ascii_file(mock_path_lookup) -> None:
dependency = Dependency(FileLoader(), Path("nonascii.py"))
@pytest.mark.parametrize("language", [Language.SQL, Language.SCALA, Language.R])
def test_local_file_builds_dependency_graph_without_problems_independent_from_source(
simple_dependency_resolver, mock_path_lookup, language: Language
) -> None:
"""Unsupported language and SQL builds a dependency graph without problems"""
dependency = Dependency(FileLoader(), Path("test.py"))
graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState())
local_file = LocalFile(Path("test.py"), "does not matter", language)
assert not local_file.build_dependency_graph(graph)

local_file = dependency.load(mock_path_lookup)

# TODO: Test specific error while loading: https://github.com/databrickslabs/ucx/issues/3584
assert local_file is None
assert Path("nonascii.py") in mock_path_lookup.successfully_resolved_paths
def test_local_file_builds_dependency_graph_without_problems_for_python(
simple_dependency_resolver, mock_path_lookup
) -> None:
"""No problems should be yielded for the python source code"""
dependency = Dependency(FileLoader(), Path("test.py"))
graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState())
local_file = LocalFile(Path("test.py"), "print(1)", Language.PYTHON)
assert not local_file.build_dependency_graph(graph)


def test_local_file_builds_dependency_graph_with_problems_for_python(
simple_dependency_resolver, mock_path_lookup
) -> None:
"""Problems should be yielded for the python source code"""
dependency = Dependency(FileLoader(), Path("test.py"))
graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState())
local_file = LocalFile(Path("test.py"), "print(1", Language.PYTHON) # Missing parenthesis is on purpose
assert local_file.build_dependency_graph(graph) == [
DependencyProblem(
"python-parse-error",
"Failed to parse code due to invalid syntax: print(1",
Path("test.py"),
)
]


@pytest.mark.parametrize("language", [Language.SQL, Language.SCALA, Language.R])
def test_local_file_builds_inherited_context_without_tree_found_and_problems_independent_from_source(
simple_dependency_resolver, mock_path_lookup, language: Language
) -> None:
"""Unsupported language and SQL builds an inherited context without a tree, found flag and problems"""
dependency = Dependency(FileLoader(), Path("test"))
graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState())
local_file = LocalFile(Path("test"), "does not matter", language)
inherited_context = local_file.build_inherited_context(graph, Path("child"))
assert not inherited_context.tree
assert not inherited_context.found
assert not inherited_context.problems


def test_local_file_builds_inherited_context_with_tree_without_found_and_problems(
simple_dependency_resolver, mock_path_lookup
) -> None:
"""A tree should be yielded, but the child nor problems are found."""
dependency = Dependency(FileLoader(), Path("test.py"))
graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState())
local_file = LocalFile(Path("test.py"), "print(1)", Language.PYTHON)
inherited_context = local_file.build_inherited_context(graph, Path("child.py"))
assert inherited_context.tree
assert not inherited_context.found
assert not inherited_context.problems


def test_local_file_builds_inherited_context_with_python_parse_error_problem(
simple_dependency_resolver, mock_path_lookup
) -> None:
"""Problems should be yielded for the python source code"""
dependency = Dependency(FileLoader(), Path("test.py"))
graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState())
local_file = LocalFile(Path("test.py"), "print(1", Language.PYTHON) # Missing parenthesis is on purpose
inherited_context = local_file.build_inherited_context(graph, Path("child.py"))
assert not inherited_context.tree
assert not inherited_context.found
assert inherited_context.problems == [
DependencyProblem(
"python-parse-error",
"Failed to parse code due to invalid syntax: print(1",
Path("test.py"),
)
]


def test_file_loader_loads_non_existing_file() -> None:
def test_file_loader_loads_file_without_permission() -> None:
path = create_autospec(Path)
path.suffix = ".py"
path.open.side_effect = FileNotFoundError("No such file or directory: 'test.py'")
path.open.side_effect = PermissionError("Permission denied")
dependency = Dependency(FileLoader(), path)
path_lookup = create_autospec(PathLookup)
path_lookup.resolve.return_value = path
Expand All @@ -41,17 +116,26 @@ def test_file_loader_loads_non_existing_file() -> None:
path_lookup.resolve.assert_called_once_with(path)


def test_file_loader_loads_file_without_permission() -> None:
def test_file_loader_loads_non_ascii_file(mock_path_lookup) -> None:
dependency = Dependency(FileLoader(), Path("nonascii.py"))

local_file = dependency.load(mock_path_lookup)

# TODO: Test specific error while loading: https://github.com/databrickslabs/ucx/issues/3584
assert local_file is None
assert Path("nonascii.py") in mock_path_lookup.successfully_resolved_paths


def test_file_loader_loads_non_existing_file() -> None:
path = create_autospec(Path)
path.suffix = ".py"
path.open.side_effect = PermissionError("Permission denied")
dependency = Dependency(FileLoader(), path)
path.open.side_effect = FileNotFoundError("No such file or directory: 'test.py'")
path_lookup = create_autospec(PathLookup)
path_lookup.resolve.return_value = path

dependency = Dependency(FileLoader(), path)
local_file = dependency.load(path_lookup)

# TODO: Test specific error while loading: https://github.com/databrickslabs/ucx/issues/3584
assert local_file is None
path.open.assert_called_once()
path_lookup.resolve.assert_called_once_with(path)
Expand Down

0 comments on commit f2fbb18

Please sign in to comment.