Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: databrickslabs/ucx
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 8f00821cf1b46e6de17e29c166fe568c39053552
Choose a base ref
..
head repository: databrickslabs/ucx
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: bc9d691d2347daae5a0e1c2e5b0845ee6f5c7e77
Choose a head ref
18 changes: 11 additions & 7 deletions src/databricks/labs/ucx/source_code/python/python_ast.py
Original file line number Diff line number Diff line change
@@ -678,9 +678,7 @@ def __init__(
self._linters = linters
self._dfsa_collectors = dfsa_collectors
self._table_collectors = table_collectors

# This tree collects all the trees parsed from source code by attaching them as child trees
self._tree: Tree = Tree.new_module()
self._tree: Tree | None = None

def lint(self, code: str) -> Iterable[Advice]:
maybe_tree = self._parse_and_append(code)
@@ -703,22 +701,23 @@ def _parse_and_append(self, code: str) -> MaybeTree:
return maybe_tree

def append_tree(self, tree: Tree) -> None:
self._tree.attach_child_tree(tree)
self._make_tree().attach_child_tree(tree)

def append_nodes(self, nodes: list[NodeNG]) -> None:
self._tree.attach_nodes(nodes)
self._make_tree().attach_nodes(nodes)

def append_globals(self, globs: dict) -> None:
self._tree.extend_globals(globs)
self._make_tree().extend_globals(globs)

def process_child_cell(self, code: str) -> None:
this_tree = self._make_tree()
maybe_tree = Tree.maybe_normalized_parse(code)
if maybe_tree.failure:
# TODO: bubble up this error
logger.warning(maybe_tree.failure.message)
return
assert maybe_tree.tree is not None
self._tree.attach_child_tree(maybe_tree.tree)
this_tree.attach_child_tree(maybe_tree.tree)

def collect_dfsas(self, source_code: str) -> Iterable[DirectFsAccess]:
maybe_tree = self._parse_and_append(source_code)
@@ -745,3 +744,8 @@ def collect_tables(self, source_code: str) -> Iterable[UsedTable]:
def collect_tables_from_tree(self, tree: Tree) -> Iterable[UsedTableNode]:
for collector in self._table_collectors:
yield from collector.collect_tables_from_tree(tree)

def _make_tree(self) -> Tree:
if self._tree is None:
self._tree = Tree.new_module()
return self._tree
32 changes: 1 addition & 31 deletions tests/unit/source_code/notebooks/test_sources.py
Original file line number Diff line number Diff line change
@@ -4,11 +4,10 @@
from unittest.mock import create_autospec

import pytest
from databricks.sdk.service.workspace import Language

from databricks.labs.ucx.source_code.base import CurrentSessionState
from databricks.labs.ucx.source_code.linters.context import LinterContext
from databricks.labs.ucx.source_code.notebooks.sources import FileLinter, Notebook, NotebookLinter
from databricks.labs.ucx.source_code.notebooks.sources import FileLinter


@pytest.mark.parametrize("path, content", [("xyz.py", "a = 3"), ("xyz.sql", "select * from dual")])
@@ -114,32 +113,3 @@ def test_file_linter_lints_file_with_missing_read_permission(migration_index, mo
assert len(advices) == 1
assert advices[0].code == "file-permission"
assert advices[0].message == f"Missing read permission for {path}"


def test_notebook_linter_lints_source_yielding_no_advices(migration_index, mock_path_lookup) -> None:
linter = NotebookLinter.from_source(
migration_index,
mock_path_lookup,
CurrentSessionState(),
"# Databricks notebook source\nprint(1)",
Language.PYTHON,
)

advices = list(linter.lint())

assert not advices, "Expected no advices"


def test_notebook_linter_lints_parent_child_context_from_grand_parent(migration_index, mock_path_lookup) -> None:
path = Path(__file__).parent.parent / "samples" / "parent-child-context" / "grand_parent.py"
notebook = Notebook.parse(path, path.read_text(), Language.PYTHON)
linter = NotebookLinter(
LinterContext(migration_index),
mock_path_lookup.change_directory(path.parent),
CurrentSessionState(),
notebook,
)

advices = list(linter.lint())

assert not advices, "Expected no advices"
88 changes: 1 addition & 87 deletions tests/unit/source_code/python/test_python_ast.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
from collections.abc import Iterable

import pytest
import astroid # type: ignore
from astroid import Assign, AssignName, Attribute, Call, Const, Expr, Module, Name # type: ignore

from databricks.labs.ucx.source_code.base import Advice, Failure
from databricks.labs.ucx.source_code.python.python_ast import PythonLinter, PythonSequentialLinter, Tree, TreeHelper
from databricks.labs.ucx.source_code.python.python_ast import Tree, TreeHelper
from databricks.labs.ucx.source_code.python.python_infer import InferredValue


@@ -407,86 +404,3 @@ def test_renumber_fails() -> None:

def test_const_is_not_from_module() -> None:
assert not Tree(Const("xyz")).is_from_module("spark")


class DummyPythonLinter(PythonLinter):
"""Dummy python linter yielding dummy advices for testing purpose."""

def lint_tree(self, tree: Tree) -> Iterable[Advice]:
yield Advice("dummy", "dummy advice", 0, 0, 0, 0)
yield Advice("dummy", "dummy advice", 1, 1, 1, 1)


def test_dummy_python_linter_lint_lints_tree() -> None:
linter = DummyPythonLinter()
advices = list(linter.lint("print(1)"))
assert advices == [Advice("dummy", "dummy advice", 0, 0, 0, 0), Advice("dummy", "dummy advice", 1, 1, 1, 1)]


def test_dummy_python_linter_lint_yields_failure_due_to_parse_error() -> None:
linter = DummyPythonLinter()
advices = list(linter.lint("print(1")) # Closing parenthesis is missing on purpose
assert advices == [
Failure(
code="python-parse-error",
message="Failed to parse code due to invalid syntax: print(1",
start_line=0,
start_col=5,
end_line=0,
end_col=1,
)
]


def test_python_sequential_linter_lint_lints_tree() -> None:
linter = PythonSequentialLinter([DummyPythonLinter()], [], [])
advices = list(linter.lint("print(1)"))
assert advices == [Advice("dummy", "dummy advice", 0, 0, 0, 0), Advice("dummy", "dummy advice", 1, 1, 1, 1)]


class NodeGlobalsLinter(PythonLinter):
"""Get the node globals"""

def lint_tree(self, tree: Tree) -> Iterable[Advice]:
globs = ""
if isinstance(tree.node, Module):
globs = ",".join(tree.node.globals.keys())
yield Advice("globals", globs, 0, 0, 0, 0)


def test_python_sequential_linter_lint_has_no_globals() -> None:
linter = PythonSequentialLinter([NodeGlobalsLinter()], [], [])
advices = list(linter.lint("print(1)"))
assert advices == [Advice("globals", "", 0, 0, 0, 0)]


def test_python_sequential_linter_lint_has_one_global() -> None:
linter = PythonSequentialLinter([NodeGlobalsLinter()], [], [])
advices = list(linter.lint("a = 1"))
assert advices == [Advice("globals", "a", 0, 0, 0, 0)]


def test_python_sequential_linter_lint_has_two_globals() -> None:
linter = PythonSequentialLinter([NodeGlobalsLinter()], [], [])
advices = list(linter.lint("a = 1;b = 2"))
assert advices == [Advice("globals", "a,b", 0, 0, 0, 0)]


def test_python_sequential_linter_lint_is_stateless() -> None:
"""Globals from previous lint calls should not be part of later calls"""
linter = PythonSequentialLinter([NodeGlobalsLinter()], [], [])
list(linter.lint("a = 1"))
advices = list(linter.lint("b = 2"))
assert advices == [Advice("globals", "b", 0, 0, 0, 0)]


def test_python_sequential_linter_extend_globals() -> None:
linter = PythonSequentialLinter([NodeGlobalsLinter()], [], [])

node = astroid.extract_node("b = a + 2")
assign_name = next(node.get_children())
assert isinstance(assign_name, AssignName)
linter.append_globals({"b": [assign_name]})

advices = list(linter.lint("a = 1"))
assert advices == [Advice("globals", "a,b", 0, 0, 0, 0)]