From f8bf94cc5e51ab908f5c9d427175967b3cb4ad96 Mon Sep 17 00:00:00 2001 From: Cor Date: Mon, 3 Feb 2025 19:03:55 +0100 Subject: [PATCH] Make fixer diagnostic codes unique (#3582) ## Changes Make fixer diagnostic codes unique so that the right fixer can be found for code migration/fixing. ### Linked issues Progresses #3514 Breaks up #3520 ### Functionality - [x] modified existing command: `databricks labs ucx migrate-local-code` ### Tests - [ ] manually tested - [x] modified and added unit tests - [x] modified and added integration tests --- docs/ucx/docs/dev/contributing.mdx | 4 +- docs/ucx/docs/reference/linter_codes.mdx | 14 ++- src/databricks/labs/ucx/source_code/base.py | 7 +- .../labs/ucx/source_code/linters/context.py | 21 ++-- .../ucx/source_code/linters/from_table.py | 7 +- .../labs/ucx/source_code/linters/pyspark.py | 84 +++++++++++++--- .../integration/source_code/message_codes.py | 66 +++++++++---- .../unit/source_code/linters/test_context.py | 60 ++++++++++++ .../source_code/linters/test_from_table.py | 4 +- .../unit/source_code/linters/test_linters.py | 56 ----------- .../unit/source_code/linters/test_pyspark.py | 98 +++++++++---------- .../functional/file-access/direct-fs2.py | 4 +- .../catalog/spark-catalog-cache-table.py | 4 +- .../spark-catalog-create-external-table.py | 10 +- .../catalog/spark-catalog-create-table.py | 10 +- .../catalog/spark-catalog-get-table.py | 2 +- .../catalog/spark-catalog-is-cached.py | 2 +- .../catalog/spark-catalog-list-columns.py | 6 +- .../spark-catalog-recover-partitions.py | 2 +- .../catalog/spark-catalog-refresh-table.py | 2 +- .../catalog/spark-catalog-table-exists.py | 6 +- .../catalog/spark-catalog-uncache-table.py | 2 +- .../pyspark/dataframe-write-insert-into.py | 4 +- .../pyspark/dataframe-write-save-as-table.py | 4 +- .../samples/functional/pyspark/spark-table.py | 2 +- .../table-migration-notebook.py | 8 +- .../table-migration-notebook.sql | 12 +-- tests/unit/source_code/test_base.py | 31 ++++++ tests/unit/source_code/test_lsp_plugin.py | 2 +- tests/unit/source_code/test_queries.py | 2 +- 30 files changed, 334 insertions(+), 202 deletions(-) create mode 100644 tests/unit/source_code/linters/test_context.py delete mode 100644 tests/unit/source_code/linters/test_linters.py diff --git a/docs/ucx/docs/dev/contributing.mdx b/docs/ucx/docs/dev/contributing.mdx index ff6ad6dcdf..b6e922163f 100644 --- a/docs/ucx/docs/dev/contributing.mdx +++ b/docs/ucx/docs/dev/contributing.mdx @@ -302,7 +302,9 @@ rdd-in-shared-clusters spark-logging-in-shared-clusters sql-parse-error sys-path-cannot-compute-value -table-migrated-to-uc +table-migrated-to-uc-python +table-migrated-to-uc-python-sql +table-migrated-to-uc-sql to-json-in-shared-clusters unsupported-magic-line ``` diff --git a/docs/ucx/docs/reference/linter_codes.mdx b/docs/ucx/docs/reference/linter_codes.mdx index 23cde86806..2f73ad92c1 100644 --- a/docs/ucx/docs/reference/linter_codes.mdx +++ b/docs/ucx/docs/reference/linter_codes.mdx @@ -31,11 +31,11 @@ spark.table(f"foo_{some_table_name}") We even detect string constants when coming either from `dbutils.widgets.get` (via job named parameters) or through loop variables. If `old.things` table is migrated to `brand.new.stuff` in Unity Catalog, the following code will -trigger two messages: [`table-migrated-to-uc`](#table-migrated-to-uc) for the first query, as the contents are clearly -analysable, and `cannot-autofix-table-reference` for the second query. +trigger two messages: [`table-migrated-to-uc-{sql,python,python-sql}`](#table-migrated-to-uc-sqlpythonpython-sql) for +the first query, as the contents are clearly analysable, and `cannot-autofix-table-reference` for the second query. ```python -# ucx[table-migrated-to-uc:+4:4:+4:20] Table old.things is migrated to brand.new.stuff in Unity Catalog +# ucx[table-migrated-to-uc-python-sql:+4:4:+4:20] Table old.things is migrated to brand.new.stuff in Unity Catalog # ucx[cannot-autofix-table-reference:+3:4:+3:20] Can't migrate table_name argument in 'spark.sql(query)' because its value cannot be computed table_name = f"table_{index}" for query in ["SELECT * FROM old.things", f"SELECT * FROM {table_name}"]: @@ -247,12 +247,16 @@ analysis where the path is located. -## `table-migrated-to-uc` +## `table-migrated-to-uc-{sql,python,python-sql}` This message indicates that the linter has found a table that has been migrated to Unity Catalog. The user must ensure that the table is available in Unity Catalog. - +| Postfix | Explanation | +|------------|-------------------------------------------------| +| sql | Table reference in SparkSQL | +| python | Table reference in PySpark | +| python-sql | Table reference in SparkSQL called from PySpark | ## `to-json-in-shared-clusters` diff --git a/src/databricks/labs/ucx/source_code/base.py b/src/databricks/labs/ucx/source_code/base.py index 9eb2800ec9..e28f696a7c 100644 --- a/src/databricks/labs/ucx/source_code/base.py +++ b/src/databricks/labs/ucx/source_code/base.py @@ -161,7 +161,12 @@ class Fixer(ABC): @property @abstractmethod - def name(self) -> str: ... + def diagnostic_code(self) -> str: + """The diagnostic code that this fixer fixes.""" + + def is_supported(self, diagnostic_code: str) -> bool: + """Indicate if the diagnostic code is supported by this fixer.""" + return self.diagnostic_code is not None and diagnostic_code == self.diagnostic_code @abstractmethod def apply(self, code: str) -> str: ... diff --git a/src/databricks/labs/ucx/source_code/linters/context.py b/src/databricks/labs/ucx/source_code/linters/context.py index 27686b414c..800aefbad0 100644 --- a/src/databricks/labs/ucx/source_code/linters/context.py +++ b/src/databricks/labs/ucx/source_code/linters/context.py @@ -24,7 +24,8 @@ from databricks.labs.ucx.source_code.linters.imports import DbutilsPyLinter from databricks.labs.ucx.source_code.linters.pyspark import ( - SparkSqlPyLinter, + DirectFsAccessSqlPylinter, + FromTableSqlPyLinter, SparkTableNamePyLinter, SparkSqlTablePyCollector, ) @@ -57,7 +58,7 @@ def __init__( sql_linters.append(from_table) sql_fixers.append(from_table) sql_table_collectors.append(from_table) - spark_sql = SparkSqlPyLinter(from_table, from_table) + spark_sql = FromTableSqlPyLinter(from_table) python_linters.append(spark_sql) python_fixers.append(spark_sql) python_table_collectors.append(SparkSqlTablePyCollector(from_table)) @@ -75,7 +76,7 @@ def __init__( DBRv8d0PyLinter(dbr_version=session_state.dbr_version), SparkConnectPyLinter(session_state), DbutilsPyLinter(session_state), - SparkSqlPyLinter(sql_direct_fs, None), + DirectFsAccessSqlPylinter(sql_direct_fs), ] python_dfsa_collectors += [DirectFsAccessPyLinter(session_state, prevent_spark_duplicates=False)] @@ -112,10 +113,16 @@ def linter(self, language: Language) -> Linter: raise ValueError(f"Unsupported language: {language}") def fixer(self, language: Language, diagnostic_code: str) -> Fixer | None: - if language not in self._fixers: - return None - for fixer in self._fixers[language]: - if fixer.name == diagnostic_code: + """Get the fixer for a language that matches the code. + + The first fixer which name matches with the diagnostic code is returned. This logic assumes the fixers have + unique names. + + Returns : + Fixer | None : The fixer if a match is found, otherwise None. + """ + for fixer in self._fixers.get(language, []): + if fixer.is_supported(diagnostic_code): return fixer return None diff --git a/src/databricks/labs/ucx/source_code/linters/from_table.py b/src/databricks/labs/ucx/source_code/linters/from_table.py index 9d259a680a..b907695adb 100644 --- a/src/databricks/labs/ucx/source_code/linters/from_table.py +++ b/src/databricks/labs/ucx/source_code/linters/from_table.py @@ -45,8 +45,9 @@ def __init__(self, index: TableMigrationIndex, session_state: CurrentSessionStat self._session_state: CurrentSessionState = session_state @property - def name(self) -> str: - return 'table-migrate' + def diagnostic_code(self) -> str: + """The diagnostic codes that this fixer fixes.""" + return "table-migrated-to-uc-sql" @property def schema(self) -> str: @@ -58,7 +59,7 @@ def lint_expression(self, expression: Expression) -> Iterable[Deprecation]: if not dst: return yield Deprecation( - code='table-migrated-to-uc', + code="table-migrated-to-uc-sql", message=f"Table {info.schema_name}.{info.table_name} is migrated to {dst.destination()} in Unity Catalog", # SQLGlot does not propagate tokens yet. See https://github.com/tobymao/sqlglot/issues/3159 start_line=0, diff --git a/src/databricks/labs/ucx/source_code/linters/pyspark.py b/src/databricks/labs/ucx/source_code/linters/pyspark.py index 209d34f634..d973197b8d 100644 --- a/src/databricks/labs/ucx/source_code/linters/pyspark.py +++ b/src/databricks/labs/ucx/source_code/linters/pyspark.py @@ -1,3 +1,4 @@ +import dataclasses import logging from abc import ABC, abstractmethod from collections.abc import Iterable, Iterator @@ -18,7 +19,11 @@ TableSqlCollector, DfsaSqlCollector, ) -from databricks.labs.ucx.source_code.linters.directfs import DIRECT_FS_ACCESS_PATTERNS, DirectFsAccessNode +from databricks.labs.ucx.source_code.linters.directfs import ( + DIRECT_FS_ACCESS_PATTERNS, + DirectFsAccessNode, + DirectFsAccessSqlLinter, +) from databricks.labs.ucx.source_code.python.python_infer import InferredValue from databricks.labs.ucx.source_code.linters.from_table import FromTableSqlLinter from databricks.labs.ucx.source_code.python.python_ast import ( @@ -155,7 +160,7 @@ def lint( if dst is None: continue yield Deprecation.from_node( - code='table-migrated-to-uc', + code='table-migrated-to-uc-python', message=f"Table {used_table[0]} is migrated to {dst.destination()} in Unity Catalog", # SQLGlot does not propagate tokens yet. See https://github.com/tobymao/sqlglot/issues/3159 node=node, @@ -387,6 +392,14 @@ def matchers(self) -> dict[str, _TableNameMatcher]: class SparkTableNamePyLinter(PythonLinter, Fixer, TablePyCollector): + """Linter for table name references in PySpark + + Examples: + 1. Find table name referenceS + ``` python + spark.read.table("hive_metastore.schema.table") + ``` + """ def __init__( self, @@ -400,9 +413,9 @@ def __init__( self._spark_matchers = SparkTableNameMatchers(False).matchers @property - def name(self) -> str: - # this is the same fixer, just in a different language context - return self._from_table.name + def diagnostic_code(self) -> str: + """The diagnostic codes that this fixer fixes.""" + return "table-migrated-to-uc-python" def lint_tree(self, tree: Tree) -> Iterable[Advice]: for node in tree.walk(): @@ -461,28 +474,32 @@ def _visit_call_nodes(cls, tree: Tree) -> Iterable[tuple[Call, NodeNG]]: yield call_node, query -class SparkSqlPyLinter(_SparkSqlAnalyzer, PythonLinter, Fixer): +class _SparkSqlPyLinter(_SparkSqlAnalyzer, PythonLinter, Fixer): + """Linter for SparkSQL used within PySpark.""" def __init__(self, sql_linter: SqlLinter, sql_fixer: Fixer | None): self._sql_linter = sql_linter self._sql_fixer = sql_fixer - @property - def name(self) -> str: - return "" if self._sql_fixer is None else self._sql_fixer.name - def lint_tree(self, tree: Tree) -> Iterable[Advice]: + inferable_values = [] for call_node, query in self._visit_call_nodes(tree): for value in InferredValue.infer_from_node(query): - if not value.is_inferred(): + if value.is_inferred(): + inferable_values.append((call_node, value)) + else: yield Advisory.from_node( code="cannot-autofix-table-reference", message=f"Can't migrate table_name argument in '{query.as_string()}' because its value cannot be computed", node=call_node, ) - continue - for advice in self._sql_linter.lint(value.as_string()): - yield advice.replace_from_node(call_node) + for call_node, value in inferable_values: + for advice in self._sql_linter.lint(value.as_string()): + # Replacing the fixer code to indicate that the SparkSQL fixer is wrapped with PySpark + code = advice.code + if self._sql_fixer and code == self._sql_fixer.diagnostic_code: + code = self.diagnostic_code + yield dataclasses.replace(advice.replace_from_node(call_node), code=code) def apply(self, code: str) -> str: if not self._sql_fixer: @@ -503,6 +520,45 @@ def apply(self, code: str) -> str: return tree.node.as_string() +class FromTableSqlPyLinter(_SparkSqlPyLinter): + """Lint tables and views in Spark SQL wrapped by PySpark code. + + Examples: + 1. Find table name reference in SparkSQL: + ``` python + spark.sql("SELECT * FROM hive_metastore.schema.table").collect() + ``` + """ + + def __init__(self, sql_linter: FromTableSqlLinter): + super().__init__(sql_linter, sql_linter) + + @property + def diagnostic_code(self) -> str: + """The diagnostic codes that this fixer fixes.""" + return "table-migrated-to-uc-python-sql" + + +class DirectFsAccessSqlPylinter(_SparkSqlPyLinter): + """Lint direct file system access in Spark SQL wrapped by PySpark code. + + Examples: + 1. Find table name reference in SparkSQL: + ``` python + spark.sql("SELECT * FROM parquet.`/dbfs/path/to/table`").collect() + ``` + """ + + def __init__(self, sql_linter: DirectFsAccessSqlLinter): + # TODO: Implement fixer for direct filesystem access (https://github.com/databrickslabs/ucx/issues/2021) + super().__init__(sql_linter, None) + + @property + def diagnostic_code(self) -> str: + """The diagnostic codes that this fixer fixes.""" + return "direct-filesystem-access-python-sql" + + class SparkSqlDfsaPyCollector(_SparkSqlAnalyzer, DfsaPyCollector): def __init__(self, sql_collector: DfsaSqlCollector): diff --git a/tests/integration/source_code/message_codes.py b/tests/integration/source_code/message_codes.py index 48e4e64a9f..3aecb38a27 100644 --- a/tests/integration/source_code/message_codes.py +++ b/tests/integration/source_code/message_codes.py @@ -1,3 +1,6 @@ +from collections.abc import Iterable +from pathlib import Path + import astroid # type: ignore[import-untyped] from databricks.labs.blueprint.wheels import ProductInfo @@ -6,30 +9,55 @@ def main(): - # pylint: disable=too-many-nested-blocks - codes = set() + """Walk the UCX code base to find all diagnostic linting codes.""" + codes = set[str]() product_info = ProductInfo.from_class(Advice) source_code = product_info.version_file().parent - for file in source_code.glob("**/*.py"): - maybe_tree = MaybeTree.from_source_code(file.read_text()) - if not maybe_tree.tree: - continue - tree = maybe_tree.tree - # recursively detect values of "code" kwarg in calls - for node in tree.walk(): - if not isinstance(node, astroid.Call): - continue - for keyword in node.keywords: - name = keyword.arg - if name != "code": - continue - if not isinstance(keyword.value, astroid.Const): - continue - problem_code = keyword.value.value - codes.add(problem_code) + for path in source_code.glob("**/*.py"): + codes.update(_find_diagnostic_codes(path)) for code in sorted(codes): print(code) +def _find_diagnostic_codes(file: Path) -> Iterable[str]: + """Walk the Python ast tree to find the diagnostic codes.""" + maybe_tree = MaybeTree.from_source_code(file.read_text()) + if not maybe_tree.tree: + return + for node in maybe_tree.tree.walk(): + diagnostic_code = None + if isinstance(node, astroid.ClassDef): + diagnostic_code = _find_diagnostic_code_in_class_def(node) + elif isinstance(node, astroid.Call): + diagnostic_code = _find_diagnostic_code_in_call(node) + if diagnostic_code: + yield diagnostic_code + + +def _find_diagnostic_code_in_call(node: astroid.Call) -> str | None: + """Find the diagnostic code in a call node.""" + for keyword in node.keywords: + if keyword.arg == "code" and isinstance(keyword.value, astroid.Const): + problem_code = keyword.value.value + return problem_code + return None + + +def _find_diagnostic_code_in_class_def(node: astroid.ClassDef) -> str | None: + """Find the diagnostic code in a class definition node.""" + diagnostic_code_methods = [] + for child_node in node.body: + if isinstance(child_node, astroid.FunctionDef) and child_node.name == "diagnostic_code": + diagnostic_code_methods.append(child_node) + if diagnostic_code_methods and diagnostic_code_methods[0].body: + problem_code = diagnostic_code_methods[0].body[0].value.value + return problem_code + return None + + if __name__ == "__main__": main() + + +def test_main(): + main() diff --git a/tests/unit/source_code/linters/test_context.py b/tests/unit/source_code/linters/test_context.py new file mode 100644 index 0000000000..3895af64df --- /dev/null +++ b/tests/unit/source_code/linters/test_context.py @@ -0,0 +1,60 @@ +import pytest +from databricks.sdk.service.workspace import Language + +from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex +from databricks.labs.ucx.source_code.base import Fixer, Linter +from databricks.labs.ucx.source_code.linters.context import LinterContext + + +def test_linter_context_linter_returns_correct_analyser_for_python() -> None: + context = LinterContext(TableMigrationIndex([])) + linter = context.linter(Language.PYTHON) + assert isinstance(linter, Linter) + + +def test_linter_context_linter_returns_correct_analyser_for_sql() -> None: + context = LinterContext(TableMigrationIndex([])) + linter = context.linter(Language.SQL) + assert isinstance(linter, Linter) + + +def test_linter_context_linter_raises_error_for_unsupported_language() -> None: + context = LinterContext(TableMigrationIndex([])) + with pytest.raises(ValueError): + context.linter(Language.R) + + +def test_linter_context_fixer_returns_none_fixer_for_python() -> None: + context = LinterContext(TableMigrationIndex([])) + fixer = context.fixer(Language.PYTHON, "diagnostic_code") + assert fixer is None + + +def test_linter_context_fixer_returns_correct_fixer_for_python() -> None: + context = LinterContext(TableMigrationIndex([])) + fixer = context.fixer(Language.PYTHON, "table-migrated-to-uc-python") + assert isinstance(fixer, Fixer) + + +def test_linter_context_fixer_returns_none_fixer_for_sql() -> None: + context = LinterContext(TableMigrationIndex([])) + fixer = context.fixer(Language.SQL, "diagnostic_code") + assert fixer is None + + +def test_linter_context_fixer_returns_correct_fixer_for_sql() -> None: + context = LinterContext(TableMigrationIndex([])) + fixer = context.fixer(Language.SQL, "table-migrated-to-uc-sql") + assert isinstance(fixer, Fixer) + + +def test_linter_context_fixer_returns_none_for_unsupported_language() -> None: + context = LinterContext(TableMigrationIndex([])) + fixer = context.fixer(Language.SCALA, "diagnostic_code") + assert fixer is None + + +def test_linter_context_linter_apply_fixes_no_operation() -> None: + context = LinterContext(TableMigrationIndex([])) + fixed_code = context.apply_fixes(Language.PYTHON, "print(1)") + assert fixed_code == "print(1)" diff --git a/tests/unit/source_code/linters/test_from_table.py b/tests/unit/source_code/linters/test_from_table.py index 64167448a9..4367ab9952 100644 --- a/tests/unit/source_code/linters/test_from_table.py +++ b/tests/unit/source_code/linters/test_from_table.py @@ -19,7 +19,7 @@ def test_migrated_tables_trigger_messages(migration_index) -> None: assert [ Deprecation( - code='table-migrated-to-uc', + code='table-migrated-to-uc-sql', message='Table old.things is migrated to brand.new.stuff in Unity Catalog', start_line=0, start_col=0, @@ -27,7 +27,7 @@ def test_migrated_tables_trigger_messages(migration_index) -> None: end_col=1024, ), Deprecation( - code='table-migrated-to-uc', + code='table-migrated-to-uc-sql', message='Table other.matters is migrated to some.certain.issues in Unity Catalog', start_line=0, start_col=0, diff --git a/tests/unit/source_code/linters/test_linters.py b/tests/unit/source_code/linters/test_linters.py deleted file mode 100644 index e72704c0da..0000000000 --- a/tests/unit/source_code/linters/test_linters.py +++ /dev/null @@ -1,56 +0,0 @@ -import pytest -from databricks.sdk.service.workspace import Language - -from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex -from databricks.labs.ucx.source_code.base import Fixer, Linter -from databricks.labs.ucx.source_code.linters.context import LinterContext - -index = TableMigrationIndex([]) - - -def test_linter_returns_correct_analyser_for_python() -> None: - languages = LinterContext(index) - linter = languages.linter(Language.PYTHON) - assert isinstance(linter, Linter) - - -def test_linter_returns_correct_analyser_for_sql() -> None: - languages = LinterContext(index) - linter = languages.linter(Language.SQL) - assert isinstance(linter, Linter) - - -def test_linter_raises_error_for_unsupported_language() -> None: - languages = LinterContext(index) - with pytest.raises(ValueError): - languages.linter(Language.R) - - -def test_fixer_returns_none_fixer_for_python() -> None: - languages = LinterContext(index) - fixer = languages.fixer(Language.PYTHON, "diagnostic_code") - assert fixer is None - - -def test_fixer_returns_correct_fixer_for_python() -> None: - languages = LinterContext(index) - fixer = languages.fixer(Language.PYTHON, "table-migrate") - assert isinstance(fixer, Fixer) - - -def test_fixer_returns_none_fixer_for_sql() -> None: - languages = LinterContext(index) - fixer = languages.fixer(Language.SQL, "diagnostic_code") - assert fixer is None - - -def test_fixer_returns_correct_fixer_for_sql() -> None: - languages = LinterContext(index) - fixer = languages.fixer(Language.SQL, "table-migrate") - assert isinstance(fixer, Fixer) or fixer is None - - -def test_fixer_returns_none_for_unsupported_language() -> None: - languages = LinterContext(index) - fixer = languages.fixer(Language.SCALA, "diagnostic_code") - assert fixer is None diff --git a/tests/unit/source_code/linters/test_pyspark.py b/tests/unit/source_code/linters/test_pyspark.py index 4c6f9b8b20..5863b49a87 100644 --- a/tests/unit/source_code/linters/test_pyspark.py +++ b/tests/unit/source_code/linters/test_pyspark.py @@ -1,17 +1,15 @@ -from typing import cast - import pytest from astroid import Call, Const, Expr # type: ignore from databricks.sdk.service.workspace import Language -from databricks.labs.ucx.source_code.base import Deprecation, CurrentSessionState, SqlLinter +from databricks.labs.ucx.source_code.base import Deprecation, CurrentSessionState from databricks.labs.ucx.source_code.linters.context import LinterContext -from databricks.labs.ucx.source_code.python.python_ast import MaybeTree, TreeHelper -from databricks.labs.ucx.source_code.linters.pyspark import SparkSqlPyLinter +from databricks.labs.ucx.source_code.linters.pyspark import FromTableSqlPyLinter from databricks.labs.ucx.source_code.linters.from_table import FromTableSqlLinter from databricks.labs.ucx.source_code.linters.pyspark import SparkCallMatcher, SparkTableNamePyLinter +from databricks.labs.ucx.source_code.python.python_ast import MaybeTree, TreeHelper def test_spark_no_sql(empty_index) -> None: @@ -33,32 +31,29 @@ def test_spark_dynamic_sql(empty_index) -> None: assert not list(sqf.lint(source)) -def test_spark_sql_no_match(empty_index) -> None: +def test_linter_context_python_linter_lints_table_pending_migration_with_empty_index(empty_index) -> None: + source_code = """ + for i in range(10): + result = spark.sql("SELECT * FROM old.things").collect() + print(len(result)) + """ context = LinterContext(empty_index, CurrentSessionState()) - sql_linter = context.linter(Language.SQL) - spark_linter = SparkSqlPyLinter(cast(SqlLinter, sql_linter), None) + linter = context.linter(Language.PYTHON) + advices = list(linter.lint(source_code)) + assert not advices - old_code = """ -for i in range(10): - result = spark.sql("SELECT * FROM old.things").collect() - print(len(result)) -""" - assert not list(spark_linter.lint(old_code)) - - -def test_spark_sql_match(migration_index) -> None: +def test_linter_context_python_linter_lints_migrated_table_and_dfsa(migration_index) -> None: + source_code = """ + spark.sql("SELECT * FROM old.things") + spark.sql("SELECT * FROM csv.`s3://bucket/path`") + """ context = LinterContext(migration_index, CurrentSessionState()) - sql_linter = context.linter(Language.SQL) - spark_linter = SparkSqlPyLinter(cast(SqlLinter, sql_linter), None) - python_code = """ -spark.sql("SELECT * FROM old.things") -spark.sql("SELECT * FROM csv.`s3://bucket/path`") -""" - advices = list(spark_linter.lint(python_code)) - assert [ + linter = context.linter(Language.PYTHON) + advices = list(linter.lint(source_code)) + assert advices == [ Deprecation( - code='table-migrated-to-uc', + code='table-migrated-to-uc-python-sql', message='Table old.things is migrated to brand.new.stuff in Unity Catalog', start_line=1, start_col=0, @@ -73,26 +68,25 @@ def test_spark_sql_match(migration_index) -> None: end_line=2, end_col=49, ), - ] == advices + ] -def test_spark_sql_match_named(migration_index) -> None: +def test_linter_context_python_linter_lints_sql_query_parameter(migration_index) -> None: + source_code = """ + for i in range(10): + result = spark.sql(args=[1], sqlQuery = "SELECT * FROM old.things").collect() + print(len(result)) + """ context = LinterContext(migration_index, CurrentSessionState()) - sql_linter = context.linter(Language.SQL) - spark_linter = SparkSqlPyLinter(cast(SqlLinter, sql_linter), None) - old_code = """ -spark.read.csv("s3://bucket/path") -for i in range(10): - result = spark.sql(args=[1], sqlQuery = "SELECT * FROM old.things").collect() - print(len(result)) -""" - assert list(spark_linter.lint(old_code)) == [ + linter = context.linter(Language.PYTHON) + advices = list(linter.lint(source_code)) + assert advices == [ Deprecation( - code='table-migrated-to-uc', + code='table-migrated-to-uc-python-sql', message='Table old.things is migrated to brand.new.stuff in Unity Catalog', - start_line=3, + start_line=2, start_col=13, - end_line=3, + end_line=2, end_col=71, ), ] @@ -110,24 +104,24 @@ def test_spark_table_return_value_apply(migration_index) -> None: assert fixed_code.rstrip() == old_code.rstrip() -def test_spark_sql_tablename_fix(migration_index) -> None: - session_state = CurrentSessionState() - ftf = FromTableSqlLinter(migration_index, session_state) - spark_linter = SparkSqlPyLinter(ftf, ftf) - - old_code = """spark.read.csv("s3://bucket/path") +def test_from_table_sql_py_linter_fixes_migrated_table(migration_index) -> None: + source_code = """spark.read.csv("s3://bucket/path") for i in range(10): result = spark.sql("SELECT * FROM old.things").collect() print(len(result)) + """ - fixed_code = spark_linter.apply(old_code) - assert ( - fixed_code.rstrip() - == """spark.read.csv('s3://bucket/path') + expected_code = """spark.read.csv('s3://bucket/path') for i in range(10): result = spark.sql('SELECT * FROM brand.new.stuff').collect() - print(len(result))""" - ) + print(len(result)) + +""" + session_state = CurrentSessionState() + from_table = FromTableSqlLinter(migration_index, session_state) + spark_linter = FromTableSqlPyLinter(from_table) + fixed_code = spark_linter.apply(source_code) + assert fixed_code == expected_code @pytest.mark.parametrize( diff --git a/tests/unit/source_code/samples/functional/file-access/direct-fs2.py b/tests/unit/source_code/samples/functional/file-access/direct-fs2.py index 0be4ec4d7f..cd820c1895 100644 --- a/tests/unit/source_code/samples/functional/file-access/direct-fs2.py +++ b/tests/unit/source_code/samples/functional/file-access/direct-fs2.py @@ -1,14 +1,14 @@ # ucx[direct-filesystem-access:+1:0:+1:34] The use of direct filesystem references is deprecated: s3://bucket/path spark.read.csv("s3://bucket/path") for i in range(10): - # ucx[table-migrated-to-uc:+1:13:+1:50] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python-sql:+1:13:+1:50] Table old.things is migrated to brand.new.stuff in Unity Catalog result = spark.sql("SELECT * FROM old.things").collect() print(len(result)) index = 10 spark.sql(f"SELECT * FROM table_{index}").collect() table_name = f"table_{index}" spark.sql(f"SELECT * FROM {table_name}").collect() -# ucx[table-migrated-to-uc:+3:4:+3:20] Table old.things is migrated to brand.new.stuff in Unity Catalog +# ucx[table-migrated-to-uc-python-sql:+3:4:+3:20] Table old.things is migrated to brand.new.stuff in Unity Catalog table_name = f"table_{index}" for query in ["SELECT * FROM old.things", f"SELECT * FROM {table_name}"]: spark.sql(query).collect() diff --git a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-cache-table.py b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-cache-table.py index 1d8ff8c034..711e52c83b 100644 --- a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-cache-table.py +++ b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-cache-table.py @@ -1,5 +1,5 @@ ## Check a literal reference to a known table that is migrated. -# ucx[table-migrated-to-uc:+1:0:+1:38] Table old.things is migrated to brand.new.stuff in Unity Catalog +# ucx[table-migrated-to-uc-python:+1:0:+1:38] Table old.things is migrated to brand.new.stuff in Unity Catalog spark.catalog.cacheTable("old.things") ## Check a literal reference to an unknown table (that is not migrated); we expect no warning. @@ -9,7 +9,7 @@ spark.catalog.cacheTable("old.things", None, "extra-argument") ## Check a call with an out-of-position named argument referencing a table known to be migrated. -# ucx[table-migrated-to-uc:+1:0:+1:67] Table old.things is migrated to brand.new.stuff in Unity Catalog +# ucx[table-migrated-to-uc-python:+1:0:+1:67] Table old.things is migrated to brand.new.stuff in Unity Catalog spark.catalog.cacheTable(storageLevel=None, tableName="old.things") ## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table. diff --git a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-create-external-table.py b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-create-external-table.py index d6f2822b3d..4e9cd4ee43 100644 --- a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-create-external-table.py +++ b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-create-external-table.py @@ -3,26 +3,26 @@ for i in range(10): ## Check a literal reference to a known table that is migrated. - # ucx[table-migrated-to-uc:+3:9:+3:56] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python:+3:9:+3:56] Table old.things is migrated to brand.new.stuff in Unity Catalog # TODO: Implement missing migration warning (on the source argument): - # #ucx[table-migrated-to-uc:+1:9:+1:0] The default format changed in Databricks Runtime 8.0, from Parquet to Delta + # #ucx[table-migrated-to-uc-python:+1:9:+1:0] The default format changed in Databricks Runtime 8.0, from Parquet to Delta df = spark.catalog.createExternalTable("old.things") do_stuff_with(df) ## Check a literal reference to an unknown table (that is not migrated); we expect no warning. # TODO: Implement missing migration warning (on the source argument): - # #ucx[table-migrated-to-uc:+1:9:+1:0] The default format changed in Databricks Runtime 8.0, from Parquet to Delta + # #ucx[table-migrated-to-uc-python:+1:9:+1:0] The default format changed in Databricks Runtime 8.0, from Parquet to Delta df = spark.catalog.createExternalTable("table.we.know.nothing.about") do_stuff_with(df) ## Check that a call with too many positional arguments is ignored as (presumably) something else; we expect no warning. # FIXME: This is a false positive due to an error in the matching specification; only 4 positional args are allowed. - # ucx[table-migrated-to-uc:+1:9:+1:92] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python:+1:9:+1:92] Table old.things is migrated to brand.new.stuff in Unity Catalog df = spark.catalog.createExternalTable("old.things", None, None, None, "extra-argument") do_stuff_with(df) ## Check a call with an out-of-position named argument referencing a table known to be migrated. - # ucx[table-migrated-to-uc:+1:9:+1:94] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python:+1:9:+1:94] Table old.things is migrated to brand.new.stuff in Unity Catalog df = spark.catalog.createExternalTable(path="foo", tableName="old.things", source="delta") do_stuff_with(df) diff --git a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-create-table.py b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-create-table.py index d742c636f1..5b04955155 100644 --- a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-create-table.py +++ b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-create-table.py @@ -3,26 +3,26 @@ for i in range(10): ## Check a literal reference to a known table that is migrated. - # ucx[table-migrated-to-uc:+3:9:+3:48] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python:+3:9:+3:48] Table old.things is migrated to brand.new.stuff in Unity Catalog # TODO: Implement missing migration warning (on the source argument): - # #ucx[table-migrated-to-uc:+1:0:+1:0] The default format changed in Databricks Runtime 8.0, from Parquet to Delta + # #ucx[table-migrated-to-uc-python:+1:0:+1:0] The default format changed in Databricks Runtime 8.0, from Parquet to Delta df = spark.catalog.createTable("old.things") do_stuff_with(df) ## Check a literal reference to an unknown table (that is not migrated); we expect no warning. # TODO: Implement missing migration warning (on the source argument): - # #ucx[table-migrated-to-uc:+1:0:+1:0] The default format changed in Databricks Runtime 8.0, from Parquet to Delta + # #ucx[table-migrated-to-uc-python:+1:0:+1:0] The default format changed in Databricks Runtime 8.0, from Parquet to Delta df = spark.catalog.createTable("table.we.know.nothing.about") do_stuff_with(df) ## Check that a call with too many positional arguments is ignored as (presumably) something else; we expect no warning. # FIXME: This is a false positive due to an error in the matching specification; only 5 positional args are allowed. - # ucx[table-migrated-to-uc:+1:9:+1:90] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python:+1:9:+1:90] Table old.things is migrated to brand.new.stuff in Unity Catalog df = spark.catalog.createTable("old.things", None, None, None, None, "extra-argument") do_stuff_with(df) ## Check a call with an out-of-position named argument referencing a table known to be migrated. - # ucx[table-migrated-to-uc:+1:9:+1:86] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python:+1:9:+1:86] Table old.things is migrated to brand.new.stuff in Unity Catalog df = spark.catalog.createTable(path="foo", tableName="old.things", source="delta") do_stuff_with(df) diff --git a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-get-table.py b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-get-table.py index 1d9b1763be..1919741e7e 100644 --- a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-get-table.py +++ b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-get-table.py @@ -3,7 +3,7 @@ for i in range(10): ## Check a literal reference to a known table that is migrated. - # ucx[table-migrated-to-uc:+1:12:+1:48] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python:+1:12:+1:48] Table old.things is migrated to brand.new.stuff in Unity Catalog table = spark.catalog.getTable("old.things") do_stuff_with(table) diff --git a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-is-cached.py b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-is-cached.py index b59c7306fc..d4265c4074 100644 --- a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-is-cached.py +++ b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-is-cached.py @@ -3,7 +3,7 @@ for i in range(10): ## Check a literal reference to a known table that is migrated. - # ucx[table-migrated-to-uc:+1:24:+1:60] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python:+1:24:+1:60] Table old.things is migrated to brand.new.stuff in Unity Catalog cached_previously = spark.catalog.isCached("old.things") ## Check a literal reference to an unknown table (that is not migrated); we expect no warning. diff --git a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-list-columns.py b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-list-columns.py index c5287f0845..49245aeadb 100644 --- a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-list-columns.py +++ b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-list-columns.py @@ -3,10 +3,10 @@ for i in range(10): ## Check a literal reference to a known table that is migrated. - # ucx[table-migrated-to-uc:+1:14:+1:53] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python:+1:14:+1:53] Table old.things is migrated to brand.new.stuff in Unity Catalog columns = spark.catalog.listColumns("old.things") # TODO: Fix missing migration warning: - # #ucx[table-migrated-to-uc:+1:1:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog + # #ucx[table-migrated-to-uc-python:+1:1:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog columns = spark.catalog.listColumns("things", "old") ## Check a literal reference to an unknown table (that is not migrated); we expect no warning. @@ -19,7 +19,7 @@ ## Check a call with an out-of-position named argument referencing a table known to be migrated. # TODO: Fix missing migration warning: - # #ucx[table-migrated-to-uc:+1:1:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog + # #ucx[table-migrated-to-uc-python:+1:1:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog columns = spark.catalog.listColumns(dbName="old", name="things") ## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table. diff --git a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-recover-partitions.py b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-recover-partitions.py index ca60763674..50e9ac4c5c 100644 --- a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-recover-partitions.py +++ b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-recover-partitions.py @@ -3,7 +3,7 @@ for i in range(10): ## Check a literal reference to a known table that is migrated. - # ucx[table-migrated-to-uc:+1:4:+1:49] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python:+1:4:+1:49] Table old.things is migrated to brand.new.stuff in Unity Catalog spark.catalog.recoverPartitions("old.things") ## Check a literal reference to an unknown table (that is not migrated); we expect no warning. diff --git a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-refresh-table.py b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-refresh-table.py index 8d48caf6d8..43edcc62ff 100644 --- a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-refresh-table.py +++ b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-refresh-table.py @@ -3,7 +3,7 @@ for i in range(10): ## Check a literal reference to a known table that is migrated. - # ucx[table-migrated-to-uc:+1:4:+1:44] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python:+1:4:+1:44] Table old.things is migrated to brand.new.stuff in Unity Catalog spark.catalog.refreshTable("old.things") ## Check a literal reference to an unknown table (that is not migrated); we expect no warning. diff --git a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-table-exists.py b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-table-exists.py index e39f1e9553..4ecc21dbb3 100644 --- a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-table-exists.py +++ b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-table-exists.py @@ -3,11 +3,11 @@ for i in range(10): ## Check a literal reference to a known table that is migrated. - # ucx[table-migrated-to-uc:+1:7:+1:46] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python:+1:7:+1:46] Table old.things is migrated to brand.new.stuff in Unity Catalog if spark.catalog.tableExists("old.things"): pass # TODO: Fix missing migration warning: - # #ucx[table-migrated-to-uc:+1:0:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog + # #ucx[table-migrated-to-uc-python:+1:0:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog if spark.catalog.tableExists("things", "old"): pass @@ -21,7 +21,7 @@ ## Check a call with an out-of-position named argument referencing a table known to be migrated. # TODO: Fix missing migration warning - # # ucx[table-migrated-to-uc:+1:0:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog + # # ucx[table-migrated-to-uc-python:+1:0:+1:0] Table old.things is migrated to brand.new.stuff in Unity Catalog if spark.catalog.tableExists(dbName="old", name="things"): pass diff --git a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-uncache-table.py b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-uncache-table.py index c20db72f95..275633e75c 100644 --- a/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-uncache-table.py +++ b/tests/unit/source_code/samples/functional/pyspark/catalog/spark-catalog-uncache-table.py @@ -3,7 +3,7 @@ for i in range(10): ## Check a literal reference to a known table that is migrated. - # ucx[table-migrated-to-uc:+1:4:+1:44] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python:+1:4:+1:44] Table old.things is migrated to brand.new.stuff in Unity Catalog spark.catalog.uncacheTable("old.things") ## Check a literal reference to an unknown table (that is not migrated); we expect no warning. diff --git a/tests/unit/source_code/samples/functional/pyspark/dataframe-write-insert-into.py b/tests/unit/source_code/samples/functional/pyspark/dataframe-write-insert-into.py index 53079026e3..2f67069355 100644 --- a/tests/unit/source_code/samples/functional/pyspark/dataframe-write-insert-into.py +++ b/tests/unit/source_code/samples/functional/pyspark/dataframe-write-insert-into.py @@ -3,7 +3,7 @@ for i in range(10): ## Check a literal reference to a known table that is migrated. - # ucx[table-migrated-to-uc:+1:4:+1:37] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python:+1:4:+1:37] Table old.things is migrated to brand.new.stuff in Unity Catalog df.write.insertInto("old.things") ## Check a literal reference to an unknown table (that is not migrated); we expect no warning. @@ -13,7 +13,7 @@ df.write.insertInto("old.things", None, "extra-argument") ## Check a call with an out-of-position named argument referencing a table known to be migrated. - # ucx[table-migrated-to-uc:+1:4:+1:63] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python:+1:4:+1:63] Table old.things is migrated to brand.new.stuff in Unity Catalog df.write.insertInto(overwrite=None, tableName="old.things") ## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table. diff --git a/tests/unit/source_code/samples/functional/pyspark/dataframe-write-save-as-table.py b/tests/unit/source_code/samples/functional/pyspark/dataframe-write-save-as-table.py index ebcef10c65..667be4b537 100644 --- a/tests/unit/source_code/samples/functional/pyspark/dataframe-write-save-as-table.py +++ b/tests/unit/source_code/samples/functional/pyspark/dataframe-write-save-as-table.py @@ -3,7 +3,7 @@ for i in range(10): ## Check a literal reference to a known table that is migrated. - # ucx[table-migrated-to-uc:+1:4:+1:54] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python:+1:4:+1:54] Table old.things is migrated to brand.new.stuff in Unity Catalog df.write.format("delta").saveAsTable("old.things") ## Check a literal reference to an unknown table (that is not migrated); we expect no warning. @@ -13,7 +13,7 @@ df.write.format("delta").saveAsTable("old.things", None, None, None, "extra-argument") ## Check a call with an out-of-position named argument referencing a table known to be migrated. - # ucx[table-migrated-to-uc:+1:4:+1:57] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python:+1:4:+1:57] Table old.things is migrated to brand.new.stuff in Unity Catalog df.write.saveAsTable(format="xyz", name="old.things") ## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table. diff --git a/tests/unit/source_code/samples/functional/pyspark/spark-table.py b/tests/unit/source_code/samples/functional/pyspark/spark-table.py index 7cfa27a76f..a6eb0bc231 100644 --- a/tests/unit/source_code/samples/functional/pyspark/spark-table.py +++ b/tests/unit/source_code/samples/functional/pyspark/spark-table.py @@ -3,7 +3,7 @@ for i in range(10): ## Check a literal reference to a known table that is migrated. - # ucx[table-migrated-to-uc:+1:9:+1:34] Table old.things is migrated to brand.new.stuff in Unity Catalog + # ucx[table-migrated-to-uc-python:+1:9:+1:34] Table old.things is migrated to brand.new.stuff in Unity Catalog df = spark.table("old.things") do_stuff_with(df) diff --git a/tests/unit/source_code/samples/functional/table-migration/table-migration-notebook.py b/tests/unit/source_code/samples/functional/table-migration/table-migration-notebook.py index 8c1255be6d..fcb48a25af 100644 --- a/tests/unit/source_code/samples/functional/table-migration/table-migration-notebook.py +++ b/tests/unit/source_code/samples/functional/table-migration/table-migration-notebook.py @@ -4,7 +4,7 @@ # COMMAND ---------- -# ucx[table-migrated-to-uc:+1:8:+1:29] Table people is migrated to cata4.nondefault.newpeople in Unity Catalog +# ucx[table-migrated-to-uc-python:+1:8:+1:29] Table people is migrated to cata4.nondefault.newpeople in Unity Catalog display(spark.table('people')) # we are looking at default.people table # COMMAND ---------- @@ -13,7 +13,7 @@ # COMMAND ---------- -# ucx[table-migrated-to-uc:+1:8:+1:30] Table persons is migrated to cata4.newsomething.persons in Unity Catalog +# ucx[table-migrated-to-uc-python:+1:8:+1:30] Table persons is migrated to cata4.newsomething.persons in Unity Catalog display(spark.table('persons')) # we are looking at something.persons table # COMMAND ---------- @@ -22,11 +22,11 @@ # COMMAND ---------- -# ucx[table-migrated-to-uc:+1:8:+1:30] Table kittens is migrated to cata4.felines.toms in Unity Catalog +# ucx[table-migrated-to-uc-python:+1:8:+1:30] Table kittens is migrated to cata4.felines.toms in Unity Catalog display(spark.table('kittens')) # we are looking at whatever.kittens table # COMMAND ---------- -# ucx[table-migrated-to-uc:+2:0:+2:38] Table numbers is migrated to cata4.counting.numbers in Unity Catalog +# ucx[table-migrated-to-uc-python:+2:0:+2:38] Table numbers is migrated to cata4.counting.numbers in Unity Catalog # ucx[default-format-changed-in-dbr8:+1:0:+1:38] The default format changed in Databricks Runtime 8.0, from Parquet to Delta spark.range(10).saveAsTable('numbers') # we are saving to whatever.numbers table. diff --git a/tests/unit/source_code/samples/functional/table-migration/table-migration-notebook.sql b/tests/unit/source_code/samples/functional/table-migration/table-migration-notebook.sql index d2c6acb4e8..971324d364 100644 --- a/tests/unit/source_code/samples/functional/table-migration/table-migration-notebook.sql +++ b/tests/unit/source_code/samples/functional/table-migration/table-migration-notebook.sql @@ -8,7 +8,7 @@ USE different_db -- COMMAND ---------- --- ucx[table-migrated-to-uc:+0:0:+0:1024] Table different_db.testtable is migrated to cata2.newspace.table in Unity Catalog +-- ucx[table-migrated-to-uc-sql:+0:0:+0:1024] Table different_db.testtable is migrated to cata2.newspace.table in Unity Catalog -- DBTITLE 1,A SQL cell that references tables SELECT * FROM testtable LIMIT 10 @@ -19,13 +19,13 @@ SELECT * FROM testtable LIMIT 10 USE old -- COMMAND ---------- --- ucx[table-migrated-to-uc:+0:0:+0:1024] Table old.testtable is migrated to cata3.newspace.table in Unity Catalog +-- ucx[table-migrated-to-uc-sql:+0:0:+0:1024] Table old.testtable is migrated to cata3.newspace.table in Unity Catalog -- DBTITLE 1,A SQL cell that references tables SELECT * FROM testtable LIMIT 10 -- COMMAND ---------- --- ucx[table-migrated-to-uc:+0:0:+0:1024] Table old.stuff is migrated to brand.new.things in Unity Catalog +-- ucx[table-migrated-to-uc-sql:+0:0:+0:1024] Table old.stuff is migrated to brand.new.things in Unity Catalog -- DBTITLE 1,A SQL cell that references tables SELECT * FROM stuff LIMIT 10 @@ -38,13 +38,13 @@ SELECT * FROM stuff LIMIT 10 spark.sql("use different_db") -- COMMAND ---------- --- ucx[table-migrated-to-uc:+0:0:+0:1024] Table different_db.testtable is migrated to cata2.newspace.table in Unity Catalog +-- ucx[table-migrated-to-uc-sql:+0:0:+0:1024] Table different_db.testtable is migrated to cata2.newspace.table in Unity Catalog -- DBTITLE 1,A SQL cell that references DBFS SELECT * FROM testtable LIMIT 10 -- COMMAND ---------- --- ucx[table-migrated-to-uc:+0:0:+0:1024] Table old.testtable is migrated to cata3.newspace.table in Unity Catalog +-- ucx[table-migrated-to-uc-sql:+0:0:+0:1024] Table old.testtable is migrated to cata3.newspace.table in Unity Catalog -- DBTITLE 1,A SQL cell that references DBFS SELECT * FROM old.testtable LIMIT 10 @@ -55,7 +55,7 @@ SELECT * FROM old.testtable LIMIT 10 USE default -- COMMAND ---------- --- ucx[table-migrated-to-uc:+0:0:+0:1024] Table default.testtable is migrated to cata.nondefault.table in Unity Catalog +-- ucx[table-migrated-to-uc-sql:+0:0:+0:1024] Table default.testtable is migrated to cata.nondefault.table in Unity Catalog -- DBTITLE 1,A SQL cell that references DBFS SELECT * FROM testtable LIMIT 10 diff --git a/tests/unit/source_code/test_base.py b/tests/unit/source_code/test_base.py index 9dc2450c42..9b993b2da8 100644 --- a/tests/unit/source_code/test_base.py +++ b/tests/unit/source_code/test_base.py @@ -10,6 +10,7 @@ Deprecation, LocatedAdvice, Failure, + Fixer, UsedTable, ) @@ -70,3 +71,33 @@ def test_located_advice_message() -> None: located_advice = LocatedAdvice(advice, Path("test.py")) assert str(located_advice) == "test.py:1:0: [code] message" + + +def test_fixer_is_supported_for_diagnostic_test_code() -> None: + class TestFixer(Fixer): + @property + def diagnostic_code(self) -> str: + return "test" + + def apply(self, code) -> str: + return "not-implemented" + + fixer = TestFixer() + + assert fixer.is_supported("test") + assert not fixer.is_supported("other-code") + + +def test_fixer_is_never_supported_for_diagnostic_empty_code() -> None: + class TestFixer(Fixer): + @property + def diagnostic_code(self) -> str: + return "" + + def apply(self, code) -> str: + return "not-implemented" + + fixer = TestFixer() + + assert not fixer.is_supported("test") + assert not fixer.is_supported("other-code") diff --git a/tests/unit/source_code/test_lsp_plugin.py b/tests/unit/source_code/test_lsp_plugin.py index 21379c3fc7..a3c89bc5fd 100644 --- a/tests/unit/source_code/test_lsp_plugin.py +++ b/tests/unit/source_code/test_lsp_plugin.py @@ -176,7 +176,7 @@ def test_with_migration_index(workspace, config) -> None: assert diagnostics == [ { 'range': {'end': {'character': 67, 'line': 0}, 'start': {'character': 9, 'line': 0}}, - 'code': 'table-migrated-to-uc', + 'code': 'table-migrated-to-uc-python-sql', 'source': 'databricks.labs.ucx', 'message': 'Table old.things is migrated to brand.new.stuff in Unity Catalog', 'severity': 2, diff --git a/tests/unit/source_code/test_queries.py b/tests/unit/source_code/test_queries.py index e09938db8f..ed9294f87f 100644 --- a/tests/unit/source_code/test_queries.py +++ b/tests/unit/source_code/test_queries.py @@ -84,7 +84,7 @@ def test_lints_queries(migration_index, mock_backend) -> None: query_id="qid", query_parent="qparent", query_name="qname", - code="table-migrated-to-uc", + code="table-migrated-to-uc-sql", message="Table old.things is migrated to brand.new.stuff in Unity Catalog", ) ]