From 4df120e40e82837b10b3c83f6c6acf763bf325e1 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Fri, 7 Jun 2024 17:22:05 -0400 Subject: [PATCH] Fix snapshot config to work in yaml files (#10275) --- .../unreleased/Fixes-20240607-134648.yaml | 6 ++ core/dbt/artifacts/resources/v1/snapshot.py | 24 +++---- core/dbt/parser/manifest.py | 11 +++ tests/functional/configs/test_configs.py | 3 +- tests/functional/simple_snapshot/fixtures.py | 2 - .../test_missing_strategy_snapshot.py | 11 +-- .../simple_snapshot/test_snapshot_config.py | 67 +++++++++++++++++++ .../unit/contracts/graph/test_nodes_parsed.py | 21 +++++- 8 files changed, 120 insertions(+), 25 deletions(-) create mode 100644 .changes/unreleased/Fixes-20240607-134648.yaml create mode 100644 tests/functional/simple_snapshot/test_snapshot_config.py diff --git a/.changes/unreleased/Fixes-20240607-134648.yaml b/.changes/unreleased/Fixes-20240607-134648.yaml new file mode 100644 index 00000000000..f40b98678f9 --- /dev/null +++ b/.changes/unreleased/Fixes-20240607-134648.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Fix snapshot config to work in yaml files +time: 2024-06-07T13:46:48.383215-04:00 +custom: + Author: gshank + Issue: "4000" diff --git a/core/dbt/artifacts/resources/v1/snapshot.py b/core/dbt/artifacts/resources/v1/snapshot.py index 6164d953184..dee235a19df 100644 --- a/core/dbt/artifacts/resources/v1/snapshot.py +++ b/core/dbt/artifacts/resources/v1/snapshot.py @@ -18,39 +18,35 @@ class SnapshotConfig(NodeConfig): # Not using Optional because of serialization issues with a Union of str and List[str] check_cols: Union[str, List[str], None] = None - @classmethod - def validate(cls, data): - super().validate(data) - # Note: currently you can't just set these keys in schema.yml because this validation - # will fail when parsing the snapshot node. - if not data.get("strategy") or not data.get("unique_key") or not data.get("target_schema"): + def final_validate(self): + if not self.strategy or not self.unique_key or not self.target_schema: raise ValidationError( "Snapshots must be configured with a 'strategy', 'unique_key', " "and 'target_schema'." ) - if data.get("strategy") == "check": - if not data.get("check_cols"): + if self.strategy == "check": + if not self.check_cols: raise ValidationError( "A snapshot configured with the check strategy must " "specify a check_cols configuration." ) - if isinstance(data["check_cols"], str) and data["check_cols"] != "all": + if isinstance(self.check_cols, str) and self.check_cols != "all": raise ValidationError( - f"Invalid value for 'check_cols': {data['check_cols']}. " + f"Invalid value for 'check_cols': {self.check_cols}. " "Expected 'all' or a list of strings." ) - elif data.get("strategy") == "timestamp": - if not data.get("updated_at"): + elif self.strategy == "timestamp": + if not self.updated_at: raise ValidationError( "A snapshot configured with the timestamp strategy " "must specify an updated_at configuration." ) - if data.get("check_cols"): + if self.check_cols: raise ValidationError("A 'timestamp' snapshot should not have 'check_cols'") # If the strategy is not 'check' or 'timestamp' it's a custom strategy, # formerly supported with GenericSnapshotConfig - if data.get("materialized") and data.get("materialized") != "snapshot": + if self.materialized and self.materialized != "snapshot": raise ValidationError("A snapshot must have a materialized value of 'snapshot'") # Called by "calculate_node_config_dict" in ContextConfigGenerator diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index 1ac2ca6acf2..74e8f226ab2 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -467,6 +467,7 @@ def load(self) -> Manifest: self.process_model_inferred_primary_keys() self.check_valid_group_config() self.check_valid_access_property() + self.check_valid_snapshot_config() semantic_manifest = SemanticManifest(self.manifest) if not semantic_manifest.validate(): @@ -1345,6 +1346,16 @@ def check_valid_access_property(self): materialization=node.get_materialization(), ) + def check_valid_snapshot_config(self): + # Snapshot config can be set in either SQL files or yaml files, + # so we need to validate afterward. + for node in self.manifest.nodes.values(): + if node.resource_type != NodeType.Snapshot: + continue + if node.created_at < self.started_at: + continue + node.config.final_validate() + def write_perf_info(self, target_path: str): path = os.path.join(target_path, PERF_INFO_FILE_NAME) write_file(path, json.dumps(self._perf_info, cls=dbt.utils.JSONEncoder, indent=4)) diff --git a/tests/functional/configs/test_configs.py b/tests/functional/configs/test_configs.py index 8d520f1ff80..2bbfac85c5c 100644 --- a/tests/functional/configs/test_configs.py +++ b/tests/functional/configs/test_configs.py @@ -2,7 +2,6 @@ import pytest -from dbt.exceptions import ParsingError from dbt.tests.util import ( check_relations_equal, run_dbt, @@ -120,7 +119,7 @@ def test_snapshots_materialization_proj_config(self, project): snapshots_dir = os.path.join(project.project_root, "snapshots") write_file(simple_snapshot, snapshots_dir, "mysnapshot.sql") - with pytest.raises(ParsingError): + with pytest.raises(ValidationError): run_dbt() diff --git a/tests/functional/simple_snapshot/fixtures.py b/tests/functional/simple_snapshot/fixtures.py index 04e4905d4cb..a94f0c04875 100644 --- a/tests/functional/simple_snapshot/fixtures.py +++ b/tests/functional/simple_snapshot/fixtures.py @@ -86,7 +86,6 @@ models__schema_yml = """ -version: 2 snapshots: - name: snapshot_actual data_tests: @@ -97,7 +96,6 @@ """ models__schema_with_target_schema_yml = """ -version: 2 snapshots: - name: snapshot_actual data_tests: diff --git a/tests/functional/simple_snapshot/test_missing_strategy_snapshot.py b/tests/functional/simple_snapshot/test_missing_strategy_snapshot.py index 407cd15439f..46543da8f4b 100644 --- a/tests/functional/simple_snapshot/test_missing_strategy_snapshot.py +++ b/tests/functional/simple_snapshot/test_missing_strategy_snapshot.py @@ -1,7 +1,7 @@ import pytest -from dbt.exceptions import ParsingError from dbt.tests.util import run_dbt +from dbt_common.dataclass_schema import ValidationError from tests.functional.simple_snapshot.fixtures import ( macros__test_no_overlaps_sql, models__ref_snapshot_sql, @@ -10,7 +10,7 @@ snapshots_invalid__snapshot_sql = """ {# make sure to never name this anything with `target_schema` in the name, or the test will be invalid! #} -{% snapshot missing_field_target_underscore_schema %} +{% snapshot snapshot_actual %} {# missing the mandatory target_schema parameter #} {{ config( @@ -44,7 +44,10 @@ def macros(): def test_missing_strategy(project): - with pytest.raises(ParsingError) as exc: + with pytest.raises(ValidationError) as exc: run_dbt(["compile"], expect_pass=False) - assert "Snapshots must be configured with a 'strategy'" in str(exc.value) + assert ( + "Snapshots must be configured with a 'strategy', 'unique_key', and 'target_schema'" + in str(exc.value) + ) diff --git a/tests/functional/simple_snapshot/test_snapshot_config.py b/tests/functional/simple_snapshot/test_snapshot_config.py new file mode 100644 index 00000000000..5124cf9c38b --- /dev/null +++ b/tests/functional/simple_snapshot/test_snapshot_config.py @@ -0,0 +1,67 @@ +import pytest + +from dbt.tests.util import run_dbt, write_file + +orders_sql = """ +select 1 as id, 101 as user_id, 'pending' as status +""" + +snapshot_sql = """ +{% snapshot orders_snapshot %} + +{{ + config( + target_schema=schema, + strategy='check', + unique_key='id', + check_cols=['status'], + ) +}} + +select * from {{ ref('orders') }} + +{% endsnapshot %} +""" + +snapshot_no_config_sql = """ +{% snapshot orders_snapshot %} + +select * from {{ ref('orders') }} + +{% endsnapshot %} +""" + +snapshot_schema_yml = """ +snapshots: + - name: orders_snapshot + config: + target_schema: test + strategy: check + unique_key: id + check_cols: ['status'] +""" + + +class TestSnapshotConfig: + @pytest.fixture(scope="class") + def models(self): + return {"orders.sql": orders_sql} + + @pytest.fixture(scope="class") + def snapshots(self): + return {"snapshot_orders.sql": snapshot_sql} + + def test_config(self, project): + run_dbt(["run"]) + results = run_dbt(["snapshot"]) + assert len(results) == 1 + + # try to parse with config in schema file + write_file( + snapshot_no_config_sql, project.project_root, "snapshots", "snapshot_orders.sql" + ) + write_file(snapshot_schema_yml, project.project_root, "snapshots", "snapshot.yml") + results = run_dbt(["parse"]) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 diff --git a/tests/unit/contracts/graph/test_nodes_parsed.py b/tests/unit/contracts/graph/test_nodes_parsed.py index 4e8392cb3df..bce860e63d3 100644 --- a/tests/unit/contracts/graph/test_nodes_parsed.py +++ b/tests/unit/contracts/graph/test_nodes_parsed.py @@ -1333,7 +1333,7 @@ def test_invalid_missing_updated_at(basic_timestamp_snapshot_config_dict): bad_fields = basic_timestamp_snapshot_config_dict del bad_fields["updated_at"] bad_fields["check_cols"] = "all" - assert_fails_validation(bad_fields, SnapshotConfig) + assert_snapshot_config_fails_validation(bad_fields) @pytest.fixture @@ -1437,7 +1437,7 @@ def test_complex_snapshot_config( def test_invalid_check_wrong_strategy(basic_check_snapshot_config_dict): wrong_strategy = basic_check_snapshot_config_dict wrong_strategy["strategy"] = "timestamp" - assert_fails_validation(wrong_strategy, SnapshotConfig) + assert_snapshot_config_fails_validation(wrong_strategy) def test_invalid_missing_check_cols(basic_check_snapshot_config_dict): @@ -1445,6 +1445,8 @@ def test_invalid_missing_check_cols(basic_check_snapshot_config_dict): del wrong_fields["check_cols"] with pytest.raises(ValidationError, match=r"A snapshot configured with the check strategy"): SnapshotConfig.validate(wrong_fields) + cfg = SnapshotConfig.from_dict(wrong_fields) + cfg.final_validate() def test_missing_snapshot_configs(basic_check_snapshot_config_dict): @@ -1452,22 +1454,35 @@ def test_missing_snapshot_configs(basic_check_snapshot_config_dict): del wrong_fields["strategy"] with pytest.raises(ValidationError, match=r"Snapshots must be configured with a 'strategy'"): SnapshotConfig.validate(wrong_fields) + cfg = SnapshotConfig.from_dict(wrong_fields) + cfg.final_validate() wrong_fields["strategy"] = "timestamp" del wrong_fields["unique_key"] with pytest.raises(ValidationError, match=r"Snapshots must be configured with a 'strategy'"): SnapshotConfig.validate(wrong_fields) + cfg = SnapshotConfig.from_dict(wrong_fields) + cfg.final_validate() wrong_fields["unique_key"] = "id" del wrong_fields["target_schema"] with pytest.raises(ValidationError, match=r"Snapshots must be configured with a 'strategy'"): SnapshotConfig.validate(wrong_fields) + cfg = SnapshotConfig.from_dict(wrong_fields) + cfg.final_validate() + + +def assert_snapshot_config_fails_validation(dct): + with pytest.raises(ValidationError): + SnapshotConfig.validate(dct) + obj = SnapshotConfig.from_dict(dct) + obj.final_validate() def test_invalid_check_value(basic_check_snapshot_config_dict): invalid_check_type = basic_check_snapshot_config_dict invalid_check_type["check_cols"] = "some" - assert_fails_validation(invalid_check_type, SnapshotConfig) + assert_snapshot_config_fails_validation(invalid_check_type) @pytest.fixture