Improve validations for derived metrics (#294)

Resolves #270 ### Description Improve validations for derived metrics. - If there were no input metrics set, previously users would run into an `AssertionError` that interrupts the semantic manifest validation process. This changes that to be a more user-friendly validation error. - Add a validation warning to ensure that `expr` is set and that it references all the input metrics. Note: The first commit here was previously merged and then reverted because adding a `ValidationError` would be a breaking change. This PR updates it to be a `ValidationWarning`. ### Checklist - [x] I have read [the contributing guide](https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/CONTRIBUTING.md) and understand what's expected of me - [x] I have signed the [CLA](https://docs.getdbt.com/docs/contributor-license-agreements) - [x] This PR includes tests, or tests are not required/relevant for this PR - [x] I have run `changie new` to [create a changelog entry](https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/CONTRIBUTING.md#adding-a-changelog-entry)
dbt-labs · Jun 16, 2024 · e3ce0c0 · e3ce0c0
1 parent ee52b13
commit e3ce0c0
Show file tree

Hide file tree

Showing 3 changed files with 87 additions and 14 deletions.
diff --git a/.changes/unreleased/Fixes-20240524-092249.yaml b/.changes/unreleased/Fixes-20240524-092249.yaml
@@ -0,0 +1,7 @@
+kind: Fixes
+body: Improve validations for derived metrics. Improve error messaging if input metrics
+  are not set. Validate that expr is set and references all input metrics.
+time: 2024-05-24T09:22:49.247603-07:00
+custom:
+  Author: courtneyholcomb
+  Issue: "270"
diff --git a/dbt_semantic_interfaces/validations/metrics.py b/dbt_semantic_interfaces/validations/metrics.py
@@ -19,6 +19,7 @@
     SemanticManifestValidationRule,
     ValidationError,
     ValidationIssue,
+    ValidationWarning,
     generate_exception_issue,
     validate_safely,
 )
@@ -87,8 +88,9 @@ def _validate_alias_collision(metric: Metric) -> List[ValidationIssue]:
                 file_context=FileContext.from_metadata(metadata=metric.metadata),
                 metric=MetricModelReference(metric_name=metric.name),
             )
-            used_names = {input_metric.name for input_metric in metric.input_metrics}
-            for input_metric in metric.input_metrics:
+            input_metrics = metric.type_params.metrics or []
+            used_names = {input_metric.name for input_metric in input_metrics}
+            for input_metric in input_metrics:
                 if input_metric.alias:
                     issues += UniqueAndValidNameRule.check_valid_name(input_metric.alias, metric_context)
                     if input_metric.alias in used_names:
@@ -109,15 +111,24 @@ def _validate_input_metrics_exist(semantic_manifest: SemanticManifest) -> List[V
 
         all_metrics = {m.name for m in semantic_manifest.metrics}
         for metric in semantic_manifest.metrics:
+            metric_context = MetricContext(
+                file_context=FileContext.from_metadata(metadata=metric.metadata),
+                metric=MetricModelReference(metric_name=metric.name),
+            )
             if metric.type == MetricType.DERIVED:
-                for input_metric in metric.input_metrics:
+                if not metric.type_params.metrics:
+                    issues.append(
+                        ValidationError(
+                            context=metric_context,
+                            message=f"No input metrics found for derived metric '{metric.name}'. "
+                            "Please add metrics to type_params.metrics.",
+                        )
+                    )
+                for input_metric in metric.type_params.metrics or []:
                     if input_metric.name not in all_metrics:
                         issues.append(
                             ValidationError(
-                                context=MetricContext(
-                                    file_context=FileContext.from_metadata(metadata=metric.metadata),
-                                    metric=MetricModelReference(metric_name=metric.name),
-                                ),
+                                context=metric_context,
                                 message=f"For metric: {metric.name}, input metric: '{input_metric.name}' does not "
                                 "exist as a configured metric in the model.",
                             )
@@ -129,7 +140,7 @@ def _validate_input_metrics_exist(semantic_manifest: SemanticManifest) -> List[V
     def _validate_time_offset_params(metric: Metric) -> List[ValidationIssue]:
         issues: List[ValidationIssue] = []
 
-        for input_metric in metric.input_metrics or []:
+        for input_metric in metric.type_params.metrics or []:
             if input_metric.offset_window and input_metric.offset_to_grain:
                 issues.append(
                     ValidationError(
@@ -144,6 +155,40 @@ def _validate_time_offset_params(metric: Metric) -> List[ValidationIssue]:
 
         return issues
 
+    @staticmethod
+    @validate_safely(whats_being_done="checking that the expr field uses the input metrics")
+    def _validate_expr(metric: Metric) -> List[ValidationIssue]:
+        issues: List[ValidationIssue] = []
+
+        if metric.type == MetricType.DERIVED:
+            if not metric.type_params.expr:
+                issues.append(
+                    ValidationWarning(
+                        context=MetricContext(
+                            file_context=FileContext.from_metadata(metadata=metric.metadata),
+                            metric=MetricModelReference(metric_name=metric.name),
+                        ),
+                        message=f"No `expr` set for derived metric {metric.name}. "
+                        "Please add an `expr` that references all input metrics.",
+                    )
+                )
+            else:
+                for input_metric in metric.type_params.metrics or []:
+                    name = input_metric.alias or input_metric.name
+                    if name not in metric.type_params.expr:
+                        issues.append(
+                            ValidationWarning(
+                                context=MetricContext(
+                                    file_context=FileContext.from_metadata(metadata=metric.metadata),
+                                    metric=MetricModelReference(metric_name=metric.name),
+                                ),
+                                message=f"Input metric '{name}' is not used in `expr`: '{metric.type_params.expr}' for "
+                                f"derived metric '{metric.name}'. Please update the `expr` or remove the input metric.",
+                            )
+                        )
+
+        return issues
+
     @staticmethod
     @validate_safely(
         whats_being_done="running model validation ensuring derived metrics properties are configured properly"
@@ -155,6 +200,7 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati
         for metric in semantic_manifest.metrics or []:
             issues += DerivedMetricRule._validate_alias_collision(metric=metric)
             issues += DerivedMetricRule._validate_time_offset_params(metric=metric)
+            issues += DerivedMetricRule._validate_expr(metric=metric)
         return issues
 
 

diff --git a/tests/validations/test_metrics.py b/tests/validations/test_metrics.py
@@ -266,6 +266,21 @@ def test_derived_metric() -> None:  # noqa: D
                         expr="notexist * 2", metrics=[PydanticMetricInput(name="notexist")]
                     ),
                 ),
+                metric_with_guaranteed_meta(
+                    name="no_expr",
+                    type=MetricType.DERIVED,
+                    type_params=PydanticMetricTypeParams(metrics=[PydanticMetricInput(name="random_metric")]),
+                ),
+                metric_with_guaranteed_meta(
+                    name="input_metric_not_in_expr",
+                    type=MetricType.DERIVED,
+                    type_params=PydanticMetricTypeParams(expr="x", metrics=[PydanticMetricInput(name="random_metric")]),
+                ),
+                metric_with_guaranteed_meta(
+                    name="no_input_metrics",
+                    type=MetricType.DERIVED,
+                    type_params=PydanticMetricTypeParams(expr="x"),
+                ),
                 metric_with_guaranteed_meta(
                     name="has_valid_time_window_params",
                     type=MetricType.DERIVED,
@@ -299,13 +314,18 @@ def test_derived_metric() -> None:  # noqa: D
             project_configuration=EXAMPLE_PROJECT_CONFIGURATION,
         )
     )
-    build_issues = validation_results.errors
-    assert len(build_issues) == 3
-    expected_substr1 = "is already being used. Please choose another alias"
-    expected_substr2 = "does not exist as a configured metric in the model"
-    expected_substr3 = "Both offset_window and offset_to_grain set"
+    build_issues = validation_results.all_issues
+    assert len(build_issues) == 6
+    expected_substrings = [
+        "is already being used. Please choose another alias",
+        "does not exist as a configured metric in the model",
+        "Both offset_window and offset_to_grain set",
+        "is not used in `expr`",
+        "No input metrics found for derived metric",
+        "No `expr` set for derived metric",
+    ]
     missing_error_strings = set()
-    for expected_str in [expected_substr1, expected_substr2, expected_substr3]:
+    for expected_str in expected_substrings:
         if not any(actual_str.as_readable_str().find(expected_str) != -1 for actual_str in build_issues):
             missing_error_strings.add(expected_str)
     assert len(missing_error_strings) == 0, (