Skip to content

Commit

Permalink
Improve validations for derived metrics (#294)
Browse files Browse the repository at this point in the history
Resolves #270

### Description
Improve validations for derived metrics.
- If there were no input metrics set, previously users would run into an
`AssertionError` that interrupts the semantic manifest validation
process. This changes that to be a more user-friendly validation error.
- Add a validation warning to ensure that `expr` is set and that it
references all the input metrics.

Note: The first commit here was previously merged and then reverted
because adding a `ValidationError` would be a breaking change. This PR
updates it to be a `ValidationWarning`.

### Checklist

- [x] I have read [the contributing
guide](https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/CONTRIBUTING.md)
and understand what's expected of me
- [x] I have signed the
[CLA](https://docs.getdbt.com/docs/contributor-license-agreements)
- [x] This PR includes tests, or tests are not required/relevant for
this PR
- [x] I have run `changie new` to [create a changelog
entry](https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/CONTRIBUTING.md#adding-a-changelog-entry)
  • Loading branch information
courtneyholcomb authored Jun 16, 2024
1 parent ee52b13 commit e3ce0c0
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 14 deletions.
7 changes: 7 additions & 0 deletions .changes/unreleased/Fixes-20240524-092249.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Fixes
body: Improve validations for derived metrics. Improve error messaging if input metrics
are not set. Validate that expr is set and references all input metrics.
time: 2024-05-24T09:22:49.247603-07:00
custom:
Author: courtneyholcomb
Issue: "270"
62 changes: 54 additions & 8 deletions dbt_semantic_interfaces/validations/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
SemanticManifestValidationRule,
ValidationError,
ValidationIssue,
ValidationWarning,
generate_exception_issue,
validate_safely,
)
Expand Down Expand Up @@ -87,8 +88,9 @@ def _validate_alias_collision(metric: Metric) -> List[ValidationIssue]:
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
)
used_names = {input_metric.name for input_metric in metric.input_metrics}
for input_metric in metric.input_metrics:
input_metrics = metric.type_params.metrics or []
used_names = {input_metric.name for input_metric in input_metrics}
for input_metric in input_metrics:
if input_metric.alias:
issues += UniqueAndValidNameRule.check_valid_name(input_metric.alias, metric_context)
if input_metric.alias in used_names:
Expand All @@ -109,15 +111,24 @@ def _validate_input_metrics_exist(semantic_manifest: SemanticManifest) -> List[V

all_metrics = {m.name for m in semantic_manifest.metrics}
for metric in semantic_manifest.metrics:
metric_context = MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
)
if metric.type == MetricType.DERIVED:
for input_metric in metric.input_metrics:
if not metric.type_params.metrics:
issues.append(
ValidationError(
context=metric_context,
message=f"No input metrics found for derived metric '{metric.name}'. "
"Please add metrics to type_params.metrics.",
)
)
for input_metric in metric.type_params.metrics or []:
if input_metric.name not in all_metrics:
issues.append(
ValidationError(
context=MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
),
context=metric_context,
message=f"For metric: {metric.name}, input metric: '{input_metric.name}' does not "
"exist as a configured metric in the model.",
)
Expand All @@ -129,7 +140,7 @@ def _validate_input_metrics_exist(semantic_manifest: SemanticManifest) -> List[V
def _validate_time_offset_params(metric: Metric) -> List[ValidationIssue]:
issues: List[ValidationIssue] = []

for input_metric in metric.input_metrics or []:
for input_metric in metric.type_params.metrics or []:
if input_metric.offset_window and input_metric.offset_to_grain:
issues.append(
ValidationError(
Expand All @@ -144,6 +155,40 @@ def _validate_time_offset_params(metric: Metric) -> List[ValidationIssue]:

return issues

@staticmethod
@validate_safely(whats_being_done="checking that the expr field uses the input metrics")
def _validate_expr(metric: Metric) -> List[ValidationIssue]:
issues: List[ValidationIssue] = []

if metric.type == MetricType.DERIVED:
if not metric.type_params.expr:
issues.append(
ValidationWarning(
context=MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
),
message=f"No `expr` set for derived metric {metric.name}. "
"Please add an `expr` that references all input metrics.",
)
)
else:
for input_metric in metric.type_params.metrics or []:
name = input_metric.alias or input_metric.name
if name not in metric.type_params.expr:
issues.append(
ValidationWarning(
context=MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
),
message=f"Input metric '{name}' is not used in `expr`: '{metric.type_params.expr}' for "
f"derived metric '{metric.name}'. Please update the `expr` or remove the input metric.",
)
)

return issues

@staticmethod
@validate_safely(
whats_being_done="running model validation ensuring derived metrics properties are configured properly"
Expand All @@ -155,6 +200,7 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati
for metric in semantic_manifest.metrics or []:
issues += DerivedMetricRule._validate_alias_collision(metric=metric)
issues += DerivedMetricRule._validate_time_offset_params(metric=metric)
issues += DerivedMetricRule._validate_expr(metric=metric)
return issues


Expand Down
32 changes: 26 additions & 6 deletions tests/validations/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,21 @@ def test_derived_metric() -> None: # noqa: D
expr="notexist * 2", metrics=[PydanticMetricInput(name="notexist")]
),
),
metric_with_guaranteed_meta(
name="no_expr",
type=MetricType.DERIVED,
type_params=PydanticMetricTypeParams(metrics=[PydanticMetricInput(name="random_metric")]),
),
metric_with_guaranteed_meta(
name="input_metric_not_in_expr",
type=MetricType.DERIVED,
type_params=PydanticMetricTypeParams(expr="x", metrics=[PydanticMetricInput(name="random_metric")]),
),
metric_with_guaranteed_meta(
name="no_input_metrics",
type=MetricType.DERIVED,
type_params=PydanticMetricTypeParams(expr="x"),
),
metric_with_guaranteed_meta(
name="has_valid_time_window_params",
type=MetricType.DERIVED,
Expand Down Expand Up @@ -299,13 +314,18 @@ def test_derived_metric() -> None: # noqa: D
project_configuration=EXAMPLE_PROJECT_CONFIGURATION,
)
)
build_issues = validation_results.errors
assert len(build_issues) == 3
expected_substr1 = "is already being used. Please choose another alias"
expected_substr2 = "does not exist as a configured metric in the model"
expected_substr3 = "Both offset_window and offset_to_grain set"
build_issues = validation_results.all_issues
assert len(build_issues) == 6
expected_substrings = [
"is already being used. Please choose another alias",
"does not exist as a configured metric in the model",
"Both offset_window and offset_to_grain set",
"is not used in `expr`",
"No input metrics found for derived metric",
"No `expr` set for derived metric",
]
missing_error_strings = set()
for expected_str in [expected_substr1, expected_substr2, expected_substr3]:
for expected_str in expected_substrings:
if not any(actual_str.as_readable_str().find(expected_str) != -1 for actual_str in build_issues):
missing_error_strings.add(expected_str)
assert len(missing_error_strings) == 0, (
Expand Down

0 comments on commit e3ce0c0

Please sign in to comment.