Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fix: Apply metric_time filters to time spine when needed #1455

Merged
merged 9 commits into from
Oct 16, 2024
7 changes: 7 additions & 0 deletions .changes/unreleased/Fixes-20241009-171939.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Fixes
body: 'Bug fix: when querying a join_to_timespine metric with a metric_time filter
that is not included in the group by, unexpected output rows were included.'
time: 2024-10-09T17:19:39.244779-07:00
custom:
Author: courtneyholcomb
Issue: "1450"
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,17 @@ metric:
filter: "{{ Dimension('booking__is_instant') }}"
filter: "{{ Entity('listing') }} IS NOT NULL"
---
metric:
name: bookings_join_to_time_spine_with_tiered_filters
description: simple metric that joins to timespine and has metric_time filters at both the measure and metric level
type: simple
type_params:
measure:
name: bookings
join_to_timespine: true
filter: "{{ TimeDimension('metric_time', 'day') }} >= '2020-01-02'"
filter: "{{ TimeDimension('metric_time', 'day') }} <= '2020-01-02'"
---
metric:
name: every_two_days_bookers_fill_nulls_with_0
description: cumulative metric filling 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,12 @@ metric:
measure:
name: archived_users
time_granularity: hour
---
metric:
name: archived_users_join_to_time_spine
description: subdaily metric joining to time spine
type: simple
type_params:
measure:
name: archived_users
join_to_timespine: true
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
'listing__booking__listing__bookings_fill_nulls_with_0',
'listing__booking__listing__bookings_fill_nulls_with_0_without_time_spine',
'listing__booking__listing__bookings_join_to_time_spine',
'listing__booking__listing__bookings_join_to_time_spine_with_tiered_filters',
'listing__booking__listing__bookings_per_booker',
'listing__booking__listing__bookings_per_dollar',
'listing__booking__listing__derived_bookings_0',
Expand Down Expand Up @@ -58,6 +59,7 @@
'listing__bookings_fill_nulls_with_0',
'listing__bookings_fill_nulls_with_0_without_time_spine',
'listing__bookings_join_to_time_spine',
'listing__bookings_join_to_time_spine_with_tiered_filters',
'listing__bookings_per_booker',
'listing__bookings_per_dollar',
'listing__bookings_per_listing',
Expand Down Expand Up @@ -194,6 +196,7 @@
'user__archived_at__week',
'user__archived_at__year',
'user__archived_users',
'user__archived_users_join_to_time_spine',
'user__bio_added_ts__day',
'user__bio_added_ts__extract_day',
'user__bio_added_ts__extract_day',
Expand Down Expand Up @@ -430,6 +433,7 @@
'user__listing__user__bookings_fill_nulls_with_0',
'user__listing__user__bookings_fill_nulls_with_0_without_time_spine',
'user__listing__user__bookings_join_to_time_spine',
'user__listing__user__bookings_join_to_time_spine_with_tiered_filters',
'user__listing__user__bookings_per_booker',
'user__listing__user__bookings_per_dollar',
'user__listing__user__bookings_per_listing',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ Model Join-Path Entity Links
('listings_latest',) ("('listing',)", "('booking', 'listing')") bookings_fill_nulls_with_0 ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") bookings_fill_nulls_with_0_without_time_spine ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") bookings_join_to_time_spine ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") bookings_join_to_time_spine_with_tiered_filters ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") bookings_per_booker ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") bookings_per_dollar ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('booking', 'listing')") derived_bookings_0 ['JOINED', 'METRIC']
Expand Down Expand Up @@ -82,6 +83,7 @@ Model Join-Path Entity Links
('listings_latest',) ("('listing',)", "('listing',)") bookings_fill_nulls_with_0 ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") bookings_fill_nulls_with_0_without_time_spine ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") bookings_join_to_time_spine ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") bookings_join_to_time_spine_with_tiered_filters ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") bookings_per_booker ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") bookings_per_dollar ['JOINED', 'METRIC']
('listings_latest',) ("('listing',)", "('listing',)") bookings_per_listing ['JOINED', 'METRIC']
Expand Down Expand Up @@ -143,6 +145,7 @@ Model Join-Path Entity Links
('listings_latest',) ("('user',)", "('listing', 'user')") bookings_fill_nulls_with_0 ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") bookings_fill_nulls_with_0_without_time_spine ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") bookings_join_to_time_spine ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") bookings_join_to_time_spine_with_tiered_filters ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") bookings_per_booker ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") bookings_per_dollar ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('listing', 'user')") bookings_per_listing ['JOINED', 'METRIC']
Expand Down Expand Up @@ -183,6 +186,7 @@ Model Join-Path Entity Links
('listings_latest',) ("('user',)", "('revenue_instance', 'user')") revenue_all_time ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('user',)") active_listings ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('user',)") archived_users ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('user',)") archived_users_join_to_time_spine ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('user',)") current_account_balance_by_user ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('user',)") identity_verifications ['JOINED', 'METRIC']
('listings_latest',) ("('user',)", "('user',)") largest_listing ['JOINED', 'METRIC']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
'company__listing__user__company__bookings_fill_nulls_with_0',
'company__listing__user__company__bookings_fill_nulls_with_0_without_time_spine',
'company__listing__user__company__bookings_join_to_time_spine',
'company__listing__user__company__bookings_join_to_time_spine_with_tiered_filters',
'company__listing__user__company__bookings_per_booker',
'company__listing__user__company__bookings_per_dollar',
'company__listing__user__company__bookings_per_view',
Expand Down Expand Up @@ -94,6 +95,7 @@
'company__user',
'company__user__company__active_listings',
'company__user__company__archived_users',
'company__user__company__archived_users_join_to_time_spine',
'company__user__company__current_account_balance_by_user',
'company__user__company__identity_verifications',
'company__user__company__largest_listing',
Expand Down Expand Up @@ -139,6 +141,7 @@
'guest__booking__guest__bookings_fill_nulls_with_0',
'guest__booking__guest__bookings_fill_nulls_with_0_without_time_spine',
'guest__booking__guest__bookings_join_to_time_spine',
'guest__booking__guest__bookings_join_to_time_spine_with_tiered_filters',
'guest__booking__guest__bookings_per_booker',
'guest__booking__guest__bookings_per_dollar',
'guest__booking__guest__derived_bookings_0',
Expand Down Expand Up @@ -173,6 +176,7 @@
'guest__bookings_fill_nulls_with_0',
'guest__bookings_fill_nulls_with_0_without_time_spine',
'guest__bookings_join_to_time_spine',
'guest__bookings_join_to_time_spine_with_tiered_filters',
'guest__bookings_per_booker',
'guest__bookings_per_dollar',
'guest__derived_bookings_0',
Expand Down Expand Up @@ -218,6 +222,7 @@
'host__booking__host__bookings_fill_nulls_with_0',
'host__booking__host__bookings_fill_nulls_with_0_without_time_spine',
'host__booking__host__bookings_join_to_time_spine',
'host__booking__host__bookings_join_to_time_spine_with_tiered_filters',
'host__booking__host__bookings_per_booker',
'host__booking__host__bookings_per_dollar',
'host__booking__host__derived_bookings_0',
Expand Down Expand Up @@ -252,6 +257,7 @@
'host__bookings_fill_nulls_with_0',
'host__bookings_fill_nulls_with_0_without_time_spine',
'host__bookings_join_to_time_spine',
'host__bookings_join_to_time_spine_with_tiered_filters',
'host__bookings_per_booker',
'host__bookings_per_dollar',
'host__derived_bookings_0',
Expand Down Expand Up @@ -298,6 +304,7 @@
'listing__booking__listing__bookings_fill_nulls_with_0',
'listing__booking__listing__bookings_fill_nulls_with_0_without_time_spine',
'listing__booking__listing__bookings_join_to_time_spine',
'listing__booking__listing__bookings_join_to_time_spine_with_tiered_filters',
'listing__booking__listing__bookings_per_booker',
'listing__booking__listing__bookings_per_dollar',
'listing__booking__listing__derived_bookings_0',
Expand Down Expand Up @@ -333,6 +340,7 @@
'listing__bookings_fill_nulls_with_0',
'listing__bookings_fill_nulls_with_0_without_time_spine',
'listing__bookings_join_to_time_spine',
'listing__bookings_join_to_time_spine_with_tiered_filters',
'listing__bookings_per_booker',
'listing__bookings_per_dollar',
'listing__bookings_per_listing',
Expand Down Expand Up @@ -405,6 +413,7 @@
'lux_listing__listing__lux_listing__bookings_fill_nulls_with_0',
'lux_listing__listing__lux_listing__bookings_fill_nulls_with_0_without_time_spine',
'lux_listing__listing__lux_listing__bookings_join_to_time_spine',
'lux_listing__listing__lux_listing__bookings_join_to_time_spine_with_tiered_filters',
'lux_listing__listing__lux_listing__bookings_per_booker',
'lux_listing__listing__lux_listing__bookings_per_dollar',
'lux_listing__listing__lux_listing__bookings_per_listing',
Expand Down Expand Up @@ -471,6 +480,7 @@
'user__archived_at__extract_year',
'user__archived_at__hour',
'user__archived_users',
'user__archived_users_join_to_time_spine',
'user__bio_added_ts__extract_day',
'user__bio_added_ts__extract_dow',
'user__bio_added_ts__extract_doy',
Expand Down Expand Up @@ -546,6 +556,7 @@
'user__listing__user__bookings_fill_nulls_with_0',
'user__listing__user__bookings_fill_nulls_with_0_without_time_spine',
'user__listing__user__bookings_join_to_time_spine',
'user__listing__user__bookings_join_to_time_spine_with_tiered_filters',
'user__listing__user__bookings_per_booker',
'user__listing__user__bookings_per_dollar',
'user__listing__user__bookings_per_listing',
Expand Down
30 changes: 23 additions & 7 deletions metricflow/dataflow/builder/dataflow_plan_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -1738,7 +1738,7 @@ def _build_aggregated_measure_from_measure_source_node(
metric_input_measure_specs=(metric_input_measure_spec,),
)

# Joining to time spine after aggregation is for measures that specify `join_to_timespine` in the YAML spec.
# Joining to time spine after aggregation is for measures that specify `join_to_timespine: true` in the YAML spec.
after_aggregation_time_spine_join_description = (
metric_input_measure_spec.after_aggregation_time_spine_join_description
)
Expand All @@ -1747,6 +1747,20 @@ def _build_aggregated_measure_from_measure_source_node(
f"Expected {SqlJoinType.LEFT_OUTER} for joining to time spine after aggregation. Remove this if "
f"there's a new use case."
)
# Find filters that contain only metric_time or agg_time_dimension. They will be applied to the time spine table.
agg_time_only_filters: List[WhereFilterSpec] = []
non_agg_time_filters: List[WhereFilterSpec] = []
for filter_spec in metric_input_measure_spec.filter_spec_set.after_measure_aggregation_filter_specs:
included_agg_time_specs = filter_spec.linkable_spec_set.included_agg_time_dimension_specs_for_measure(
measure_reference=measure_spec.reference, semantic_model_lookup=self._semantic_model_lookup
)
if set(included_agg_time_specs) == set(filter_spec.linkable_spec_set.as_tuple):
agg_time_only_filters.append(filter_spec)
else:
non_agg_time_filters.append(filter_spec)

# TODO: split this node into TimeSpineSourceNode and JoinToTimeSpineNode - then can use standard nodes here
# like JoinToCustomGranularityNode, WhereConstraintNode, etc.
output_node: DataflowPlanNode = JoinToTimeSpineNode.create(
parent_node=aggregate_measures_node,
requested_agg_time_dimension_specs=queried_agg_time_dimension_specs,
Expand All @@ -1755,19 +1769,21 @@ def _build_aggregated_measure_from_measure_source_node(
time_range_constraint=predicate_pushdown_state.time_range_constraint,
offset_window=after_aggregation_time_spine_join_description.offset_window,
offset_to_grain=after_aggregation_time_spine_join_description.offset_to_grain,
time_spine_filters=agg_time_only_filters,
)

# Since new rows might have been added due to time spine join, re-apply constraints here. Only re-apply filters
# for specs that are also in the queried specs, since those are the only ones that might have changed after the
# time spine join.
queried_filter_specs = [
# for specs that are also in the queried specs, since those are the only ones that could change after the time
# spine join. Exclude filters that contain only metric_time or an agg time dimension, since were already applied
# to the time spine table.
queried_non_agg_time_filter_specs = [
filter_spec
for filter_spec in metric_input_measure_spec.filter_spec_set.after_measure_aggregation_filter_specs
for filter_spec in non_agg_time_filters
if set(filter_spec.linkable_specs).issubset(set(queried_linkable_specs.as_tuple))
]
if len(queried_filter_specs) > 0:
if len(queried_non_agg_time_filter_specs) > 0:
output_node = WhereConstraintNode.create(
parent_node=output_node, where_specs=queried_filter_specs, always_apply=True
parent_node=output_node, where_specs=queried_non_agg_time_filter_specs, always_apply=True
)

# TODO: this will break if you query by agg_time_dimension but apply a time constraint on metric_time.
Expand Down
Loading
Loading