Skip to content

Commit

Permalink
SQL rendering tests for metric queries with custom granularity (#1410)
Browse files Browse the repository at this point in the history
SQL rendering tests for querying metrics with custom granularities. Does
not include tests with filters (coming later). If you have suggestions
for additional metric scenarios that should be tested, please let me
know!
  • Loading branch information
courtneyholcomb authored Sep 24, 2024
1 parent 51d781b commit 727a380
Show file tree
Hide file tree
Showing 71 changed files with 11,335 additions and 0 deletions.
163 changes: 163 additions & 0 deletions tests_metricflow/query_rendering/test_custom_granularity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
"""Tests metric query rendering for granularity and date part operations.
This module runs query requests for various granularity/date part options and compares
the rendered output against snapshot files.
"""

from __future__ import annotations

import pytest
from _pytest.fixtures import FixtureRequest
from dbt_semantic_interfaces.references import EntityReference
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
from metricflow_semantics.specs.metric_spec import MetricSpec
from metricflow_semantics.specs.query_spec import MetricFlowQuerySpec
from metricflow_semantics.specs.time_dimension_spec import TimeDimensionSpec
from metricflow_semantics.test_helpers.config_helpers import MetricFlowTestConfiguration
from metricflow_semantics.time.granularity import ExpandedTimeGranularity

from metricflow.dataflow.builder.dataflow_plan_builder import DataflowPlanBuilder
from metricflow.plan_conversion.dataflow_to_sql import DataflowToSqlQueryPlanConverter
from metricflow.protocols.sql_client import SqlClient
from tests_metricflow.query_rendering.compare_rendered_query import render_and_check

metric_time_with_custom_grain = TimeDimensionSpec(
"metric_time",
entity_links=(),
time_granularity=ExpandedTimeGranularity(name="martian_day", base_granularity=TimeGranularity.DAY),
)
normal_time_dim_with_custom_grain1 = TimeDimensionSpec(
element_name="ds",
time_granularity=ExpandedTimeGranularity(name="martian_day", base_granularity=TimeGranularity.DAY),
entity_links=(EntityReference("booking"),),
)
normal_time_dim_with_custom_grain2 = TimeDimensionSpec(
element_name="bio_added_ts",
time_granularity=ExpandedTimeGranularity(name="martian_day", base_granularity=TimeGranularity.DAY),
entity_links=(EntityReference("user"),),
)


# TODO: subqueries in this test should be collapsed. Update optimizer
@pytest.mark.sql_engine_snapshot
def test_simple_metric_with_custom_granularity( # noqa: D103
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
dataflow_plan_builder: DataflowPlanBuilder,
dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter,
sql_client: SqlClient,
) -> None:
query_spec = MetricFlowQuerySpec(
metric_specs=(MetricSpec("bookings"),),
time_dimension_specs=(normal_time_dim_with_custom_grain1,),
)

render_and_check(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_to_sql_converter=dataflow_to_sql_converter,
sql_client=sql_client,
dataflow_plan_builder=dataflow_plan_builder,
query_spec=query_spec,
)


@pytest.mark.sql_engine_snapshot
def test_cumulative_metric_with_custom_granularity( # noqa: D103
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
dataflow_plan_builder: DataflowPlanBuilder,
dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter,
sql_client: SqlClient,
) -> None:
query_spec = MetricFlowQuerySpec(
metric_specs=(MetricSpec("trailing_2_months_revenue"),),
time_dimension_specs=(metric_time_with_custom_grain,),
)

render_and_check(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_to_sql_converter=dataflow_to_sql_converter,
sql_client=sql_client,
dataflow_plan_builder=dataflow_plan_builder,
query_spec=query_spec,
)


@pytest.mark.sql_engine_snapshot
def test_derived_metric_with_custom_granularity( # noqa: D103
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
dataflow_plan_builder: DataflowPlanBuilder,
dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter,
sql_client: SqlClient,
) -> None:
query_spec = MetricFlowQuerySpec(
metric_specs=(MetricSpec("booking_fees_per_booker"),),
time_dimension_specs=(normal_time_dim_with_custom_grain1,),
)

render_and_check(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_to_sql_converter=dataflow_to_sql_converter,
sql_client=sql_client,
dataflow_plan_builder=dataflow_plan_builder,
query_spec=query_spec,
)


# TODO: subqueries in this test should be collapsed. Update optimizer
@pytest.mark.sql_engine_snapshot
def test_multiple_metrics_with_custom_granularity( # noqa: D103
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
dataflow_plan_builder: DataflowPlanBuilder,
dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter,
sql_client: SqlClient,
) -> None:
query_spec = MetricFlowQuerySpec(
metric_specs=(MetricSpec("bookings"), MetricSpec("listings")),
time_dimension_specs=(metric_time_with_custom_grain,),
)

render_and_check(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_to_sql_converter=dataflow_to_sql_converter,
sql_client=sql_client,
dataflow_plan_builder=dataflow_plan_builder,
query_spec=query_spec,
)


# TODO: subqueries in this test should be collapsed. Update optimizer
@pytest.mark.sql_engine_snapshot
def test_metric_custom_granularity_joined_to_non_default_grain( # noqa: D103
request: FixtureRequest,
mf_test_configuration: MetricFlowTestConfiguration,
dataflow_plan_builder: DataflowPlanBuilder,
dataflow_to_sql_converter: DataflowToSqlQueryPlanConverter,
sql_client: SqlClient,
) -> None:
query_spec = MetricFlowQuerySpec(
metric_specs=(MetricSpec("listings"),),
time_dimension_specs=(
metric_time_with_custom_grain,
TimeDimensionSpec(
element_name="ds",
time_granularity=ExpandedTimeGranularity.from_time_granularity(TimeGranularity.MONTH),
entity_links=(EntityReference("listing"),),
),
),
)

render_and_check(
request=request,
mf_test_configuration=mf_test_configuration,
dataflow_to_sql_converter=dataflow_to_sql_converter,
sql_client=sql_client,
dataflow_plan_builder=dataflow_plan_builder,
query_spec=query_spec,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
-- Re-aggregate Metric via Group By
SELECT
subq_9.metric_time__martian_day
, subq_9.trailing_2_months_revenue
FROM (
-- Window Function for Metric Re-aggregation
SELECT
subq_8.metric_time__martian_day
, AVG(subq_8.trailing_2_months_revenue) OVER (PARTITION BY subq_8.metric_time__martian_day) AS trailing_2_months_revenue
FROM (
-- Compute Metrics via Expressions
SELECT
subq_7.metric_time__martian_day
, subq_7.metric_time__day
, subq_7.txn_revenue AS trailing_2_months_revenue
FROM (
-- Aggregate Measures
SELECT
subq_6.metric_time__martian_day
, subq_6.metric_time__day
, SUM(subq_6.txn_revenue) AS txn_revenue
FROM (
-- Pass Only Elements: ['txn_revenue', 'metric_time__day', 'metric_time__day']
-- Join to Custom Granularity Dataset
SELECT
subq_4.metric_time__day AS metric_time__day
, subq_4.txn_revenue AS txn_revenue
, subq_5.martian_day AS metric_time__martian_day
FROM (
-- Join Self Over Time Range
SELECT
subq_2.metric_time__day AS metric_time__day
, subq_1.ds__day AS ds__day
, subq_1.ds__week AS ds__week
, subq_1.ds__month AS ds__month
, subq_1.ds__quarter AS ds__quarter
, subq_1.ds__year AS ds__year
, subq_1.ds__extract_year AS ds__extract_year
, subq_1.ds__extract_quarter AS ds__extract_quarter
, subq_1.ds__extract_month AS ds__extract_month
, subq_1.ds__extract_day AS ds__extract_day
, subq_1.ds__extract_dow AS ds__extract_dow
, subq_1.ds__extract_doy AS ds__extract_doy
, subq_1.revenue_instance__ds__day AS revenue_instance__ds__day
, subq_1.revenue_instance__ds__week AS revenue_instance__ds__week
, subq_1.revenue_instance__ds__month AS revenue_instance__ds__month
, subq_1.revenue_instance__ds__quarter AS revenue_instance__ds__quarter
, subq_1.revenue_instance__ds__year AS revenue_instance__ds__year
, subq_1.revenue_instance__ds__extract_year AS revenue_instance__ds__extract_year
, subq_1.revenue_instance__ds__extract_quarter AS revenue_instance__ds__extract_quarter
, subq_1.revenue_instance__ds__extract_month AS revenue_instance__ds__extract_month
, subq_1.revenue_instance__ds__extract_day AS revenue_instance__ds__extract_day
, subq_1.revenue_instance__ds__extract_dow AS revenue_instance__ds__extract_dow
, subq_1.revenue_instance__ds__extract_doy AS revenue_instance__ds__extract_doy
, subq_1.metric_time__week AS metric_time__week
, subq_1.metric_time__month AS metric_time__month
, subq_1.metric_time__quarter AS metric_time__quarter
, subq_1.metric_time__year AS metric_time__year
, subq_1.metric_time__extract_year AS metric_time__extract_year
, subq_1.metric_time__extract_quarter AS metric_time__extract_quarter
, subq_1.metric_time__extract_month AS metric_time__extract_month
, subq_1.metric_time__extract_day AS metric_time__extract_day
, subq_1.metric_time__extract_dow AS metric_time__extract_dow
, subq_1.metric_time__extract_doy AS metric_time__extract_doy
, subq_1.user AS user
, subq_1.revenue_instance__user AS revenue_instance__user
, subq_1.txn_revenue AS txn_revenue
FROM (
-- Time Spine
SELECT
subq_3.ds AS metric_time__day
FROM ***************************.mf_time_spine subq_3
) subq_2
INNER JOIN (
-- Metric Time Dimension 'ds'
SELECT
subq_0.ds__day
, subq_0.ds__week
, subq_0.ds__month
, subq_0.ds__quarter
, subq_0.ds__year
, subq_0.ds__extract_year
, subq_0.ds__extract_quarter
, subq_0.ds__extract_month
, subq_0.ds__extract_day
, subq_0.ds__extract_dow
, subq_0.ds__extract_doy
, subq_0.revenue_instance__ds__day
, subq_0.revenue_instance__ds__week
, subq_0.revenue_instance__ds__month
, subq_0.revenue_instance__ds__quarter
, subq_0.revenue_instance__ds__year
, subq_0.revenue_instance__ds__extract_year
, subq_0.revenue_instance__ds__extract_quarter
, subq_0.revenue_instance__ds__extract_month
, subq_0.revenue_instance__ds__extract_day
, subq_0.revenue_instance__ds__extract_dow
, subq_0.revenue_instance__ds__extract_doy
, subq_0.ds__day AS metric_time__day
, subq_0.ds__week AS metric_time__week
, subq_0.ds__month AS metric_time__month
, subq_0.ds__quarter AS metric_time__quarter
, subq_0.ds__year AS metric_time__year
, subq_0.ds__extract_year AS metric_time__extract_year
, subq_0.ds__extract_quarter AS metric_time__extract_quarter
, subq_0.ds__extract_month AS metric_time__extract_month
, subq_0.ds__extract_day AS metric_time__extract_day
, subq_0.ds__extract_dow AS metric_time__extract_dow
, subq_0.ds__extract_doy AS metric_time__extract_doy
, subq_0.user
, subq_0.revenue_instance__user
, subq_0.txn_revenue
FROM (
-- Read Elements From Semantic Model 'revenue'
SELECT
revenue_src_28000.revenue AS txn_revenue
, DATETIME_TRUNC(revenue_src_28000.created_at, day) AS ds__day
, DATETIME_TRUNC(revenue_src_28000.created_at, isoweek) AS ds__week
, DATETIME_TRUNC(revenue_src_28000.created_at, month) AS ds__month
, DATETIME_TRUNC(revenue_src_28000.created_at, quarter) AS ds__quarter
, DATETIME_TRUNC(revenue_src_28000.created_at, year) AS ds__year
, EXTRACT(year FROM revenue_src_28000.created_at) AS ds__extract_year
, EXTRACT(quarter FROM revenue_src_28000.created_at) AS ds__extract_quarter
, EXTRACT(month FROM revenue_src_28000.created_at) AS ds__extract_month
, EXTRACT(day FROM revenue_src_28000.created_at) AS ds__extract_day
, IF(EXTRACT(dayofweek FROM revenue_src_28000.created_at) = 1, 7, EXTRACT(dayofweek FROM revenue_src_28000.created_at) - 1) AS ds__extract_dow
, EXTRACT(dayofyear FROM revenue_src_28000.created_at) AS ds__extract_doy
, DATETIME_TRUNC(revenue_src_28000.created_at, day) AS revenue_instance__ds__day
, DATETIME_TRUNC(revenue_src_28000.created_at, isoweek) AS revenue_instance__ds__week
, DATETIME_TRUNC(revenue_src_28000.created_at, month) AS revenue_instance__ds__month
, DATETIME_TRUNC(revenue_src_28000.created_at, quarter) AS revenue_instance__ds__quarter
, DATETIME_TRUNC(revenue_src_28000.created_at, year) AS revenue_instance__ds__year
, EXTRACT(year FROM revenue_src_28000.created_at) AS revenue_instance__ds__extract_year
, EXTRACT(quarter FROM revenue_src_28000.created_at) AS revenue_instance__ds__extract_quarter
, EXTRACT(month FROM revenue_src_28000.created_at) AS revenue_instance__ds__extract_month
, EXTRACT(day FROM revenue_src_28000.created_at) AS revenue_instance__ds__extract_day
, IF(EXTRACT(dayofweek FROM revenue_src_28000.created_at) = 1, 7, EXTRACT(dayofweek FROM revenue_src_28000.created_at) - 1) AS revenue_instance__ds__extract_dow
, EXTRACT(dayofyear FROM revenue_src_28000.created_at) AS revenue_instance__ds__extract_doy
, revenue_src_28000.user_id AS user
, revenue_src_28000.user_id AS revenue_instance__user
FROM ***************************.fct_revenue revenue_src_28000
) subq_0
) subq_1
ON
(
subq_1.metric_time__day <= subq_2.metric_time__day
) AND (
subq_1.metric_time__day > DATE_SUB(CAST(subq_2.metric_time__day AS DATETIME), INTERVAL 2 month)
)
) subq_4
LEFT OUTER JOIN
***************************.mf_time_spine subq_5
ON
subq_4.metric_time__day = subq_5.ds
) subq_6
GROUP BY
metric_time__martian_day
, metric_time__day
) subq_7
) subq_8
) subq_9
GROUP BY
metric_time__martian_day
, trailing_2_months_revenue
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
-- Re-aggregate Metric via Group By
SELECT
metric_time__martian_day
, trailing_2_months_revenue
FROM (
-- Compute Metrics via Expressions
-- Window Function for Metric Re-aggregation
SELECT
metric_time__martian_day
, AVG(txn_revenue) OVER (PARTITION BY metric_time__martian_day) AS trailing_2_months_revenue
FROM (
-- Pass Only Elements: ['txn_revenue', 'metric_time__day', 'metric_time__day']
-- Join to Custom Granularity Dataset
-- Aggregate Measures
SELECT
subq_15.martian_day AS metric_time__martian_day
, subq_14.metric_time__day AS metric_time__day
, SUM(subq_14.txn_revenue) AS txn_revenue
FROM (
-- Join Self Over Time Range
SELECT
subq_13.ds AS metric_time__day
, revenue_src_28000.revenue AS txn_revenue
FROM ***************************.mf_time_spine subq_13
INNER JOIN
***************************.fct_revenue revenue_src_28000
ON
(
DATETIME_TRUNC(revenue_src_28000.created_at, day) <= subq_13.ds
) AND (
DATETIME_TRUNC(revenue_src_28000.created_at, day) > DATE_SUB(CAST(subq_13.ds AS DATETIME), INTERVAL 2 month)
)
) subq_14
LEFT OUTER JOIN
***************************.mf_time_spine subq_15
ON
subq_14.metric_time__day = subq_15.ds
GROUP BY
metric_time__martian_day
, metric_time__day
) subq_17
) subq_19
GROUP BY
metric_time__martian_day
, trailing_2_months_revenue
Loading

0 comments on commit 727a380

Please sign in to comment.