From 800ed47c2c58714e777f25958095a90f3e74fe42 Mon Sep 17 00:00:00 2001 From: Agnes Kiss Date: Tue, 5 Dec 2023 16:47:28 +0000 Subject: [PATCH] Add optional conversions module --- dbt_project.yml | 9 + docs/markdown/snowplow_unified_macros_docs.md | 2 +- .../.scripts/integration_test.sh | 15 +- integration_tests/dbt_project.yml | 2 + macros/config_check.sql | 6 + macros/field_definitions/conversion_query.sql | 176 ++++++++++++++++++ .../get_conversion_columns.sql | 151 --------------- .../get_app_context_fields.sql | 2 +- .../get_app_error_event_fields.sql | 2 +- .../get_browser_context_fields.sql | 2 +- .../get_cmp_visible_event_fields.sql | 2 +- .../get_consent_event_fields.sql | 2 +- .../get_conversion_fields.sql | 56 ++++++ macros/field_extractions/get_cwv_fields.sql | 2 +- .../get_deep_link_context_fields.sql | 2 +- .../get_geo_context_fields.sql | 2 +- .../get_iab_context_fields.sql | 2 +- .../get_mobile_context_fields.sql | 2 +- .../get_screen_context_fields.sql | 2 +- .../get_screen_view_event_fields.sql | 2 +- .../get_session_context_fields.sql | 2 +- .../get_ua_context_fields.sql | 2 +- .../get_web_page_context_fields.sql | 2 +- .../get_yauaa_context_fields.sql | 2 +- macros/macros.yml | 4 +- macros/unify_fields_query.sql | 32 ++-- .../conversions/conversions.yml | 83 +++++++++ .../snowplow_unified_conversions_this_run.sql | 27 +++ .../snowplow_unified_conversions.sql | 41 ++++ .../snowplow_unified_sessions_this_run.sql | 24 +-- packages.yml | 2 +- 31 files changed, 453 insertions(+), 209 deletions(-) create mode 100644 macros/field_definitions/conversion_query.sql delete mode 100644 macros/field_definitions/get_conversion_columns.sql create mode 100644 macros/field_extractions/get_conversion_fields.sql create mode 100644 models/optional_modules/conversions/conversions.yml create mode 100644 models/optional_modules/conversions/scratch/snowplow_unified_conversions_this_run.sql create mode 100644 models/optional_modules/conversions/snowplow_unified_conversions.sql diff --git a/dbt_project.yml b/dbt_project.yml index 65ea4f6e..d41fc968 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -58,6 +58,7 @@ vars: snowplow__session_lookback_days: 730 snowplow__session_stitching: true snowplow__view_stitching: false + snowplow__conversion_stitching: true snowplow__session_timestamp: collector_tstamp snowplow__start_date: '2020-01-01' # snowplow__total_all_conversions: false @@ -91,10 +92,12 @@ vars: snowplow__session_passthroughs: [] snowplow__user_first_passthroughs: [] snowplow__user_last_passthroughs: [] + snowplow__conversion_passthroughs: [] # enable custom modules: snowplow__enable_consent: false snowplow__enable_cwv: false snowplow__enable_app_errors: false + snowplow__enable_conversions: false # WAREHOUSE SPECIFIC @@ -169,6 +172,12 @@ models: scratch: +schema: "scratch" +tags: "scratch" + conversions: + +schema: "derived" + +tags: ["snowplow_unified_incremental", "derived", "conversions"] + scratch: + +schema: "scratch" + +tags: "scratch" core_web_vitals: +schema: "derived" +tags: ["snowplow_unified_incremental", "derived", "core_web_vitals"] diff --git a/docs/markdown/snowplow_unified_macros_docs.md b/docs/markdown/snowplow_unified_macros_docs.md index 46556b7a..859463db 100644 --- a/docs/markdown/snowplow_unified_macros_docs.md +++ b/docs/markdown/snowplow_unified_macros_docs.md @@ -231,7 +231,7 @@ The specific sql to be used for the relevant warehouse to calculate the count of {% endraw %} {% enddocs %} -{% docs macro_get_conversion_columns %} +{% docs macro_conversion_query %} {% raw %} A macro to keep the different ways of calculating conversion fields per warehouse abstracted away for the sessions table. diff --git a/integration_tests/.scripts/integration_test.sh b/integration_tests/.scripts/integration_test.sh index 9fa1185e..41c3e979 100755 --- a/integration_tests/.scripts/integration_test.sh +++ b/integration_tests/.scripts/integration_test.sh @@ -24,29 +24,26 @@ fi for db in ${DATABASES[@]}; do echo "Snowplow unified integration tests: Seeding data" - eval "dbt seed --full-refresh --target $db" || exit 1; - echo "Snowplow unified integration tests: App errors module" + echo "Snowplow unified integration tests: Conversions" + eval "dbt run --full-refresh --select +snowplow_unified_conversions snowplow_unified_integration_tests.source --vars '{snowplow__allow_refresh: true, snowplow__backfill_limit_days: 220, snowplow__enable_cwv: false, snowplow__enable_conversions: true}' --target $db" || exit 1; + echo "Snowplow unified integration tests: App errors module" eval "dbt run --full-refresh --select +snowplow_unified_app_errors snowplow_unified_integration_tests.source --vars '{snowplow__allow_refresh: true, snowplow__backfill_limit_days: 220, snowplow__enable_cwv: false, snowplow__enable_app_errors: true}' --target $db" || exit 1; echo "Snowplow unified integration tests: Late enabled contexts" - eval "dbt run --full-refresh --select +test_late_enabled_contexts snowplow_unified_integration_tests.source --vars '{snowplow__allow_refresh: true, snowplow__backfill_limit_days: 220, snowplow__enable_cwv: false, snowplow__enable_mobile_context: false, snowplow__enable_geolocation_context: false, snowplow__enable_application_context: false, snowplow__enable_screen_context: false, snowplow__enable_app_errors: false, snowplow__enable_deep_link_context: false, snowplow__enable_cwv: false, snowplow__enable_iab: false, snowplow__enable_ua: false, snowplow__enable_browser_context: false, snowplow__enable_consent: false}' --target $db" || exit 1; eval "dbt run --select +test_late_enabled_contexts run --vars '{snowplow__allow_refresh: true, snowplow__backfill_limit_days: 250, snowplow__enable_cwv: false}' --target $db" - echo "Snowplow unified integration tests: Late enabled contexts test passed" echo "Snowplow unified integration tests: Execute models (all contexts except for cwv) - run 1/4" - eval "dbt run --full-refresh --vars '{snowplow__allow_refresh: true, snowplow__backfill_limit_days: 243, snowplow__enable_cwv: false}' --target $db" || exit 1; for i in {2..4} do echo "Snowplow unified integration tests: Execute models (all contexts except for cwv) - run $i/4" - eval "dbt run --vars '{snowplow__enable_cwv: false}' --target $db" || exit 1; done @@ -55,19 +52,15 @@ for db in ${DATABASES[@]}; do eval "dbt test --exclude snowplow_unified_web_vital_measurements snowplow_unified_web_vital_measurements_actual snowplow_unified_web_vital_events_this_run snowplow_unified_views_mobile_screen_engagement_actual test_name:not_null --store-failures --target $db" || exit 1; echo "Snowplow unified integration tests: All non-CWV tests passed" - + echo "Snowplow unified integration tests - Core Web Vitals: Execute models" - eval "dbt run --select +snowplow_unified_web_vital_measurements_actual snowplow_unified_web_vital_measurements_expected_stg source --full-refresh --vars '{snowplow__allow_refresh: true, snowplow__start_date: '2023-03-01', snowplow__backfill_limit_days: 50, snowplow__cwv_days_to_measure: 999, snowplow__enable_mobile: false, snowplow__enable_mobile_context: false, snowplow__enable_geolocation_context: false, snowplow__enable_application_context: false, snowplow__enable_screen_context: false, snowplow__enable_app_errors: false, snowplow__enable_deep_link_context: false, snowplow__enable_ua: false, snowplow__enable_browser_context: false, snowplow__enable_consent: false}' --target $db" || exit 1; - eval "dbt test --select snowplow_unified_web_vital_measurements_actual --store-failures --target $db" || exit 1; echo "Snowplow unified integration tests: Execute web (all web contexts except for cwv)" - eval "dbt run --full-refresh --vars '{snowplow__allow_refresh: true, snowplow__backfill_limit_days: 9999, snowplow__enable_mobile: false, snowplow__enable_mobile_context: false, snowplow__enable_geolocation_context: false, snowplow__enable_application_context: false, snowplow__enable_screen_context: false, snowplow__enable_app_errors: false, snowplow__enable_deep_link_context: false, snowplow__enable_cwv: false}' --select +snowplow_unified_users snowplow_unified_events_stg --target $db" || exit 1; echo "Snowplow unified integration tests: Execute mobile (all mobile contexts)" - eval "dbt run --full-refresh --vars '{snowplow__allow_refresh: true, snowplow__backfill_limit_days: 9999, snowplow__enable_web: false, snowplow__enable_iab: false, snowplow__enable_ua: false, snowplow__enable_browser_context: false, snowplow__enable_consent: false, snowplow__enable_cwv: false}' --select +snowplow_unified_users snowplow_unified_events_stg --target $db" || exit 1; echo "Snowplow unified integration tests: All CWV tests passed" diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 45e09729..88746172 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -47,6 +47,7 @@ vars: snowplow__enable_web: true snowplow__enable_mobile: true snowplow__yauaa_context: snowplow_unified_yauaa_context_stg + snowplow__conversion_stitching: false # web only contexts snowplow__cwv_events: snowplow_unified_cwv_stg snowplow__page_view_context: snowplow_unified_page_view_context_stg @@ -96,6 +97,7 @@ vars: snowplow__total_all_conversions: true snowplow__list_event_counts: true + seeds: quote_columns: false snowplow_unified_integration_tests: diff --git a/macros/config_check.sql b/macros/config_check.sql index 30b65924..ce2ef410 100644 --- a/macros/config_check.sql +++ b/macros/config_check.sql @@ -72,5 +72,11 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 ) }} {% endif %} {% endif %} + + {% if var('snowplow__enable_conversions') and not var('snowplow__conversion_events') %} + {{ exceptions.raise_compiler_error( + "Snowplow Error: var('snowplow__conversion_events') is not configured but the conversions optional module is enabled. Please configure this variable before proceeding." + ) }} + {% endif %} {% endmacro %} diff --git a/macros/field_definitions/conversion_query.sql b/macros/field_definitions/conversion_query.sql new file mode 100644 index 00000000..d08f8a0d --- /dev/null +++ b/macros/field_definitions/conversion_query.sql @@ -0,0 +1,176 @@ +{# +Copyright (c) 2023-present Snowplow Analytics Ltd. All rights reserved. +This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, +and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. +You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ +#} + +{% macro conversion_query(conv_object = {}, names_only = false) %} +{{ return(adapter.dispatch('conversion_query', 'snowplow_unified')(conv_object, names_only)) }} +{% endmacro %} + +{% macro default__conversion_query(conv_object, names_only = false) %} +{% if execute %} +{% do exceptions.raise_compiler_error('Macro get_field only supports Bigquery, Snowflake, Spark, Databricks, Postgres, and Redshift, it is not supported for ' ~ target.type) %} +{% endif %} +{% endmacro %} + +{% macro snowflake__conversion_query(conv_object, names_only = false) %} + +{% set when_condition = "cv_type = '"~ conv_object['name'] ~"'" if var('snowplow__enable_conversions', false) else conv_object['condition'] %} +{% set then_condition = "cv_value" if var('snowplow__enable_conversions', false) else conv_object['value'] %} +{% set tstamp_field = "cv_tstamp" if var('snowplow__enable_conversions', false) else "derived_tstamp" %} + +{%- if not names_only %} +,COUNT(CASE WHEN {{ when_condition }} THEN 1 ELSE null END) AS cv_{{ conv_object['name'] }}_volume +{%- if conv_object.get('list_events', false) %} +,ARRAYAGG(CASE WHEN {{ when_condition }} THEN event_id ELSE null END) WITHIN GROUP (ORDER BY {{ tstamp_field }}, dvce_created_tstamp, event_id) AS cv_{{ conv_object['name'] }}_events +{%- endif -%} +{%- if conv_object.get('value', none) %} +,ARRAYAGG(CASE WHEN {{ when_condition }} THEN coalesce({{ then_condition }},{{ conv_object.get('default_value', 0) }}) ELSE null END) WITHIN GROUP (ORDER BY {{ tstamp_field }}, dvce_created_tstamp, event_id) AS cv_{{ conv_object['name'] }}_values +,SUM(CASE WHEN {{ when_condition }} THEN coalesce({{ then_condition }}, {{ conv_object.get('default_value', 0) }}) ELSE 0 END) AS cv_{{ conv_object['name'] }}_total +{%- endif %} +,MIN(CASE WHEN {{ when_condition }} THEN {{ tstamp_field }} ELSE null END) AS cv_{{ conv_object['name'] }}_first_conversion +,CAST(MAX(CASE WHEN {{ when_condition }} THEN 1 ELSE 0 END) AS {{ type_boolean() }}) AS cv_{{ conv_object['name'] }}_converted +{%- else -%} +,coalesce(cv_{{ conv_object['name'] }}_volume, 0) as cv_{{ conv_object['name'] }}_volume +{%- if conv_object.get('list_events', false) %} +,coalesce(cv_{{ conv_object['name'] }}_events, []) as cv_{{ conv_object['name'] }}_events +{%- endif %} +{%- if conv_object.get('value', none) %} +,coalesce(cv_{{ conv_object['name'] }}_values, []) as cv_{{ conv_object['name'] }}_values +,coalesce(cv_{{ conv_object['name'] }}_total, 0) as cv_{{ conv_object['name'] }}_total +{%- endif %} +,cv_{{ conv_object['name'] }}_first_conversion +,coalesce(cv_{{ conv_object['name'] }}_converted, false) as cv_{{ conv_object['name'] }}_converted +{%- endif %} +{% endmacro %} + + +{% macro bigquery__conversion_query(conv_object, names_only = false) %} + +{% set when_condition = "cv_type = '"~ conv_object['name'] ~"'" if var('snowplow__enable_conversions', false) else conv_object['condition'] %} +{% set then_condition = "cv_value" if var('snowplow__enable_conversions', false) else conv_object['value'] %} +{% set tstamp_field = "cv_tstamp" if var('snowplow__enable_conversions', false) else "derived_tstamp" %} + +{%- if not names_only %} +,COUNT(CASE WHEN {{ when_condition }} THEN 1 ELSE null END) AS cv_{{ conv_object['name'] }}_volume +{%- if conv_object.get('list_events', false) %} +,ARRAY_AGG(CASE WHEN {{ when_condition }} THEN event_id ELSE null END IGNORE NULLS ORDER BY {{ tstamp_field }}, dvce_created_tstamp, event_id) AS cv_{{ conv_object['name'] }}_events +{%- endif -%} +{%- if conv_object.get('value', none) %} +,ARRAY_AGG(CASE WHEN {{ when_condition }} THEN coalesce({{ then_condition }},{{ conv_object.get('default_value', 0) }}) ELSE null END IGNORE NULLS ORDER BY {{ tstamp_field }}, dvce_created_tstamp, event_id) AS cv_{{ conv_object['name'] }}_values +,SUM(CASE WHEN {{ when_condition }} THEN coalesce({{ then_condition }}, {{ conv_object.get('default_value', 0) }}) ELSE 0 END) AS cv_{{ conv_object['name'] }}_total +{%- endif -%} +,MIN(CASE WHEN {{ when_condition }} THEN {{ tstamp_field }} ELSE null END) AS cv_{{ conv_object['name'] }}_first_conversion +,CAST(MAX(CASE WHEN {{ when_condition }} THEN 1 ELSE 0 END) AS {{ type_boolean() }}) AS cv_{{ conv_object['name'] }}_converted +{%- else -%} +,coalesce(cv_{{ conv_object['name'] }}_volume, 0) as cv_{{ conv_object['name'] }}_volume +{%- if conv_object.get('list_events', false) %} +,coalesce(cv_{{ conv_object['name'] }}_events, []) as cv_{{ conv_object['name'] }}_events +{%- endif %} +{%- if conv_object.get('value', none) %} +,coalesce(cv_{{ conv_object['name'] }}_values, []) as cv_{{ conv_object['name'] }}_values +,coalesce(cv_{{ conv_object['name'] }}_total, 0) as cv_{{ conv_object['name'] }}_total +{%- endif %} +,cv_{{ conv_object['name'] }}_first_conversion +,coalesce(cv_{{ conv_object['name'] }}_converted, false) as cv_{{ conv_object['name'] }}_converted +{%- endif %} +{% endmacro %} + + +{% macro spark__conversion_query(conv_object, names_only = false) %} + +{% set when_condition = "cv_type = '"~ conv_object['name'] ~"'" if var('snowplow__enable_conversions', false) else conv_object['condition'] %} +{% set then_condition = "cv_value" if var('snowplow__enable_conversions', false) else conv_object['value'] %} +{% set tstamp_field = "cv_tstamp" if var('snowplow__enable_conversions', false) else "derived_tstamp" %} + +{%- if not names_only %} +,COUNT(CASE WHEN {{ when_condition }} THEN 1 ELSE null END) AS cv_{{ conv_object['name'] }}_volume +{%- if conv_object.get('list_events', false) %} +{# make an struct of the thing we want to put in an array, then the things we want to order by, collect THOSE into an array, filter out where the thing we want is null, sort those based on the other columns, then select just the thing we care about #} +,transform(array_sort(FILTER(collect_list(struct(CASE WHEN {{ when_condition }} THEN event_id ELSE null END, {{ tstamp_field }}, dvce_created_tstamp, event_id)), x -> x['col1'] is not null), (left, right) -> CASE WHEN left['{{ tstamp_field }}'] < right['{{ tstamp_field }}'] THEN -1 WHEN left['{{ tstamp_field }}'] > right['{{ tstamp_field }}'] THEN 1 WHEN left['dvce_created_tstamp'] < right['dvce_created_tstamp'] THEN -1 WHEN left['dvce_created_tstamp'] > right['dvce_created_tstamp'] THEN 1 WHEN left['event_id'] < right['event_id'] THEN -1 WHEN left['event_id'] > right['event_id'] THEN 1 ELSE 0 END), x -> x['col1']) AS cv_{{ conv_object['name'] }}_events +{%- endif -%} +{%- if conv_object.get('value', none) %} +{# make an struct of the thing we want to put in an array, then the things we want to order by, collect THOSE into an array, filter out where the thing we want is null, sort those based on the other columns, then select just the thing we care about #} +,transform(array_sort(FILTER(collect_list(struct(CASE WHEN {{ when_condition }} THEN coalesce({{ then_condition }},{{ conv_object.get('default_value', 0) }}) ELSE null END, {{ tstamp_field }}, dvce_created_tstamp, event_id)), x -> x['col1'] is not null), (left, right) -> CASE WHEN left['{{ tstamp_field }}'] < right['{{ tstamp_field }}'] THEN -1 WHEN left['{{ tstamp_field }}'] > right['{{ tstamp_field }}'] THEN 1 WHEN left['dvce_created_tstamp'] < right['dvce_created_tstamp'] THEN -1 WHEN left['dvce_created_tstamp'] > right['dvce_created_tstamp'] THEN 1 WHEN left['event_id'] < right['event_id'] THEN -1 WHEN left['event_id'] > right['event_id'] THEN 1 ELSE 0 END), x -> x['col1']) AS cv_{{ conv_object['name'] }}_values +,SUM(CASE WHEN {{ when_condition }} THEN coalesce({{ then_condition }}, {{ conv_object.get('default_value', 0) }}) ELSE 0 END) AS cv_{{ conv_object['name'] }}_total +{%- endif -%} +,MIN(CASE WHEN {{ when_condition }} THEN {{ tstamp_field }} ELSE null END) AS cv_{{ conv_object['name'] }}_first_conversion +,CAST(MAX(CASE WHEN {{ when_condition }} THEN 1 ELSE 0 END) AS {{ type_boolean() }}) AS cv_{{ conv_object['name'] }}_converted +{%- else -%} +,coalesce(cv_{{ conv_object['name'] }}_volume, 0) as cv_{{ conv_object['name'] }}_volume +{%- if conv_object.get('list_events', false) %} +,coalesce(cv_{{ conv_object['name'] }}_events, from_json("[]", "array")) as cv_{{ conv_object['name'] }}_events +{%- endif %} +{%- if conv_object.get('value', none) %} +,coalesce(cv_{{ conv_object['name'] }}_values, from_json("[]", "array")) as cv_{{ conv_object['name'] }}_values +,coalesce(cv_{{ conv_object['name'] }}_total, 0) as cv_{{ conv_object['name'] }}_total +{%- endif %} +,cv_{{ conv_object['name'] }}_first_conversion +,coalesce(cv_{{ conv_object['name'] }}_converted, false) as cv_{{ conv_object['name'] }}_converted +{%- endif %} +{% endmacro %} + +{% macro postgres__conversion_query(conv_object = {}, names_only = false) %} + +{% set when_condition = "cv_type = '"~ conv_object['name'] ~"'" if var('snowplow__enable_conversions', false) else conv_object['condition'] %} +{% set then_condition = "cv_value" if var('snowplow__enable_conversions', false) else conv_object['value'] %} +{% set tstamp_field = "cv_tstamp" if var('snowplow__enable_conversions', false) else "derived_tstamp" %} + +{%- if not names_only %} +,COUNT(CASE WHEN {{ when_condition }} THEN 1 ELSE null END) AS cv_{{ conv_object['name'] }}_volume +{%- if conv_object.get('list_events', false) %} +,ARRAY_REMOVE(ARRAY_AGG(CASE WHEN {{ when_condition }} THEN event_id ELSE null END ORDER BY {{ tstamp_field }}, dvce_created_tstamp, event_id), null) AS cv_{{ conv_object['name'] }}_events +{%- endif -%} +{%- if conv_object.get('value', none) %} +,ARRAY_REMOVE(ARRAY_AGG(CASE WHEN {{ when_condition }} THEN coalesce({{ then_condition }},{{ conv_object.get('default_value', 0) }}) ELSE null END ORDER BY {{ tstamp_field }}, dvce_created_tstamp, event_id), null) AS cv_{{ conv_object['name'] }}_values +,SUM(CASE WHEN {{ when_condition }} THEN coalesce({{ then_condition }}, {{ conv_object.get('default_value', 0) }}) ELSE 0 END) AS cv_{{ conv_object['name'] }}_total +{%- endif -%} +,MIN(CASE WHEN {{ when_condition }} THEN {{ tstamp_field }} ELSE null END) AS cv_{{ conv_object['name'] }}_first_conversion +,CAST(MAX(CASE WHEN {{ when_condition }} THEN 1 ELSE 0 END) AS {{ type_boolean() }}) AS cv_{{ conv_object['name'] }}_converted +{%- else -%} +,coalesce(cv_{{ conv_object['name'] }}_volume, 0) as cv_{{ conv_object['name'] }}_volume +{%- if conv_object.get('list_events', false) %} +,coalesce(cv_{{ conv_object['name'] }}_events, string_to_array(regexp_replace('[]', '[\[\]\"]', '', 'g'),',')) as cv_{{ conv_object['name'] }}_events +{%- endif %} +{%- if conv_object.get('value', none) %} +,coalesce(cv_{{ conv_object['name'] }}_values, string_to_array(regexp_replace('[]', '[\[\]\"]', '', 'g'),',')::numeric[]) as cv_{{ conv_object['name'] }}_values +,coalesce(cv_{{ conv_object['name'] }}_total, 0) as cv_{{ conv_object['name'] }}_total +{%- endif %} +,cv_{{ conv_object['name'] }}_first_conversion +,coalesce(cv_{{ conv_object['name'] }}_converted, false) as cv_{{ conv_object['name'] }}_converted +{%- endif %} +{% endmacro %} + +{% macro redshift__conversion_query(conv_object, names_only = false) %} + +{% set when_condition = "cv_type = '"~ conv_object['name'] ~"'" if var('snowplow__enable_conversions', false) else conv_object['condition'] %} +{% set then_condition = "cv_value" if var('snowplow__enable_conversions', false) else conv_object['value'] %} +{% set tstamp_field = "cv_tstamp" if var('snowplow__enable_conversions', false) else "derived_tstamp" %} + +{%- if not names_only %} +,COUNT(CASE WHEN {{ when_condition }} THEN 1 ELSE null END) AS cv_{{ conv_object['name'] }}_volume +{%- if conv_object.get('list_events', false) %} +,SPLIT_TO_ARRAY(LISTAGG(CASE WHEN {{ when_condition }} THEN event_id ELSE null END, ',') WITHIN GROUP (ORDER BY {{ tstamp_field }}, dvce_created_tstamp, event_id), ',') AS cv_{{ conv_object['name'] }}_events +{%- endif -%} +{%- if conv_object.get('value', none) %} +{# Want to try and use a symbol that is unlikely to be in the values due to redshift not having a single array_agg function, hence ~ not , #} +,SPLIT_TO_ARRAY(LISTAGG(CASE WHEN {{ when_condition }} THEN coalesce({{ then_condition }} ,{{ conv_object.get('default_value', 0) }}) ELSE null END, '~') WITHIN GROUP (ORDER BY {{ tstamp_field }}, dvce_created_tstamp, event_id), '~') AS cv_{{ conv_object['name'] }}_values +,SUM(CASE WHEN {{ when_condition }} THEN coalesce({{ then_condition }}, {{ conv_object.get('default_value', 0) }}) ELSE 0 END) AS cv_{{ conv_object['name'] }}_total +{%- endif -%} +,MIN(CASE WHEN {{ when_condition }} THEN {{ tstamp_field }} ELSE null END) AS cv_{{ conv_object['name'] }}_first_conversion +,CAST(MAX(CASE WHEN {{ when_condition }} THEN 1 ELSE 0 END) AS {{ type_boolean() }}) AS cv_{{ conv_object['name'] }}_converted +{%- else -%} +,coalesce(cv_{{ conv_object['name'] }}_volume, 0) as cv_{{ conv_object['name'] }}_volume +{%- if conv_object.get('list_events', false) %} +,coalesce(cv_{{ conv_object['name'] }}_events, cast(null as super)) as cv_{{ conv_object['name'] }}_events +{%- endif %} +{%- if conv_object.get('value', none) %} +,coalesce(cv_{{ conv_object['name'] }}_values, cast(null as super)) as cv_{{ conv_object['name'] }}_values +,coalesce(cv_{{ conv_object['name'] }}_total, 0) as cv_{{ conv_object['name'] }}_total +{%- endif %} +,cv_{{ conv_object['name'] }}_first_conversion +,coalesce(cv_{{ conv_object['name'] }}_converted, false) as cv_{{ conv_object['name'] }}_converted +{%- endif %} +{% endmacro %} diff --git a/macros/field_definitions/get_conversion_columns.sql b/macros/field_definitions/get_conversion_columns.sql deleted file mode 100644 index b84afad7..00000000 --- a/macros/field_definitions/get_conversion_columns.sql +++ /dev/null @@ -1,151 +0,0 @@ -{# -Copyright (c) 2023-present Snowplow Analytics Ltd. All rights reserved. -This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, -and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. -You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ -#} - -{% macro get_conversion_columns(conv_object = {}, names_only = false) %} - {{ return(adapter.dispatch('get_conversion_columns', 'snowplow_unified')(conv_object, names_only)) }} -{% endmacro %} - -{% macro default__get_conversion_columns(conv_object, names_only = false) %} -{% if execute %} - {% do exceptions.raise_compiler_error('Macro get_field only supports Bigquery, Snowflake, Spark, Databricks, Postgres, and Redshift, it is not supported for ' ~ target.type) %} -{% endif %} -{% endmacro %} - -{% macro snowflake__get_conversion_columns(conv_object, names_only = false) %} - {%- if not names_only %} - ,COUNT(CASE WHEN {{ conv_object['condition'] }} THEN 1 ELSE null END) AS cv_{{ conv_object['name'] }}_volume - {%- if conv_object.get('list_events', false) %} - ,ARRAYAGG(CASE WHEN {{ conv_object['condition'] }} THEN event_id ELSE null END) WITHIN GROUP (ORDER BY derived_tstamp, dvce_created_tstamp, event_id) AS cv_{{ conv_object['name'] }}_events - {%- endif -%} - {%- if conv_object.get('value', none) %} - ,ARRAYAGG(CASE WHEN {{ conv_object['condition'] }} THEN coalesce({{ conv_object['value'] }},{{ conv_object.get('default_value', 0) }}) ELSE null END) WITHIN GROUP (ORDER BY derived_tstamp, dvce_created_tstamp, event_id) AS cv_{{ conv_object['name'] }}_values - ,SUM(CASE WHEN {{ conv_object['condition'] }} THEN coalesce({{ conv_object['value'] }}, {{ conv_object.get('default_value', 0) }}) ELSE 0 END) AS cv_{{ conv_object['name'] }}_total - {%- endif %} - ,MIN(CASE WHEN {{ conv_object['condition'] }} THEN derived_tstamp ELSE null END) AS cv_{{ conv_object['name'] }}_first_conversion - ,CAST(MAX(CASE WHEN {{ conv_object['condition'] }} THEN 1 ELSE 0 END) AS {{ type_boolean() }}) AS cv_{{ conv_object['name'] }}_converted - {%- else -%} - ,cv_{{ conv_object['name'] }}_volume - {%- if conv_object.get('list_events', false) %} - ,cv_{{ conv_object['name'] }}_events - {%- endif %} - {%- if conv_object.get('value', none) %} - ,cv_{{ conv_object['name'] }}_values - ,cv_{{ conv_object['name'] }}_total - {%- endif %} - ,cv_{{ conv_object['name'] }}_first_conversion - ,cv_{{ conv_object['name'] }}_converted - {%- endif %} -{% endmacro %} - - -{% macro bigquery__get_conversion_columns(conv_object, names_only = false) %} - {%- if not names_only %} - ,COUNT(CASE WHEN {{ conv_object['condition'] }} THEN 1 ELSE null END) AS cv_{{ conv_object['name'] }}_volume - {%- if conv_object.get('list_events', false) %} - ,ARRAY_AGG(CASE WHEN {{ conv_object['condition'] }} THEN event_id ELSE null END IGNORE NULLS ORDER BY derived_tstamp, dvce_created_tstamp, event_id) AS cv_{{ conv_object['name'] }}_events - {%- endif -%} - {%- if conv_object.get('value', none) %} - ,ARRAY_AGG(CASE WHEN {{ conv_object['condition'] }} THEN coalesce({{ conv_object['value'] }},{{ conv_object.get('default_value', 0) }}) ELSE null END IGNORE NULLS ORDER BY derived_tstamp, dvce_created_tstamp, event_id) AS cv_{{ conv_object['name'] }}_values - ,SUM(CASE WHEN {{ conv_object['condition'] }} THEN coalesce({{ conv_object['value'] }}, {{ conv_object.get('default_value', 0) }}) ELSE 0 END) AS cv_{{ conv_object['name'] }}_total - {%- endif -%} - ,MIN(CASE WHEN {{ conv_object['condition'] }} THEN derived_tstamp ELSE null END) AS cv_{{ conv_object['name'] }}_first_conversion - ,CAST(MAX(CASE WHEN {{ conv_object['condition'] }} THEN 1 ELSE 0 END) AS {{ type_boolean() }}) AS cv_{{ conv_object['name'] }}_converted - {%- else -%} - ,cv_{{ conv_object['name'] }}_volume - {%- if conv_object.get('list_events', false) %} - ,cv_{{ conv_object['name'] }}_events - {%- endif %} - {%- if conv_object.get('value', none) %} - ,cv_{{ conv_object['name'] }}_values - ,cv_{{ conv_object['name'] }}_total - {%- endif %} - ,cv_{{ conv_object['name'] }}_first_conversion - ,cv_{{ conv_object['name'] }}_converted - {%- endif %} -{% endmacro %} - - -{% macro spark__get_conversion_columns(conv_object, names_only = false) %} - {%- if not names_only %} - ,COUNT(CASE WHEN {{ conv_object['condition'] }} THEN 1 ELSE null END) AS cv_{{ conv_object['name'] }}_volume - {%- if conv_object.get('list_events', false) %} - {# make an struct of the thing we want to put in an array, then the things we want to order by, collect THOSE into an array, filter out where the thing we want is null, sort those based on the other columns, then select just the thing we care about #} - ,transform(array_sort(FILTER(collect_list(struct(CASE WHEN {{ conv_object['condition'] }} THEN event_id ELSE null END, derived_tstamp, dvce_created_tstamp, event_id)), x -> x['col1'] is not null), (left, right) -> CASE WHEN left['derived_tstamp'] < right['derived_tstamp'] THEN -1 WHEN left['derived_tstamp'] > right['derived_tstamp'] THEN 1 WHEN left['dvce_created_tstamp'] < right['dvce_created_tstamp'] THEN -1 WHEN left['dvce_created_tstamp'] > right['dvce_created_tstamp'] THEN 1 WHEN left['event_id'] < right['event_id'] THEN -1 WHEN left['event_id'] > right['event_id'] THEN 1 ELSE 0 END), x -> x['col1']) AS cv_{{ conv_object['name'] }}_events - {%- endif -%} - {%- if conv_object.get('value', none) %} - {# make an struct of the thing we want to put in an array, then the things we want to order by, collect THOSE into an array, filter out where the thing we want is null, sort those based on the other columns, then select just the thing we care about #} - ,transform(array_sort(FILTER(collect_list(struct(CASE WHEN {{ conv_object['condition'] }} THEN coalesce({{ conv_object['value'] }},{{ conv_object.get('default_value', 0) }}) ELSE null END, derived_tstamp, dvce_created_tstamp, event_id)), x -> x['col1'] is not null), (left, right) -> CASE WHEN left['derived_tstamp'] < right['derived_tstamp'] THEN -1 WHEN left['derived_tstamp'] > right['derived_tstamp'] THEN 1 WHEN left['dvce_created_tstamp'] < right['dvce_created_tstamp'] THEN -1 WHEN left['dvce_created_tstamp'] > right['dvce_created_tstamp'] THEN 1 WHEN left['event_id'] < right['event_id'] THEN -1 WHEN left['event_id'] > right['event_id'] THEN 1 ELSE 0 END), x -> x['col1']) AS cv_{{ conv_object['name'] }}_values - ,SUM(CASE WHEN {{ conv_object['condition'] }} THEN coalesce({{ conv_object['value'] }}, {{ conv_object.get('default_value', 0) }}) ELSE 0 END) AS cv_{{ conv_object['name'] }}_total - {%- endif -%} - ,MIN(CASE WHEN {{ conv_object['condition'] }} THEN derived_tstamp ELSE null END) AS cv_{{ conv_object['name'] }}_first_conversion - ,CAST(MAX(CASE WHEN {{ conv_object['condition'] }} THEN 1 ELSE 0 END) AS {{ type_boolean() }}) AS cv_{{ conv_object['name'] }}_converted - {%- else -%} - ,cv_{{ conv_object['name'] }}_volume - {%- if conv_object.get('list_events', false) %} - ,cv_{{ conv_object['name'] }}_events - {%- endif %} - {%- if conv_object.get('value', none) %} - ,cv_{{ conv_object['name'] }}_values - ,cv_{{ conv_object['name'] }}_total - {%- endif %} - ,cv_{{ conv_object['name'] }}_first_conversion - ,cv_{{ conv_object['name'] }}_converted - {%- endif %} -{% endmacro %} - -{% macro postgres__get_conversion_columns(conv_object = {}, names_only = false) %} - {%- if not names_only %} - ,COUNT(CASE WHEN {{ conv_object['condition'] }} THEN 1 ELSE null END) AS cv_{{ conv_object['name'] }}_volume - {%- if conv_object.get('list_events', false) %} - ,ARRAY_REMOVE(ARRAY_AGG(CASE WHEN {{ conv_object['condition'] }} THEN event_id ELSE null END ORDER BY derived_tstamp, dvce_created_tstamp, event_id), null) AS cv_{{ conv_object['name'] }}_events - {%- endif -%} - {%- if conv_object.get('value', none) %} - ,ARRAY_REMOVE(ARRAY_AGG(CASE WHEN {{ conv_object['condition'] }} THEN coalesce({{ conv_object['value'] }},{{ conv_object.get('default_value', 0) }}) ELSE null END ORDER BY derived_tstamp, dvce_created_tstamp, event_id), null) AS cv_{{ conv_object['name'] }}_values - ,SUM(CASE WHEN {{ conv_object['condition'] }} THEN coalesce({{ conv_object['value'] }}, {{ conv_object.get('default_value', 0) }}) ELSE 0 END) AS cv_{{ conv_object['name'] }}_total - {%- endif -%} - ,MIN(CASE WHEN {{ conv_object['condition'] }} THEN derived_tstamp ELSE null END) AS cv_{{ conv_object['name'] }}_first_conversion - ,CAST(MAX(CASE WHEN {{ conv_object['condition'] }} THEN 1 ELSE 0 END) AS {{ type_boolean() }}) AS cv_{{ conv_object['name'] }}_converted - {%- else -%} - ,cv_{{ conv_object['name'] }}_volume - {%- if conv_object.get('list_events', false) %} - ,cv_{{ conv_object['name'] }}_events - {%- endif %} - {%- if conv_object.get('value', none) %} - ,cv_{{ conv_object['name'] }}_values - ,cv_{{ conv_object['name'] }}_total - {%- endif %} - ,cv_{{ conv_object['name'] }}_first_conversion - ,cv_{{ conv_object['name'] }}_converted - {%- endif %} -{% endmacro %} - -{% macro redshift__get_conversion_columns(conv_object, names_only = false) %} - {%- if not names_only %} - ,COUNT(CASE WHEN {{ conv_object['condition'] }} THEN 1 ELSE null END) AS cv_{{ conv_object['name'] }}_volume - {%- if conv_object.get('list_events', false) %} - ,SPLIT_TO_ARRAY(LISTAGG(CASE WHEN {{ conv_object['condition'] }} THEN event_id ELSE null END, ',') WITHIN GROUP (ORDER BY derived_tstamp, dvce_created_tstamp, event_id), ',') AS cv_{{ conv_object['name'] }}_events - {%- endif -%} - {%- if conv_object.get('value', none) %} - {# Want to try and use a symbol that is unlikely to be in the values due to redshift not having a single array_agg function, hence ~ not , #} - ,SPLIT_TO_ARRAY(LISTAGG(CASE WHEN {{ conv_object['condition'] }} THEN coalesce({{ conv_object['value'] }},{{ conv_object.get('default_value', 0) }}) ELSE null END, '~') WITHIN GROUP (ORDER BY derived_tstamp, dvce_created_tstamp, event_id), '~') AS cv_{{ conv_object['name'] }}_values - ,SUM(CASE WHEN {{ conv_object['condition'] }} THEN coalesce({{ conv_object['value'] }}, {{ conv_object.get('default_value', 0) }}) ELSE 0 END) AS cv_{{ conv_object['name'] }}_total - {%- endif -%} - ,MIN(CASE WHEN {{ conv_object['condition'] }} THEN derived_tstamp ELSE null END) AS cv_{{ conv_object['name'] }}_first_conversion - ,CAST(MAX(CASE WHEN {{ conv_object['condition'] }} THEN 1 ELSE 0 END) AS {{ type_boolean() }}) AS cv_{{ conv_object['name'] }}_converted - {%- else -%} - ,cv_{{ conv_object['name'] }}_volume - {%- if conv_object.get('list_events', false) %} - ,cv_{{ conv_object['name'] }}_events - {%- endif %} - {%- if conv_object.get('value', none) %} - ,cv_{{ conv_object['name'] }}_values - ,cv_{{ conv_object['name'] }}_total - {%- endif %} - ,cv_{{ conv_object['name'] }}_first_conversion - ,cv_{{ conv_object['name'] }}_converted - {%- endif %} -{% endmacro %} diff --git a/macros/field_extractions/get_app_context_fields.sql b/macros/field_extractions/get_app_context_fields.sql index c9c3e827..2f39e690 100644 --- a/macros/field_extractions/get_app_context_fields.sql +++ b/macros/field_extractions/get_app_context_fields.sql @@ -29,7 +29,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 enabled=var('snowplow__enable_application_context', false), col_prefix='contexts_com_snowplowanalytics_mobile_application_1', fields=bq_app_context_fields, - relation=source('atomic', 'events') if project_name != 'snowplow_unified_integration_tests' else ref('snowplow_unified_events_stg'), + relation=source('atomic', 'events') if 'integration_tests' not in project_name and 'snowplow' not in project_name else ref('snowplow_unified_events_stg'), relation_alias=none) }} {% else %} , cast(null as {{ type_string() }}) as app__build diff --git a/macros/field_extractions/get_app_error_event_fields.sql b/macros/field_extractions/get_app_error_event_fields.sql index 2ee06b75..7ffbf81e 100644 --- a/macros/field_extractions/get_app_error_event_fields.sql +++ b/macros/field_extractions/get_app_error_event_fields.sql @@ -43,7 +43,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 enabled=var('snowplow__enable_app_errors', false), col_prefix='unstruct_event_com_snowplowanalytics_snowplow_application_error_1', fields=bq_app_error_event_fields, - relation=source('atomic', 'events') if project_name != 'snowplow_unified_integration_tests' else ref('snowplow_unified_events_stg'), + relation=source('atomic', 'events') if 'integration_tests' not in project_name and 'snowplow' not in project_name else ref('snowplow_unified_events_stg'), relation_alias=none) }} {% else %} , cast(null as {{ type_string() }}) as app_error__message diff --git a/macros/field_extractions/get_browser_context_fields.sql b/macros/field_extractions/get_browser_context_fields.sql index cb63846a..a404d626 100644 --- a/macros/field_extractions/get_browser_context_fields.sql +++ b/macros/field_extractions/get_browser_context_fields.sql @@ -51,7 +51,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 enabled=var('snowplow__enable_browser_context', false), col_prefix='contexts_com_snowplowanalytics_snowplow_browser_context_1', fields=bq_browser_context_fields, - relation=source('atomic', 'events') if project_name != 'snowplow_unified_integration_tests' else ref('snowplow_unified_events_stg'), + relation=source('atomic', 'events') if 'integration_tests' not in project_name and 'snowplow' not in project_name else ref('snowplow_unified_events_stg'), relation_alias=none) }} {% else %} , cast(null as {{ type_string() }}) as browser__viewport diff --git a/macros/field_extractions/get_cmp_visible_event_fields.sql b/macros/field_extractions/get_cmp_visible_event_fields.sql index 1a3f79bf..9c7de234 100644 --- a/macros/field_extractions/get_cmp_visible_event_fields.sql +++ b/macros/field_extractions/get_cmp_visible_event_fields.sql @@ -27,7 +27,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 enabled=var('snowplow__enable_consent', false), col_prefix='unstruct_event_com_snowplowanalytics_snowplow_cmp_visible_1', fields=bq_cmp_visible_fields, - relation=source('atomic', 'events') if project_name != 'snowplow_unified_integration_tests' else ref('snowplow_unified_events_stg'), + relation=source('atomic', 'events') if 'integration_tests' not in project_name and 'snowplow' not in project_name else ref('snowplow_unified_events_stg'), relation_alias=none) }} {% else %} , cast(null as {{ type_float() }}) as cmp__elapsed_time diff --git a/macros/field_extractions/get_consent_event_fields.sql b/macros/field_extractions/get_consent_event_fields.sql index 34434431..ae3bf601 100644 --- a/macros/field_extractions/get_consent_event_fields.sql +++ b/macros/field_extractions/get_consent_event_fields.sql @@ -39,7 +39,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 enabled=var('snowplow__enable_consent', false), col_prefix='unstruct_event_com_snowplowanalytics_snowplow_consent_preferences_1', fields=bq_consent_fields, - relation=source('atomic', 'events') if project_name != 'snowplow_unified_integration_tests' else ref('snowplow_unified_events_stg'), + relation=source('atomic', 'events') if 'integration_tests' not in project_name and 'snowplow' not in project_name else ref('snowplow_unified_events_stg'), relation_alias=none) }} {% else %} , cast(null as {{ type_string() }}) as consent__event_type diff --git a/macros/field_extractions/get_conversion_fields.sql b/macros/field_extractions/get_conversion_fields.sql new file mode 100644 index 00000000..ff7e9af9 --- /dev/null +++ b/macros/field_extractions/get_conversion_fields.sql @@ -0,0 +1,56 @@ +{# +Copyright (c) 2023-present Snowplow Analytics Ltd. All rights reserved. +This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, +and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. +You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ +#} + +{% macro get_conversion_fields(conv_def = []) %} + {{ return(adapter.dispatch('get_conversion_fields', 'snowplow_unified')(conv_def)) }} +{% endmacro %} + +{% macro default__get_conversion_fields(conv_def) %} + + select + + event_id, + session_identifier, + user_identifier, + user_id, + + {% if var('snowplow__conversion_stitching') %} + -- updated with mapping as part of post hook on derived conversions table + cast(user_identifier as {{ snowplow_utils.type_max_string() }}) as stitched_user_id, + {% endif %} + + {%- if conv_def.get('value', none) %} + coalesce({{ conv_def['value'] }},{{ conv_def.get('default_value', 0) }}) as cv_value, + {% else %} + 0 as cv_value, + {% endif %} + + derived_tstamp as cv_tstamp, + dvce_created_tstamp, + '{{ conv_def['name'] }}' as cv_type + + {%- if var('snowplow__conversion_passthroughs', []) -%} + {%- for identifier in var('snowplow__conversion_passthroughs', []) %} + {# Check if it is a simple column or a sql+alias #} + {%- if identifier is mapping -%} + ,{{identifier['sql']}} as {{identifier['alias']}} + {%- else -%} + ,ev.{{identifier}} + {%- endif -%} + {% endfor -%} + {%- endif %} + + from {{ ref('snowplow_unified_events_this_run') }} as ev + + where {{ conv_def['condition'] }} + + {% if var("snowplow__ua_bot_filter", true) %} + {{ filter_bots() }} + {% endif %} + + +{% endmacro %} diff --git a/macros/field_extractions/get_cwv_fields.sql b/macros/field_extractions/get_cwv_fields.sql index 6fcee564..7196e7df 100644 --- a/macros/field_extractions/get_cwv_fields.sql +++ b/macros/field_extractions/get_cwv_fields.sql @@ -39,7 +39,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 enabled=var('snowplow__enable_cwv', false), col_prefix='unstruct_event_com_snowplowanalytics_snowplow_web_vitals_1', fields=bq_cwv_fields, - relation=source('atomic', 'events') if project_name != 'snowplow_unified_integration_tests' else ref('snowplow_unified_events_stg'), + relation=source('atomic', 'events') if 'integration_tests' not in project_name and 'snowplow' not in project_name else ref('snowplow_unified_events_stg'), relation_alias=none) }} {% else %} , cast(null as decimal) as cwv__lcp, diff --git a/macros/field_extractions/get_deep_link_context_fields.sql b/macros/field_extractions/get_deep_link_context_fields.sql index 8a013b09..513f524e 100644 --- a/macros/field_extractions/get_deep_link_context_fields.sql +++ b/macros/field_extractions/get_deep_link_context_fields.sql @@ -29,7 +29,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 enabled=var('snowplow__enable_deep_link_context', false), col_prefix='contexts_com_snowplowanalytics_mobile_deep_link_1', fields=bq_deep_link_context_fields, - relation=source('atomic', 'events') if project_name != 'snowplow_unified_integration_tests' else ref('snowplow_unified_events_stg'), + relation=source('atomic', 'events') if 'integration_tests' not in project_name and 'snowplow' not in project_name else ref('snowplow_unified_events_stg'), relation_alias=none) }} {% else %} , cast(null as {{ type_string() }}) as deep_link__url diff --git a/macros/field_extractions/get_geo_context_fields.sql b/macros/field_extractions/get_geo_context_fields.sql index 28fcd896..e8d960bf 100644 --- a/macros/field_extractions/get_geo_context_fields.sql +++ b/macros/field_extractions/get_geo_context_fields.sql @@ -39,7 +39,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 enabled=var('snowplow__enable_geolocation_context', false), col_prefix='contexts_com_snowplowanalytics_snowplow_geolocation_context_1', fields=bq_geo_context_fields, - relation=source('atomic', 'events') if project_name != 'snowplow_unified_integration_tests' else ref('snowplow_unified_events_stg'), + relation=source('atomic', 'events') if 'integration_tests' not in project_name and 'snowplow' not in project_name else ref('snowplow_unified_events_stg'), relation_alias=none) }} {% else %} , cast(null as {{ type_float() }}) as geo__latitude diff --git a/macros/field_extractions/get_iab_context_fields.sql b/macros/field_extractions/get_iab_context_fields.sql index 7227b8b3..88a6d005 100644 --- a/macros/field_extractions/get_iab_context_fields.sql +++ b/macros/field_extractions/get_iab_context_fields.sql @@ -33,7 +33,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 enabled=var('snowplow__enable_iab', false), fields=bq_iab_fields, col_prefix='contexts_com_iab_snowplow_spiders_and_robots_1', - relation=source('atomic', 'events') if project_name != 'snowplow_unified_integration_tests' else ref('snowplow_unified_events_stg'), + relation=source('atomic', 'events') if 'integration_tests' not in project_name and 'snowplow' not in project_name else ref('snowplow_unified_events_stg'), relation_alias=none) }} {%- else -%} , cast(null as {{ type_string() }}) as iab__category diff --git a/macros/field_extractions/get_mobile_context_fields.sql b/macros/field_extractions/get_mobile_context_fields.sql index c09b85b4..60e514c2 100644 --- a/macros/field_extractions/get_mobile_context_fields.sql +++ b/macros/field_extractions/get_mobile_context_fields.sql @@ -75,7 +75,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 enabled=var('snowplow__enable_mobile_context', false), col_prefix='contexts_com_snowplowanalytics_snowplow_mobile_context_1', fields=bq_mobile_context_fields, - relation=source('atomic', 'events') if project_name != 'snowplow_unified_integration_tests' else ref('snowplow_unified_events_stg'), + relation=source('atomic', 'events') if 'integration_tests' not in project_name and 'snowplow' not in project_name else ref('snowplow_unified_events_stg'), relation_alias=none) }} {% else %} , cast(null as {{ type_string() }}) as mobile__device_manufacturer diff --git a/macros/field_extractions/get_screen_context_fields.sql b/macros/field_extractions/get_screen_context_fields.sql index 6c8b1803..12070966 100644 --- a/macros/field_extractions/get_screen_context_fields.sql +++ b/macros/field_extractions/get_screen_context_fields.sql @@ -39,7 +39,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 enabled=var('snowplow__enable_screen_context', false), col_prefix='contexts_com_snowplowanalytics_mobile_screen_1', fields=bq_screen_context_fields, - relation=source('atomic', 'events') if project_name != 'snowplow_unified_integration_tests' else ref('snowplow_unified_events_stg'), + relation=source('atomic', 'events') if 'integration_tests' not in project_name and 'snowplow' not in project_name else ref('snowplow_unified_events_stg'), relation_alias=none) }} {% else %} , cast(null as {{ type_string() }}) as screen__id diff --git a/macros/field_extractions/get_screen_view_event_fields.sql b/macros/field_extractions/get_screen_view_event_fields.sql index 0da0bde0..e01be92b 100644 --- a/macros/field_extractions/get_screen_view_event_fields.sql +++ b/macros/field_extractions/get_screen_view_event_fields.sql @@ -39,7 +39,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 enabled=true, col_prefix='unstruct_event_com_snowplowanalytics_mobile_screen_view_1', fields=bq_screen_view_event_fields, - relation=source('atomic', 'events') if project_name != 'snowplow_unified_integration_tests' else ref('snowplow_unified_events_stg'), + relation=source('atomic', 'events') if 'integration_tests' not in project_name and 'snowplow' not in project_name else ref('snowplow_unified_events_stg'), relation_alias=none) }} {% else %} , cast(null as {{ type_string() }}) as screen_view__id diff --git a/macros/field_extractions/get_session_context_fields.sql b/macros/field_extractions/get_session_context_fields.sql index 08a7c316..63d6886d 100644 --- a/macros/field_extractions/get_session_context_fields.sql +++ b/macros/field_extractions/get_session_context_fields.sql @@ -38,7 +38,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 enabled=true, col_prefix='contexts_com_snowplowanalytics_snowplow_client_session_1', fields=bq_session_context_fields, - relation=source('atomic', 'events') if project_name != 'snowplow_unified_integration_tests' else ref('snowplow_unified_events_stg'), + relation=source('atomic', 'events') if 'integration_tests' not in project_name and 'snowplow' not in project_name else ref('snowplow_unified_events_stg'), relation_alias=none) }} {% else %} , cast(null as {{ type_string() }}) as session__session_id diff --git a/macros/field_extractions/get_ua_context_fields.sql b/macros/field_extractions/get_ua_context_fields.sql index a4fbf6f8..006ad774 100644 --- a/macros/field_extractions/get_ua_context_fields.sql +++ b/macros/field_extractions/get_ua_context_fields.sql @@ -50,7 +50,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 enabled=var('snowplow__enable_ua', false), fields=bq_ua_fields, col_prefix='contexts_com_snowplowanalytics_snowplow_ua_parser_context_1', - relation=source('atomic', 'events') if project_name != 'snowplow_unified_integration_tests' else ref('snowplow_unified_events_stg'), + relation=source('atomic', 'events') if 'integration_tests' not in project_name and 'snowplow' not in project_name else ref('snowplow_unified_events_stg'), relation_alias=none) }} {%- else -%} , cast(null as {{ type_string() }}) as ua__useragent_family diff --git a/macros/field_extractions/get_web_page_context_fields.sql b/macros/field_extractions/get_web_page_context_fields.sql index 60ebf546..0894aeac 100644 --- a/macros/field_extractions/get_web_page_context_fields.sql +++ b/macros/field_extractions/get_web_page_context_fields.sql @@ -27,7 +27,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 enabled=true, fields=bq_web_page_fields, col_prefix='contexts_com_snowplowanalytics_snowplow_web_page_1', - relation=source('atomic', 'events') if project_name != 'snowplow_unified_integration_tests' else ref('snowplow_unified_events_stg'), + relation=source('atomic', 'events') if 'integration_tests' not in project_name and 'snowplow' not in project_name else ref('snowplow_unified_events_stg'), relation_alias=none) }} {% else %} , cast(null as {{ type_string() }}) as page_view__id diff --git a/macros/field_extractions/get_yauaa_context_fields.sql b/macros/field_extractions/get_yauaa_context_fields.sql index bc722a4e..a04dab49 100644 --- a/macros/field_extractions/get_yauaa_context_fields.sql +++ b/macros/field_extractions/get_yauaa_context_fields.sql @@ -65,7 +65,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 enabled=var('snowplow__enable_yauaa', false), fields=bq_yauaa_fields, col_prefix='contexts_nl_basjes_yauaa_context_1', - relation=source('atomic', 'events') if project_name != 'snowplow_unified_integration_tests' else ref('snowplow_unified_events_stg'), + relation=source('atomic', 'events') if 'integration_tests' not in project_name and 'snowplow' not in project_name else ref('snowplow_unified_events_stg'), relation_alias=none) }} {%- else -%} , cast(null as {{ type_string() }}) as yauaa__device_class diff --git a/macros/macros.yml b/macros/macros.yml index 5a719aa1..aa6cb646 100644 --- a/macros/macros.yml +++ b/macros/macros.yml @@ -97,6 +97,6 @@ macros: description: '{{ doc("macro_event_counts_query") }}' - name: event_counts_string_query description: '{{ doc("macro_event_counts_string_query") }}' - - name: get_conversion_columns - description: '{{ doc("macro_get_conversion_columns") }}' + - name: conversion_query + description: '{{ doc("macro_conversion_query") }}' diff --git a/macros/unify_fields_query.sql b/macros/unify_fields_query.sql index b6498654..9e152375 100644 --- a/macros/unify_fields_query.sql +++ b/macros/unify_fields_query.sql @@ -16,7 +16,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 select *, - coalesce( + cast(coalesce( {% if var('snowplow__enable_web') %} ev.page_view__id, {% endif %} @@ -26,42 +26,42 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 ev.screen__id, {% endif %} {% endif %} - null, null) as view_id, + null, null) as {{ dbt.type_string()}} ) as view_id, - coalesce( + cast(coalesce( {% if var('snowplow__enable_mobile') %} ev.session__session_index, {% endif %} {% if var('snowplow__enable_web') %} ev.domain_sessionidx, {% endif %} - null, null) as device_session_index, + null, null) as {{ dbt.type_int()}} ) as device_session_index, - coalesce( + cast(coalesce( {% if var('snowplow__enable_deep_link_context') %} ev.deep_link__referrer, {% else %} ev.page_referrer, {% endif %} - null, null) as referrer, + null, null) as {{ dbt.type_string()}} ) as referrer, - coalesce( + cast(coalesce( {% if var('snowplow__enable_deep_link_context') %} ev.deep_link__url, {% else %} ev.page_url, {% endif %} - null, null) as url, + null, null) as {{ dbt.type_string()}} ) as url, - coalesce( + cast(coalesce( {% if var('snowplow__enable_mobile_context') %} ev.mobile__resolution, {% else %} ev.dvce_screenwidth || 'x' || ev.dvce_screenheight, {% endif %} - null, null) as screen_resolution, + null, null) as {{ dbt.type_string()}} ) as screen_resolution, - coalesce( + cast(coalesce( {% if var('snowplow__enable_mobile_context') %} ev.mobile__os_type, {% endif %} @@ -71,9 +71,9 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {% if var('snowplow__enable_ua') %} ev.ua__os_family, {% endif %} - null, null) as os_type, + null, null) as {{ dbt.type_string()}} ) as os_type, - coalesce( + cast(coalesce( {% if var('snowplow__enable_yauaa') %} ev.yauaa__operating_system_version, {% endif %} @@ -83,16 +83,16 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {% if var('snowplow__enable_ua') %} ev.ua__os_version, {% endif %} - null, null) as os_version, + null, null) as {{ dbt.type_string()}} ) as os_version, - coalesce( + cast(coalesce( {% if var('snowplow__enable_web') %} ev.domain_userid, {% endif %} {% if var('snowplow__enable_mobile') %} ev.session__user_id, {% endif %} - null, null) as device_identifier, + null, null) as {{ dbt.type_string()}} ) as device_identifier, case when platform = 'web' then 'Web' --includes mobile web when platform = 'mob' then 'Mobile/Tablet' diff --git a/models/optional_modules/conversions/conversions.yml b/models/optional_modules/conversions/conversions.yml new file mode 100644 index 00000000..a7bad683 --- /dev/null +++ b/models/optional_modules/conversions/conversions.yml @@ -0,0 +1,83 @@ +version: 2 + +models: + - name: snowplow_unified_conversions_this_run + description: A scratch table used as a base for creating the incremental conversions derived table. + columns: + - name: event_id + description: '{{ doc("col_event_id") }}' + tags: + - primary-key + tests: + - not_null: + config: + enabled: '{{var("snowplow__enable_conversions", false)}}' + - name: user_id + description: '{{ doc("col_user_id") }}' + tests: + - not_null: + config: + enabled: '{{var("snowplow__enable_conversions", false)}}' + - name: session_identifier + description: '{{ doc("col_session_identifier") }}' + tests: + - not_null: + config: + enabled: '{{var("snowplow__enable_conversions", false)}}' + - name: user_identifier + description: '{{ doc("col_user_identifier") }}' + tests: + - not_null: + config: + enabled: '{{var("snowplow__enable_conversions", false)}}' + - name: stitched_user_id + description: '{{ doc("col_stitched_user_id") }}' + - name: cv_value + description: The conversion value. + - name: cv_tstamp + description: The derived_tstamp of the conversion event + - name: dvce_created_tstamp, + description: '{{ doc("col_dvce_created_tstamp") }}' + - name: as cv_type + description: The type of conversion, taken from the 'name' value of the user defined conversion definition + + - name: snowplow_unified_conversions + description: An incremental table which contains all relevant fields for unique conversion events. + columns: + - name: event_id + description: '{{ doc("col_event_id") }}' + tags: + - primary-key + tests: + - not_null: + config: + enabled: '{{var("snowplow__enable_conversions", false)}}' + - name: user_id + description: '{{ doc("col_user_id") }}' + tests: + - not_null: + config: + enabled: '{{var("snowplow__enable_conversions", false)}}' + - name: session_identifier + description: '{{ doc("col_session_identifier") }}' + tests: + - not_null: + config: + enabled: '{{var("snowplow__enable_conversions", false)}}' + - name: user_identifier + description: '{{ doc("col_user_identifier") }}' + tests: + - not_null: + config: + enabled: '{{var("snowplow__enable_conversions", false)}}' + - name: stitched_user_id + description: '{{ doc("col_stitched_user_id") }}' + - name: cv_value + description: The conversion value. + - name: cv_tstamp + description: The derived_tstamp of the conversion event + - name: dvce_created_tstamp, + description: '{{ doc("col_dvce_created_tstamp") }}' + - name: as cv_type + description: The type of conversion, taken from the 'name' value of the user defined conversion definition + \ No newline at end of file diff --git a/models/optional_modules/conversions/scratch/snowplow_unified_conversions_this_run.sql b/models/optional_modules/conversions/scratch/snowplow_unified_conversions_this_run.sql new file mode 100644 index 00000000..c1a243bf --- /dev/null +++ b/models/optional_modules/conversions/scratch/snowplow_unified_conversions_this_run.sql @@ -0,0 +1,27 @@ +{# +Copyright (c) 2023-present Snowplow Analytics Ltd. All rights reserved. +This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, +and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. +You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ +#} + +{{ + config( + tags=["this_run"], + enabled=var("snowplow__enable_conversions", false), + sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) + ) +}} + +with prep as ( + + {%- for conv_def in var('snowplow__conversion_events', []) %} + + {{ snowplow_unified.get_conversion_fields(conv_def)}} + {% if not loop.last %}union all{% endif %} + + {%- endfor %} + +) + +select * from prep diff --git a/models/optional_modules/conversions/snowplow_unified_conversions.sql b/models/optional_modules/conversions/snowplow_unified_conversions.sql new file mode 100644 index 00000000..e1260518 --- /dev/null +++ b/models/optional_modules/conversions/snowplow_unified_conversions.sql @@ -0,0 +1,41 @@ +{# +Copyright (c) 2023-present Snowplow Analytics Ltd. All rights reserved. +This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, +and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. +You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ +#} + +{{ + config( + materialized='incremental', + enabled=var("snowplow__enable_conversions", false), + on_schema_change='append_new_columns', + unique_key='event_id', + upsert_date_key='cv_tstamp', + sort='cv_tstamp', + dist='event_id', + partition_by = snowplow_utils.get_value_by_target_type(bigquery_val = { + "field": "cv_tstamp", + "data_type": "timestamp" + }, databricks_val='cv_tstamp_date'), + cluster_by=snowplow_utils.get_value_by_target_type(bigquery_val=["user_identifier","session_identifier"], snowflake_val=["to_date(cv_tstamp)"]), + tags=["derived"], + post_hook="{{ snowplow_unified.stitch_user_identifiers( + enabled=var('snowplow__conversion_stitching') + ) }}", + sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')), + tblproperties={ + 'delta.autoOptimize.optimizeWrite' : 'true', + 'delta.autoOptimize.autoCompact' : 'true' + }, + snowplow_optimize = true + ) +}} + + +select * + {% if target.type in ['databricks', 'spark'] -%} + , DATE(cv_tstamp) as cv_tstamp_date + {%- endif %} +from {{ ref('snowplow_unified_conversions_this_run') }} +where {{ snowplow_utils.is_run_with_new_events('snowplow_unified') }} --returns false if run doesn't contain new events. diff --git a/models/sessions/scratch/snowplow_unified_sessions_this_run.sql b/models/sessions/scratch/snowplow_unified_sessions_this_run.sql index 7f111c60..725e064e 100644 --- a/models/sessions/scratch/snowplow_unified_sessions_this_run.sql +++ b/models/sessions/scratch/snowplow_unified_sessions_this_run.sql @@ -231,16 +231,18 @@ with session_firsts as ( select session_identifier {%- for conv_def in var('snowplow__conversion_events') %} - {{ snowplow_unified.get_conversion_columns(conv_def)}} + {{ snowplow_unified.conversion_query(conv_def)}} {%- endfor %} - from {{ ref('snowplow_unified_events_this_run') }} - where - 1 = 1 - {% if var("snowplow__ua_bot_filter", true) %} + {% if var('snowplow__enable_conversions', false) %} + from {{ ref('snowplow_unified_conversions_this_run') }} + {% else %} + from {{ ref('snowplow_unified_events_this_run') }} + where 1 = 1 + {% if var("snowplow__ua_bot_filter", true) %} {{ filter_bots() }} + {% endif %} {% endif %} - group by - session_identifier + group by session_identifier ) {%- endif %} @@ -319,7 +321,7 @@ select , {{ event_counts_query() }} as event_counts {%- endif %} , a.total_events - , {{ engaged_session() }} as is_engaged + , coalesce({{ engaged_session() }}, false) as is_engaged -- when the session starts with a ping we need to add the min visit length to get when the session actually started {% if var('snowplow__enable_web') or var('snowplow__enable_screen_summary_context', false) %} @@ -461,12 +463,12 @@ select -- conversion fields {%- if var('snowplow__conversion_events', none) %} {%- for conv_def in var('snowplow__conversion_events') %} - {{ snowplow_unified.get_conversion_columns(conv_def, names_only = true)}} + {{ snowplow_unified.conversion_query(conv_def, names_only = true)}} {%- endfor %} {% if var('snowplow__total_all_conversions', false) %} - ,{%- for conv_def in var('snowplow__conversion_events') %}{{'cv_' ~ conv_def['name'] ~ '_volume'}}{%- if not loop.last %} + {% endif -%}{%- endfor %} as cv__all_volume + ,{%- for conv_def in var('snowplow__conversion_events') %} coalesce({{'cv_' ~ conv_def['name'] ~ '_volume'}},0) {%- if not loop.last %} + {% endif -%}{%- endfor %} as cv__all_volume {# Use 0 in case of no conversions having a value field #} - ,0 {%- for conv_def in var('snowplow__conversion_events') %}{%- if conv_def.get('value') %} + {{'cv_' ~ conv_def['name'] ~ '_total'}}{% endif -%}{%- endfor %} as cv__all_total + ,0 {%- for conv_def in var('snowplow__conversion_events') %}{%- if conv_def.get('value') %} + coalesce({{'cv_' ~ conv_def['name'] ~ '_total'}}, 0){% endif -%}{%- endfor %} as cv__all_total {% endif %} {%- endif %} diff --git a/packages.yml b/packages.yml index 61af1890..f7e4826e 100644 --- a/packages.yml +++ b/packages.yml @@ -1,3 +1,3 @@ packages: - package: snowplow/snowplow_utils - version: [">=0.15.0", "<0.16.0"] + version: [">=0.16.0", "<0.17.0"]