diff --git a/CHANGELOG b/CHANGELOG index 7b14084a..8302b26f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,11 +3,15 @@ snowplow-unified 0.2.1 (2024-02-XX) ## Summary XXX +## 🚨 Breaking Changes 🚨 +We have changed the behavior of the `allow_refresh` macro so now if `snowplow__allow_refresh` is set to `true` it will only refresh the manifest models if the `--full-refresh` flag is also set. If you require the old behavior where it would refresh the manifest models on an incremental run when `snowplow__allow_refresh` was set to `true`, please overwrite this macro. See the [Overriding Macros](https://docs.snowplow.io/docs/modeling-your-data/modeling-your-data-with-dbt/dbt-operation/macros-and-keys/#overriding-macros) guide for more details. + ## Features - Add new passthrough aggregations to the views, sessions, and users table, enabled using `snowplow__view/session/user_aggregations` - Reorder and add some additional context fields to derived tables (non-breaking change) - Add `snowplow__custom_sql` to allow adding custom sql to the `snowplow_unified_base_events_this_run` and `snowplow_unified_events_this_run` models - Add macro to define cluster-by for tables to allow users to overwrite this if required +- Add check for `--full-refresh` flag before allowing refresh of manifest models when `snowplow__allow_refresh` is set to `true`. ## Fixes - Fix a bug where if you ran the package in a period with no data, and had list all events enabled, the package would error rather than complete diff --git a/docs/markdown/snowplow_unified_macros_docs.md b/docs/markdown/snowplow_unified_macros_docs.md index 5c234eb5..47405991 100644 --- a/docs/markdown/snowplow_unified_macros_docs.md +++ b/docs/markdown/snowplow_unified_macros_docs.md @@ -103,7 +103,7 @@ The sql to extract the columns from the yauaa context, or these columns as nulls This macro is used to determine if a full-refresh is allowed (depending on the environment), using the `snowplow__allow_refresh` variable. #### Returns -`snowplow__allow_refresh` if environment is not `dev`, `none` otherwise. +`snowplow__allow_refresh` if environment is not `dev`, `none` otherwise. Returns `none` if the `--full-refresh` flag is not present. {% endraw %} {% enddocs %} @@ -243,7 +243,7 @@ The sql needed to make the warehosue specific transformations to retrieve the co {% endraw %} {% enddocs %} -{% docs macro_cluster_by_values %} +{% docs macro_get_cluster_by_values %} {% raw %} A macro to manage the cluster by fields for various models in the package. diff --git a/macros/allow_refresh.sql b/macros/allow_refresh.sql index 09c712b7..5d221ccf 100644 --- a/macros/allow_refresh.sql +++ b/macros/allow_refresh.sql @@ -13,11 +13,15 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {% macro default__allow_refresh() %} - {% set allow_refresh = snowplow_utils.get_value_by_target( - dev_value=none, - default_value=var('snowplow__allow_refresh'), - dev_target_name=var('snowplow__dev_target_name') - ) %} + {% if flags.FULL_REFRESH == True %} + {% set allow_refresh = snowplow_utils.get_value_by_target( + dev_value=none, + default_value=var('snowplow__allow_refresh'), + dev_target_name=var('snowplow__dev_target_name') + ) %} + {% else %} + {% set allow_refresh = none %} + {% endif %} {{ return(allow_refresh) }} diff --git a/macros/field_definitions/cluster_by_values.sql b/macros/field_definitions/get_cluster_by_values.sql similarity index 90% rename from macros/field_definitions/cluster_by_values.sql rename to macros/field_definitions/get_cluster_by_values.sql index abf0a230..51112dc4 100644 --- a/macros/field_definitions/cluster_by_values.sql +++ b/macros/field_definitions/get_cluster_by_values.sql @@ -1,9 +1,9 @@ -{% macro unified_cluser_by(model) %} - {{ return(adapter.dispatch('unified_cluser_by', 'snowplow_unified')(model)) }} +{% macro get_cluster_by_values(model) %} + {{ return(adapter.dispatch('get_cluster_by_values', 'snowplow_unified')(model)) }} {% endmacro %} -{% macro default__unified_cluser_by(model) %} +{% macro default__get_cluster_by_values(model) %} {% if model == 'lifecycle_manifest' %} {{ return(snowplow_utils.get_value_by_target_type(bigquery_val=["session_identifier"], snowflake_val=["to_date(start_tstamp)"])) }} {% elif model == 'app_errors' %} diff --git a/macros/macros.yml b/macros/macros.yml index ac52b70f..f0cb2265 100644 --- a/macros/macros.yml +++ b/macros/macros.yml @@ -99,8 +99,8 @@ macros: description: '{{ doc("macro_event_counts_string_query") }}' - name: conversion_query description: '{{ doc("macro_conversion_query") }}' - - name: cluster_by_values - description: '{{ doc("macro_cluster_by_values") }}' + - name: get_cluster_by_values + description: '{{ doc("macro_get_cluster_by_values") }}' arguments: - name: model type: string diff --git a/models/base/manifest/snowplow_unified_base_sessions_lifecycle_manifest.sql b/models/base/manifest/snowplow_unified_base_sessions_lifecycle_manifest.sql index 599c3f9d..767f2944 100644 --- a/models/base/manifest/snowplow_unified_base_sessions_lifecycle_manifest.sql +++ b/models/base/manifest/snowplow_unified_base_sessions_lifecycle_manifest.sql @@ -16,7 +16,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 "field": "start_tstamp", "data_type": "timestamp" }, databricks_val='start_tstamp_date'), - cluster_by=snowplow_unified.unified_cluser_by('lifecycle_manifest'), + cluster_by=snowplow_unified.get_cluster_by_values('lifecycle_manifest'), full_refresh=snowplow_unified.allow_refresh(), tags=["manifest"], sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')), diff --git a/models/optional_modules/app_errors/snowplow_unified_app_errors.sql b/models/optional_modules/app_errors/snowplow_unified_app_errors.sql index 4a5def3e..75d6a7d2 100644 --- a/models/optional_modules/app_errors/snowplow_unified_app_errors.sql +++ b/models/optional_modules/app_errors/snowplow_unified_app_errors.sql @@ -16,7 +16,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 "field": "derived_tstamp", "data_type": "timestamp" }, databricks_val='derived_tstamp_date'), - cluster_by=snowplow_unified.unified_cluser_by('app_errors'), + cluster_by=snowplow_unified.get_cluster_by_values('app_errors'), tags=["derived"], enabled=var("snowplow__enable_app_errors", false), sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')), diff --git a/models/optional_modules/consent/snowplow_unified_consent_log.sql b/models/optional_modules/consent/snowplow_unified_consent_log.sql index a297f46e..2570b4cf 100644 --- a/models/optional_modules/consent/snowplow_unified_consent_log.sql +++ b/models/optional_modules/consent/snowplow_unified_consent_log.sql @@ -18,7 +18,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 "field": "derived_tstamp", "data_type": "timestamp" }, databricks_val = 'derived_tstamp_date'), - cluster_by=snowplow_unified.unified_cluser_by('consent_log'), + cluster_by=snowplow_unified.get_cluster_by_values('consent_log'), sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')), tblproperties={ 'delta.autoOptimize.optimizeWrite' : 'true', diff --git a/models/optional_modules/conversions/snowplow_unified_conversions.sql b/models/optional_modules/conversions/snowplow_unified_conversions.sql index 6c04c370..f785b30a 100644 --- a/models/optional_modules/conversions/snowplow_unified_conversions.sql +++ b/models/optional_modules/conversions/snowplow_unified_conversions.sql @@ -18,7 +18,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 "field": "cv_tstamp", "data_type": "timestamp" }, databricks_val='cv_tstamp_date'), - cluster_by=snowplow_unified.unified_cluser_by('conversions'), + cluster_by=snowplow_unified.get_cluster_by_values('conversions'), tags=["derived"], post_hook="{{ snowplow_unified.stitch_user_identifiers( enabled=var('snowplow__conversion_stitching') diff --git a/models/optional_modules/core_web_vitals/snowplow_unified_web_vitals.sql b/models/optional_modules/core_web_vitals/snowplow_unified_web_vitals.sql index eac42351..6c565aa4 100644 --- a/models/optional_modules/core_web_vitals/snowplow_unified_web_vitals.sql +++ b/models/optional_modules/core_web_vitals/snowplow_unified_web_vitals.sql @@ -18,7 +18,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 "field": "derived_tstamp", "data_type": "timestamp" }, databricks_val = 'derived_tstamp_date'), - cluster_by=snowplow_unified.unified_cluser_by('web_vitals'), + cluster_by=snowplow_unified.get_cluster_by_values('web_vitals'), sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')), tblproperties={ 'delta.autoOptimize.optimizeWrite' : 'true', diff --git a/models/sessions/snowplow_unified_sessions.sql b/models/sessions/snowplow_unified_sessions.sql index 78738c09..8438fecc 100644 --- a/models/sessions/snowplow_unified_sessions.sql +++ b/models/sessions/snowplow_unified_sessions.sql @@ -17,7 +17,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 "field": "start_tstamp", "data_type": "timestamp" }, databricks_val='start_tstamp_date'), - cluster_by=snowplow_unified.unified_cluser_by('sessions'), + cluster_by=snowplow_unified.get_cluster_by_values('sessions'), tags=["derived"], post_hook="{{ snowplow_unified.stitch_user_identifiers( enabled=var('snowplow__session_stitching') diff --git a/models/users/scratch/snowplow_unified_users_aggs.sql b/models/users/scratch/snowplow_unified_users_aggs.sql index 48897afc..95a37da2 100644 --- a/models/users/scratch/snowplow_unified_users_aggs.sql +++ b/models/users/scratch/snowplow_unified_users_aggs.sql @@ -11,7 +11,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 "field": "start_tstamp", "data_type": "timestamp" }), - cluster_by=snowplow_unified.unified_cluser_by('users_aggs'), + cluster_by=snowplow_unified.get_cluster_by_values('users_aggs'), sort='user_identifier', dist='user_identifier', sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) diff --git a/models/users/snowplow_unified_users.sql b/models/users/snowplow_unified_users.sql index d191b421..70946bd2 100644 --- a/models/users/snowplow_unified_users.sql +++ b/models/users/snowplow_unified_users.sql @@ -21,7 +21,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 post_hook="{{ snowplow_unified.stitch_user_identifiers( enabled=var('snowplow__session_stitching') ) }}", - cluster_by=snowplow_unified.unified_cluser_by('users'), + cluster_by=snowplow_unified.get_cluster_by_values('users'), tags=["derived"], sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')), tblproperties={ diff --git a/models/views/snowplow_unified_views.sql b/models/views/snowplow_unified_views.sql index ba9d2792..e015b09a 100644 --- a/models/views/snowplow_unified_views.sql +++ b/models/views/snowplow_unified_views.sql @@ -17,7 +17,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 "field": "start_tstamp", "data_type": "timestamp" }, databricks_val='start_tstamp_date'), - cluster_by=snowplow_unified.unified_cluser_by('views'), + cluster_by=snowplow_unified.get_cluster_by_values('views'), tags=["derived"], post_hook="{{ snowplow_unified.stitch_user_identifiers( enabled=var('snowplow__view_stitching')