From 8379d10af3f80f6f500f90d4a2aef2e806fa1235 Mon Sep 17 00:00:00 2001 From: Alex Higgs Date: Tue, 13 Sep 2022 09:54:06 +0000 Subject: [PATCH] Release v0.9.0 --- dbt_project.yml | 5 +- macros/internal/helpers/is_checks.sql | 11 +- macros/internal/helpers/logging.sql | 21 ++ .../helpers/stage_processing_macros.sql | 27 ++ .../metadata_processing/as_constant.sql | 10 +- .../metadata_processing/concat_ws.sql | 10 +- .../escape_column_names.sql | 75 +++-- .../expand_column_list.sql | 6 +- .../process_payload_column_excludes.sql | 38 +++ macros/materialisations/drop_temporary.sql | 14 + macros/materialisations/mat_is_checks.sql | 8 +- .../get_period_boundaries.sql | 100 ++++-- .../get_period_filter_sql.sql | 16 +- .../period_mat_helpers/get_period_of_load.sql | 32 +- ...replace_placeholder_with_period_filter.sql | 14 + .../replace_placeholder_with_rank_filter.sql | 6 +- ...vault_insert_by_period_materialization.sql | 49 ++- .../vault_insert_by_rank_materialization.sql | 29 +- macros/staging/derive_columns.sql | 57 +++- macros/staging/null_columns.sql | 89 +++++ macros/staging/stage.sql | 46 ++- macros/supporting/as_of_date_window.sql | 123 +++++++ macros/supporting/bridge_shared.sql | 58 ++++ macros/supporting/cast_binary.sql | 23 ++ macros/supporting/cast_date.sql | 80 +++++ macros/supporting/current_timestamp.sql | 2 +- macros/supporting/datatypes.sql | 20 +- .../supporting/get_query_results_as_dict.sql | 29 +- macros/supporting/hash.sql | 222 ++++++++++-- macros/supporting/max_datetime.sql | 18 +- macros/supporting/prefix.sql | 2 +- macros/tables/bigquery/bridge.sql | 295 +--------------- macros/tables/bigquery/eff_sat.sql | 38 +-- macros/tables/bigquery/hub.sql | 27 +- macros/tables/bigquery/link.sql | 24 +- macros/tables/bigquery/ma_sat.sql | 80 ++--- macros/tables/bigquery/pit.sql | 118 +------ macros/tables/bigquery/sat.sql | 79 +---- macros/tables/bigquery/t_link.sql | 5 +- macros/tables/bigquery/xts.sql | 5 +- macros/tables/databricks/hub.sql | 10 + macros/tables/databricks/link.sql | 10 + macros/tables/databricks/sat.sql | 12 + macros/tables/postgres/hub.sql | 89 +++++ macros/tables/postgres/link.sql | 100 ++++++ macros/tables/postgres/sat.sql | 63 ++++ macros/tables/snowflake/bridge.sql | 318 +++++------------- macros/tables/snowflake/eff_sat.sql | 83 +++-- macros/tables/snowflake/hub.sql | 50 +-- macros/tables/snowflake/link.sql | 52 +-- macros/tables/snowflake/ma_sat.sql | 91 ++--- macros/tables/snowflake/pit.sql | 218 +++++------- macros/tables/snowflake/sat.sql | 68 ++-- macros/tables/snowflake/t_link.sql | 38 ++- macros/tables/snowflake/xts.sql | 103 ++++-- macros/tables/sqlserver/bridge.sql | 284 +--------------- macros/tables/sqlserver/eff_sat.sql | 38 +-- macros/tables/sqlserver/hub.sql | 35 +- macros/tables/sqlserver/link.sql | 27 +- macros/tables/sqlserver/ma_sat.sql | 43 +-- macros/tables/sqlserver/pit.sql | 12 +- macros/tables/sqlserver/sat.sql | 78 +---- macros/tables/sqlserver/t_link.sql | 5 +- 63 files changed, 1947 insertions(+), 1791 deletions(-) create mode 100644 macros/internal/helpers/logging.sql create mode 100644 macros/internal/metadata_processing/process_payload_column_excludes.sql create mode 100644 macros/materialisations/drop_temporary.sql create mode 100644 macros/staging/null_columns.sql create mode 100644 macros/supporting/as_of_date_window.sql create mode 100644 macros/supporting/bridge_shared.sql create mode 100644 macros/supporting/cast_binary.sql create mode 100644 macros/supporting/cast_date.sql create mode 100644 macros/tables/databricks/hub.sql create mode 100644 macros/tables/databricks/link.sql create mode 100644 macros/tables/databricks/sat.sql create mode 100644 macros/tables/postgres/hub.sql create mode 100644 macros/tables/postgres/link.sql create mode 100644 macros/tables/postgres/sat.sql diff --git a/dbt_project.yml b/dbt_project.yml index 9b01b57d9..bbfbcd077 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,5 +1,5 @@ name: dbtvault -version: 0.8.3 +version: 0.9.0 require-dbt-version: [">=1.0.0", "<2.0.0"] config-version: 2 @@ -17,4 +17,5 @@ clean-targets: vars: hash: MD5 - max_datetime: '{{ dbtvault.max_datetime() }}' \ No newline at end of file + null_key_required: '-1' # Default -1, allows user to configure + null_key_optional: '-2' # Default -2, allows user to configure diff --git a/macros/internal/helpers/is_checks.sql b/macros/internal/helpers/is_checks.sql index f657a2d2b..d90ac86e1 100644 --- a/macros/internal/helpers/is_checks.sql +++ b/macros/internal/helpers/is_checks.sql @@ -16,7 +16,7 @@ {%- macro is_nothing(obj) -%} - {%- if obj is none or obj is undefined or not obj -%} + {%- if obj is none or obj is undefined or not obj or dbtvault.is_list(obj, empty_is_false=true) -%} {%- do return(true) -%} {%- else -%} {%- do return(false) -%} @@ -29,7 +29,12 @@ {%- macro is_something(obj) -%} {%- if obj is not none and obj is defined and obj -%} - {%- do return(true) -%} + {#- if an empty list, do not consider the object something -#} + {% if dbtvault.is_list(empty_is_false=true) %} + {%- do return(true) -%} + {%- else -%} + {%- do return(false) -%} + {%- endif -%} {%- else -%} {%- do return(false) -%} {%- endif -%} @@ -50,4 +55,4 @@ {%- do return(false) -%} {%- endif -%} -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/internal/helpers/logging.sql b/macros/internal/helpers/logging.sql new file mode 100644 index 000000000..20449467e --- /dev/null +++ b/macros/internal/helpers/logging.sql @@ -0,0 +1,21 @@ +{% macro log_relation_sources(relation, source_count) %} + {{ return(adapter.dispatch('log_relation_sources', 'dbtvault')(relation=relation, source_count=source_count)) }} +{%- endmacro -%} + +{% macro default__log_relation_sources(relation, source_count) %} + + {%- if execute -%} + + {%- do dbt_utils.log_info('Loading {} from {} source(s)'.format("{}.{}.{}".format(relation.database, relation.schema, relation.identifier), + source_count)) -%} + {%- endif -%} +{% endmacro %} + +{% macro databricks__log_relation_sources(relation, source_count) %} + + {%- if execute -%} + + {%- do dbt_utils.log_info('Loading {} from {} source(s)'.format("{}.{}".format(relation.schema, relation.identifier), + source_count)) -%} + {%- endif -%} +{% endmacro %} \ No newline at end of file diff --git a/macros/internal/helpers/stage_processing_macros.sql b/macros/internal/helpers/stage_processing_macros.sql index 3b36274cb..e3db7b4bd 100644 --- a/macros/internal/helpers/stage_processing_macros.sql +++ b/macros/internal/helpers/stage_processing_macros.sql @@ -90,3 +90,30 @@ {%- endfor -%} {%- endmacro -%} + + +{%- macro extract_null_column_names(columns_dict=none) -%} + + {%- set extracted_column_names = [] -%} + + {%- if columns_dict is mapping -%} + {%- for key, value in columns_dict.items() -%} + {%- if dbtvault.is_something(value) -%} + {% if dbtvault.is_list(value) %} + {% for col_name in value %} + {%- do extracted_column_names.append(col_name) -%} + {%- do extracted_column_names.append(col_name ~ "_ORIGINAL") -%} + {% endfor %} + {% else %} + {%- do extracted_column_names.append(value) -%} + {%- do extracted_column_names.append(value ~ "_ORIGINAL") -%} + {% endif %} + {%- endif -%} + {%- endfor -%} + + {%- do return(extracted_column_names) -%} + {%- else -%} + {%- do return([]) -%} + {%- endif -%} + +{%- endmacro -%} diff --git a/macros/internal/metadata_processing/as_constant.sql b/macros/internal/metadata_processing/as_constant.sql index 868b1b918..328be8b3c 100644 --- a/macros/internal/metadata_processing/as_constant.sql +++ b/macros/internal/metadata_processing/as_constant.sql @@ -14,15 +14,7 @@ {%- else -%} - {%- if dbtvault.is_expression(column_str) -%} - - {{- return(column_str) -}} - - {%- else -%} - - {{- return(dbtvault.escape_column_names(column_str)) -}} - - {%- endif -%} + {{- return(column_str) -}} {%- endif -%} {%- else -%} diff --git a/macros/internal/metadata_processing/concat_ws.sql b/macros/internal/metadata_processing/concat_ws.sql index f8f1aed35..a6b7c6416 100644 --- a/macros/internal/metadata_processing/concat_ws.sql +++ b/macros/internal/metadata_processing/concat_ws.sql @@ -6,7 +6,7 @@ {%- macro default__concat_ws(string_list, separator="||") -%} - {{ "CONCAT_WS('" ~ separator ~ "', " ~ string_list | join(", ") ~ ")" }} + CONCAT_WS('{{ separator }}', {{ string_list | join(", ") }}) {%- endmacro -%} @@ -19,4 +19,10 @@ {%- endfor -%} {{- '\n)' -}} -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} + +{%- macro sqlserver__concat_ws(string_list, separator="||") -%} + +{{ dbtvault.default__concat_ws(string_list=string_list, separator=separator) }} + +{%- endmacro -%} diff --git a/macros/internal/metadata_processing/escape_column_names.sql b/macros/internal/metadata_processing/escape_column_names.sql index 4c8d3063d..6e5b130ee 100644 --- a/macros/internal/metadata_processing/escape_column_names.sql +++ b/macros/internal/metadata_processing/escape_column_names.sql @@ -1,6 +1,8 @@ {%- macro escape_column_names(columns=none) -%} -{# Different platforms use different escape characters, the default below is for Snowflake which uses double quotes #} + {%- if dbtvault.is_list(columns) -%} + {%- set columns = dbtvault.expand_column_list(columns) -%} + {%- endif -%} {%- if dbtvault.is_something(columns) -%} @@ -64,31 +66,31 @@ {%- endif -%} -{%- if columns is none -%} + {%- if columns is none -%} - {%- do return(none) -%} + {%- do return(none) -%} -{%- elif columns == [] -%} + {%- elif columns == [] -%} - {%- do return([]) -%} + {%- do return([]) -%} -{%- elif columns == {} -%} + {%- elif columns == {} -%} - {%- do return({}) -%} + {%- do return({}) -%} -{%- elif columns is string -%} + {%- elif columns is string -%} - {%- do return(col_string) -%} + {%- do return(col_string) -%} -{%- elif dbtvault.is_list(columns) -%} + {%- elif dbtvault.is_list(columns) -%} - {%- do return(col_list) -%} + {%- do return(col_list) -%} -{%- elif columns is mapping -%} + {%- elif columns is mapping -%} - {%- do return(col_mapping) -%} + {%- do return(col_mapping) -%} -{%- endif -%} + {%- endif -%} {%- endmacro -%} @@ -101,33 +103,44 @@ {%- macro default__escape_column_name(column) -%} - {%- set escape_char_left = var('escape_char_left', '"') -%} - {%- set escape_char_right = var('escape_char_right', '"') -%} + {# Do not escape a constant (single quoted) value #} + {%- if column | first == "'" and column | last == "'" -%} + {%- set escaped_column_name = column -%} + {%- else -%} + {%- set escape_char_default_left, escape_char_default_right = dbtvault.get_escape_characters() -%} + {%- set escape_char_left = var('escape_char_left', escape_char_default_left) -%} + {%- set escape_char_right = var('escape_char_right', escape_char_default_right) -%} - {%- set escaped_column_name = escape_char_left ~ column | replace(escape_char_left, '') | replace(escape_char_right, '') | trim ~ escape_char_right -%} + {%- set escaped_column_name = escape_char_left ~ column | replace(escape_char_left, '') | replace(escape_char_right, '') | trim ~ escape_char_right -%} + {%- endif -%} {%- do return(escaped_column_name) -%} {%- endmacro -%} -{%- macro sqlserver__escape_column_name(column) -%} - - {%- set escape_char_left = var('escape_char_left', '"') -%} - {%- set escape_char_right = var('escape_char_right', '"') -%} +{% macro get_escape_characters() -%} - {%- set escaped_column_name = escape_char_left ~ column | replace(escape_char_left, '') | replace(escape_char_right, '') | trim ~ escape_char_right -%} + {% do return(adapter.dispatch('get_escape_characters', 'dbtvault')()) -%} - {%- do return(escaped_column_name) -%} - -{%- endmacro -%} +{%- endmacro %} -{%- macro bigquery__escape_column_name(column) -%} +{%- macro snowflake__get_escape_characters() %} + {%- do return (('"', '"')) -%} +{%- endmacro %} - {%- set escape_char_left = var('escape_char_left', '`') -%} - {%- set escape_char_right = var('escape_char_right', '`') -%} +{%- macro bigquery__get_escape_characters() %} + {%- do return (('`', '`')) -%} +{%- endmacro %} - {%- set escaped_column_name = escape_char_left ~ column | replace(escape_char_left, '') | replace(escape_char_right, '') | trim ~ escape_char_right -%} +{%- macro sqlserver__get_escape_characters() %} + {%- do return (('"', '"')) -%} +{%- endmacro %} - {%- do return(escaped_column_name) -%} +{%- macro databricks__get_escape_characters() %} + {%- do return (('`', '`')) -%} +{%- endmacro %} -{%- endmacro -%} \ No newline at end of file +{%- macro postgres__get_escape_characters() %} + {#- DO NOT QUOTE FOR NOW. Postgres has a "feature" which froces explicit casing and breaks the SQL-92 standard -#} + {%- do return (('', '')) -%} +{%- endmacro %} diff --git a/macros/internal/metadata_processing/expand_column_list.sql b/macros/internal/metadata_processing/expand_column_list.sql index 8c524d3ec..1338254ed 100644 --- a/macros/internal/metadata_processing/expand_column_list.sql +++ b/macros/internal/metadata_processing/expand_column_list.sql @@ -1,9 +1,7 @@ {%- macro expand_column_list(columns=none) -%} {%- if not columns -%} - {%- if execute -%} - {{- exceptions.raise_compiler_error("Expected a list of columns, got: " ~ columns) -}} - {%- endif -%} + {%- do return([]) -%} {%- endif -%} {%- set col_list = [] -%} @@ -49,4 +47,4 @@ {%- do return(col_list) -%} -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/internal/metadata_processing/process_payload_column_excludes.sql b/macros/internal/metadata_processing/process_payload_column_excludes.sql new file mode 100644 index 000000000..80bfc2b9a --- /dev/null +++ b/macros/internal/metadata_processing/process_payload_column_excludes.sql @@ -0,0 +1,38 @@ +{%- macro process_payload_column_excludes(src_pk, src_hashdiff, src_payload, src_extra_columns, + src_eff, src_ldts, src_source, source_model) -%} + + {%- if src_payload is not mapping -%} + {%- do return(src_payload) -%} + {%- endif -%} + + {%- set source_model_cols = adapter.get_columns_in_relation(ref(source_model)) -%} + {%- set columns_in_metadata = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, + src_payload, src_extra_columns, + src_eff, src_ldts, src_source]) | map('lower') | list -%} + + {%- set payload_cols = [] -%} + {%- for col in source_model_cols -%} + {%- if col.column | lower not in columns_in_metadata -%} + {%- do payload_cols.append(col.column) -%} + {%- endif -%} + {%- endfor -%} + + {%- if 'exclude_columns' in src_payload.keys() -%} + {%- set table_excludes_columns = src_payload.exclude_columns -%} + + {%- if table_excludes_columns -%} + + {%- set excluded_payload = [] -%} + {%- set exclude_columns_list = src_payload.columns | map('lower') | list -%} + + {%- for col in payload_cols -%} + {%- if col | lower not in exclude_columns_list -%} + {%- do excluded_payload.append(col) -%} + {%- endif -%} + {%- endfor -%} + {%- endif -%} + {%- endif -%} + + {%- do return(excluded_payload) -%} + +{%- endmacro -%} diff --git a/macros/materialisations/drop_temporary.sql b/macros/materialisations/drop_temporary.sql new file mode 100644 index 000000000..eef3c1fc2 --- /dev/null +++ b/macros/materialisations/drop_temporary.sql @@ -0,0 +1,14 @@ +{% macro drop_temporary_special(tmp_relation) %} + {# In databricks and sqlserver a temporary view/table can only be dropped by #} + {# the connection or session that created it so drop it now before the commit below closes this session #} + + {%- set drop_query_name = 'DROP_QUERY-' ~ i -%} + {% call statement(drop_query_name, fetch_result=True) -%} + {% if target.type == 'databricks' %} + DROP VIEW {{ tmp_relation }}; + {% elif target.type == 'sqlserver' %} + DROP TABLE {{ tmp_relation }}; + {% endif %} + {%- endcall %} + +{% endmacro %} \ No newline at end of file diff --git a/macros/materialisations/mat_is_checks.sql b/macros/materialisations/mat_is_checks.sql index 95778ffc3..d37521045 100644 --- a/macros/materialisations/mat_is_checks.sql +++ b/macros/materialisations/mat_is_checks.sql @@ -17,7 +17,7 @@ {{ return(relation is not none and relation.type == 'table' and model.config.materialized == 'vault_insert_by_period' - and not flags.FULL_REFRESH) }} + and not should_full_refresh()) }} {% endif %} {% endmacro %} @@ -33,7 +33,7 @@ {{ return(relation is not none and relation.type == 'table' and model.config.materialized == 'vault_insert_by_rank' - and not flags.FULL_REFRESH) }} + and not should_full_refresh()) }} {% endif %} {% endmacro %} @@ -49,7 +49,7 @@ {{ return(relation is not none and relation.type == 'table' and model.config.materialized == 'bridge_incremental' - and not flags.FULL_REFRESH) }} + and not should_full_refresh()) }} {% endif %} {% endmacro %} @@ -65,6 +65,6 @@ {{ return(relation is not none and relation.type == 'table' and model.config.materialized == 'pit_incremental' - and not flags.FULL_REFRESH) }} + and not should_full_refresh()) }} {% endif %} {% endmacro %} \ No newline at end of file diff --git a/macros/materialisations/period_mat_helpers/get_period_boundaries.sql b/macros/materialisations/period_mat_helpers/get_period_boundaries.sql index b90ce6875..c17465df0 100644 --- a/macros/materialisations/period_mat_helpers/get_period_boundaries.sql +++ b/macros/materialisations/period_mat_helpers/get_period_boundaries.sql @@ -1,8 +1,7 @@ -{%- macro get_period_boundaries(target_schema, target_table, timestamp_field, start_date, stop_date, period) -%} +{%- macro get_period_boundaries(target_relation, timestamp_field, start_date, stop_date, period) -%} {% set macro = adapter.dispatch('get_period_boundaries', - 'dbtvault')(target_schema=target_schema, - target_table=target_table, + 'dbtvault')(target_relation=target_relation, timestamp_field=timestamp_field, start_date=start_date, stop_date=stop_date, @@ -13,15 +12,16 @@ -{% macro default__get_period_boundaries(target_schema, target_table, timestamp_field, start_date, stop_date, period) -%} +{% macro default__get_period_boundaries(target_relation, timestamp_field, start_date, stop_date, period) -%} + {%- set from_date_or_timestamp = "NULLIF('{}','none')::TIMESTAMP".format(stop_date | lower) -%} {% set period_boundary_sql -%} WITH period_data AS ( SELECT COALESCE(MAX({{ timestamp_field }}), '{{ start_date }}')::TIMESTAMP AS start_timestamp, - COALESCE({{ dbt_utils.dateadd('millisecond', 86399999, "NULLIF('" ~ stop_date | lower ~ "','none')::TIMESTAMP") }}, + COALESCE({{ dbt_utils.dateadd('millisecond', 86399999, from_date_or_timestamp) }}, {{ dbtvault.current_timestamp() }} ) AS stop_timestamp - FROM {{ target_schema }}.{{ target_table }} + FROM {{ target_relation }} ) SELECT start_timestamp, @@ -44,22 +44,22 @@ -{% macro bigquery__get_period_boundaries(target_schema, target_table, timestamp_field, start_date, stop_date, period) -%} +{% macro bigquery__get_period_boundaries(target_relation, timestamp_field, start_date, stop_date, period) -%} + + {%- set from_date_or_timestamp = "NULLIF('{}','none')".format(stop_date | lower) -%} {% set period_boundary_sql -%} with data as ( select - coalesce(CAST(max({{ timestamp_field }}) AS DATETIME), CAST('{{ start_date }}' AS DATETIME)) as START_TIMESTAMP, - coalesce({{ dbt_utils.dateadd('millisecond', 86399999, "nullif('" ~ stop_date | lower ~ "','none')") }}, - CAST(CURRENT_TIMESTAMP() AS DATETIME) ) as STOP_TIMESTAMP - from {{ target_schema }}.{{ target_table }} + COALESCE(CAST(MAX({{ timestamp_field }}) AS DATETIME), CAST('{{ start_date }}' AS DATETIME)) as START_TIMESTAMP, + COALESCE({{ dbt_utils.dateadd('millisecond', 86399999, from_date_or_timestamp) }}, + CAST(CURRENT_TIMESTAMP() AS DATETIME)) as STOP_TIMESTAMP + from {{ target_relation }} ) select START_TIMESTAMP, STOP_TIMESTAMP, - {{ dbt_utils.datediff('start_timestamp', - 'stop_timestamp', - period) }} + 1 as NUM_PERIODS + {{ dbt_utils.datediff('start_timestamp', 'stop_timestamp', period) }} + 1 as NUM_PERIODS from data {%- endset %} @@ -76,19 +76,79 @@ -{% macro sqlserver__get_period_boundaries(target_schema, target_table, timestamp_field, start_date, stop_date, period) -%} +{% macro sqlserver__get_period_boundaries(target_relation, timestamp_field, start_date, stop_date, period) -%} {# MSSQL cannot CAST datetime2 strings with more than 7 decimal places #} - {% set start_date_mssql = start_date[0:27] %} - {% set stop_date_mssql = stop_date[0:27] %} + {% set start_date = start_date[0:27] %} + {% set stop_date = stop_date[0:27] %} + {%- set from_date_or_timestamp = "CAST(NULLIF('{}','none') AS DATETIME2)".format(stop_date | lower) %} + + {% set period_boundary_sql -%} + WITH period_data AS ( + SELECT + CAST(COALESCE(MAX({{ timestamp_field }}), CAST('{{ start_date }}' AS DATETIME2)) AS DATETIME2) AS start_timestamp, + COALESCE({{ dbt_utils.dateadd('millisecond', 86399999, from_date_or_timestamp) }}, + {{ dbtvault.current_timestamp() }} ) AS stop_timestamp + FROM {{ target_relation }} + ) + SELECT + start_timestamp, + stop_timestamp, + {{ dbt_utils.datediff('start_timestamp', 'stop_timestamp', period) }} + 1 AS num_periods + FROM period_data + {%- endset %} + + {% set period_boundaries_dict = dbtvault.get_query_results_as_dict(period_boundary_sql) %} + + {% set period_boundaries = {'start_timestamp': period_boundaries_dict['START_TIMESTAMP'][0] | string, + 'stop_timestamp': period_boundaries_dict['STOP_TIMESTAMP'][0] | string, + 'num_periods': period_boundaries_dict['NUM_PERIODS'][0] | int} %} + + {% do return(period_boundaries) %} +{%- endmacro %} + + +{% macro databricks__get_period_boundaries(target_relation, timestamp_field, start_date, stop_date, period) -%} + + {%- set from_date_or_timestamp = "NULLIF('{}','none')::TIMESTAMP".format(stop_date | lower) -%} + + {% set period_boundary_sql -%} + + WITH period_data AS ( + SELECT + COALESCE(MAX({{ timestamp_field }}), CAST('{{ start_date }}' AS TIMESTAMP)) AS start_timestamp, + COALESCE({{ dbt_utils.dateadd('millisecond', 86399999, from_date_or_timestamp) }}, + {{ dbtvault.current_timestamp() }}) AS stop_timestamp + FROM {{ target_relation }} + ) + SELECT + IF(stop_timestamp < start_timestamp, stop_timestamp, start_timestamp) AS start_timestamp, + stop_timestamp, + {{ dbt_utils.datediff('start_timestamp', 'stop_timestamp', period) }} + 1 AS num_periods + + FROM period_data + {%- endset %} + + {% set period_boundaries_dict = dbtvault.get_query_results_as_dict(period_boundary_sql) %} + + {% set period_boundaries = {'start_timestamp': period_boundaries_dict['START_TIMESTAMP'][0] | string, + 'stop_timestamp': period_boundaries_dict['STOP_TIMESTAMP'][0] | string, + 'num_periods': period_boundaries_dict['NUM_PERIODS'][0] | int} %} + + {% do return(period_boundaries) %} +{%- endmacro %} + + + +{% macro postgres__get_period_boundaries(target_relation, timestamp_field, start_date, stop_date, period) -%} {% set period_boundary_sql -%} WITH period_data AS ( SELECT - CAST(COALESCE(MAX({{ timestamp_field }}), CAST('{{ start_date_mssql }}' AS DATETIME2)) AS DATETIME2) AS start_timestamp, - COALESCE({{ dbt_utils.dateadd('millisecond', 86399999, "CAST(NULLIF('" ~ stop_date_mssql | lower ~ "','none') AS DATETIME2)") }}, + COALESCE(MAX({{ timestamp_field }}), '{{ start_date }}')::TIMESTAMP AS start_timestamp, + COALESCE({{ dbt_utils.dateadd('millisecond', 86399999, "NULLIF('" ~ stop_date | lower ~ "','none')::TIMESTAMP") }}, {{ dbtvault.current_timestamp() }} ) AS stop_timestamp - FROM {{ target_schema }}.{{ target_table }} + FROM {{ target_relation }} ) SELECT start_timestamp, diff --git a/macros/materialisations/period_mat_helpers/get_period_filter_sql.sql b/macros/materialisations/period_mat_helpers/get_period_filter_sql.sql index 245eedef6..07bc57c2c 100644 --- a/macros/materialisations/period_mat_helpers/get_period_filter_sql.sql +++ b/macros/materialisations/period_mat_helpers/get_period_filter_sql.sql @@ -40,4 +40,18 @@ offset, period)}) -%} {# MSSQL does not allow CTEs in a subquery #} {{ filtered_sql.sql }} -{%- endmacro %} \ No newline at end of file +{%- endmacro %} + + + +{% macro postgres__get_period_filter_sql(target_cols_csv, base_sql, timestamp_field, period, start_timestamp, stop_timestamp, offset) -%} + + {%- set filtered_sql = {'sql': base_sql} -%} + + {%- do filtered_sql.update({'sql': dbtvault.replace_placeholder_with_period_filter(filtered_sql.sql, + timestamp_field, + start_timestamp, + stop_timestamp, + offset, period)}) -%} + select {{ target_cols_csv }} from ({{ filtered_sql.sql }}) +{%- endmacro %} diff --git a/macros/materialisations/period_mat_helpers/get_period_of_load.sql b/macros/materialisations/period_mat_helpers/get_period_of_load.sql index fecd80745..3241ea79d 100644 --- a/macros/materialisations/period_mat_helpers/get_period_of_load.sql +++ b/macros/materialisations/period_mat_helpers/get_period_of_load.sql @@ -9,8 +9,6 @@ {%- endmacro %} - - {%- macro default__get_period_of_load(period, offset, start_timestamp) -%} {% set period_of_load_sql -%} @@ -25,12 +23,10 @@ {%- endmacro -%} - - {%- macro bigquery__get_period_of_load(period, offset, start_timestamp) -%} {% set period_of_load_sql -%} - SELECT DATE_TRUNC(DATE_ADD( DATE('{{start_timestamp}}'), INTERVAL {{ offset }} {{ period }}), {{ period }} ) AS PERIOD_OF_LOAD + SELECT DATE_TRUNC(DATE_ADD( DATE('{{ start_timestamp }}'), INTERVAL {{ offset }} {{ period }}), {{ period }} ) AS PERIOD_OF_LOAD {%- endset %} {% set period_of_load_dict = dbtvault.get_query_results_as_dict(period_of_load_sql) %} @@ -41,8 +37,6 @@ {%- endmacro -%} - - {%- macro sqlserver__get_period_of_load(period, offset, start_timestamp) -%} {# MSSQL cannot CAST datetime2 strings with more than 7 decimal places #} {% set start_timestamp_mssql = start_timestamp[0:23] %} @@ -56,4 +50,26 @@ {% set period_of_load = period_of_load_dict['PERIOD_OF_LOAD'][0] | string %} {% do return(period_of_load) %} -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} + + +{%- macro databricks__get_period_of_load(period, offset, start_timestamp) -%} + {% do return(dbtvault.default__get_period_of_load(period=period, offset=offset, start_timestamp=start_timestamp)) %} +{%- endmacro -%} + + + +{%- macro postgres__get_period_of_load(period, offset, start_timestamp) -%} + {# Postgres uses different DateTime arithmetic #} + {% set period_of_load_sql -%} + SELECT DATE_TRUNC('{{ period }}', + TO_TIMESTAMP('{{ start_timestamp }}', 'YYYY-MM-DD HH24:MI:SS') + interval '{{ offset }} {{ period }}' + ) AS period_of_load + {%- endset %} + + {% set period_of_load_dict = dbtvault.get_query_results_as_dict(period_of_load_sql) %} + + {% set period_of_load = period_of_load_dict['PERIOD_OF_LOAD'][0] | string %} + + {% do return(period_of_load) %} +{%- endmacro -%} diff --git a/macros/materialisations/period_mat_helpers/replace_placeholder_with_period_filter.sql b/macros/materialisations/period_mat_helpers/replace_placeholder_with_period_filter.sql index 216bc106a..00baf71d8 100644 --- a/macros/materialisations/period_mat_helpers/replace_placeholder_with_period_filter.sql +++ b/macros/materialisations/period_mat_helpers/replace_placeholder_with_period_filter.sql @@ -60,3 +60,17 @@ {% do return(filtered_sql) %} {% endmacro %} + + + +{% macro postgres__replace_placeholder_with_period_filter(core_sql, timestamp_field, start_timestamp, stop_timestamp, offset, period) %} + + {%- set period_filter -%} + {{ timestamp_field }}::DATE >= DATE_TRUNC('{{ period }}', TO_DATE('{{ start_timestamp }}', 'YYYY-MM-DD') + INTERVAL '{{ offset }} {{ period }}') + AND {{ timestamp_field }}::DATE < DATE_TRUNC('{{ period }}', TO_DATE('{{ start_timestamp }}','YYYY-MM-DD') + INTERVAL '{{ offset }} {{ period }}' + INTERVAL '1 {{ period }}') + AND {{ timestamp_field }}::DATE >= TO_DATE('{{ start_timestamp }}','YYYY-MM-DD') + {%- endset -%} + {%- set filtered_sql = core_sql | replace("__PERIOD_FILTER__", period_filter) -%} + + {% do return(filtered_sql) %} +{% endmacro %} diff --git a/macros/materialisations/rank_mat_helpers/replace_placeholder_with_rank_filter.sql b/macros/materialisations/rank_mat_helpers/replace_placeholder_with_rank_filter.sql index 05e4ffbbc..d0178b42e 100644 --- a/macros/materialisations/rank_mat_helpers/replace_placeholder_with_rank_filter.sql +++ b/macros/materialisations/rank_mat_helpers/replace_placeholder_with_rank_filter.sql @@ -5,9 +5,11 @@ rank_column=rank_column, rank_iteration=rank_iteration) %} {% do return(macro) %} - {%- endmacro %} +{%- endmacro %} - {% macro default__replace_placeholder_with_rank_filter(core_sql, rank_column, rank_iteration) %} + + +{% macro default__replace_placeholder_with_rank_filter(core_sql, rank_column, rank_iteration) %} {%- set rank_filter -%} {{ rank_column }}:: INTEGER = {{ rank_iteration }}::INTEGER diff --git a/macros/materialisations/vault_insert_by_period_materialization.sql b/macros/materialisations/vault_insert_by_period_materialization.sql index 3e4c0bf98..7eb9b8ee9 100644 --- a/macros/materialisations/vault_insert_by_period_materialization.sql +++ b/macros/materialisations/vault_insert_by_period_materialization.sql @@ -1,5 +1,9 @@ {% materialization vault_insert_by_period, default -%} + {% if target.type == "postgres" and execute %} + {{ exceptions.raise_compiler_error("The vault_insert_by_period materialisation is currently unavailable on Postgres.") }} + {% endif %} + {%- set full_refresh_mode = (should_full_refresh()) -%} {% if target.type == "sqlserver" %} @@ -53,9 +57,7 @@ 0, period) %} {% set build_sql = create_table_as(False, target_relation, filtered_sql) %} {% else %} - - {% set period_boundaries = dbtvault.get_period_boundaries(schema, - target_relation.name, + {% set period_boundaries = dbtvault.get_period_boundaries(target_relation, timestamp_field, start_stop_dates.start_date, start_stop_dates.stop_date, @@ -68,6 +70,7 @@ {% for i in range(period_boundaries.num_periods) -%} {%- set iteration_number = i + 1 -%} + {%- set period_of_load = dbtvault.get_period_of_load(period, i, period_boundaries.start_timestamp) -%} {{ dbt_utils.log_info("Running for {} {} of {} ({}) [{}]".format(period, iteration_number, period_boundaries.num_periods, period_of_load, model.unique_id)) }} @@ -78,9 +81,13 @@ period_boundaries.start_timestamp, period_boundaries.stop_timestamp, i) %} + + {# This call statement drops and then creates a temporary table #} {# but MSSQL will fail to drop any temporary table created by a previous loop iteration #} {# See MSSQL note and drop code below #} + + {# [ ] TODO check dbt postgres implementation for a possible fix #} {% call statement() -%} {{ create_table_as(True, tmp_relation, tmp_table_sql) }} {%- endcall %} @@ -100,7 +107,15 @@ {% set result = load_result(insert_query_name) %} {% if 'response' in result.keys() %} {# added in v0.19.0 #} - {% set rows_inserted = result['response']['rows_affected'] %} + {%- if not result['response']['rows_affected'] %} + {% if target.type == "databricks" and result['data'] | length > 0 %} + {% set rows_inserted = result['data'][0][1] | int %} + {% else %} + {% set rows_inserted = 0 %} + {% endif %} + {%- else %} + {% set rows_inserted = result['response']['rows_affected'] %} + {%- endif %} {% else %} {# older versions #} {% set rows_inserted = result['status'].split(" ")[2] | int %} {% endif %} @@ -113,16 +128,14 @@ period_of_load, rows_inserted, model.unique_id)) }} - {% if target.type == "sqlserver" %} - {# In MSSQL a temporary table can only be dropped by the connection or session that created it #} - {# so drop it now before the commit below closes this session #} - {%- set drop_query_name = 'DROP_QUERY-' ~ i -%} - {% call statement(drop_query_name, fetch_result=True) -%} - DROP TABLE {{ tmp_relation }}; - {%- endcall %} - {% endif %} + {# In databricks and sqlserver a temporary view/table can only be dropped by #} + {# the connection or session that created it so drop it now before the commit below closes this session #} model.unique_id)) }} + {% if target.type in ['databricks', 'sqlserver'] %} + {{ dbtvault.drop_temporary_special(tmp_relation) }} + {% else %} + {% do to_drop.append(tmp_relation) %} + {% endif %} - {% do to_drop.append(tmp_relation) %} {% do adapter.commit() %} {% endfor %} @@ -141,7 +154,15 @@ {% set result = load_result('main') %} {% if 'response' in result.keys() %} {# added in v0.19.0 #} - {% set rows_inserted = result['response']['rows_affected'] %} + {%- if not result['response']['rows_affected'] %} + {% if target.type == "databricks" and result['data'] | length > 0 %} + {% set rows_inserted = result['data'][0][1] | int %} + {% else %} + {% set rows_inserted = 0 %} + {% endif %} + {%- else %} + {% set rows_inserted = result['response']['rows_affected'] %} + {%- endif %} {% else %} {# older versions #} {% set rows_inserted = result['status'].split(" ")[2] | int %} {% endif %} diff --git a/macros/materialisations/vault_insert_by_rank_materialization.sql b/macros/materialisations/vault_insert_by_rank_materialization.sql index 5440171c4..2cbe70d36 100644 --- a/macros/materialisations/vault_insert_by_rank_materialization.sql +++ b/macros/materialisations/vault_insert_by_rank_materialization.sql @@ -1,5 +1,9 @@ {% materialization vault_insert_by_rank, default -%} + {% if target.type == "postgres" and execute %} + {{ exceptions.raise_compiler_error("The vault_insert_by_rank materialisation is currently unavailable on Postgres.") }} + {% endif %} + {%- set full_refresh_mode = (should_full_refresh()) -%} {% if target.type == "sqlserver" %} @@ -78,9 +82,14 @@ {%- endcall %} {% set result = load_result(insert_query_name) %} - {% if 'response' in result.keys() %} {# added in v0.19.0 #} - {% set rows_inserted = result['response']['rows_affected'] %} + {# Investigate for Databricks #} + {%- if result['response']['rows_affected'] == None %} + {% set rows_inserted = 0 %} + {%- else %} + {% set rows_inserted = result['response']['rows_affected'] %} + {%- endif %} + {% else %} {# older versions #} {% set rows_inserted = result['status'].split(" ")[2] | int %} {% endif %} @@ -93,16 +102,14 @@ rows_inserted, model.unique_id)) }} - {% if target.type == "sqlserver" %} - {# In MSSQL a temporary table can only be dropped by the connection or session that created it #} - {# so drop it now before the commit below closes this session #} - {%- set drop_query_name = 'DROP_QUERY-' ~ i -%} - {% call statement(drop_query_name, fetch_result=True) -%} - DROP TABLE {{ tmp_relation }}; - {%- endcall %} - {% endif %} + {# In databricks and sqlserver a temporary view/table can only be dropped by #} + {# the connection or session that created it so drop it now before the commit below closes this session #} model.unique_id)) }} + {% if target.type in ['databricks', 'sqlserver'] %} + {{ dbtvault.drop_temporary_special(tmp_relation) }} + {% else %} + {% do to_drop.append(tmp_relation) %} + {% endif %} - {% do to_drop.append(tmp_relation) %} {% do adapter.commit() %} {% endfor %} diff --git a/macros/staging/derive_columns.sql b/macros/staging/derive_columns.sql index 941ce5315..dc7f06500 100644 --- a/macros/staging/derive_columns.sql +++ b/macros/staging/derive_columns.sql @@ -16,25 +16,55 @@ {%- if columns is mapping and columns is not none -%} {#- Add aliases of derived columns to excludes and full SQL to includes -#} - {%- for col in columns -%} + {%- for derived_column in columns -%} + {% set column_config = columns[derived_column] %} - {%- if dbtvault.is_list(columns[col]) -%} + {%- if dbtvault.is_list(column_config) -%} {%- set column_list = [] -%} - {%- for concat_component in columns[col] -%} + {%- for concat_component in column_config -%} {%- set column_str = dbtvault.as_constant(concat_component) -%} {%- do column_list.append(column_str) -%} {%- endfor -%} + {%- set concat = dbtvault.concat_ws(column_list, "||") -%} - {%- set concat_string = concat ~ " AS " ~ dbtvault.escape_column_names(col) -%} + {%- set concat_string = concat ~ " AS " ~ dbtvault.escape_column_names(derived_column) -%} {%- do der_columns.append(concat_string) -%} - {%- set exclude_columns = exclude_columns + columns[col] -%} - {% else %} - {%- set column_str = dbtvault.as_constant(columns[col]) -%} - {%- do der_columns.append(column_str ~ " AS " ~ dbtvault.escape_column_names(col)) -%} - {%- do exclude_columns.append(col) -%} - {% endif %} + {%- else -%} + {%- if column_config is mapping and column_config -%} + {%- set column_escape = column_config['escape'] -%} + + {%- if dbtvault.is_list(column_config['source_column']) -%} + {%- set column_list = [] -%} + + {%- for concat_component in column_config['source_column'] -%} + {%- set column_str = dbtvault.as_constant(concat_component) -%} + {%- if column_escape is true %} + {%- set column_str = dbtvault.escape_column_names(column_str) -%} + {% endif %} + {%- do column_list.append(column_str) -%} + {%- endfor -%} + + {%- set concat = dbtvault.concat_ws(column_list, "||") -%} + {%- set concat_string = concat ~ " AS " ~ dbtvault.escape_column_names(derived_column) -%} + + {%- do der_columns.append(concat_string) -%} + {%- else -%} + {%- set column_str = dbtvault.as_constant(column_config['source_column']) -%} + {%- if column_escape is true -%} + {%- do der_columns.append(dbtvault.escape_column_names(column_str) ~ " AS " ~ dbtvault.escape_column_names(derived_column)) -%} + {%- else -%} + {%- do der_columns.append(column_str ~ " AS " ~ dbtvault.escape_column_names(derived_column)) -%} + {%- endif -%} + {%- endif -%} + {%- else -%} + {%- set column_str = dbtvault.as_constant(column_config) -%} + {%- do der_columns.append(column_str ~ " AS " ~ dbtvault.escape_column_names(derived_column)) -%} + {%- endif -%} + {%- endif -%} + + {%- do exclude_columns.append(derived_column) -%} {%- endfor -%} @@ -42,7 +72,7 @@ {%- if source_relation is defined and source_relation is not none -%} {%- for col in source_cols -%} - {%- if col not in exclude_columns -%} + {%- if col | lower not in exclude_columns | map('lower') | list -%} {%- do src_columns.append(dbtvault.escape_column_names(col)) -%} {%- endif -%} {%- endfor -%} @@ -61,10 +91,11 @@ {%- if execute -%} {{ exceptions.raise_compiler_error("Invalid column configuration: -expected format: {'source_relation': Relation, 'columns': {column_name: column_value}} +expected format, either: {'source_relation': Relation, 'columns': {column_name: column_value}} +or: {'source_relation': Relation, 'columns': {column_name: {'source_column': column_value, 'escape': true / false}}} got: {'source_relation': " ~ source_relation ~ ", 'columns': " ~ columns ~ "}") }} {%- endif %} {%- endif %} -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/staging/null_columns.sql b/macros/staging/null_columns.sql new file mode 100644 index 000000000..6676527bf --- /dev/null +++ b/macros/staging/null_columns.sql @@ -0,0 +1,89 @@ +{%- macro null_columns(source_relation=none, columns=none) -%} + + {{- adapter.dispatch('null_columns', 'dbtvault')(source_relation=source_relation, columns=columns) -}} + +{%- endmacro %} + +{%- macro default__null_columns(source_relation=none, columns=none) -%} + +{%- if columns is mapping and columns is not none -%} + + {%- set ns = namespace() -%} + + {%- for col in columns -%} + {%- if col.lower() == 'required' -%} + {% if dbtvault.is_something(columns[col]) %} + {%- if columns[col] is string -%} + {%- set ns.required = [columns[col]] -%} + {%- elif dbtvault.is_list(columns[col]) -%} + {%- set ns.required = columns[col] -%} + {%- endif -%} + {%- endif -%} + {%- endif -%} + {%- if col.lower() == 'optional' -%} + {% if dbtvault.is_something(columns[col]) %} + {%- if columns[col] is string -%} + {%- set ns.optional = [columns[col]] -%} + {%- elif dbtvault.is_list(columns[col]) -%} + {%- set ns.optional = columns[col] -%} + {%- endif -%} + {%- endif -%} + {%- endif -%} + {%- endfor -%} + + {%- set required_value = var('null_key_required', '-1') -%} + {%- set optional_value = var('null_key_optional', '-2') -%} + + {%- if dbtvault.is_something(ns.required) -%} + {%- filter indent(width=0) -%} + {%- for col_name in ns.required -%} + {{ dbtvault.null_column_sql(col_name, required_value) }}{{ ",\n" if not loop.last }}{{ ",\n" if loop.last and dbtvault.is_something(ns.optional) else "" }} + {%- endfor -%} + {%- endfilter -%} + {%- endif -%} + + {%- if dbtvault.is_something(ns.optional) -%} + {%- filter indent(width=0) -%} + {%- for col_name in ns.optional -%} + {{ dbtvault.null_column_sql(col_name, optional_value) }}{{ ",\n" if not loop.last else "\n" }} + {%- endfor -%} + {%- endfilter -%} + {%- endif -%} + +{%- endif -%} + +{%- endmacro -%} + + +{%- macro null_column_sql(col_name, default_value) -%} + + {{- adapter.dispatch('null_column_sql', 'dbtvault')(col_name=col_name, default_value=default_value) -}} + +{%- endmacro -%} + +{%- macro default__null_column_sql(col_name, default_value) -%} + + {%- set col_name_esc = dbtvault.escape_column_names(col_name) -%} + {%- set col_name_orig_esc = dbtvault.escape_column_names(col_name ~ "_ORIGINAL") -%} + {{ col_name_esc }} AS {{ col_name_orig_esc }}, + IFNULL({{ col_name_esc }}, '{{ default_value }}') AS {{ col_name_esc }} + +{%- endmacro -%} + +{%- macro sqlserver__null_column_sql(col_name, default_value) -%} + + {%- set col_name_esc = dbtvault.escape_column_names(col_name) -%} + {%- set col_name_orig_esc = dbtvault.escape_column_names(col_name ~ "_ORIGINAL") -%} + {{ col_name_esc }} AS {{ col_name_orig_esc }}, + ISNULL({{ col_name_esc }}, '{{ default_value }}') AS {{ col_name_esc }} + +{%- endmacro -%} + +{%- macro postgres__null_column_sql(col_name, default_value) -%} + + {%- set col_name_esc = dbtvault.escape_column_names(col_name) -%} + {%- set col_name_orig_esc = dbtvault.escape_column_names(col_name ~ "_ORIGINAL") -%} + {{ col_name_esc }} AS {{ col_name_orig_esc }}, + COALESCE({{ col_name_esc }}, '{{ default_value }}') AS {{ col_name_esc }} + +{%- endmacro -%} \ No newline at end of file diff --git a/macros/staging/stage.sql b/macros/staging/stage.sql index 4c0cccd4b..9eafd3f6b 100644 --- a/macros/staging/stage.sql +++ b/macros/staging/stage.sql @@ -1,19 +1,21 @@ -{%- macro stage(include_source_columns=none, source_model=none, hashed_columns=none, derived_columns=none, ranked_columns=none) -%} +{%- macro stage(include_source_columns=none, source_model=none, hashed_columns=none, derived_columns=none, null_columns=none, ranked_columns=none) -%} {%- if include_source_columns is none -%} {%- set include_source_columns = true -%} {%- endif -%} - {{- adapter.dispatch('stage', 'dbtvault')(include_source_columns=include_source_columns, + {{- dbtvault.prepend_generated_by() }} + + {{ adapter.dispatch('stage', 'dbtvault')(include_source_columns=include_source_columns, source_model=source_model, hashed_columns=hashed_columns, derived_columns=derived_columns, - ranked_columns=ranked_columns) -}} + null_columns=null_columns, + ranked_columns=ranked_columns + ) -}} {%- endmacro -%} -{%- macro default__stage(include_source_columns, source_model, hashed_columns, derived_columns, ranked_columns) -%} - -{{ dbtvault.prepend_generated_by() }} +{%- macro default__stage(include_source_columns, source_model, hashed_columns, derived_columns, null_columns, ranked_columns) -%} {% if (source_model is none) and execute %} @@ -31,7 +33,8 @@ {{- exceptions.raise_compiler_error(error_message) -}} {%- endif -%} -{#- Check for source format or ref format and create relation object from source_model -#} +{#- Check for source format or ref format and create +relation object from source_model -#} {% if source_model is mapping and source_model is not none -%} {%- set source_name = source_model | first -%} @@ -49,18 +52,21 @@ {%- endif -%} {%- set derived_column_names = dbtvault.extract_column_names(derived_columns) | map('upper') | list -%} +{%- set null_column_names = dbtvault.extract_null_column_names(null_columns) | map('upper') | list -%} {%- set hashed_column_names = dbtvault.extract_column_names(hashed_columns) | map('upper') | list -%} {%- set ranked_column_names = dbtvault.extract_column_names(ranked_columns) | map('upper') | list -%} -{%- set exclude_column_names = derived_column_names + hashed_column_names | map('upper') | list -%} +{%- set exclude_column_names = derived_column_names + null_column_names + hashed_column_names | map('upper') | list -%} {%- set source_and_derived_column_names = (all_source_columns + derived_column_names) | map('upper') | unique | list -%} {%- set source_columns_to_select = dbtvault.process_columns_to_select(all_source_columns, exclude_column_names) -%} -{%- set derived_columns_to_select = dbtvault.process_columns_to_select(source_and_derived_column_names, hashed_column_names) | unique | list -%} +{%- set derived_columns_to_select = dbtvault.process_columns_to_select(source_and_derived_column_names, null_column_names + hashed_column_names) | unique | list -%} +{%- set derived_and_null_columns_to_select = dbtvault.process_columns_to_select(source_and_derived_column_names + null_column_names, hashed_column_names) | unique | list -%} {%- set final_columns_to_select = [] -%} {#- Include source columns in final column selection if true -#} {%- if include_source_columns -%} {%- if dbtvault.is_nothing(derived_columns) + and dbtvault.is_nothing(null_columns) and dbtvault.is_nothing(hashed_columns) and dbtvault.is_nothing(ranked_columns) -%} {%- set final_columns_to_select = final_columns_to_select + all_source_columns -%} @@ -94,13 +100,30 @@ derived_columns AS ( ) {%- endif -%} +{% if dbtvault.is_something(null_columns) -%}, + +null_columns AS ( + + SELECT + + {{ dbtvault.print_list(dbtvault.escape_column_names(derived_columns_to_select)) }}{{"," if dbtvault.is_something(derived_columns_to_select) else ""}} + + {{ dbtvault.null_columns(source_relation=none, columns=null_columns) | indent(4) }} + + FROM {{ last_cte }} + {%- set last_cte = "null_columns" -%} + {%- set final_columns_to_select = final_columns_to_select + null_column_names %} +) +{%- endif -%} + + {% if dbtvault.is_something(hashed_columns) -%}, hashed_columns AS ( SELECT - {{ dbtvault.print_list(dbtvault.escape_column_names(derived_columns_to_select)) }}, + {{ dbtvault.print_list(dbtvault.escape_column_names(derived_and_null_columns_to_select)) }}, {% set processed_hash_columns = dbtvault.process_hash_column_excludes(hashed_columns, all_source_columns) -%} {{- dbtvault.hash_columns(columns=processed_hash_columns) | indent(4) }} @@ -137,4 +160,5 @@ columns_to_select AS ( ) SELECT * FROM columns_to_select -{%- endmacro -%} \ No newline at end of file + +{%- endmacro -%} diff --git a/macros/supporting/as_of_date_window.sql b/macros/supporting/as_of_date_window.sql new file mode 100644 index 000000000..64f8a4a52 --- /dev/null +++ b/macros/supporting/as_of_date_window.sql @@ -0,0 +1,123 @@ +{%- macro as_of_date_window(src_pk, src_ldts, stage_tables_ldts, source_model) -%} + +last_safe_load_datetime AS ( + SELECT MIN(LOAD_DATETIME) AS LAST_SAFE_LOAD_DATETIME + FROM ( + + {% for stg in stage_tables_ldts -%} + {%- set stage_ldts = stage_tables_ldts[stg] -%} + SELECT MIN({{ stage_ldts }}) AS LOAD_DATETIME FROM {{ ref(stg) }} + {% if not loop.last %} UNION ALL {% endif %} + {% endfor -%} + + ) AS l +), + +as_of_grain_old_entries AS ( + SELECT DISTINCT AS_OF_DATE + FROM {{ this }} +), + +as_of_grain_lost_entries AS ( + SELECT a.AS_OF_DATE + FROM as_of_grain_old_entries AS a + LEFT OUTER JOIN as_of_dates AS b + ON a.AS_OF_DATE = b.AS_OF_DATE + WHERE b.AS_OF_DATE IS NULL +), + +as_of_grain_new_entries AS ( + SELECT a.AS_OF_DATE + FROM as_of_dates AS a + LEFT OUTER JOIN as_of_grain_old_entries AS b + ON a.AS_OF_DATE = b.AS_OF_DATE + WHERE b.AS_OF_DATE IS NULL +), + +min_date AS ( + SELECT MIN(AS_OF_DATE) AS MIN_DATE + FROM as_of_dates +), + +backfill_as_of AS ( + SELECT AS_OF_DATE + FROM as_of_dates AS a + + {% if target.type == "bigquery" -%} + INNER JOIN last_safe_load_datetime as l + ON a.AS_OF_DATE < l.LAST_SAFE_LOAD_DATETIME + {% else %} + WHERE a.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + {%- endif %} +), + +new_rows_pks AS ( + SELECT {{ dbtvault.prefix([src_pk], 'h') }} + FROM {{ source_model }} AS h + + {% if target.type == "bigquery" -%} + INNER JOIN last_safe_load_datetime as l + ON h.{{ src_ldts }} >= l.LAST_SAFE_LOAD_DATETIME + {% else %} + WHERE h.{{ src_ldts }} >= (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + {%- endif %} +), + +new_rows_as_of AS ( + SELECT AS_OF_DATE + FROM as_of_dates AS a + {% if target.type == "bigquery" -%} + INNER JOIN last_safe_load_datetime as l + ON a.AS_OF_DATE >= l.LAST_SAFE_LOAD_DATETIME + UNION DISTINCT + {% else %} + WHERE a.AS_OF_DATE >= (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + UNION + {%- endif %} + SELECT as_of_date + FROM as_of_grain_new_entries +), + +overlap_pks AS ( + SELECT a.* + FROM {{ this }} AS a + INNER JOIN {{ source_model }} as b + ON {{ dbtvault.multikey(src_pk, prefix=['a','b'], condition='=') }} + {% if target.type == "bigquery" -%} + INNER JOIN min_date + ON 1 = 1 + INNER JOIN last_safe_load_datetime + ON 1 = 1 + LEFT OUTER JOIN as_of_grain_lost_entries + ON a.AS_OF_DATE = as_of_grain_lost_entries.AS_OF_DATE + WHERE a.AS_OF_DATE >= min_date.MIN_DATE + AND a.AS_OF_DATE < last_safe_load_datetime.LAST_SAFE_LOAD_DATETIME + AND as_of_grain_lost_entries.AS_OF_DATE IS NULL + {% else %} + WHERE a.AS_OF_DATE >= (SELECT MIN_DATE FROM min_date) + AND a.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + AND a.AS_OF_DATE NOT IN (SELECT AS_OF_DATE FROM as_of_grain_lost_entries) + {%- endif %} +), + +overlap_as_of AS ( + SELECT p.AS_OF_DATE + FROM as_of_dates AS p + {% if target.type == "bigquery" -%} + INNER JOIN min_date + ON 1 = 1 + INNER JOIN last_safe_load_datetime + ON 1 = 1 + LEFT OUTER JOIN as_of_grain_lost_entries + ON p.AS_OF_DATE = as_of_grain_lost_entries.AS_OF_DATE + WHERE p.AS_OF_DATE >= min_date.MIN_DATE + AND p.AS_OF_DATE < last_safe_load_datetime.LAST_SAFE_LOAD_DATETIME + AND as_of_grain_lost_entries.AS_OF_DATE IS NULL + {% else %} + WHERE p.AS_OF_DATE >= (SELECT MIN_DATE FROM min_date) + AND p.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + AND p.AS_OF_DATE NOT IN (SELECT AS_OF_DATE FROM as_of_grain_lost_entries) + {% endif %} +) + +{%- endmacro -%} diff --git a/macros/supporting/bridge_shared.sql b/macros/supporting/bridge_shared.sql new file mode 100644 index 000000000..656bbe1e3 --- /dev/null +++ b/macros/supporting/bridge_shared.sql @@ -0,0 +1,58 @@ +{%- macro bridge_overlap_and_new_rows(src_pk, bridge_walk, source_model, new_as_of_dates_cte) -%} + +SELECT + {{ dbtvault.prefix([src_pk], 'a') }}, + b.AS_OF_DATE, + {%- for bridge_step in bridge_walk.keys() -%} + {%- set link_table = bridge_walk[bridge_step]['link_table'] -%} + {%- set eff_sat_table = bridge_walk[bridge_step]['eff_sat_table'] -%} + + {%- set link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_pk']) -%} + + {%- set bridge_link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_link_pk']) -%} + {%- set bridge_end_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_end_date']) -%} + {%- set bridge_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_load_date']) -%} + + {%- set eff_sat_end_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_end_date']) -%} + {%- set eff_sat_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_load_date']) %} + + {{- '\n ' }} {{ link_table | lower }}.{{ link_pk }} AS {{ bridge_link_pk }}, + {{- '\n ' }} {{ eff_sat_table | lower }}.{{ eff_sat_end_date }} AS {{ bridge_end_date }}, + {{- '\n ' }} {{ eff_sat_table | lower }}.{{ eff_sat_load_date }} AS {{ bridge_load_date }} + + {%- if not loop.last %}, {%- endif -%} + + {% endfor -%} + + FROM {{ source_model }} AS a + INNER JOIN {{ new_as_of_dates_cte }} AS b + ON (1=1) + + {%- set loop_vars = namespace(last_link = '', last_link_fk = '') %} + {%- for bridge_step in bridge_walk.keys() -%} + + {%- set current_link = bridge_walk[bridge_step]['link_table'] -%} + {%- set current_eff_sat = bridge_walk[bridge_step]['eff_sat_table'] -%} + + {%- set link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_pk']) -%} + {%- set link_fk1 = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_fk1']) -%} + {%- set link_fk2 = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_fk2']) -%} + + {%- set eff_sat_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_pk']) -%} + {%- set eff_sat_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_load_date']) -%} + + {%- if loop.first %} + LEFT JOIN {{ ref(current_link) }} AS {{ current_link | lower }} + ON {{ dbtvault.multikey(src_pk, prefix=['a', current_link | lower], condition='=') }} + {%- else %} + LEFT JOIN {{ ref(current_link) }} AS {{ current_link | lower }} + ON {{ loop_vars.last_link }}.{{ loop_vars.last_link_fk2 }} = {{ current_link | lower }}.{{ link_fk1 }} + {%- endif %} + INNER JOIN {{ ref(current_eff_sat) }} AS {{ current_eff_sat | lower }} + ON {{ current_eff_sat | lower }}.{{ eff_sat_pk }} = {{ current_link | lower }}.{{ link_pk }} + AND {{ current_eff_sat | lower }}.{{ eff_sat_load_date }} <= b.AS_OF_DATE + {%- set loop_vars.last_link = current_link | lower -%} + {%- set loop_vars.last_link_fk2 = link_fk2 -%} + {% endfor %} + +{%- endmacro -%} diff --git a/macros/supporting/cast_binary.sql b/macros/supporting/cast_binary.sql new file mode 100644 index 000000000..91afaa03d --- /dev/null +++ b/macros/supporting/cast_binary.sql @@ -0,0 +1,23 @@ +{%- macro cast_binary(column_str, alias=none) -%} + {{ return(adapter.dispatch('cast_binary', 'dbtvault')(column_str=column_str, alias=alias)) }} +{%- endmacro -%} + + +{%- macro default__cast_binary(column_str, alias=none) -%} + + CAST('{{ column_str }}' AS {{ dbtvault.type_binary() }}) {% if alias %} AS {{ alias }} {%- endif %} + +{%- endmacro -%} + + +{%- macro sqlserver__cast_binary(column_str, alias=none) -%} + + CONVERT({{ dbtvault.type_binary() }}, '{{ column_str }}', 2) {% if alias %} AS {{ alias }} {%- endif %} + +{%- endmacro -%} + +{%- macro bigquery__cast_binary(column_str, alias=none) -%} + + {{ dbtvault.default__cast_binary(column_str=column_str, alias=alias) }} + +{%- endmacro -%} diff --git a/macros/supporting/cast_date.sql b/macros/supporting/cast_date.sql new file mode 100644 index 000000000..06122c53c --- /dev/null +++ b/macros/supporting/cast_date.sql @@ -0,0 +1,80 @@ +{%- macro cast_date(column_str, as_string=false, datetime=false, alias=none) -%} + {{ return(adapter.dispatch('cast_date', 'dbtvault')(column_str=column_str, as_string=as_string, datetime=datetime, alias=alias)) }} +{%- endmacro -%} + +{%- macro snowflake__cast_date(column_str, as_string=false, datetime=false, alias=none) -%} + + {%- if datetime -%} + {%- if not as_string -%} + TO_DATETIME({{ column_str }}) + {%- else -%} + TO_DATETIME('{{ column_str }}') + {%- endif -%} + {%- else -%} + {%- if not as_string -%} + TO_DATE({{ column_str }}) + {%- else -%} + TO_DATE('{{ column_str }}') + {%- endif -%} + {%- endif -%} + + {%- if alias %} AS {{ alias }} {%- endif %} + +{%- endmacro -%} + + +{%- macro sqlserver__cast_date(column_str, as_string=false, datetime=false, alias=none) -%} + + {%- if datetime -%} + {%- if not as_string -%} + CONVERT(DATETIME2, {{ column_str }}) + {%- else -%} + CONVERT(DATETIME2, '{{ column_str }}') + {%- endif -%} + {%- else -%} + {%- if not as_string -%} + CONVERT(DATE, {{ column_str }}) + {%- else -%} + CONVERT(DATE, '{{ column_str }}') + {%- endif -%} + {%- endif -%} + + {%- if alias %} AS {{ alias }} {%- endif %} + + +{%- endmacro -%} + + +{%- macro bigquery__cast_date(column_str, as_string=false, datetime=false, alias=none) -%} + + {%- if datetime -%} + {%- if not as_string -%} + CAST(PARSE_DATETIME('%F %H:%M:%E6S', {{ column_str }})) + {%- else -%} + CAST(PARSE_DATETIME('%F %H:%M:%E6S', '{{ column_str }}')) + {%- endif -%} + {%- else -%} + {%- if not as_string -%} + DATE({{ column_str }}) + {%- else -%} + DATE('{{ column_str }}') + {%- endif -%} + {%- endif -%} + + {%- if alias %} AS {{ alias }} {%- endif %} + +{%- endmacro -%} + + +{%- macro databricks__cast_date(column_str, as_string=false, datetime=false, alias=none) -%} + + {{ dbtvault.snowflake__cast_date(column_str=column_str, as_string=as_string, datetime=datetime, alias=alias)}} + +{%- endmacro -%} + + +{%- macro postgres__cast_date(column_str, as_string=false, datetime=false, alias=none) -%} + + {{ dbtvault.snowflake__cast_date(column_str=column_str, as_string=as_string, datetime=datetime, alias=alias)}} + +{%- endmacro -%} \ No newline at end of file diff --git a/macros/supporting/current_timestamp.sql b/macros/supporting/current_timestamp.sql index 4e89806d9..ca8c37273 100644 --- a/macros/supporting/current_timestamp.sql +++ b/macros/supporting/current_timestamp.sql @@ -15,7 +15,7 @@ {%- endmacro %} {% macro default__current_timestamp_in_utc() %} - {{dbt_utils.current_timestamp_in_utc()}} + {{ dbt_utils.current_timestamp_in_utc() }} {% endmacro %} {% macro sqlserver__current_timestamp_in_utc() %} diff --git a/macros/supporting/datatypes.sql b/macros/supporting/datatypes.sql index ec4c458b6..5825ea077 100644 --- a/macros/supporting/datatypes.sql +++ b/macros/supporting/datatypes.sql @@ -1,11 +1,25 @@ {%- macro type_timestamp() -%} - {{ return(adapter.dispatch('type_timestamp', 'dbtvault')()) }} + {{- return(adapter.dispatch('type_timestamp', 'dbtvault')()) -}} {%- endmacro -%} {%- macro default__type_timestamp() -%} - {{ dbt_utils.type_timestamp() }} + TIMESTAMP_NTZ {%- endmacro -%} {%- macro sqlserver__type_timestamp() -%} - datetime2 + DATETIME2 +{%- endmacro -%} + + + +{%- macro type_binary() -%} + {{- return(adapter.dispatch('type_binary', 'dbtvault')()) -}} +{%- endmacro -%} + +{%- macro default__type_binary() -%} + {%- if var('hash') == 'MD5' -%} + BINARY(16) + {%- elif var('hash') == 'SHA' -%} + BINARY(32) + {%- endif -%} {%- endmacro -%} diff --git a/macros/supporting/get_query_results_as_dict.sql b/macros/supporting/get_query_results_as_dict.sql index bf9eb2d64..ffe7af142 100644 --- a/macros/supporting/get_query_results_as_dict.sql +++ b/macros/supporting/get_query_results_as_dict.sql @@ -3,27 +3,12 @@ {% endmacro %} {% macro default__get_query_results_as_dict(query) %} - {{ return(dbt_utils.get_query_results_as_dict(query)) }} -{% endmacro %} - -{% macro sqlserver__get_query_results_as_dict(query) %} - - {%- call statement('get_query_results', fetch_result=True,auto_begin=false) -%} - - {{ query }} + {%- set query_results = dbt_utils.get_query_results_as_dict(query) -%} + {%- set query_results_processed = {} -%} - {%- endcall -%} + {% for k, v in query_results.items() %} + {% do query_results_processed.update({k.upper(): v}) %} + {% endfor %} - {% set sql_results={} %} - - {%- if execute -%} - {% set sql_results_table = load_result('get_query_results').table.columns %} - {% for column_name, column in sql_results_table.items() %} - {# Column names in upper case for consistency #} - {% do sql_results.update({column_name.upper(): column.values()}) %} - {% endfor %} - {%- endif -%} - - {{ return(sql_results) }} - -{% endmacro %} \ No newline at end of file + {{ return(query_results_processed) }} +{% endmacro %} diff --git a/macros/supporting/hash.sql b/macros/supporting/hash.sql index 3c9caeb8a..b1bcf6266 100644 --- a/macros/supporting/hash.sql +++ b/macros/supporting/hash.sql @@ -36,11 +36,7 @@ {#- If single column to hash -#} {%- if columns is string -%} {%- set column_str = dbtvault.as_constant(columns) -%} - {%- if dbtvault.is_expression(column_str) -%} - {%- set escaped_column_str = column_str -%} - {%- else -%} - {%- set escaped_column_str = dbtvault.escape_column_names(column_str) -%} - {%- endif -%} + {%- set escaped_column_str = dbtvault.escape_column_names(column_str) -%} {{- "CAST(({}({})) AS BINARY({})) AS {}".format(hash_alg, standardise | replace('[EXPRESSION]', escaped_column_str), hash_size, dbtvault.escape_column_names(alias)) | indent(4) -}} {#- Else a list of columns to hash -#} @@ -58,11 +54,7 @@ {%- do all_null.append(null_placeholder_string) -%} {%- set column_str = dbtvault.as_constant(column) -%} - {%- if dbtvault.is_expression(column_str) -%} - {%- set escaped_column_str = column_str -%} - {%- else -%} - {%- set escaped_column_str = dbtvault.escape_column_names(column_str) -%} - {%- endif -%} + {%- set escaped_column_str = dbtvault.escape_column_names(column_str) -%} {{- "\nIFNULL({}, '{}')".format(standardise | replace('[EXPRESSION]', escaped_column_str), null_placeholder_string) | indent(4) -}} {{- "," if not loop.last -}} @@ -109,11 +101,7 @@ {#- If single column to hash -#} {%- if columns is string -%} {%- set column_str = dbtvault.as_constant(columns) -%} - {%- if dbtvault.is_expression(column_str) -%} - {%- set escaped_column_str = column_str -%} - {%- else -%} - {%- set escaped_column_str = dbtvault.escape_column_names(column_str) -%} - {%- endif -%} + {%- set escaped_column_str = dbtvault.escape_column_names(column_str) -%} {{- "CAST(UPPER(TO_HEX({}({}))) AS STRING) AS {}".format(hash_alg, standardise | replace('[EXPRESSION]', escaped_column_str), dbtvault.escape_column_names(alias)) | indent(4) -}} {#- Else a list of columns to hash -#} @@ -131,11 +119,7 @@ {%- do all_null.append(null_placeholder_string) -%} {%- set column_str = dbtvault.as_constant(column) -%} - {%- if dbtvault.is_expression(column_str) -%} - {%- set escaped_column_str = column_str -%} - {%- else -%} - {%- set escaped_column_str = dbtvault.escape_column_names(column_str) -%} - {%- endif -%} + {%- set escaped_column_str = dbtvault.escape_column_names(column_str) -%} {{- "\nIFNULL({}, '{}')".format(standardise | replace('[EXPRESSION]', escaped_column_str), null_placeholder_string) | indent(4) -}} {{- ",'{}',".format(concat_string) if not loop.last -}} {%- if loop.last -%} @@ -162,6 +146,11 @@ {%- set concat_string = var('concat_string', '||') -%} {%- set null_placeholder_string = var('null_placeholder_string', '^^') -%} +{% if dbtvault.is_list(columns) and columns | length == 1 %} + {%- set columns = columns[0] -%} +{% endif %} + + {#- Select hashing algorithm -#} {%- if hash == 'MD5' -%} {%- set hash_alg = 'MD5' -%} @@ -181,6 +170,86 @@ {%- set columns = columns|sort -%} {%- endif -%} +{#- If single column to hash -#} +{%- if columns is string -%} + {%- set column_str = dbtvault.as_constant(columns) -%} + {%- set escaped_column_str = dbtvault.escape_column_names(column_str) -%} + {{- "CAST(HASHBYTES('{}', {}) AS BINARY({})) AS {}".format(hash_alg, standardise | replace('[EXPRESSION]', escaped_column_str), hash_size, dbtvault.escape_column_names(alias)) | indent(4) -}} + +{#- Else a list of columns to hash -#} +{%- else -%} + {%- set all_null = [] -%} + + {%- if is_hashdiff -%} + {{- "CAST(HASHBYTES('{}', (CONCAT_WS('{}',".format(hash_alg, concat_string) | indent(4) -}} + {%- else -%} + {{- "CAST(HASHBYTES('{}', (NULLIF(CONCAT_WS('{}',".format(hash_alg, concat_string) | indent(4) -}} + {%- endif -%} + + {%- for column in columns -%} + + {%- do all_null.append(null_placeholder_string) -%} + + {%- set column_str = dbtvault.as_constant(column) -%} + {%- set escaped_column_str = dbtvault.escape_column_names(column_str) -%} + {{- "\nISNULL({}, '{}')".format(standardise | replace('[EXPRESSION]', escaped_column_str), null_placeholder_string) | indent(4) -}} + {{- "," if not loop.last -}} + + {%- if loop.last -%} + + {% if is_hashdiff %} + {{- "\n))) AS BINARY({})) AS {}".format(hash_size, dbtvault.escape_column_names(alias)) -}} + {%- else -%} + {{- "\n), '{}'))) AS BINARY({})) AS {}".format(all_null | join(""), hash_size, dbtvault.escape_column_names(alias)) -}} + {%- endif -%} + {%- else -%} + + {%- do all_null.append(concat_string) -%} + + {%- endif -%} + + {%- endfor -%} + +{%- endif -%} + +{%- endmacro -%} + + +{%- macro postgres__hash(columns, alias, is_hashdiff) -%} + +{%- set hash = var('hash', 'MD5') -%} +{%- set concat_string = var('concat_string', '||') -%} +{%- set null_placeholder_string = var('null_placeholder_string', '^^') -%} + +{#- Select hashing algorithm -#} +{%- if hash == 'MD5' -%} + {%- set hash_alg = 'MD5' -%} +{%- elif hash == 'SHA' -%} + {%- set hash_alg = 'SHA256' -%} +{%- else -%} + {%- set hash_alg = 'MD5' -%} +{%- endif -%} + +{#- Select hashing expression (left and right sides) -#} +{#- * MD5 is simple function call to md5(val) -#} +{#- * SHA256 needs input cast to BYTEA and then its BYTEA result encoded as hex text output -#} +{#- e.g. ENCODE(SHA256(CAST(val AS BYTEA)), 'hex') -#} +{#- Ref: https://www.postgresql.org/docs/11/functions-binarystring.html -#} +{%- if hash_alg == 'MD5' -%} + {%- set hash_expr_left = 'MD5(' -%} + {%- set hash_expr_right = ')' -%} +{%- elif hash_alg == 'SHA256' -%} + {%- set hash_expr_left = 'ENCODE(SHA256(CAST(' -%} + {%- set hash_expr_right = " AS BYTEA)), 'hex')" -%} +{%- endif -%} + +{%- set standardise = "NULLIF(UPPER(TRIM(CAST([EXPRESSION] AS VARCHAR))), '')" -%} + +{#- Alpha sort columns before hashing if a hashdiff -#} +{%- if is_hashdiff and dbtvault.is_list(columns) -%} + {%- set columns = columns|sort -%} +{%- endif -%} + {#- If single column to hash -#} {%- if columns is string -%} {%- set column_str = dbtvault.as_constant(columns) -%} @@ -189,16 +258,17 @@ {%- else -%} {%- set escaped_column_str = dbtvault.escape_column_names(column_str) -%} {%- endif -%} - {{- "CAST(HASHBYTES('{}', {}) AS BINARY({})) AS {}".format(hash_alg, standardise | replace('[EXPRESSION]', escaped_column_str), hash_size, dbtvault.escape_column_names(alias)) | indent(4) -}} + + {{- "CAST(UPPER({}{}{}) AS BYTEA) AS {}".format(hash_expr_left, standardise | replace('[EXPRESSION]', escaped_column_str), hash_expr_right, dbtvault.escape_column_names(alias)) | indent(4) -}} {#- Else a list of columns to hash -#} {%- else -%} {%- set all_null = [] -%} {%- if is_hashdiff -%} - {{- "CAST(HASHBYTES('{}', (CONCAT_WS('{}',".format(hash_alg, concat_string) | indent(4) -}} + {{- "CAST(UPPER({}CONCAT_WS('{}',".format(hash_expr_left, concat_string) | indent(4) -}} {%- else -%} - {{- "CAST(HASHBYTES('{}', (NULLIF(CONCAT_WS('{}',".format(hash_alg, concat_string) | indent(4) -}} + {{- "CAST(UPPER({}NULLIF(CONCAT_WS('{}',".format(hash_expr_left, concat_string) | indent(4) -}} {%- endif -%} {%- for column in columns -%} @@ -211,15 +281,113 @@ {%- else -%} {%- set escaped_column_str = dbtvault.escape_column_names(column_str) -%} {%- endif -%} - {{- "\nISNULL({}, '{}')".format(standardise | replace('[EXPRESSION]', escaped_column_str), null_placeholder_string) | indent(4) -}} + + {{- "\nCOALESCE({}, '{}')".format(standardise | replace('[EXPRESSION]', escaped_column_str), null_placeholder_string) | indent(4) -}} {{- "," if not loop.last -}} {%- if loop.last -%} {% if is_hashdiff %} - {{- "\n))) AS BINARY({})) AS {}".format(hash_size, dbtvault.escape_column_names(alias)) -}} + {{- "\n){}) AS BYTEA) AS {}".format(hash_expr_right, dbtvault.escape_column_names(alias)) -}} {%- else -%} - {{- "\n), '{}'))) AS BINARY({})) AS {}".format(all_null | join(""), hash_size, dbtvault.escape_column_names(alias)) -}} + {{- "\n), '{}'){}) AS BYTEA) AS {}".format(all_null | join(""), hash_expr_right, dbtvault.escape_column_names(alias)) -}} + {%- endif -%} + {%- else -%} + + {%- do all_null.append(concat_string) -%} + + {%- endif -%} + {%- endfor -%} + +{%- endif -%} + +{%- endmacro -%} + +{%- macro databricks__hash(columns, alias, is_hashdiff) -%} + +{%- set hash = var('hash', 'MD5') -%} +{%- set concat_string = var('concat_string', '||') -%} +{%- set null_placeholder_string = var('null_placeholder_string', '^^') -%} + +{#- Select hashing algorithm -#} +{%- if hash == 'MD5' -%} + {%- set hash_alg = 'MD5' -%} +{%- elif hash == 'SHA' -%} + {%- set hash_alg = 'SHA2' -%} + {%- set bit_length = 256 -%} +{%- else -%} + {%- set hash_alg = 'MD5' -%} +{%- endif -%} + +{%- set standardise = "NULLIF(UPPER(TRIM(CAST([EXPRESSION] AS STRING))), '')" %} + +{#- Alpha sort columns before hashing if a hashdiff -#} +{%- if is_hashdiff and dbtvault.is_list(columns) -%} + {%- set columns = columns|sort -%} +{%- endif -%} + +{#- If single column to hash -#} +{%- if columns is string -%} + {%- set column_str = dbtvault.as_constant(columns) -%} + {%- if dbtvault.is_expression(column_str) -%} + {%- set escaped_column_str = column_str -%} + {%- else -%} + {%- set escaped_column_str = dbtvault.escape_column_names(column_str) -%} + {%- endif -%} + + {%- if hash_alg == "MD5" %} + {{- "UPPER(MD5({})) AS {}".format(standardise | replace('[EXPRESSION]', escaped_column_str), dbtvault.escape_column_names(alias)) | indent(4) -}} + {%- else %} + {{- "UPPER(SHA2({}, {})) AS {}".format(standardise | replace('[EXPRESSION]', escaped_column_str), bit_length, dbtvault.escape_column_names(alias)) | indent(4) -}} + {%- endif %} + +{#- Else a list of columns to hash -#} +{%- else -%} + {%- set all_null = [] -%} + + {%- if is_hashdiff -%} + {%- if hash_alg == "MD5" %} + {{- "UPPER(MD5(CONCAT_WS('{}',".format(concat_string) | indent(4) -}} + {%- else %} + {{- "UPPER(SHA2(CONCAT_WS('{}',".format(concat_string) | indent(4) -}} + {%- endif %} + + {%- else -%} + {%- if hash_alg == "MD5" %} + {{- "UPPER(MD5(NULLIF(CONCAT_WS('{}',".format(concat_string) | indent(4) -}} + {%- else %} + {{- "UPPER(SHA2(NULLIF(CONCAT_WS('{}',".format(concat_string) | indent(4) -}} + {%- endif %} + {%- endif -%} + + {%- for column in columns -%} + + {%- do all_null.append(null_placeholder_string) -%} + + {%- set column_str = dbtvault.as_constant(column) -%} + {%- if dbtvault.is_expression(column_str) -%} + {%- set escaped_column_str = column_str -%} + {%- else -%} + {%- set escaped_column_str = dbtvault.escape_column_names(column_str) -%} + {%- endif -%} + {{- "\nIFNULL({}, '{}')".format(standardise | replace('[EXPRESSION]', escaped_column_str), null_placeholder_string) | indent(4) -}} + {{- "," if not loop.last -}} + + {%- if loop.last -%} + + {% if is_hashdiff %} + {%- if hash_alg == "MD5" %} + {{- "\n))) AS {}".format(dbtvault.escape_column_names(alias)) -}} + {%- else %} + {{- "\n), {})) AS {}".format(bit_length, dbtvault.escape_column_names(alias)) -}} + {%- endif %} + {%- else -%} + {%- if hash_alg == "MD5" %} + {{- "\n), '{}'))) AS {}".format(all_null | join(""), dbtvault.escape_column_names(alias)) -}} + {%- else %} + {{- "\n), '{}'), {})) AS {}".format(all_null | join(""), bit_length, dbtvault.escape_column_names(alias)) -}} + {%- endif %} + {%- endif -%} {%- else -%} @@ -231,4 +399,4 @@ {%- endif -%} -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/supporting/max_datetime.sql b/macros/supporting/max_datetime.sql index a193a47f8..0343fc09f 100644 --- a/macros/supporting/max_datetime.sql +++ b/macros/supporting/max_datetime.sql @@ -1,23 +1,23 @@ {%- macro max_datetime() -%} - {{- return(adapter.dispatch('max_datetime', 'dbtvault')()) -}} + {%- do return(adapter.dispatch('max_datetime', 'dbtvault')()) -%} {%- endmacro %} {%- macro default__max_datetime() %} - {% do return('9999-12-31 23:59:59.999999') %} + {%- do return(var('max_datetime', '9999-12-31 23:59:59.999999')) -%} -{% endmacro -%} +{%- endmacro -%} -{%- macro sqlserver__max_datetime() %} +{%- macro sqlserver__max_datetime() -%} - {% do return('9999-12-31 23:59:59.9999999') %} + {%- do return(var('max_datetime', '9999-12-31 23:59:59.9999999')) -%} -{% endmacro -%} +{%- endmacro -%} -{%- macro bigquery__max_datetime() %} +{%- macro bigquery__max_datetime() -%} - {% do return('9999-12-31 23:59:59.999999') %} + {%- do return(var('max_datetime', '9999-12-31 23:59:59.999999')) -%} -{% endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/supporting/prefix.sql b/macros/supporting/prefix.sql index 9fa006fb8..290a8fa5b 100644 --- a/macros/supporting/prefix.sql +++ b/macros/supporting/prefix.sql @@ -59,4 +59,4 @@ {%- endif -%} {%- endif -%} -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/bigquery/bridge.sql b/macros/tables/bigquery/bridge.sql index 26978d715..5c162706d 100644 --- a/macros/tables/bigquery/bridge.sql +++ b/macros/tables/bigquery/bridge.sql @@ -1,288 +1,11 @@ -{%- macro bigquery__bridge(src_pk, as_of_dates_table, bridge_walk, stage_tables_ldts, src_ldts, source_model) -%} +{%- macro bigquery__bridge(src_pk, as_of_dates_table, bridge_walk, stage_tables_ldts, src_extra_columns, src_ldts, source_model) -%} -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} +{{ dbtvault.default__bridge(src_pk=src_pk, + as_of_dates_table=as_of_dates_table, + bridge_walk=bridge_walk, + stage_tables_ldts=stage_tables_ldts, + src_extra_columns=src_extra_columns, + src_ldts=src_ldts, + source_model=source_model) }} -{{ dbtvault.prepend_generated_by() }} - -{%- if (as_of_dates_table is none) and execute -%} - {%- set error_message -%} - "Bridge error: Missing as_of_dates table configuration. A as_of_dates_table must be provided." - {%- endset -%} - {{- exceptions.raise_compiler_error(error_message) -}} -{%- endif -%} - -{#- Acquiring the source relation for the AS_OF table -#} -{%- if as_of_dates_table is mapping and as_of_dates_table is not none -%} - {%- set source_name = as_of_dates_table | first -%} - {%- set source_table_name = as_of_dates_table[source_name] -%} - {%- set source_relation = source(source_name, source_table_name) -%} -{%- elif as_of_dates_table is not mapping and as_of_dates_table is not none -%} - {%- set source_relation = ref(as_of_dates_table) -%} -{%- endif -%} - -{%- set max_datetime = var('max_datetime', '9999-12-31 23:59:59.999999') -%} - -{#- Stating the dependencies on the stage tables outside of the If STATEMENT -#} -{% for stg in stage_tables_ldts -%} - {{- "-- depends_on: " ~ ref(stg) -}} -{%- endfor %} - -{#- Setting the new AS_OF dates CTE name -#} -{%- if dbtvault.is_any_incremental() -%} - {%- set new_as_of_dates_cte = 'NEW_ROWS_AS_OF' -%} -{%- else -%} - {%- set new_as_of_dates_cte = 'AS_OF' -%} -{%- endif %} - -WITH as_of AS ( - SELECT a.AS_OF_DATE - FROM {{ source_relation }} AS a - WHERE a.AS_OF_DATE <= CURRENT_DATE() -), - -{%- if dbtvault.is_any_incremental() %} - -last_safe_load_datetime AS ( - SELECT MIN(LOAD_DATETIME) AS LAST_SAFE_LOAD_DATETIME - FROM ( - {%- filter indent(width=8) -%} - {%- for stg in stage_tables_ldts -%} - {%- set stage_ldts =(stage_tables_ldts[stg]) -%} - {{ "SELECT MIN(" ~ stage_ldts ~ ") AS LOAD_DATETIME FROM " ~ ref(stg) }} - {{ "UNION ALL" if not loop.last }} - {% endfor -%} - {%- endfilter -%} - ) AS l -), - -as_of_grain_old_entries AS ( - SELECT DISTINCT AS_OF_DATE - FROM {{ this }} -), - -as_of_grain_lost_entries AS ( - SELECT a.AS_OF_DATE - FROM as_of_grain_old_entries AS a - LEFT OUTER JOIN as_of AS b - ON a.AS_OF_DATE = b.AS_OF_DATE - WHERE b.AS_OF_DATE IS NULL -), - -as_of_grain_new_entries AS ( - SELECT a.AS_OF_DATE - FROM as_of AS a - LEFT OUTER JOIN as_of_grain_old_entries AS b - ON a.AS_OF_DATE = b.AS_OF_DATE - WHERE b.AS_OF_DATE IS NULL -), - -min_date AS ( - SELECT min(AS_OF_DATE) AS MIN_DATE - FROM as_of -), - -new_rows_pks AS ( - SELECT {{ dbtvault.prefix([src_pk], 'h') }} - FROM {{ ref(source_model) }} AS h - INNER JOIN last_safe_load_datetime - ON 1 = 1 - WHERE h.{{ src_ldts }} >= last_safe_load_datetime.LAST_SAFE_LOAD_DATETIME -), - -new_rows_as_of AS ( - (SELECT AS_OF_DATE - FROM as_of - INNER JOIN last_safe_load_datetime - ON 1 = 1 - WHERE as_of.AS_OF_DATE >= last_safe_load_datetime.LAST_SAFE_LOAD_DATETIME) - UNION DISTINCT - (SELECT as_of_date - FROM as_of_grain_new_entries) -), - -overlap_pks AS ( - SELECT {{ dbtvault.prefix([src_pk], 'p') }} - FROM {{ this }} AS p - INNER JOIN {{ ref(source_model) }} as h - ON {{ dbtvault.multikey(src_pk, prefix=['p','h'], condition='=') }} - INNER JOIN min_date - ON 1 = 1 - INNER JOIN last_safe_load_datetime - ON 1 = 1 - LEFT OUTER JOIN as_of_grain_lost_entries - ON p.AS_OF_DATE = as_of_grain_lost_entries.AS_OF_DATE - WHERE p.AS_OF_DATE >= min_date.MIN_DATE - AND p.AS_OF_DATE < last_safe_load_datetime.LAST_SAFE_LOAD_DATETIME - AND as_of_grain_lost_entries.AS_OF_DATE IS NULL -), - -overlap_as_of AS ( - SELECT p.AS_OF_DATE - FROM as_of AS p - INNER JOIN min_date - ON 1 = 1 - INNER JOIN last_safe_load_datetime - ON 1 = 1 - LEFT OUTER JOIN as_of_grain_lost_entries - ON p.AS_OF_DATE = as_of_grain_lost_entries.AS_OF_DATE - WHERE p.AS_OF_DATE >= min_date.MIN_DATE - AND p.AS_OF_DATE < last_safe_load_datetime.LAST_SAFE_LOAD_DATETIME - AND as_of_grain_lost_entries.AS_OF_DATE IS NULL -), - -overlap AS ( - SELECT - {{ dbtvault.prefix([src_pk], 'a') }}, - b.AS_OF_DATE - {%- for bridge_step in bridge_walk.keys() -%} - {%- set link_table = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_table']) -%} - {%- set link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_pk']) -%} - {%- set bridge_link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_link_pk']) -%} - {%- set eff_sat_table = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_table']) -%} - {%- set bridge_end_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_end_date']) -%} - {%- set bridge_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_load_date']) -%} - {%- set eff_sat_end_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_end_date']) -%} - {%- set eff_sat_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_load_date']) -%} - {%- filter indent(width=8) %} - {{ ',' ~ link_table ~ '.' ~ link_pk ~ ' AS ' ~ bridge_link_pk }} - {{ ',' ~ eff_sat_table ~ '.' ~ eff_sat_end_date ~ ' AS ' ~ bridge_end_date }} - {{ ',' ~ eff_sat_table ~ '.' ~ eff_sat_load_date ~' AS ' ~ bridge_load_date }} - {%- endfilter -%} - {% endfor %} - FROM overlap_pks AS a - INNER JOIN overlap_as_of AS b - ON (1=1) - {%- set loop_vars = namespace(lastlink = '', last_link_fk = '') -%} - {%- for bridge_step in bridge_walk.keys() -%} - {%- set current_link = bridge_walk[bridge_step]['link_table'] -%} - {%- set current_eff_sat = bridge_walk[bridge_step]['eff_sat_table'] -%} - {%- set link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_pk']) -%} - {%- set link_fk1 = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_fk1']) -%} - {%- set link_fk2 = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_fk2']) -%} - {%- set eff_sat_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_pk']) -%} - {%- set eff_sat_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_load_date']) -%} - {%- if loop.first %} - LEFT JOIN {{ ref(current_link) }} AS {{ dbtvault.escape_column_names(current_link) }} - ON a.{{ src_pk }} = {{ dbtvault.escape_column_names(current_link) }}.{{ link_fk1 }} - {%- else %} - LEFT JOIN {{ ref(current_link) }} AS {{ dbtvault.escape_column_names(current_link) }} - ON {{ loop_vars.last_link }}.{{ loop_vars.last_link_fk2 }} = {{ dbtvault.escape_column_names(current_link) }}.{{ link_fk1 }} - {%- endif %} - INNER JOIN {{ ref(current_eff_sat) }} AS {{ dbtvault.escape_column_names(current_eff_sat) }} - ON {{ dbtvault.escape_column_names(current_eff_sat) }}.{{ eff_sat_pk }} = {{ dbtvault.escape_column_names(current_link) }}.{{ link_pk }} - AND {{ dbtvault.escape_column_names(current_eff_sat) }}.{{ eff_sat_load_date }} <= b.AS_OF_DATE - {%- set loop_vars.last_link = current_link -%} - {%- set loop_vars.last_link_fk2 = link_fk2 -%} - {% endfor %} -), -{%- endif %} - -new_rows AS ( - SELECT - {{ dbtvault.prefix([src_pk], 'a') }}, - b.AS_OF_DATE - {%- for bridge_step in bridge_walk.keys() -%} - {%- set link_table = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_table']) -%} - {%- set link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_pk']) -%} - {%- set bridge_link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_link_pk']) -%} - {%- set eff_sat_table = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_table']) -%} - {%- set bridge_end_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_end_date']) -%} - {%- set bridge_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_load_date']) -%} - {%- set eff_sat_end_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_end_date']) -%} - {%- set eff_sat_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_load_date']) -%} - {%- filter indent(width=8) -%} - {{ ',' ~ link_table ~'.'~ link_pk ~' AS '~ bridge_link_pk }} - {{ ',' ~ eff_sat_table ~ '.' ~ eff_sat_end_date ~ ' AS ' ~ bridge_end_date }} - {{ ',' ~ eff_sat_table ~ '.' ~ eff_sat_load_date ~ ' AS ' ~ bridge_load_date }} - {%- endfilter -%} - {% endfor %} - FROM {{ ref(source_model) }} AS a - INNER JOIN {{ new_as_of_dates_cte }} AS b - ON (1=1) - {%- set loop_vars = namespace(lastlink = '', last_link_fk = '') %} - {%- for bridge_step in bridge_walk.keys() -%} - {%- set current_link = bridge_walk[bridge_step]['link_table'] -%} - {%- set current_eff_sat = bridge_walk[bridge_step]['eff_sat_table'] -%} - {%- set link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_pk']) -%} - {%- set link_fk1 = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_fk1']) -%} - {%- set link_fk2 = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_fk2']) -%} - {%- set eff_sat_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_pk']) -%} - {%- set eff_sat_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_load_date']) -%} - {%- if loop.first %} - LEFT JOIN {{ ref(current_link) }} AS {{ dbtvault.escape_column_names(current_link) }} - ON a.{{ src_pk }} = {{ dbtvault.escape_column_names(current_link) }}.{{ link_fk1 }} - {%- else %} - LEFT JOIN {{ ref(current_link) }} AS {{ dbtvault.escape_column_names(current_link) }} - ON {{ loop_vars.last_link }}.{{ loop_vars.last_link_fk2 }} = {{ dbtvault.escape_column_names(current_link) }}.{{ link_fk1 }} - {%- endif %} - INNER JOIN {{ ref(current_eff_sat) }} AS {{ dbtvault.escape_column_names(current_eff_sat) }} - ON {{ dbtvault.escape_column_names(current_eff_sat) }}.{{ eff_sat_pk }} = {{ dbtvault.escape_column_names(current_link) }}.{{ link_pk }} - AND {{ dbtvault.escape_column_names(current_eff_sat) }}.{{ eff_sat_load_date }} <= b.AS_OF_DATE - {%- set loop_vars.last_link = dbtvault.escape_column_names(current_link) -%} - {%- set loop_vars.last_link_fk2 = link_fk2 -%} - {% endfor %} -), - -{# Full data from bridge walk(s) -#} -all_rows AS ( - SELECT * FROM new_rows - {%- if dbtvault.is_any_incremental() %} - UNION ALL - SELECT * FROM overlap - {%- endif %} -), - -{# Select most recent set of relationship key(s) for each as of date -#} - -candidate_rows AS ( - SELECT * - FROM ( - SELECT *, - ROW_NUMBER() OVER ( - PARTITION BY AS_OF_DATE, - {%- for bridge_step in bridge_walk.keys() -%} - {% set bridge_link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_link_pk']) -%} - {%- if loop.first %} - {{ bridge_link_pk }} - {%- else %} - {{ ','~ bridge_link_pk }} - {%- endif -%} - {%- endfor %} - ORDER BY - {%- for bridge_step in bridge_walk.keys() -%} - {% set bridge_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_load_date']) %} - {%- if loop.first %} - {{ bridge_load_date ~' DESC' }} - {%- else %} - {{ ','~ bridge_load_date ~' DESC' }} - {%- endif -%} - {%- endfor %} - ) AS row_num - FROM all_rows - ) AS a - WHERE a.row_num = 1 -), - -bridge AS ( - SELECT - {{ dbtvault.prefix([src_pk], 'c') }}, - c.AS_OF_DATE - {%- for bridge_step in bridge_walk.keys() -%} - {% set bridge_link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_link_pk']) -%} - {{ ',c.' ~ bridge_link_pk }} - {%- endfor %} - FROM candidate_rows AS c - {%- for bridge_step in bridge_walk.keys() -%} - {%- set bridge_end_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_end_date']) -%} - {%- if loop.first %} - WHERE DATE({{ 'c.' ~ bridge_end_date }}) = CAST(PARSE_DATETIME('%F %H:%M:%E6S', '{{ max_datetime }}') AS DATE) - {%- else %} - AND DATE({{ 'c.' ~ bridge_end_date }}) = CAST(PARSE_DATETIME('%F %H:%M:%E6S', '{{ max_datetime }}') AS DATE) - {%- endif -%} - {%- endfor %} -) - -SELECT * FROM bridge - -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/bigquery/eff_sat.sql b/macros/tables/bigquery/eff_sat.sql index 3655a6e41..9b79a004a 100644 --- a/macros/tables/bigquery/eff_sat.sql +++ b/macros/tables/bigquery/eff_sat.sql @@ -1,27 +1,11 @@ -{%- macro bigquery__eff_sat(src_pk, src_dfk, src_sfk, src_start_date, src_end_date, src_eff, src_ldts, src_source, source_model) -%} - -{{- dbtvault.check_required_parameters(src_pk=src_pk, src_dfk=src_dfk, src_sfk=src_sfk, - src_start_date=src_start_date, src_end_date=src_end_date, - src_eff=src_eff, src_ldts=src_ldts, src_source=src_source, - source_model=source_model) -}} - -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_dfk = dbtvault.escape_column_names(src_dfk) -%} -{%- set src_sfk = dbtvault.escape_column_names(src_sfk) -%} -{%- set src_start_date = dbtvault.escape_column_names(src_start_date) -%} -{%- set src_end_date = dbtvault.escape_column_names(src_end_date) -%} -{%- set src_eff = dbtvault.escape_column_names(src_eff) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} -{%- set src_source = dbtvault.escape_column_names(src_source) -%} - -{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_dfk, src_sfk, src_start_date, src_end_date, src_eff, src_ldts, src_source]) -%} +{%- macro bigquery__eff_sat(src_pk, src_dfk, src_sfk, src_extra_columns, src_start_date, src_end_date, src_eff, src_ldts, src_source, source_model) -%} + +{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_dfk, src_sfk, src_extra_columns, src_start_date, src_end_date, src_eff, src_ldts, src_source]) -%} {%- set fk_cols = dbtvault.expand_column_list(columns=[src_dfk, src_sfk]) -%} {%- set dfk_cols = dbtvault.expand_column_list(columns=[src_dfk]) -%} {%- set is_auto_end_dating = config.get('is_auto_end_dating', default=false) %} -{%- set max_datetime = var('max_datetime', '9999-12-31 23:59:59.999999') %} - -{{- dbtvault.prepend_generated_by() }} +{%- set max_datetime = dbtvault.max_datetime() %} WITH source_data AS ( SELECT {{ dbtvault.prefix(source_cols, 'a', alias_target='source') }} @@ -72,6 +56,9 @@ new_open_records AS ( SELECT DISTINCT {{ dbtvault.prefix([src_pk], 'f') }}, {{ dbtvault.alias_all(fk_cols, 'f') }}, + {% if dbtvault.is_something(src_extra_columns) %} + {{ dbtvault.prefix([src_extra_columns], 'f') }}, + {% endif -%} {%- if is_auto_end_dating %} f.{{ src_eff }} AS {{ src_start_date }}, {% else %} @@ -92,6 +79,9 @@ new_reopened_records AS ( SELECT DISTINCT {{ dbtvault.prefix([src_pk], 'lc') }}, {{ dbtvault.alias_all(fk_cols, 'lc') }}, + {% if dbtvault.is_something(src_extra_columns) %} + {{ dbtvault.prefix([src_extra_columns], 'g') }}, + {% endif -%} {%- if is_auto_end_dating %} g.{{ src_eff }} AS {{ src_start_date }}, {% else %} @@ -115,6 +105,9 @@ new_closed_records AS ( SELECT DISTINCT {{ dbtvault.prefix([src_pk], 'lo') }}, {{ dbtvault.alias_all(fk_cols, 'lo') }}, + {% if dbtvault.is_something(src_extra_columns) %} + {{ dbtvault.prefix([src_extra_columns], 'h') }}, + {% endif -%} lo.{{ src_start_date }} AS {{ src_start_date }}, h.{{ src_eff }} AS {{ src_end_date }}, h.{{ src_eff }} AS {{ src_eff }}, @@ -133,6 +126,9 @@ new_closed_records AS ( SELECT DISTINCT {{ dbtvault.prefix([src_pk], 'lo') }}, {{ dbtvault.alias_all(fk_cols, 'lo') }}, + {% if dbtvault.is_something(src_extra_columns) %} + {{ dbtvault.prefix([src_extra_columns], 'h') }}, + {% endif -%} h.{{ src_start_date }} AS {{ src_start_date }}, h.{{ src_end_date }} AS {{ src_end_date }}, h.{{ src_eff }} AS {{ src_eff }}, @@ -173,4 +169,4 @@ records_to_insert AS ( SELECT * FROM records_to_insert -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/bigquery/hub.sql b/macros/tables/bigquery/hub.sql index 9eb823a70..1a5588cf9 100644 --- a/macros/tables/bigquery/hub.sql +++ b/macros/tables/bigquery/hub.sql @@ -1,27 +1,14 @@ -{%- macro bigquery__hub(src_pk, src_nk, src_ldts, src_source, source_model) -%} +{%- macro bigquery__hub(src_pk, src_nk, src_extra_columns, src_ldts, src_source, source_model) -%} -{{- dbtvault.check_required_parameters(src_pk=src_pk, src_nk=src_nk, - src_ldts=src_ldts, src_source=src_source, - source_model=source_model) -}} - -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_nk = dbtvault.escape_column_names(src_nk) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} -{%- set src_source = dbtvault.escape_column_names(src_source) -%} - -{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_nk, src_ldts, src_source]) -%} +{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_nk, src_extra_columns, src_ldts, src_source]) -%} {%- if model.config.materialized == 'vault_insert_by_rank' %} {%- set source_cols_with_rank = source_cols + dbtvault.escape_column_names([config.get('rank_column')]) -%} -{%- endif -%} - -{{ dbtvault.prepend_generated_by() }} +{%- endif %} {{ 'WITH ' -}} -{%- if not (source_model is iterable and source_model is not string) -%} - {%- set source_model = [source_model] -%} -{%- endif -%} +{%- set stage_count = source_model | length -%} {%- set ns = namespace(last_cte= "") -%} @@ -46,7 +33,7 @@ ), {% endfor -%} -{% if source_model | length > 1 %} +{% if stage_count > 1 %} stage_union AS ( {%- for src in source_model %} SELECT * FROM row_rank_{{ loop.index | string }} @@ -72,7 +59,7 @@ stage_mat_filter AS ( {%- set ns.last_cte = "stage_mat_filter" %} ), {%- endif -%} -{%- if source_model | length > 1 %} +{%- if stage_count > 1 %} row_rank_union AS ( SELECT ru.*, @@ -99,4 +86,4 @@ records_to_insert AS ( SELECT * FROM records_to_insert -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/bigquery/link.sql b/macros/tables/bigquery/link.sql index 486637c35..982adc1bc 100644 --- a/macros/tables/bigquery/link.sql +++ b/macros/tables/bigquery/link.sql @@ -1,23 +1,15 @@ -{%- macro bigquery__link(src_pk, src_fk, src_ldts, src_source, source_model) -%} +{%- macro bigquery__link(src_pk, src_fk, src_extra_columns, src_ldts, src_source, source_model) -%} -{{- dbtvault.check_required_parameters(src_pk=src_pk, src_fk=src_fk, - src_ldts=src_ldts, src_source=src_source, - source_model=source_model) -}} - -{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_fk, src_ldts, src_source]) -%} +{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_fk, src_extra_columns, src_ldts, src_source]) -%} {%- set fk_cols = dbtvault.expand_column_list([src_fk]) -%} {%- if model.config.materialized == 'vault_insert_by_rank' %} {%- set source_cols_with_rank = source_cols + [config.get('rank_column')] -%} -{%- endif -%} - -{{ dbtvault.prepend_generated_by() }} +{%- endif %} {{ 'WITH ' -}} -{%- if not (source_model is iterable and source_model is not string) -%} - {%- set source_model = [source_model] -%} -{%- endif -%} +{%- set stage_count = source_model | length -%} {%- set ns = namespace(last_cte= "") -%} @@ -36,7 +28,7 @@ row_rank_{{ source_number }} AS ( ORDER BY {{ dbtvault.prefix([src_ldts], 'rr') }} ) AS row_number FROM {{ ref (src) }} AS rr - {%- if source_model | length == 1 %} + {%- if stage_count == 1 %} WHERE {{ dbtvault.multikey(src_pk, prefix='rr', condition ='IS NOT NULL') }} AND {{ dbtvault.multikey(fk_cols, prefix='rr', condition ='IS NOT NULL') }} QUALIFY row_number = 1 @@ -46,7 +38,7 @@ row_rank_{{ source_number }} AS ( {% endfor -%} -{% if source_model | length > 1 %} +{% if stage_count > 1 %} stage_union AS ( {%- for src in source_model %} SELECT * FROM row_rank_{{ loop.index | string }} @@ -72,7 +64,7 @@ stage_mat_filter AS ( {%- set ns.last_cte = "stage_mat_filter" %} ), {% endif %} -{%- if source_model | length > 1 %} +{%- if stage_count > 1 %} row_rank_union AS ( SELECT ru.*, @@ -99,4 +91,4 @@ records_to_insert AS ( SELECT * FROM records_to_insert -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/bigquery/ma_sat.sql b/macros/tables/bigquery/ma_sat.sql index 38553323e..3f3e0eeb5 100644 --- a/macros/tables/bigquery/ma_sat.sql +++ b/macros/tables/bigquery/ma_sat.sql @@ -1,27 +1,14 @@ -{%- macro bigquery__ma_sat(src_pk, src_cdk, src_hashdiff, src_payload, src_eff, src_ldts, src_source, source_model) -%} +{%- macro bigquery__ma_sat(src_pk, src_cdk, src_hashdiff, src_payload, src_extra_columns, src_eff, src_ldts, src_source, source_model) %} - -{{- dbtvault.check_required_parameters(src_pk=src_pk, src_cdk=src_cdk, src_hashdiff=src_hashdiff, - src_payload=src_payload, src_ldts=src_ldts, src_source=src_source, - source_model=source_model) -}} - -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_cdk = dbtvault.escape_column_names(src_cdk) -%} -{%- set src_hashdiff = dbtvault.escape_column_names(src_hashdiff) -%} -{%- set src_payload = dbtvault.escape_column_names(src_payload) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} -{%- set src_source = dbtvault.escape_column_names(src_source) -%} - -{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_cdk, src_payload, src_eff, src_ldts, src_source]) -%} +{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_cdk, src_payload, src_extra_columns, src_hashdiff, src_eff, src_ldts, src_source]) -%} {%- set rank_cols = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_ldts]) -%} {%- set cdk_cols = dbtvault.expand_column_list(columns=[src_cdk]) -%} +{%- set cols_for_latest = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_cdk, src_ldts]) %} {%- if model.config.materialized == 'vault_insert_by_rank' -%} {%- set source_cols_with_rank = source_cols + [config.get('rank_column')] -%} {%- endif -%} -{{ dbtvault.prepend_generated_by() }} - {# Select unique source records -#} WITH source_data AS ( {%- if model.config.materialized == 'vault_insert_by_rank' %} @@ -45,13 +32,12 @@ WITH source_data AS ( {% if dbtvault.is_any_incremental() %} source_data_with_count AS ( - SELECT a.* - ,b.source_count + SELECT a.*, + b.source_count FROM source_data a - INNER JOIN - ( - SELECT {{ dbtvault.prefix([src_pk], 't') }} - ,COUNT(*) AS source_count + INNER JOIN ( + SELECT {{ dbtvault.prefix([src_pk], 't') }}, + COUNT(*) AS source_count FROM (SELECT DISTINCT {{ dbtvault.prefix([src_pk], 's') }}, {{ dbtvault.prefix([src_hashdiff], 's', alias_target='source') }}, {{ dbtvault.prefix(cdk_cols, 's') }} FROM source_data AS s) AS t GROUP BY {{ dbtvault.prefix([src_pk], 't') }} ) AS b @@ -60,21 +46,17 @@ source_data_with_count AS ( {# Select latest records from satellite, restricted to PKs in source data -#} latest_records AS ( - SELECT {{ dbtvault.prefix([src_pk], 'mas', alias_target='target') }} - ,{{ dbtvault.prefix([src_hashdiff], 'mas', alias_target='target') }} - ,{{ dbtvault.prefix([src_cdk], 'mas', alias_target='target') }} - ,{{ dbtvault.prefix([src_ldts], 'mas', alias_target='target') }} - ,mas.latest_rank - ,DENSE_RANK() OVER (PARTITION BY {{ dbtvault.prefix([src_pk], 'mas') }} - ORDER BY {{ dbtvault.prefix([src_hashdiff], 'mas', alias_target='target') }}, {{ dbtvault.prefix([src_cdk], 'mas') }} ASC) AS check_rank - FROM - ( - SELECT {{ dbtvault.prefix([src_pk], 'inner_mas', alias_target='target') }} - ,{{ dbtvault.prefix([src_hashdiff], 'inner_mas', alias_target='target') }} - ,{{ dbtvault.prefix([src_cdk], 'inner_mas', alias_target='target') }} - ,{{ dbtvault.prefix([src_ldts], 'inner_mas', alias_target='target') }} - ,RANK() OVER (PARTITION BY {{ dbtvault.prefix([src_pk], 'inner_mas') }} - ORDER BY {{ dbtvault.prefix([src_ldts], 'inner_mas') }} DESC) AS latest_rank + SELECT {{ dbtvault.prefix(cols_for_latest, 'mas', alias_target='target') }}, + mas.latest_rank, + DENSE_RANK() OVER (PARTITION BY {{ dbtvault.prefix([src_pk], 'mas') }} + ORDER BY {{ dbtvault.prefix([src_hashdiff], 'mas', alias_target='target') }}, + {{ dbtvault.prefix([src_cdk], 'mas') }} ASC + ) AS check_rank + FROM ( + SELECT {{ dbtvault.prefix(cols_for_latest, 'inner_mas', alias_target='target') }}, + RANK() OVER (PARTITION BY {{ dbtvault.prefix([src_pk], 'inner_mas') }} + ORDER BY {{ dbtvault.prefix([src_ldts], 'inner_mas') }} DESC + ) AS latest_rank FROM {{ this }} AS inner_mas INNER JOIN (SELECT DISTINCT {{ dbtvault.prefix([src_pk], 's') }} FROM source_data as s ) AS spk ON {{ dbtvault.multikey([src_pk], prefix=['inner_mas', 'spk'], condition='=') }} @@ -84,9 +66,9 @@ latest_records AS ( {# Select summary details for each group of latest records -#} latest_group_details AS ( - SELECT {{ dbtvault.prefix([src_pk], 'lr') }} - ,{{ dbtvault.prefix([src_ldts], 'lr') }} - ,MAX(lr.check_rank) AS latest_count + SELECT {{ dbtvault.prefix([src_pk], 'lr') }}, + {{ dbtvault.prefix([src_ldts], 'lr') }}, + MAX(lr.check_rank) AS latest_count FROM latest_records AS lr GROUP BY {{ dbtvault.prefix([src_pk], 'lr') }}, {{ dbtvault.prefix([src_ldts], 'lr') }} ), @@ -105,20 +87,14 @@ records_to_insert AS ( {% if dbtvault.is_any_incremental() %} SELECT {{ dbtvault.alias_all(source_cols, 'source_data_with_count') }} FROM source_data_with_count - WHERE EXISTS - ( + WHERE EXISTS ( SELECT 1 FROM source_data_with_count AS stage - WHERE NOT EXISTS - ( + WHERE NOT EXISTS ( SELECT 1 - FROM - ( - SELECT {{ dbtvault.prefix([src_pk], 'lr', alias_target='target') }} - ,{{ dbtvault.prefix([src_hashdiff], 'lr', alias_target='target') }} - ,{{ dbtvault.prefix([src_cdk], 'lr', alias_target='target') }} - ,{{ dbtvault.prefix([src_ldts], 'lr', alias_target='target') }} - ,lg.latest_count + FROM ( + SELECT {{ dbtvault.prefix(cols_for_latest, 'lr', alias_target='target') }}, + lg.latest_count FROM latest_records AS lr INNER JOIN latest_group_details AS lg ON {{ dbtvault.multikey([src_pk], prefix=['lr', 'lg'], condition='=') }} @@ -139,4 +115,4 @@ records_to_insert AS ( SELECT * FROM records_to_insert -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/bigquery/pit.sql b/macros/tables/bigquery/pit.sql index 0b21a6919..ce7206740 100644 --- a/macros/tables/bigquery/pit.sql +++ b/macros/tables/bigquery/pit.sql @@ -1,21 +1,4 @@ -{%- macro bigquery__pit(src_pk, as_of_dates_table, satellites, stage_tables, src_ldts, source_model) -%} - -{{- dbtvault.check_required_parameters(source_model=source_model, src_pk=src_pk, - satellites=satellites, - stage_tables=stage_tables, - src_ldts=src_ldts) -}} - -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} - -{{ dbtvault.prepend_generated_by() }} - -{%- if (as_of_dates_table is none) and execute -%} - {%- set error_message -%} - "pit error: Missing as_of_dates table configuration. A as_of_dates_table must be provided." - {%- endset -%} - {{- exceptions.raise_compiler_error(error_message) -}} -{%- endif -%} +{%- macro bigquery__pit(src_pk, src_extra_columns, as_of_dates_table, satellites, stage_tables_ldts, src_ldts, source_model) %} {#- Acquiring the source relation for the AS_OF table -#} {%- if as_of_dates_table is mapping and as_of_dates_table is not none -%} @@ -30,101 +13,19 @@ {%- set ghost_pk = '0x0000000000000000' -%} {%- set ghost_date = '1900-01-01 00:00:00.000000' %} -{# Stating the dependancies on the stage tables outside of the If STATEMENT #} -{% for stg in stage_tables -%} - {{ "-- depends_on: " ~ ref(stg) }} -{% endfor %} - -{#- Setting the new AS_OF dates CTE name -#} {%- if dbtvault.is_any_incremental() -%} -{%- set new_as_of_dates_cte = 'new_rows_as_of' -%} + {%- set new_as_of_dates_cte = 'new_rows_as_of' -%} {%- else -%} -{%- set new_as_of_dates_cte = 'as_of_dates' -%} + {%- set new_as_of_dates_cte = 'as_of_dates' -%} {%- endif %} - WITH as_of_dates AS ( SELECT * FROM {{ as_of_table_relation }} ), {%- if dbtvault.is_any_incremental() %} -last_safe_load_datetime AS ( - SELECT MIN(LOAD_DATETIME) AS LAST_SAFE_LOAD_DATETIME FROM ( - {%- for stg in stage_tables -%} - {%- set stage_ldts = stage_tables[stg] %} - SELECT MIN({{ stage_ldts }}) AS LOAD_DATETIME FROM {{ ref(stg) }} - {{ "UNION ALL" if not loop.last }} - {%- endfor %} - ) -), - -as_of_grain_old_entries AS ( - SELECT DISTINCT AS_OF_DATE FROM {{ this }} -), - -as_of_grain_lost_entries AS ( - SELECT a.AS_OF_DATE - FROM as_of_grain_old_entries AS a - LEFT OUTER JOIN as_of_dates AS b - ON a.AS_OF_DATE = b.AS_OF_DATE - WHERE b.AS_OF_DATE IS NULL -), - -as_of_grain_new_entries AS ( - SELECT a.AS_OF_DATE - FROM as_of_dates AS a - LEFT OUTER JOIN as_of_grain_old_entries AS b - ON a.AS_OF_DATE = b.AS_OF_DATE - WHERE b.AS_OF_DATE IS NULL -), - -min_date AS ( - SELECT min(AS_OF_DATE) AS MIN_DATE - FROM as_of_dates -), - -backfill_as_of AS ( - SELECT AS_OF_DATE - FROM as_of_dates AS a - INNER JOIN last_safe_load_datetime as l - ON a.AS_OF_DATE < l.LAST_SAFE_LOAD_DATETIME -), - -new_rows_pks AS ( - SELECT {{ dbtvault.prefix([src_pk], 'a') }} - FROM {{ ref(source_model) }} AS a - INNER JOIN last_safe_load_datetime as l - ON a.{{ src_ldts }} >= l.LAST_SAFE_LOAD_DATETIME -), - -new_rows_as_of AS ( - SELECT AS_OF_DATE - FROM as_of_dates AS a - INNER JOIN last_safe_load_datetime as l - ON a.AS_OF_DATE >= l.LAST_SAFE_LOAD_DATETIME - UNION DISTINCT - SELECT AS_OF_DATE - FROM as_of_grain_new_entries -), - -overlap AS ( - SELECT a.* - FROM {{ this }} AS a - INNER JOIN {{ ref(source_model) }} as b - ON {{ dbtvault.multikey(src_pk, prefix=['a','b'], condition='=') }} - INNER JOIN min_date - ON 1 = 1 - INNER JOIN last_safe_load_datetime - ON 1 = 1 - LEFT OUTER JOIN as_of_grain_lost_entries - ON a.AS_OF_DATE = as_of_grain_lost_entries.AS_OF_DATE - WHERE a.AS_OF_DATE >= min_date.MIN_DATE - AND a.AS_OF_DATE < last_safe_load_datetime.LAST_SAFE_LOAD_DATETIME - AND as_of_grain_lost_entries.AS_OF_DATE IS NULL -), - -{#- Back-fill any newly arrived hubs, set all historical pit dates to ghost records -#} +{{ dbtvault.as_of_date_window(src_pk, src_ldts, stage_tables_ldts, ref(source_model)) }}, backfill_rows_as_of_dates AS ( SELECT @@ -183,8 +84,10 @@ new_rows AS ( {%- set sat_ldts_name = (satellites[sat_name]['ldts'].keys() | list )[0] -%} {%- set sat_pk = dbtvault.escape_column_names(satellites[sat_name]['pk'][sat_pk_name]) -%} {%- set sat_ldts = dbtvault.escape_column_names(satellites[sat_name]['ldts'][sat_ldts_name]) %} - COALESCE(MAX({{ dbtvault.escape_column_names( sat_name | lower ~ '_src' ) }}.{{ sat_pk }}), '{{ ghost_pk }}') AS {{ sat_name | upper }}_{{ sat_pk_name | upper }}, - COALESCE(MAX({{ dbtvault.escape_column_names( sat_name | lower ~ '_src' ) }}.{{ sat_ldts }}), PARSE_DATETIME('%F %H:%M:%E6S', '{{ ghost_date }}')) AS {{ sat_name | upper }}_{{ sat_ldts_name | upper }} + COALESCE(MAX({{ dbtvault.escape_column_names( sat_name | lower ~ '_src' ) }}.{{ sat_pk }}), + '{{ ghost_pk }}') AS {{ sat_name | upper }}_{{ sat_pk_name | upper }}, + COALESCE(MAX({{ dbtvault.escape_column_names( sat_name | lower ~ '_src' ) }}.{{ sat_ldts }}), + PARSE_DATETIME('%F %H:%M:%E6S', '{{ ghost_date }}')) AS {{ sat_name | upper }}_{{ sat_ldts_name | upper }} {{- "," if not loop.last }} {%- endfor %} FROM new_rows_as_of_dates AS a @@ -209,13 +112,12 @@ pit AS ( SELECT * FROM new_rows {%- if dbtvault.is_any_incremental() %} UNION ALL - SELECT * FROM overlap + SELECT * FROM overlap_pks UNION ALL SELECT * FROM backfill - {%- endif %} ) SELECT DISTINCT * FROM pit -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/bigquery/sat.sql b/macros/tables/bigquery/sat.sql index b0a1f4589..69af3ecf7 100644 --- a/macros/tables/bigquery/sat.sql +++ b/macros/tables/bigquery/sat.sql @@ -1,74 +1,9 @@ -{%- macro bigquery__sat(src_pk, src_hashdiff, src_payload, src_eff, src_ldts, src_source, source_model) -%} +{%- macro bigquery__sat(src_pk, src_hashdiff, src_payload, src_extra_columns, src_eff, src_ldts, src_source, source_model) -%} -{{- dbtvault.check_required_parameters(src_pk=src_pk, src_hashdiff=src_hashdiff, src_payload=src_payload, - src_ldts=src_ldts, src_source=src_source, - source_model=source_model) -}} + {{ dbtvault.default__sat(src_pk=src_pk, src_hashdiff=src_hashdiff, + src_payload=src_payload, + src_extra_columns=src_extra_columns, + src_eff=src_eff, src_ldts=src_ldts, + src_source=src_source, source_model=source_model) }} -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_hashdiff = dbtvault.escape_column_names(src_hashdiff) -%} -{%- set src_payload = dbtvault.escape_column_names(src_payload) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} -{%- set src_source = dbtvault.escape_column_names(src_source) -%} - -{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_payload, src_eff, src_ldts, src_source]) -%} -{%- set rank_cols = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_ldts]) -%} -{%- set pk_cols = dbtvault.expand_column_list(columns=[src_pk]) -%} - -{%- if model.config.materialized == 'vault_insert_by_rank' %} - {%- set source_cols_with_rank = source_cols + dbtvault.escape_column_names([config.get('rank_column')]) -%} -{%- endif -%} - -{{ dbtvault.prepend_generated_by() }} - -WITH source_data AS ( - {%- if model.config.materialized == 'vault_insert_by_rank' %} - SELECT {{ dbtvault.prefix(source_cols_with_rank, 'a', alias_target='source') }} - {%- else %} - SELECT {{ dbtvault.prefix(source_cols, 'a', alias_target='source') }} - {%- endif %} - FROM {{ ref(source_model) }} AS a - WHERE {{ dbtvault.multikey(src_pk, prefix='a', condition='IS NOT NULL') }} - {%- if model.config.materialized == 'vault_insert_by_period' %} - AND __PERIOD_FILTER__ - {% elif model.config.materialized == 'vault_insert_by_rank' %} - AND __RANK_FILTER__ - {% endif %} -), - -{% if dbtvault.is_any_incremental() %} - -latest_records AS ( - SELECT {{ dbtvault.prefix(rank_cols, 'a', alias_target='target') }} - FROM - ( - SELECT {{ dbtvault.prefix(rank_cols, 'current_records', alias_target='target') }}, - RANK() OVER ( - PARTITION BY {{ dbtvault.prefix([src_pk], 'current_records') }} - ORDER BY {{ dbtvault.prefix([src_ldts], 'current_records') }} DESC - ) AS rank - FROM {{ this }} AS current_records - JOIN ( - SELECT DISTINCT {{ dbtvault.prefix([src_pk], 'source_data') }} - FROM source_data - ) AS source_records - ON {{ dbtvault.multikey(src_pk, prefix=['current_records','source_records'], condition='=') }} - ) AS a - WHERE a.rank = 1 -), - -{%- endif %} - -records_to_insert AS ( - SELECT DISTINCT {{ dbtvault.alias_all(source_cols, 'stage') }} - FROM source_data AS stage - {%- if dbtvault.is_any_incremental() %} - LEFT JOIN latest_records - ON {{ dbtvault.multikey(src_pk, prefix=['latest_records','stage'], condition='=') }} - WHERE {{ dbtvault.prefix([src_hashdiff], 'latest_records', alias_target='target') }} != {{ dbtvault.prefix([src_hashdiff], 'stage') }} - OR {{ dbtvault.prefix([src_hashdiff], 'latest_records', alias_target='target') }} IS NULL - {%- endif %} -) - -SELECT * FROM records_to_insert - -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/bigquery/t_link.sql b/macros/tables/bigquery/t_link.sql index f0efb2e4e..1b2f46da7 100644 --- a/macros/tables/bigquery/t_link.sql +++ b/macros/tables/bigquery/t_link.sql @@ -1,7 +1,8 @@ -{%- macro bigquery__t_link(src_pk, src_fk, src_payload, src_eff, src_ldts, src_source, source_model) -%} +{%- macro bigquery__t_link(src_pk, src_fk, src_payload, src_extra_columns, src_eff, src_ldts, src_source, source_model) -%} {{ dbtvault.default__t_link(src_pk=src_pk, src_fk=src_fk, src_payload=src_payload, + src_extra_columns=src_extra_columns, src_eff=src_eff, src_ldts=src_ldts, src_source=src_source, source_model=source_model) }} -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/bigquery/xts.sql b/macros/tables/bigquery/xts.sql index 789ee0ad3..0a4515677 100644 --- a/macros/tables/bigquery/xts.sql +++ b/macros/tables/bigquery/xts.sql @@ -1,9 +1,10 @@ -{%- macro bigquery__xts(src_pk, src_satellite, src_ldts, src_source, source_model) -%} +{%- macro bigquery__xts(src_pk, src_satellite, src_extra_columns, src_ldts, src_source, source_model) -%} {{ dbtvault.default__xts(src_pk=src_pk, src_satellite=src_satellite, + src_extra_columns=src_extra_columns, src_ldts=src_ldts, src_source=src_source, source_model=source_model) }} -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/databricks/hub.sql b/macros/tables/databricks/hub.sql new file mode 100644 index 000000000..2ec1b1b5b --- /dev/null +++ b/macros/tables/databricks/hub.sql @@ -0,0 +1,10 @@ +{%- macro databricks__hub(src_pk, src_nk, src_extra_columns, src_ldts, src_source, source_model) -%} + +{{ dbtvault.default__hub(src_pk=src_pk, + src_nk=src_nk, + src_extra_columns=src_extra_columns, + src_ldts=src_ldts, + src_source=src_source, + source_model=source_model) }} + +{%- endmacro -%} \ No newline at end of file diff --git a/macros/tables/databricks/link.sql b/macros/tables/databricks/link.sql new file mode 100644 index 000000000..f6007b02c --- /dev/null +++ b/macros/tables/databricks/link.sql @@ -0,0 +1,10 @@ +{%- macro databricks__link(src_pk, src_fk, src_extra_columns, src_ldts, src_source, source_model) -%} + +{{ dbtvault.default__link(src_pk=src_pk, + src_fk=src_fk, + src_extra_columns=src_extra_columns, + src_ldts=src_ldts, + src_source=src_source, + source_model=source_model) }} + +{%- endmacro -%} diff --git a/macros/tables/databricks/sat.sql b/macros/tables/databricks/sat.sql new file mode 100644 index 000000000..47bd6baf2 --- /dev/null +++ b/macros/tables/databricks/sat.sql @@ -0,0 +1,12 @@ +{%- macro databricks__sat(src_pk, src_hashdiff, src_payload, src_extra_columns, src_eff, src_ldts, src_source, source_model) -%} + +{{ dbtvault.default__sat(src_pk=src_pk, + src_hashdiff=src_hashdiff, + src_payload=src_payload, + src_extra_columns=src_extra_columns, + src_eff=src_eff, + src_ldts=src_ldts, + src_source=src_source, + source_model=source_model) }} + +{%- endmacro -%} \ No newline at end of file diff --git a/macros/tables/postgres/hub.sql b/macros/tables/postgres/hub.sql new file mode 100644 index 000000000..c47a6387c --- /dev/null +++ b/macros/tables/postgres/hub.sql @@ -0,0 +1,89 @@ +{%- macro postgres__hub(src_pk, src_nk, src_extra_columns, src_ldts, src_source, source_model) -%} + +{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_nk, src_extra_columns, src_ldts, src_source]) -%} + +{%- if model.config.materialized == 'vault_insert_by_rank' %} + {%- set source_cols_with_rank = source_cols + dbtvault.escape_column_names([config.get('rank_column')]) -%} +{%- endif -%} + +{{ dbtvault.prepend_generated_by() }} + +{{ 'WITH ' -}} + +{%- if not (source_model is iterable and source_model is not string) -%} + {%- set source_model = [source_model] -%} +{%- endif -%} + +{%- set ns = namespace(last_cte= "") -%} + +{%- for src in source_model -%} + +{%- set source_number = loop.index | string -%} + +row_rank_{{ source_number }} AS ( +{#- PostgreSQL has DISTINCT ON which should be more performant than the + strategy used by Snowflake ROW_NUMBER() OVER( PARTITION BY ... +-#} + {%- if model.config.materialized == 'vault_insert_by_rank' %} + SELECT DISTINCT ON ({{ dbtvault.prefix([src_pk], 'rr') }}) {{ dbtvault.prefix(source_cols_with_rank, 'rr') }} + {%- else %} + SELECT DISTINCT ON ({{ dbtvault.prefix([src_pk], 'rr') }}) {{ dbtvault.prefix(source_cols, 'rr') }} + {%- endif %} + FROM {{ ref(src) }} AS rr + WHERE {{ dbtvault.multikey(src_pk, prefix='rr', condition='IS NOT NULL') }} + ORDER BY {{ dbtvault.prefix([src_pk], 'rr') }}, {{ dbtvault.prefix([src_ldts], 'rr') }} + {%- set ns.last_cte = "row_rank_{}".format(source_number) %} +),{{ "\n" if not loop.last }} +{% endfor -%} +{% if source_model | length > 1 %} +stage_union AS ( + {%- for src in source_model %} + SELECT * FROM row_rank_{{ loop.index | string }} + {%- if not loop.last %} + UNION ALL + {%- endif %} + {%- endfor %} + {%- set ns.last_cte = "stage_union" %} +), +{%- endif -%} +{%- if model.config.materialized == 'vault_insert_by_period' %} +stage_mat_filter AS ( + SELECT * + FROM {{ ns.last_cte }} + WHERE __PERIOD_FILTER__ + {%- set ns.last_cte = "stage_mat_filter" %} +), +{%- elif model.config.materialized == 'vault_insert_by_rank' %} +stage_mat_filter AS ( + SELECT * + FROM {{ ns.last_cte }} + WHERE __RANK_FILTER__ + {%- set ns.last_cte = "stage_mat_filter" %} +), +{%- endif -%} +{%- if source_model | length > 1 %} + +row_rank_union AS ( +{#- PostgreSQL has DISTINCT ON which should be more performant than the + strategy used by Snowflake ROW_NUMBER() OVER( PARTITION BY ... +-#} + SELECT DISTINCT ON ({{ dbtvault.prefix([src_pk], 'ru') }}) ru.* + FROM {{ ns.last_cte }} AS ru + WHERE {{ dbtvault.multikey(src_pk, prefix='ru', condition='IS NOT NULL') }} + ORDER BY {{ dbtvault.prefix([src_pk], 'ru') }}, {{ dbtvault.prefix([src_ldts], 'ru') }}, {{ dbtvault.prefix([src_source], 'ru') }} ASC + {%- set ns.last_cte = "row_rank_union" %} +), +{% endif %} +records_to_insert AS ( + SELECT {{ dbtvault.prefix(source_cols, 'a', alias_target='target') }} + FROM {{ ns.last_cte }} AS a + {%- if dbtvault.is_any_incremental() %} + LEFT JOIN {{ this }} AS d + ON {{ dbtvault.multikey(src_pk, prefix=['a','d'], condition='=') }} + WHERE {{ dbtvault.multikey(src_pk, prefix='d', condition='IS NULL') }} + {%- endif %} +) + +SELECT * FROM records_to_insert + +{%- endmacro -%} \ No newline at end of file diff --git a/macros/tables/postgres/link.sql b/macros/tables/postgres/link.sql new file mode 100644 index 000000000..1043ec3ce --- /dev/null +++ b/macros/tables/postgres/link.sql @@ -0,0 +1,100 @@ +{%- macro postgres__link(src_pk, src_fk, src_extra_columns, src_ldts, src_source, source_model) -%} + +{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_fk, src_extra_columns, src_ldts, src_source]) -%} +{%- set fk_cols = dbtvault.expand_column_list([src_fk]) -%} + +{%- if model.config.materialized == 'vault_insert_by_rank' %} + {%- set source_cols_with_rank = source_cols + dbtvault.escape_column_names([config.get('rank_column')]) -%} +{%- endif -%} + +{{ dbtvault.prepend_generated_by() }} + +{{ 'WITH ' -}} + +{%- if not (source_model is iterable and source_model is not string) -%} + {%- set source_model = [source_model] -%} +{%- endif -%} + +{%- set ns = namespace(last_cte= "") -%} + +{%- for src in source_model -%} + +{%- set source_number = loop.index | string -%} + +row_rank_{{ source_number }} AS ( + SELECT * FROM ( + {%- if model.config.materialized == 'vault_insert_by_rank' %} + SELECT {{ dbtvault.prefix(source_cols_with_rank, 'rr') }}, + {%- else %} + SELECT {{ dbtvault.prefix(source_cols, 'rr') }}, + {%- endif %} + ROW_NUMBER() OVER( + PARTITION BY {{ dbtvault.prefix([src_pk], 'rr') }} + ORDER BY {{ dbtvault.prefix([src_ldts], 'rr') }} + ) AS row_number + FROM {{ ref(src) }} AS rr + {%- if source_model | length == 1 %} + WHERE {{ dbtvault.multikey(src_pk, prefix='rr', condition='IS NOT NULL') }} + AND {{ dbtvault.multikey(fk_cols, prefix='rr', condition='IS NOT NULL') }} + {%- endif %} + ) as l + WHERE row_number = 1 + {%- set ns.last_cte = "row_rank_{}".format(source_number) %} +),{{ "\n" if not loop.last }} +{% endfor -%} +{% if source_model | length > 1 %} +stage_union AS ( + {%- for src in source_model %} + SELECT * FROM row_rank_{{ loop.index | string }} + {%- if not loop.last %} + UNION ALL + {%- endif %} + {%- endfor %} + {%- set ns.last_cte = "stage_union" %} +), +{%- endif -%} +{%- if model.config.materialized == 'vault_insert_by_period' %} +stage_mat_filter AS ( + SELECT * + FROM {{ ns.last_cte }} + WHERE __PERIOD_FILTER__ + {%- set ns.last_cte = "stage_mat_filter" %} +), +{%- elif model.config.materialized == 'vault_insert_by_rank' %} +stage_mat_filter AS ( + SELECT * + FROM {{ ns.last_cte }} + WHERE __RANK_FILTER__ + {%- set ns.last_cte = "stage_mat_filter" %} +), +{% endif %} +{%- if source_model | length > 1 %} + +row_rank_union AS ( + SELECT * FROM ( + SELECT ru.*, + ROW_NUMBER() OVER( + PARTITION BY {{ dbtvault.prefix([src_pk], 'ru') }} + ORDER BY {{ dbtvault.prefix([src_ldts], 'ru') }}, {{ dbtvault.prefix([src_source], 'ru') }} ASC + ) AS row_rank_number + FROM {{ ns.last_cte }} AS ru + WHERE {{ dbtvault.multikey(src_pk, prefix='ru', condition='IS NOT NULL') }} + AND {{ dbtvault.multikey(fk_cols, prefix='ru', condition='IS NOT NULL') }} + ) AS a + WHERE row_rank_number = 1 + {%- set ns.last_cte = "row_rank_union" %} +), +{% endif %} +records_to_insert AS ( + SELECT {{ dbtvault.prefix(source_cols, 'a', alias_target='target') }} + FROM {{ ns.last_cte }} AS a + {%- if dbtvault.is_any_incremental() %} + LEFT JOIN {{ this }} AS d + ON {{ dbtvault.multikey(src_pk, prefix=['a','d'], condition='=') }} + WHERE {{ dbtvault.multikey(src_pk, prefix='d', condition='IS NULL') }} + {%- endif %} +) + +SELECT * FROM records_to_insert + +{%- endmacro -%} \ No newline at end of file diff --git a/macros/tables/postgres/sat.sql b/macros/tables/postgres/sat.sql new file mode 100644 index 000000000..9175dd05c --- /dev/null +++ b/macros/tables/postgres/sat.sql @@ -0,0 +1,63 @@ +{%- macro postgres__sat(src_pk, src_hashdiff, src_payload, src_extra_columns, src_eff, src_ldts, src_source, source_model) -%} + +{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_payload, src_extra_columns, src_eff, src_ldts, src_source]) -%} +{%- set rank_cols = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_ldts]) -%} +{%- set pk_cols = dbtvault.expand_column_list(columns=[src_pk]) -%} + +{%- if model.config.materialized == 'vault_insert_by_rank' %} + {%- set source_cols_with_rank = source_cols + dbtvault.escape_column_names([config.get('rank_column')]) -%} +{%- endif -%} + +{{ dbtvault.prepend_generated_by() }} + +WITH source_data AS ( + {%- if model.config.materialized == 'vault_insert_by_rank' %} + SELECT {{ dbtvault.prefix(source_cols_with_rank, 'a', alias_target='source') }} + {%- else %} + SELECT {{ dbtvault.prefix(source_cols, 'a', alias_target='source') }} + {%- endif %} + FROM {{ ref(source_model) }} AS a + WHERE {{ dbtvault.multikey(src_pk, prefix='a', condition='IS NOT NULL') }} + {%- if model.config.materialized == 'vault_insert_by_period' %} + AND __PERIOD_FILTER__ + {% elif model.config.materialized == 'vault_insert_by_rank' %} + AND __RANK_FILTER__ + {% endif %} +), + +{%- if dbtvault.is_any_incremental() %} + +latest_records AS ( + SELECT {{ dbtvault.prefix(rank_cols, 'a', alias_target='target') }} + FROM ( + SELECT {{ dbtvault.prefix(rank_cols, 'current_records', alias_target='target') }}, + RANK() OVER ( + PARTITION BY {{ dbtvault.prefix([src_pk], 'current_records') }} + ORDER BY {{ dbtvault.prefix([src_ldts], 'current_records') }} DESC + ) AS rank + FROM {{ this }} AS current_records + JOIN ( + SELECT DISTINCT {{ dbtvault.prefix([src_pk], 'source_data') }} + FROM source_data + ) AS source_records + ON {{ dbtvault.multikey(src_pk, prefix=['current_records','source_records'], condition='=') }} + ) AS a + WHERE a.rank = 1 +), + +{%- endif %} + +records_to_insert AS ( + SELECT DISTINCT {{ dbtvault.alias_all(source_cols, 'stage') }} + FROM source_data AS stage + {%- if dbtvault.is_any_incremental() %} + LEFT JOIN latest_records + ON {{ dbtvault.multikey(src_pk, prefix=['latest_records','stage'], condition='=') }} + WHERE {{ dbtvault.prefix([src_hashdiff], 'latest_records', alias_target='target') }} != {{ dbtvault.prefix([src_hashdiff], 'stage') }} + OR {{ dbtvault.prefix([src_hashdiff], 'latest_records', alias_target='target') }} IS NULL + {%- endif %} +) + +SELECT * FROM records_to_insert + +{%- endmacro -%} diff --git a/macros/tables/snowflake/bridge.sql b/macros/tables/snowflake/bridge.sql index 720dba157..259bec124 100644 --- a/macros/tables/snowflake/bridge.sql +++ b/macros/tables/snowflake/bridge.sql @@ -1,225 +1,70 @@ -{%- macro bridge(src_pk, as_of_dates_table, bridge_walk, stage_tables_ldts, src_ldts, source_model) -%} +{%- macro bridge(src_pk, src_extra_columns, as_of_dates_table, bridge_walk, stage_tables_ldts, src_ldts, source_model) -%} - {{- adapter.dispatch('bridge', 'dbtvault')(source_model=source_model, src_pk=src_pk, - bridge_walk=bridge_walk, - as_of_dates_table=as_of_dates_table, - stage_tables_ldts=stage_tables_ldts, - src_ldts=src_ldts) -}} -{%- endmacro -%} + {%- if dbtvault.is_something(src_extra_columns) and execute -%} + {%- do exceptions.warn("WARNING: src_extra_columns not yet available for PITs or Bridges. This parameter will be ignored.") -%} + {%- endif -%} -{%- macro default__bridge(src_pk, as_of_dates_table, bridge_walk, stage_tables_ldts, src_ldts, source_model) -%} + {{- dbtvault.check_required_parameters(src_pk=src_pk, + as_of_dates_table=as_of_dates_table, + bridge_walk=bridge_walk, + stage_tables_ldts=stage_tables_ldts, + src_ldts=src_ldts, + source_model=source_model) -}} -{{- dbtvault.check_required_parameters(source_model=source_model, src_pk=src_pk, - bridge_walk=bridge_walk, - stage_tables_ldts=stage_tables_ldts, - src_ldts=src_ldts) -}} + {%- set src_pk = dbtvault.escape_column_names(src_pk) -%} + {%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} + {{- dbtvault.prepend_generated_by() }} -{{ dbtvault.prepend_generated_by() }} + {% for stg in stage_tables_ldts %} + {{- "-- depends_on: " ~ ref(stg) }} + {% endfor %} -{%- if (as_of_dates_table is none) and execute -%} - {%- set error_message -%} - "Bridge error: Missing as_of_dates table configuration. A as_of_dates_table must be provided." - {%- endset -%} - {{- exceptions.raise_compiler_error(error_message) -}} -{%- endif -%} + {#- Acquiring the source relation for the AS_OF table -#} + {%- if as_of_dates_table is mapping and as_of_dates_table is not none -%} + {%- set source_name = as_of_dates_table | first -%} + {%- set source_table_name = as_of_dates_table[source_name] -%} + {%- set as_of_dates_table = source(source_name, source_table_name) -%} + {%- elif as_of_dates_table is not mapping and as_of_dates_table is not none -%} + {%- set as_of_dates_table = ref(as_of_dates_table) -%} + {%- endif %} -{#- Acquiring the source relation for the AS_OF table -#} -{%- if as_of_dates_table is mapping and as_of_dates_table is not none -%} - {%- set source_name = as_of_dates_table | first -%} - {%- set source_table_name = as_of_dates_table[source_name] -%} - {%- set source_relation = source(source_name, source_table_name) -%} -{%- elif as_of_dates_table is not mapping and as_of_dates_table is not none -%} - {%- set source_relation = ref(as_of_dates_table) -%} -{%- endif -%} + {{ adapter.dispatch('bridge', 'dbtvault')(src_pk=src_pk, + src_extra_columns=src_extra_columns, + src_ldts=src_ldts, + as_of_dates_table=as_of_dates_table, + bridge_walk=bridge_walk, + stage_tables_ldts=stage_tables_ldts, + source_model=source_model) -}} +{%- endmacro -%} -{%- set max_datetime = var('max_datetime', '9999-12-31 23:59:59.999999') -%} +{%- macro default__bridge(src_pk, src_extra_columns, src_ldts, as_of_dates_table, bridge_walk, stage_tables_ldts, source_model) -%} -{#- Stating the dependencies on the stage tables outside of the If STATEMENT -#} -{% for stg in stage_tables_ldts -%} - {{- "-- depends_on: " ~ ref(stg) -}} -{%- endfor %} +{%- set max_datetime = dbtvault.max_datetime() -%} {#- Setting the new AS_OF dates CTE name -#} {%- if dbtvault.is_any_incremental() -%} - {%- set new_as_of_dates_cte = 'NEW_ROWS_AS_OF' -%} + {%- set new_as_of_dates_cte = 'new_rows_as_of' -%} {%- else -%} - {%- set new_as_of_dates_cte = 'AS_OF' -%} + {%- set new_as_of_dates_cte = 'as_of_dates' -%} {%- endif %} -WITH as_of AS ( - SELECT a.AS_OF_DATE - FROM {{ source_relation }} AS a - WHERE a.AS_OF_DATE <= CURRENT_DATE() +WITH as_of_dates AS ( + SELECT * + FROM {{ as_of_dates_table }} ), {%- if dbtvault.is_any_incremental() %} -last_safe_load_datetime AS ( - SELECT MIN(LOAD_DATETIME) AS LAST_SAFE_LOAD_DATETIME - FROM ( - {%- filter indent(width=8) -%} - {%- for stg in stage_tables_ldts -%} - {%- set stage_ldts =(stage_tables_ldts[stg]) -%} - {{ "SELECT MIN(" ~ stage_ldts ~ ") AS LOAD_DATETIME FROM " ~ ref(stg) }} - {{ "UNION ALL" if not loop.last }} - {% endfor -%} - {%- endfilter -%} - ) AS l -), - -as_of_grain_old_entries AS ( - SELECT DISTINCT AS_OF_DATE - FROM {{ this }} -), - -as_of_grain_lost_entries AS ( - SELECT a.AS_OF_DATE - FROM as_of_grain_old_entries AS a - LEFT OUTER JOIN as_of AS b - ON a.AS_OF_DATE = b.AS_OF_DATE - WHERE b.AS_OF_DATE IS NULL -), - -as_of_grain_new_entries AS ( - SELECT a.AS_OF_DATE - FROM as_of AS a - LEFT OUTER JOIN as_of_grain_old_entries AS b - ON a.AS_OF_DATE = b.AS_OF_DATE - WHERE b.AS_OF_DATE IS NULL -), - -min_date AS ( - SELECT min(AS_OF_DATE) AS MIN_DATE - FROM as_of -), - -new_rows_pks AS ( - SELECT {{ dbtvault.prefix([src_pk], 'h') }} - FROM {{ ref(source_model) }} AS h - WHERE h.{{ src_ldts }} >= (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) -), - -new_rows_as_of AS ( - SELECT AS_OF_DATE - FROM as_of - WHERE as_of.AS_OF_DATE >= (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) - UNION - SELECT as_of_date - FROM as_of_grain_new_entries -), - -overlap_pks AS ( - SELECT {{ dbtvault.prefix([src_pk], 'p') }} - FROM {{ this }} AS p - INNER JOIN {{ ref(source_model) }} as h - ON {{ dbtvault.multikey(src_pk, prefix=['p','h'], condition='=') }} - WHERE p.AS_OF_DATE >= (SELECT MIN_DATE FROM min_date) - AND p.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) - AND p.AS_OF_DATE NOT IN (SELECT AS_OF_DATE FROM as_of_grain_lost_entries) -), - -overlap_as_of AS ( - SELECT AS_OF_DATE - FROM as_of AS p - WHERE p.AS_OF_DATE >= (SELECT MIN_DATE FROM min_date) - AND p.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) - AND p.AS_OF_DATE NOT IN (SELECT AS_OF_DATE FROM as_of_grain_lost_entries) -), +{{ dbtvault.as_of_date_window(src_pk, src_ldts, stage_tables_ldts, ref(source_model)) }}, overlap AS ( - SELECT - {{ dbtvault.prefix([src_pk], 'a') }}, - b.AS_OF_DATE - {%- for bridge_step in bridge_walk.keys() -%} - {%- set link_table = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_table']) -%} - {%- set link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_pk']) -%} - {%- set bridge_link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_link_pk']) -%} - {%- set eff_sat_table = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_table']) -%} - {%- set bridge_end_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_end_date']) -%} - {%- set bridge_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_load_date']) -%} - {%- set eff_sat_end_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_end_date']) -%} - {%- set eff_sat_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_load_date']) -%} - {%- filter indent(width=8) %} - {{ ',' ~ link_table ~ '.' ~ link_pk ~ ' AS ' ~ bridge_link_pk }} - {{ ',' ~ eff_sat_table ~ '.' ~ eff_sat_end_date ~ ' AS ' ~ bridge_end_date }} - {{ ',' ~ eff_sat_table ~ '.' ~ eff_sat_load_date ~' AS ' ~ bridge_load_date }} - {%- endfilter -%} - {% endfor %} - FROM overlap_pks AS a - INNER JOIN overlap_as_of AS b - ON (1=1) - {%- set loop_vars = namespace(lastlink = '', last_link_fk = '') -%} - {%- for bridge_step in bridge_walk.keys() -%} - {%- set current_link = bridge_walk[bridge_step]['link_table'] -%} - {%- set current_eff_sat = bridge_walk[bridge_step]['eff_sat_table'] -%} - {%- set link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_pk']) -%} - {%- set link_fk1 = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_fk1']) -%} - {%- set link_fk2 = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_fk2']) -%} - {%- set eff_sat_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_pk']) -%} - {%- set eff_sat_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_load_date']) -%} - {%- if loop.first %} - LEFT JOIN {{ ref(current_link) }} AS {{ dbtvault.escape_column_names(current_link) }} - ON a.{{ src_pk }} = {{ dbtvault.escape_column_names(current_link) }}.{{ link_fk1 }} - {%- else %} - LEFT JOIN {{ ref(current_link) }} AS {{ dbtvault.escape_column_names(current_link) }} - ON {{ loop_vars.last_link }}.{{ loop_vars.last_link_fk2 }} = {{ dbtvault.escape_column_names(current_link) }}.{{ link_fk1 }} - {%- endif %} - INNER JOIN {{ ref(current_eff_sat) }} AS {{ dbtvault.escape_column_names(current_eff_sat) }} - ON {{ dbtvault.escape_column_names(current_eff_sat) }}.{{ eff_sat_pk }} = {{ dbtvault.escape_column_names(current_link) }}.{{ link_pk }} - AND {{ dbtvault.escape_column_names(current_eff_sat) }}.{{ eff_sat_load_date }} <= b.AS_OF_DATE - {%- set loop_vars.last_link = current_link -%} - {%- set loop_vars.last_link_fk2 = link_fk2 -%} - {% endfor %} + {{ dbtvault.bridge_overlap_and_new_rows(src_pk, bridge_walk, 'overlap_pks', 'overlap_as_of') }} ), {%- endif %} new_rows AS ( - SELECT - {{ dbtvault.prefix([src_pk], 'a') }}, - b.AS_OF_DATE - {%- for bridge_step in bridge_walk.keys() -%} - {%- set link_table = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_table']) -%} - {%- set link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_pk']) -%} - {%- set bridge_link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_link_pk']) -%} - {%- set eff_sat_table = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_table']) -%} - {%- set bridge_end_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_end_date']) -%} - {%- set bridge_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_load_date']) -%} - {%- set eff_sat_end_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_end_date']) -%} - {%- set eff_sat_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_load_date']) -%} - {%- filter indent(width=8) -%} - {{ ',' ~ link_table ~'.'~ link_pk ~' AS '~ bridge_link_pk }} - {{ ',' ~ eff_sat_table ~ '.' ~ eff_sat_end_date ~ ' AS ' ~ bridge_end_date }} - {{ ',' ~ eff_sat_table ~ '.' ~ eff_sat_load_date ~ ' AS ' ~ bridge_load_date }} - {%- endfilter -%} - {% endfor %} - FROM {{ ref(source_model) }} AS a - INNER JOIN {{ new_as_of_dates_cte }} AS b - ON (1=1) - {%- set loop_vars = namespace(lastlink = '', last_link_fk = '') %} - {%- for bridge_step in bridge_walk.keys() -%} - {%- set current_link = bridge_walk[bridge_step]['link_table'] -%} - {%- set current_eff_sat = bridge_walk[bridge_step]['eff_sat_table'] -%} - {%- set link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_pk']) -%} - {%- set link_fk1 = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_fk1']) -%} - {%- set link_fk2 = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_fk2']) -%} - {%- set eff_sat_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_pk']) -%} - {%- set eff_sat_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_load_date']) -%} - {%- if loop.first %} - LEFT JOIN {{ ref(current_link) }} AS {{ dbtvault.escape_column_names(current_link) }} - ON a.{{ src_pk }} = {{ dbtvault.escape_column_names(current_link) }}.{{ link_fk1 }} - {%- else %} - LEFT JOIN {{ ref(current_link) }} AS {{ dbtvault.escape_column_names(current_link) }} - ON {{ loop_vars.last_link }}.{{ loop_vars.last_link_fk2 }} = {{ dbtvault.escape_column_names(current_link) }}.{{ link_fk1 }} - {%- endif %} - INNER JOIN {{ ref(current_eff_sat) }} AS {{ dbtvault.escape_column_names(current_eff_sat) }} - ON {{ dbtvault.escape_column_names(current_eff_sat) }}.{{ eff_sat_pk }} = {{ dbtvault.escape_column_names(current_link) }}.{{ link_pk }} - AND {{ dbtvault.escape_column_names(current_eff_sat) }}.{{ eff_sat_load_date }} <= b.AS_OF_DATE - {%- set loop_vars.last_link = dbtvault.escape_column_names(current_link) -%} - {%- set loop_vars.last_link_fk2 = link_fk2 -%} - {% endfor %} + {{ dbtvault.bridge_overlap_and_new_rows(src_pk, bridge_walk, ref(source_model), new_as_of_dates_cte) }} ), {# Full data from bridge walk(s) -#} @@ -233,50 +78,55 @@ all_rows AS ( {# Select most recent set of relationship key(s) for each as of date -#} candidate_rows AS ( - SELECT *, - ROW_NUMBER() OVER ( - PARTITION BY AS_OF_DATE, - {%- for bridge_step in bridge_walk.keys() -%} - {% set bridge_link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_link_pk']) -%} - {%- if loop.first %} - {{ bridge_link_pk }} - {%- else %} - {{ ','~ bridge_link_pk }} - {%- endif -%} - {%- endfor %} - ORDER BY - {%- for bridge_step in bridge_walk.keys() -%} - {% set bridge_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_load_date']) %} - {%- if loop.first %} - {{ bridge_load_date ~' DESC' }} - {%- else %} - {{ ','~ bridge_load_date ~' DESC' }} - {%- endif -%} - {%- endfor %} - ) AS row_num - FROM all_rows - QUALIFY row_num = 1 + SELECT * + FROM ( + SELECT *, + ROW_NUMBER() OVER ( + PARTITION BY + AS_OF_DATE, + {% for bridge_step in bridge_walk.keys() -%} + + {%- set bridge_link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_link_pk']) -%} + + {{ bridge_link_pk }} {%- if not loop.last %}, {% endif -%} + + {%- endfor %} + ORDER BY + {% for bridge_step in bridge_walk.keys() -%} + + {%- set bridge_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_load_date']) -%} + + {{ bridge_load_date }} DESC {%- if not loop.last %}, {% endif -%} + + {%- endfor %} + ) AS ROW_NUM + FROM all_rows + ) AS a + WHERE a.ROW_NUM = 1 ), bridge AS ( SELECT {{ dbtvault.prefix([src_pk], 'c') }}, - c.AS_OF_DATE - {%- for bridge_step in bridge_walk.keys() -%} - {% set bridge_link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_link_pk']) -%} - {{ ',c.' ~ bridge_link_pk }} - {%- endfor %} + c.AS_OF_DATE, + + {% for bridge_step in bridge_walk.keys() %} + + {% set bridge_link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_link_pk']) %} + c.{{ bridge_link_pk }} + {%- if not loop.last %}, {%- endif -%} + {%- endfor -%} + FROM candidate_rows AS c - {%- for bridge_step in bridge_walk.keys() -%} - {%- set bridge_end_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_end_date']) -%} - {%- if loop.first %} - WHERE TO_DATE({{ 'c.' ~ bridge_end_date }}) = TO_DATE('{{ max_datetime }}') - {%- else %} - AND TO_DATE({{ 'c.' ~ bridge_end_date }}) = TO_DATE('{{ max_datetime }}') - {%- endif -%} - {%- endfor %} + +{%- for bridge_step in bridge_walk.keys() -%} + {%- set bridge_end_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_end_date']) %} + + {% if loop.first -%} WHERE {%- else -%} AND {%- endif %} {{ dbtvault.cast_date(dbtvault.prefix([bridge_end_date], 'c')) }} = {{ dbtvault.cast_date(max_datetime, true, false) }} + +{% endfor -%} ) SELECT * FROM bridge -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/snowflake/eff_sat.sql b/macros/tables/snowflake/eff_sat.sql index f5dd735b4..a08e6d48d 100644 --- a/macros/tables/snowflake/eff_sat.sql +++ b/macros/tables/snowflake/eff_sat.sql @@ -1,35 +1,40 @@ -{%- macro eff_sat(src_pk, src_dfk, src_sfk, src_start_date, src_end_date, src_eff, src_ldts, src_source, source_model) -%} +{%- macro eff_sat(src_pk, src_dfk, src_sfk, src_extra_columns, src_start_date, src_end_date, src_eff, src_ldts, src_source, source_model) -%} - {{- adapter.dispatch('eff_sat', 'dbtvault')(src_pk=src_pk, src_dfk=src_dfk, src_sfk=src_sfk, - src_start_date=src_start_date, src_end_date=src_end_date, - src_eff=src_eff, src_ldts=src_ldts, src_source=src_source, - source_model=source_model) -}} -{%- endmacro -%} + {{- dbtvault.check_required_parameters(src_pk=src_pk, src_dfk=src_dfk, src_sfk=src_sfk, + src_start_date=src_start_date, src_end_date=src_end_date, + src_eff=src_eff, src_ldts=src_ldts, src_source=src_source, + source_model=source_model) -}} + + {%- set src_pk = dbtvault.escape_column_names(src_pk) -%} + {%- set src_dfk = dbtvault.escape_column_names(src_dfk) -%} + {%- set src_sfk = dbtvault.escape_column_names(src_sfk) -%} + + {%- set src_extra_columns = dbtvault.escape_column_names(src_extra_columns) -%} -{%- macro default__eff_sat(src_pk, src_dfk, src_sfk, src_start_date, src_end_date, src_eff, src_ldts, src_source, source_model) -%} + {%- set src_start_date = dbtvault.escape_column_names(src_start_date) -%} + {%- set src_end_date = dbtvault.escape_column_names(src_end_date) -%} + {%- set src_eff = dbtvault.escape_column_names(src_eff) -%} -{{- dbtvault.check_required_parameters(src_pk=src_pk, src_dfk=src_dfk, src_sfk=src_sfk, - src_start_date=src_start_date, src_end_date=src_end_date, - src_eff=src_eff, src_ldts=src_ldts, src_source=src_source, - source_model=source_model) -}} + {%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} + {%- set src_source = dbtvault.escape_column_names(src_source) -%} -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_dfk = dbtvault.escape_column_names(src_dfk) -%} -{%- set src_sfk = dbtvault.escape_column_names(src_sfk) -%} -{%- set src_start_date = dbtvault.escape_column_names(src_start_date) -%} -{%- set src_end_date = dbtvault.escape_column_names(src_end_date) -%} -{%- set src_eff = dbtvault.escape_column_names(src_eff) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} -{%- set src_source = dbtvault.escape_column_names(src_source) -%} + {{ dbtvault.prepend_generated_by() }} + + {{ adapter.dispatch('eff_sat', 'dbtvault')(src_pk=src_pk, src_dfk=src_dfk, src_sfk=src_sfk, + src_extra_columns=src_extra_columns, + src_start_date=src_start_date, src_end_date=src_end_date, + src_eff=src_eff, src_ldts=src_ldts, src_source=src_source, + source_model=source_model) -}} +{%- endmacro -%} -{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_dfk, src_sfk, src_start_date, src_end_date, src_eff, src_ldts, src_source]) -%} +{%- macro default__eff_sat(src_pk, src_dfk, src_sfk, src_extra_columns, src_start_date, src_end_date, src_eff, src_ldts, src_source, source_model) -%} + +{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_dfk, src_sfk, src_extra_columns, src_start_date, src_end_date, src_eff, src_ldts, src_source]) -%} {%- set fk_cols = dbtvault.expand_column_list(columns=[src_dfk, src_sfk]) -%} {%- set dfk_cols = dbtvault.expand_column_list(columns=[src_dfk]) -%} {%- set is_auto_end_dating = config.get('is_auto_end_dating', default=false) %} -{{- dbtvault.prepend_generated_by() }} - -{%- set max_datetime = var('max_datetime', '9999-12-31 23:59:59.999999') %} +{%- set max_datetime = dbtvault.max_datetime() %} WITH source_data AS ( SELECT {{ dbtvault.prefix(source_cols, 'a', alias_target='source') }} @@ -47,13 +52,15 @@ WITH source_data AS ( {# Selecting the most recent records for each link hashkey -#} latest_records AS ( - SELECT {{ dbtvault.alias_all(source_cols, 'b') }}, - ROW_NUMBER() OVER ( - PARTITION BY {{ dbtvault.prefix([src_pk], 'b') }} - ORDER BY b.{{ src_ldts }} DESC - ) AS row_num - FROM {{ this }} AS b - QUALIFY row_num = 1 + SELECT * FROM ( + SELECT {{ dbtvault.alias_all(source_cols, 'b') }}, + ROW_NUMBER() OVER ( + PARTITION BY {{ dbtvault.prefix([src_pk], 'b') }} + ORDER BY b.{{ src_ldts }} DESC + ) AS row_num + FROM {{ this }} AS b + ) AS inner + WHERE row_num = 1 ), {# Selecting the open records of the most recent records for each link hashkey -#} @@ -75,6 +82,9 @@ new_open_records AS ( SELECT DISTINCT {{ dbtvault.prefix([src_pk], 'f') }}, {{ dbtvault.alias_all(fk_cols, 'f') }}, + {% if dbtvault.is_something(src_extra_columns) %} + {{ dbtvault.prefix([src_extra_columns], 'f') }}, + {% endif -%} {%- if is_auto_end_dating %} f.{{ src_eff }} AS {{ src_start_date }}, {% else %} @@ -95,6 +105,9 @@ new_reopened_records AS ( SELECT DISTINCT {{ dbtvault.prefix([src_pk], 'lc') }}, {{ dbtvault.alias_all(fk_cols, 'lc') }}, + {% if dbtvault.is_something(src_extra_columns) %} + {{ dbtvault.prefix([src_extra_columns], 'g') }}, + {% endif -%} {%- if is_auto_end_dating %} g.{{ src_eff }} AS {{ src_start_date }}, {% else %} @@ -118,6 +131,9 @@ new_closed_records AS ( SELECT DISTINCT {{ dbtvault.prefix([src_pk], 'lo') }}, {{ dbtvault.alias_all(fk_cols, 'lo') }}, + {% if dbtvault.is_something(src_extra_columns) %} + {{ dbtvault.prefix([src_extra_columns], 'h') }}, + {% endif -%} lo.{{ src_start_date }} AS {{ src_start_date }}, h.{{ src_eff }} AS {{ src_end_date }}, h.{{ src_eff }} AS {{ src_eff }}, @@ -135,7 +151,10 @@ new_closed_records AS ( new_closed_records AS ( SELECT DISTINCT {{ dbtvault.prefix([src_pk], 'lo') }}, - {{ dbtvault.alias_all(fk_cols, 'lo') }}, + {{ dbtvault.alias_all(fk_cols, 'h') }}, + {% if dbtvault.is_something(src_extra_columns) %} + {{ dbtvault.prefix([src_extra_columns], 'h') }}, + {% endif -%} h.{{ src_start_date }} AS {{ src_start_date }}, h.{{ src_end_date }} AS {{ src_end_date }}, h.{{ src_eff }} AS {{ src_eff }}, @@ -174,4 +193,4 @@ records_to_insert AS ( {%- endif %} SELECT * FROM records_to_insert -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/snowflake/hub.sql b/macros/tables/snowflake/hub.sql index 8121a36ef..4410c89c8 100644 --- a/macros/tables/snowflake/hub.sql +++ b/macros/tables/snowflake/hub.sql @@ -1,35 +1,41 @@ -{%- macro hub(src_pk, src_nk, src_ldts, src_source, source_model) -%} +{%- macro hub(src_pk, src_nk, src_extra_columns, src_ldts, src_source, source_model) -%} + + {{- dbtvault.check_required_parameters(src_pk=src_pk, src_nk=src_nk, + src_ldts=src_ldts, src_source=src_source, + source_model=source_model) -}} + + {%- set src_pk = dbtvault.escape_column_names(src_pk) -%} + {%- set src_nk = dbtvault.escape_column_names(src_nk) -%} + {%- set src_extra_columns = dbtvault.escape_column_names(src_extra_columns) -%} + {%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} + {%- set src_source = dbtvault.escape_column_names(src_source) -%} + + {%- if not dbtvault.is_list(source_model) -%} + {%- set source_model = [source_model] -%} + {%- endif -%} + + {{ dbtvault.log_relation_sources(this, source_model | length) }} + + {{- dbtvault.prepend_generated_by() -}} {{- adapter.dispatch('hub', 'dbtvault')(src_pk=src_pk, src_nk=src_nk, + src_extra_columns=src_extra_columns, src_ldts=src_ldts, src_source=src_source, source_model=source_model) -}} {%- endmacro -%} -{%- macro default__hub(src_pk, src_nk, src_ldts, src_source, source_model) -%} - -{{- dbtvault.check_required_parameters(src_pk=src_pk, src_nk=src_nk, - src_ldts=src_ldts, src_source=src_source, - source_model=source_model) -}} - -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_nk = dbtvault.escape_column_names(src_nk) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} -{%- set src_source = dbtvault.escape_column_names(src_source) -%} +{%- macro default__hub(src_pk, src_nk, src_extra_columns, src_ldts, src_source, source_model) -%} -{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_nk, src_ldts, src_source]) -%} +{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_nk, src_extra_columns, src_ldts, src_source]) -%} {%- if model.config.materialized == 'vault_insert_by_rank' %} {%- set source_cols_with_rank = source_cols + dbtvault.escape_column_names([config.get('rank_column')]) -%} -{%- endif -%} - -{{ dbtvault.prepend_generated_by() }} +{%- endif %} {{ 'WITH ' -}} -{%- if not (source_model is iterable and source_model is not string) -%} - {%- set source_model = [source_model] -%} -{%- endif -%} +{%- set stage_count = source_model | length -%} {%- set ns = namespace(last_cte= "") -%} @@ -53,7 +59,7 @@ row_rank_{{ source_number }} AS ( {%- set ns.last_cte = "row_rank_{}".format(source_number) %} ),{{ "\n" if not loop.last }} {% endfor -%} -{% if source_model | length > 1 %} +{% if stage_count > 1 %} stage_union AS ( {%- for src in source_model %} SELECT * FROM row_rank_{{ loop.index | string }} @@ -64,6 +70,7 @@ stage_union AS ( {%- set ns.last_cte = "stage_union" %} ), {%- endif -%} + {%- if model.config.materialized == 'vault_insert_by_period' %} stage_mat_filter AS ( SELECT * @@ -79,7 +86,8 @@ stage_mat_filter AS ( {%- set ns.last_cte = "stage_mat_filter" %} ), {%- endif -%} -{%- if source_model | length > 1 %} + +{%- if stage_count > 1 %} row_rank_union AS ( SELECT ru.*, @@ -105,4 +113,4 @@ records_to_insert AS ( SELECT * FROM records_to_insert -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/snowflake/link.sql b/macros/tables/snowflake/link.sql index 0e4960ea2..3866514d2 100644 --- a/macros/tables/snowflake/link.sql +++ b/macros/tables/snowflake/link.sql @@ -1,36 +1,42 @@ -{%- macro link(src_pk, src_fk, src_ldts, src_source, source_model) -%} +{%- macro link(src_pk, src_fk, src_extra_columns, src_ldts, src_source, source_model) -%} - {{- adapter.dispatch('link', 'dbtvault')(src_pk=src_pk, src_fk=src_fk, + {{- dbtvault.check_required_parameters(src_pk=src_pk, src_fk=src_fk, + src_ldts=src_ldts, src_source=src_source, + source_model=source_model) -}} + + {%- set src_pk = dbtvault.escape_column_names(src_pk) -%} + {%- set src_fk = dbtvault.escape_column_names(src_fk) -%} + {%- set src_extra_columns = dbtvault.escape_column_names(src_extra_columns) -%} + {%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} + {%- set src_source = dbtvault.escape_column_names(src_source) -%} + + {%- if not dbtvault.is_list(source_model) -%} + {%- set source_model = [source_model] -%} + {%- endif -%} + + {{ dbtvault.log_relation_sources(this, source_model | length) }} + + {{ dbtvault.prepend_generated_by() }} + + {{ adapter.dispatch('link', 'dbtvault')(src_pk=src_pk, src_fk=src_fk, + src_extra_columns=src_extra_columns, src_ldts=src_ldts, src_source=src_source, source_model=source_model) -}} {%- endmacro -%} -{%- macro default__link(src_pk, src_fk, src_ldts, src_source, source_model) -%} - -{{- dbtvault.check_required_parameters(src_pk=src_pk, src_fk=src_fk, - src_ldts=src_ldts, src_source=src_source, - source_model=source_model) -}} +{%- macro default__link(src_pk, src_fk, src_extra_columns, src_ldts, src_source, source_model) -%} -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_fk = dbtvault.escape_column_names(src_fk) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} -{%- set src_source = dbtvault.escape_column_names(src_source) -%} - -{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_fk, src_ldts, src_source]) -%} +{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_fk, src_extra_columns, src_ldts, src_source]) -%} {%- set fk_cols = dbtvault.expand_column_list([src_fk]) -%} {%- if model.config.materialized == 'vault_insert_by_rank' %} {%- set source_cols_with_rank = source_cols + dbtvault.escape_column_names([config.get('rank_column')]) -%} -{%- endif -%} - -{{ dbtvault.prepend_generated_by() }} +{%- endif %} {{ 'WITH ' -}} -{%- if not (source_model is iterable and source_model is not string) -%} - {%- set source_model = [source_model] -%} -{%- endif -%} +{%- set stage_count = source_model | length -%} {%- set ns = namespace(last_cte= "") -%} @@ -49,7 +55,7 @@ row_rank_{{ source_number }} AS ( ORDER BY {{ dbtvault.prefix([src_ldts], 'rr') }} ) AS row_number FROM {{ ref(src) }} AS rr - {%- if source_model | length == 1 %} + {%- if stage_count == 1 %} WHERE {{ dbtvault.multikey(src_pk, prefix='rr', condition='IS NOT NULL') }} AND {{ dbtvault.multikey(fk_cols, prefix='rr', condition='IS NOT NULL') }} {%- endif %} @@ -57,7 +63,7 @@ row_rank_{{ source_number }} AS ( {%- set ns.last_cte = "row_rank_{}".format(source_number) %} ),{{ "\n" if not loop.last }} {% endfor -%} -{% if source_model | length > 1 %} +{% if stage_count > 1 %} stage_union AS ( {%- for src in source_model %} SELECT * FROM row_rank_{{ loop.index | string }} @@ -83,7 +89,7 @@ stage_mat_filter AS ( {%- set ns.last_cte = "stage_mat_filter" %} ), {% endif %} -{%- if source_model | length > 1 %} +{%- if stage_count > 1 %} row_rank_union AS ( SELECT ru.*, @@ -110,4 +116,4 @@ records_to_insert AS ( SELECT * FROM records_to_insert -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/snowflake/ma_sat.sql b/macros/tables/snowflake/ma_sat.sql index b387a927b..588d993ad 100644 --- a/macros/tables/snowflake/ma_sat.sql +++ b/macros/tables/snowflake/ma_sat.sql @@ -1,35 +1,39 @@ -{%- macro ma_sat(src_pk, src_cdk, src_hashdiff, src_payload, src_eff, src_ldts, src_source, source_model) -%} +{%- macro ma_sat(src_pk, src_cdk, src_hashdiff, src_payload, src_extra_columns, src_eff, src_ldts, src_source, source_model) -%} - {{- adapter.dispatch('ma_sat', 'dbtvault')(src_pk=src_pk, src_cdk=src_cdk, src_hashdiff=src_hashdiff, - src_payload=src_payload, src_eff=src_eff, src_ldts=src_ldts, - src_source=src_source, source_model=source_model) -}} + {{- dbtvault.check_required_parameters(src_pk=src_pk, src_cdk=src_cdk, src_hashdiff=src_hashdiff, + src_payload=src_payload, src_ldts=src_ldts, src_source=src_source, + source_model=source_model) -}} -{%- endmacro %} + {%- set src_pk = dbtvault.escape_column_names(src_pk) -%} + {%- set src_cdk = dbtvault.escape_column_names(src_cdk) -%} + + {%- set src_hashdiff = dbtvault.escape_column_names(src_hashdiff) -%} + {%- set src_payload = dbtvault.escape_column_names(src_payload) -%} + {%- set src_extra_columns = dbtvault.escape_column_names(src_extra_columns) -%} + + {%- set src_eff = dbtvault.escape_column_names(src_eff) -%} + {%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} + {%- set src_source = dbtvault.escape_column_names(src_source) -%} -{%- macro default__ma_sat(src_pk, src_cdk, src_hashdiff, src_payload, src_eff, src_ldts, src_source, source_model) -%} + {{ dbtvault.prepend_generated_by() }} + {{ adapter.dispatch('ma_sat', 'dbtvault')(src_pk=src_pk, src_cdk=src_cdk, src_hashdiff=src_hashdiff, + src_payload=src_payload, src_extra_columns=src_extra_columns, + src_eff=src_eff, src_ldts=src_ldts, + src_source=src_source, source_model=source_model) -}} -{{- dbtvault.check_required_parameters(src_pk=src_pk, src_cdk=src_cdk, src_hashdiff=src_hashdiff, - src_payload=src_payload, src_ldts=src_ldts, src_source=src_source, - source_model=source_model) -}} +{%- endmacro %} -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_cdk = dbtvault.escape_column_names(src_cdk) -%} -{%- set src_hashdiff = dbtvault.escape_column_names(src_hashdiff) -%} -{%- set src_payload = dbtvault.escape_column_names(src_payload) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} -{%- set src_source = dbtvault.escape_column_names(src_source) -%} +{%- macro default__ma_sat(src_pk, src_cdk, src_hashdiff, src_payload, src_extra_columns, src_eff, src_ldts, src_source, source_model) -%} -{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_cdk, src_payload, src_eff, src_ldts, src_source]) -%} +{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_cdk, src_payload, src_extra_columns, src_hashdiff, src_eff, src_ldts, src_source]) -%} {%- set rank_cols = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_ldts]) -%} {%- set cdk_cols = dbtvault.expand_column_list(columns=[src_cdk]) -%} {%- set cols_for_latest = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_cdk, src_ldts]) %} {%- if model.config.materialized == 'vault_insert_by_rank' -%} {%- set source_cols_with_rank = source_cols + [config.get('rank_column')] -%} -{%- endif -%} - -{{ dbtvault.prepend_generated_by() }} +{%- endif %} {# Select unique source records #} WITH source_data AS ( @@ -59,27 +63,31 @@ WITH source_data AS ( {# Select latest records from satellite, restricted to PKs in source data -#} latest_records AS ( - SELECT {{ dbtvault.prefix(cols_for_latest, 'mas', alias_target='target') }} - ,mas.latest_rank - ,DENSE_RANK() OVER (PARTITION BY {{ dbtvault.prefix([src_pk], 'mas') }} - ORDER BY {{ dbtvault.prefix([src_hashdiff], 'mas', alias_target='target') }}, {{ dbtvault.prefix(cdk_cols, 'mas') }} ASC) AS check_rank - FROM - ( - SELECT {{ dbtvault.prefix(cols_for_latest, 'inner_mas', alias_target='target') }} - ,RANK() OVER (PARTITION BY {{ dbtvault.prefix([src_pk], 'inner_mas') }} - ORDER BY {{ dbtvault.prefix([src_ldts], 'inner_mas') }} DESC) AS latest_rank + SELECT {{ dbtvault.prefix(cols_for_latest, 'mas', alias_target='target') }}, + mas.latest_rank, + DENSE_RANK() OVER (PARTITION BY {{ dbtvault.prefix([src_pk], 'mas') }} + ORDER BY {{ dbtvault.prefix([src_hashdiff], 'mas', alias_target='target') }}, {{ dbtvault.prefix(cdk_cols, 'mas') }} ASC + ) AS check_rank + FROM ( + SELECT {{ dbtvault.prefix(cols_for_latest, 'inner_mas', alias_target='target') }}, + RANK() OVER (PARTITION BY {{ dbtvault.prefix([src_pk], 'inner_mas') }} + ORDER BY {{ dbtvault.prefix([src_ldts], 'inner_mas') }} DESC + ) AS latest_rank FROM {{ this }} AS inner_mas - INNER JOIN (SELECT DISTINCT {{ dbtvault.prefix([src_pk], 's') }} FROM source_data as s ) AS spk - ON {{ dbtvault.multikey([src_pk], prefix=['inner_mas', 'spk'], condition='=') }} - QUALIFY latest_rank = 1 + INNER JOIN ( + SELECT DISTINCT {{ dbtvault.prefix([src_pk], 's') }} + FROM source_data as s + ) AS spk + ON {{ dbtvault.multikey([src_pk], prefix=['inner_mas', 'spk'], condition='=') }} + QUALIFY latest_rank = 1 ) AS mas ), {# Select summary details for each group of latest records -#} latest_group_details AS ( - SELECT {{ dbtvault.prefix([src_pk], 'lr') }} - ,{{ dbtvault.prefix([src_ldts], 'lr') }} - ,MAX(lr.check_rank) AS latest_count + SELECT {{ dbtvault.prefix([src_pk], 'lr') }}, + {{ dbtvault.prefix([src_ldts], 'lr') }}, + MAX(lr.check_rank) AS latest_count FROM latest_records AS lr GROUP BY {{ dbtvault.prefix([src_pk], 'lr') }}, {{ dbtvault.prefix([src_ldts], 'lr') }} ), @@ -94,17 +102,14 @@ records_to_insert AS ( {# if any_incremental -#} {% if dbtvault.is_any_incremental() %} - WHERE EXISTS - ( + WHERE EXISTS ( SELECT 1 FROM source_data AS stage - WHERE NOT EXISTS - ( + WHERE NOT EXISTS ( SELECT 1 - FROM - ( - SELECT {{ dbtvault.prefix(cols_for_latest, 'lr', alias_target='target') }} - ,lg.latest_count + FROM ( + SELECT {{ dbtvault.prefix(cols_for_latest, 'lr', alias_target='target') }}, + lg.latest_count FROM latest_records AS lr INNER JOIN latest_group_details AS lg ON {{ dbtvault.multikey([src_pk], prefix=['lr', 'lg'], condition='=') }} @@ -123,4 +128,4 @@ records_to_insert AS ( SELECT * FROM records_to_insert -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/snowflake/pit.sql b/macros/tables/snowflake/pit.sql index 2901a315b..8bfaa222c 100644 --- a/macros/tables/snowflake/pit.sql +++ b/macros/tables/snowflake/pit.sql @@ -1,32 +1,35 @@ -{%- macro pit(src_pk, as_of_dates_table, satellites, stage_tables, src_ldts, source_model ) -%} - -{# TODO Should the length of the ghost_pk zero hash be determined by the hashing option being used, i.e. MD5 = 16, SHA = 32 ? #} - - {{- adapter.dispatch('pit', 'dbtvault')(source_model=source_model, src_pk=src_pk, - as_of_dates_table=as_of_dates_table, - satellites=satellites, - stage_tables=stage_tables, - src_ldts=src_ldts) -}} +{%- macro pit(src_pk, src_extra_columns, as_of_dates_table, satellites, stage_tables_ldts, src_ldts, source_model) -%} + + {%- if dbtvault.is_something(src_extra_columns) and execute -%} + {%- do exceptions.warn("WARNING: src_extra_columns not yet available for PITs or Bridges. This parameter will be ignored.") -%} + {%- endif -%} + + {{- dbtvault.check_required_parameters(src_pk=src_pk, + as_of_dates_table=as_of_dates_table, + satellites=satellites, + stage_tables_ldts=stage_tables_ldts, + src_ldts=src_ldts, + source_model=source_model) -}} + + {%- set src_pk = dbtvault.escape_column_names(src_pk) -%} + {%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} + + {{- dbtvault.prepend_generated_by() }} + + {%- for stg in stage_tables_ldts %} + {{ "-- depends_on: " ~ ref(stg) -}} + {%- endfor -%} + + {{ adapter.dispatch('pit', 'dbtvault')(src_pk=src_pk, + src_extra_columns=src_extra_columns, + as_of_dates_table=as_of_dates_table, + satellites=satellites, + stage_tables_ldts=stage_tables_ldts, + src_ldts=src_ldts, + source_model=source_model) -}} {%- endmacro -%} -{%- macro default__pit(src_pk, as_of_dates_table, satellites, stage_tables, src_ldts, source_model) -%} - -{{- dbtvault.check_required_parameters(source_model=source_model, src_pk=src_pk, - satellites=satellites, - stage_tables=stage_tables, - src_ldts=src_ldts) -}} - -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} - -{{ dbtvault.prepend_generated_by() }} - -{%- if (as_of_dates_table is none) and execute -%} - {%- set error_message -%} - "PIT error: Missing as_of_dates table configuration. A as_of_dates_table must be provided." - {%- endset -%} - {{- exceptions.raise_compiler_error(error_message) -}} -{%- endif -%} +{%- macro default__pit(src_pk, src_extra_columns, as_of_dates_table, satellites, stage_tables_ldts, src_ldts, source_model) -%} {#- Acquiring the source relation for the AS_OF table -#} {%- if as_of_dates_table is mapping and as_of_dates_table is not none -%} @@ -41,16 +44,10 @@ {%- set ghost_pk = '0000000000000000' -%} {%- set ghost_date = '1900-01-01 00:00:00.000' %} -{# Stating the dependancies on the stage tables outside of the If STATEMENT #} -{% for stg in stage_tables -%} - {{ "-- depends_on: " ~ ref(stg) }} -{% endfor %} - -{#- Setting the new AS_OF dates CTE name -#} {%- if dbtvault.is_any_incremental() -%} -{%- set new_as_of_dates_cte = 'new_rows_as_of' -%} + {%- set new_as_of_dates_cte = 'new_rows_as_of' -%} {%- else -%} -{%- set new_as_of_dates_cte = 'as_of_dates' -%} + {%- set new_as_of_dates_cte = 'as_of_dates' -%} {%- endif %} WITH as_of_dates AS ( @@ -59,73 +56,7 @@ WITH as_of_dates AS ( {%- if dbtvault.is_any_incremental() %} -last_safe_load_datetime AS ( - SELECT MIN(LOAD_DATETIME) AS LAST_SAFE_LOAD_DATETIME FROM ( - {%- for stg in stage_tables -%} - {%- set stage_ldts = stage_tables[stg] %} - SELECT MIN({{ stage_ldts }}) AS LOAD_DATETIME FROM {{ (ref(stg)) }} - {{ "UNION ALL" if not loop.last }} - {%- endfor %} - ) a -), - -as_of_grain_old_entries AS ( - SELECT DISTINCT AS_OF_DATE FROM {{ this }} -), - -as_of_grain_lost_entries AS ( - SELECT a.AS_OF_DATE - FROM as_of_grain_old_entries AS a - LEFT OUTER JOIN as_of_dates AS b - ON a.AS_OF_DATE = b.AS_OF_DATE - WHERE b.AS_OF_DATE IS NULL -), - -as_of_grain_new_entries AS ( - SELECT a.AS_OF_DATE - FROM as_of_dates AS a - LEFT OUTER JOIN as_of_grain_old_entries AS b - ON a.AS_OF_DATE = b.AS_OF_DATE - WHERE b.AS_OF_DATE IS NULL -), - -min_date AS ( - SELECT min(AS_OF_DATE) AS MIN_DATE - FROM as_of_dates -), - -backfill_as_of AS ( - SELECT AS_OF_DATE - FROM as_of_dates AS a - WHERE a.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) -), - -new_rows_pks AS ( - SELECT {{ dbtvault.prefix([src_pk], 'a') }} - FROM {{ ref(source_model) }} AS a - WHERE a.{{ src_ldts }} >= (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) -), - -new_rows_as_of AS ( - SELECT AS_OF_DATE - FROM as_of_dates AS a - WHERE a.AS_OF_DATE >= (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) - UNION - SELECT AS_OF_DATE - FROM as_of_grain_new_entries -), - -overlap AS ( - SELECT a.* - FROM {{ this }} AS a - INNER JOIN {{ ref(source_model) }} as b - ON {{ dbtvault.multikey(src_pk, prefix=['a','b'], condition='=') }} - WHERE a.AS_OF_DATE >= (SELECT MIN_DATE FROM min_date) - AND a.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) - AND a.AS_OF_DATE NOT IN (SELECT AS_OF_DATE FROM as_of_grain_lost_entries) -), - -{#- Back-fill any newly arrived hubs, set all historical pit dates to ghost records -#} +{{ dbtvault.as_of_date_window(src_pk, src_ldts, stage_tables_ldts, ref(source_model)) }}, backfill_rows_as_of_dates AS ( SELECT @@ -133,24 +64,30 @@ backfill_rows_as_of_dates AS ( b.AS_OF_DATE FROM new_rows_pks AS a INNER JOIN backfill_as_of AS b - ON (1=1 ) + ON (1=1) ), backfill AS ( SELECT {{ dbtvault.prefix([src_pk], 'a') }}, a.AS_OF_DATE, - {%- for sat_name in satellites -%} - {%- set sat_pk_name = (satellites[sat_name]['pk'].keys() | list )[0] | upper -%} - {%- set sat_ldts_name = (satellites[sat_name]['ldts'].keys() | list )[0] | upper -%} - {%- set sat_name = sat_name | upper %} - {%- if target.type == "sqlserver" -%} - CONVERT(BINARY(16), '{{ ghost_pk }}', 2) AS {{ dbtvault.escape_column_names( sat_name ~ '_' ~ sat_pk_name ) }}, - {%- else -%} - CAST('{{ ghost_pk }}' AS BINARY(16)) AS {{ dbtvault.escape_column_names( sat_name ~ '_' ~ sat_pk_name ) }}, - {%- endif -%} - CAST('{{ ghost_date }}' AS {{ dbtvault.type_timestamp() }}) AS {{ dbtvault.escape_column_names( sat_name ~ '_' ~ sat_ldts_name ) }} + + {% for sat_name in satellites -%} + {%- set sat_pk_name = (satellites[sat_name]['pk'].keys() | list )[0] -%} + {%- set sat_ldts_name = (satellites[sat_name]['ldts'].keys() | list )[0] -%} + {%- set sat_name = sat_name %} + + {% if target.type == "sqlserver" %} + CONVERT({{ dbtvault.type_binary() }}, '{{ ghost_pk }}', 2) AS {{ dbtvault.escape_column_names("{}_{}".format(sat_name, sat_pk_name)) }}, + CAST('{{ ghost_date }}' AS {{ dbtvault.type_timestamp() }}) AS {{ dbtvault.escape_column_names("{}_{}".format(sat_name, sat_ldts_name)) }} + {% else %} + CAST('{{ ghost_pk }}' AS {{ dbtvault.type_binary() }}) AS {{ dbtvault.escape_column_names("{}_{}".format(sat_name, sat_pk_name)) }}, + CAST('{{ ghost_date }}' AS {{ dbtvault.type_timestamp() }}) AS {{ dbtvault.escape_column_names("{}_{}".format(sat_name, sat_ldts_name)) }} + {% endif -%} + + {{- ',' if not loop.last -}} + {%- endfor %} FROM backfill_rows_as_of_dates AS a @@ -159,9 +96,10 @@ backfill AS ( {%- set sat_ldts_name = (satellites[sat_name]['ldts'].keys() | list )[0] -%} {%- set sat_pk = dbtvault.escape_column_names(satellites[sat_name]['pk'][sat_pk_name]) -%} {%- set sat_ldts = dbtvault.escape_column_names(satellites[sat_name]['ldts'][sat_ldts_name]) -%} - LEFT JOIN {{ ref(sat_name) }} AS {{ dbtvault.escape_column_names( sat_name | lower ~ '_src' ) }} - {{ "ON" | indent(4) }} a.{{ src_pk }} = {{ dbtvault.escape_column_names( sat_name | lower ~ '_src' ) }}.{{ sat_pk }} - {{ "AND" | indent(4) }} {{ dbtvault.escape_column_names( sat_name | lower ~ '_src' ) }}.{{ sat_ldts }} <= a.AS_OF_DATE + + LEFT JOIN {{ ref(sat_name) }} AS {{ sat_name | lower ~ '_src' }} + ON a.{{ src_pk }} = {{ sat_name | lower ~ '_src' }}.{{ sat_pk }} + AND {{ sat_name | lower ~ '_src' }}.{{ sat_ldts }} <= a.AS_OF_DATE {% endfor -%} GROUP BY @@ -182,19 +120,33 @@ new_rows AS ( SELECT {{ dbtvault.prefix([src_pk], 'a') }}, a.AS_OF_DATE, - {%- for sat_name in satellites -%} + {%- for sat_name in satellites %} {%- set sat_pk_name = (satellites[sat_name]['pk'].keys() | list )[0] -%} {%- set sat_ldts_name = (satellites[sat_name]['ldts'].keys() | list )[0] -%} {%- set sat_pk = dbtvault.escape_column_names(satellites[sat_name]['pk'][sat_pk_name]) -%} - {%- set sat_ldts = dbtvault.escape_column_names(satellites[sat_name]['ldts'][sat_ldts_name]) %} + {%- set sat_ldts = dbtvault.escape_column_names(satellites[sat_name]['ldts'][sat_ldts_name]) -%} + {%- if target.type == "sqlserver" -%} - COALESCE(MAX({{ dbtvault.escape_column_names( sat_name | lower ~ '_src' ) }}.{{ sat_pk }}), CONVERT(BINARY(16), '{{ ghost_pk }}', 2)) AS {{ dbtvault.escape_column_names( sat_name | upper ~ '_' ~ sat_pk_name | upper ) }}, - {%- else -%} - COALESCE(MAX({{ dbtvault.escape_column_names( sat_name | lower ~ '_src' ) }}.{{ sat_pk }}), CAST('{{ ghost_pk }}' AS BINARY(16))) AS {{ dbtvault.escape_column_names( sat_name | upper ~ '_' ~ sat_pk_name | upper ) }}, - {%- endif -%} - COALESCE(MAX({{ dbtvault.escape_column_names( sat_name | lower ~ '_src' ) }}.{{ sat_ldts }}), CAST('{{ ghost_date }}' AS {{ dbtvault.type_timestamp() }})) AS {{ dbtvault.escape_column_names( sat_name | upper ~ '_' ~ sat_ldts_name | upper ) }} + + COALESCE(MAX({{ sat_name | lower ~ '_src' }}.{{ sat_pk }}), + CONVERT({{ dbtvault.type_binary() }}, '{{ ghost_pk }}', 2)) + AS {{ dbtvault.escape_column_names("{}_{}".format(sat_name, sat_pk_name)) }}, + + {%- else %} + + COALESCE(MAX({{ sat_name | lower ~ '_src' }}.{{ sat_pk }}), + CAST('{{ ghost_pk }}' AS {{ dbtvault.type_binary() }})) + AS {{ dbtvault.escape_column_names("{}_{}".format(sat_name, sat_pk_name)) }}, + + {%- endif %} + + COALESCE(MAX({{ sat_name | lower ~ '_src' }}.{{ sat_ldts }}), + CAST('{{ ghost_date }}' AS {{ dbtvault.type_timestamp() }})) + AS {{ dbtvault.escape_column_names("{}_{}".format(sat_name, sat_ldts_name)) }} + {{- "," if not loop.last }} {%- endfor %} + FROM new_rows_as_of_dates AS a {% for sat_name in satellites -%} @@ -202,26 +154,28 @@ new_rows AS ( {%- set sat_ldts_name = (satellites[sat_name]['ldts'].keys() | list )[0] -%} {%- set sat_pk = dbtvault.escape_column_names(satellites[sat_name]['pk'][sat_pk_name]) -%} {%- set sat_ldts = dbtvault.escape_column_names(satellites[sat_name]['ldts'][sat_ldts_name]) -%} - LEFT JOIN {{ ref(sat_name) }} AS {{ dbtvault.escape_column_names( sat_name | lower ~ '_src' ) }} - {{ "ON" | indent(4) }} a.{{ src_pk }} = {{ dbtvault.escape_column_names( sat_name | lower ~ '_src' ) }}.{{ sat_pk }} - {{ "AND" | indent(4) }} {{ dbtvault.escape_column_names( sat_name | lower ~ '_src' ) }}.{{ sat_ldts }} <= a.AS_OF_DATE + + LEFT JOIN {{ ref(sat_name) }} AS {{ sat_name | lower ~ '_src'}} + ON a.{{ src_pk }} = {{ sat_name | lower }}_src.{{ sat_pk }} + AND {{ sat_name | lower ~ '_src'}}.{{ sat_ldts }} <= a.AS_OF_DATE + {% endfor -%} GROUP BY - {{ dbtvault.prefix([src_pk], 'a') }}, a.AS_OF_DATE + {{ dbtvault.prefix([src_pk], 'a') }}, + a.AS_OF_DATE ), pit AS ( SELECT * FROM new_rows -{%- if dbtvault.is_any_incremental() %} + {%- if dbtvault.is_any_incremental() %} UNION ALL - SELECT * FROM overlap + SELECT * FROM overlap_pks UNION ALL SELECT * FROM backfill - -{%- endif %} + {% endif %} ) SELECT DISTINCT * FROM pit -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/snowflake/sat.sql b/macros/tables/snowflake/sat.sql index 029844a4d..ed9fb4390 100644 --- a/macros/tables/snowflake/sat.sql +++ b/macros/tables/snowflake/sat.sql @@ -1,32 +1,42 @@ -{%- macro sat(src_pk, src_hashdiff, src_payload, src_eff, src_ldts, src_source, source_model) -%} +{%- macro sat(src_pk, src_hashdiff, src_payload, src_extra_columns, src_eff, src_ldts, src_source, source_model) -%} - {{- adapter.dispatch('sat', 'dbtvault')(src_pk=src_pk, src_hashdiff=src_hashdiff, - src_payload=src_payload, src_eff=src_eff, src_ldts=src_ldts, - src_source=src_source, source_model=source_model) -}} + {{- dbtvault.check_required_parameters(src_pk=src_pk, src_hashdiff=src_hashdiff, src_payload=src_payload, + src_ldts=src_ldts, src_source=src_source, + source_model=source_model) -}} -{%- endmacro %} + {%- set src_payload = dbtvault.process_payload_column_excludes( + src_pk=src_pk, src_hashdiff=src_hashdiff, + src_payload=src_payload, src_extra_columns=src_extra_columns, src_eff=src_eff, + src_ldts=src_ldts, src_source=src_source, source_model=source_model) -%} -{%- macro default__sat(src_pk, src_hashdiff, src_payload, src_eff, src_ldts, src_source, source_model) -%} + {%- set src_pk = dbtvault.escape_column_names(src_pk) -%} + {%- set src_hashdiff = dbtvault.escape_column_names(src_hashdiff) -%} -{{- dbtvault.check_required_parameters(src_pk=src_pk, src_hashdiff=src_hashdiff, src_payload=src_payload, - src_ldts=src_ldts, src_source=src_source, - source_model=source_model) -}} + {%- set src_payload = dbtvault.escape_column_names(src_payload) -%} -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_hashdiff = dbtvault.escape_column_names(src_hashdiff) -%} -{%- set src_payload = dbtvault.escape_column_names(src_payload) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} -{%- set src_source = dbtvault.escape_column_names(src_source) -%} + {%- set src_extra_columns = dbtvault.escape_column_names(src_extra_columns) -%} + {%- set src_eff = dbtvault.escape_column_names(src_eff) -%} + {%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} + {%- set src_source = dbtvault.escape_column_names(src_source) -%} -{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_payload, src_eff, src_ldts, src_source]) -%} -{%- set rank_cols = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_ldts]) -%} + {{ dbtvault.prepend_generated_by() }} + + {{ adapter.dispatch('sat', 'dbtvault')(src_pk=src_pk, src_hashdiff=src_hashdiff, + src_payload=src_payload, src_extra_columns=src_extra_columns, + src_eff=src_eff, src_ldts=src_ldts, + src_source=src_source, source_model=source_model) -}} + +{%- endmacro -%} + +{%- macro default__sat(src_pk, src_hashdiff, src_payload, src_extra_columns, src_eff, src_ldts, src_source, source_model) -%} + +{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_payload, src_extra_columns, src_eff, src_ldts, src_source]) -%} +{%- set window_cols = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_ldts]) -%} {%- set pk_cols = dbtvault.expand_column_list(columns=[src_pk]) -%} {%- if model.config.materialized == 'vault_insert_by_rank' %} {%- set source_cols_with_rank = source_cols + dbtvault.escape_column_names([config.get('rank_column')]) -%} -{%- endif -%} - -{{ dbtvault.prepend_generated_by() }} +{%- endif %} WITH source_data AS ( {%- if model.config.materialized == 'vault_insert_by_rank' %} @@ -43,15 +53,15 @@ WITH source_data AS ( {% endif %} ), -{%- if dbtvault.is_any_incremental() %} +{% if dbtvault.is_any_incremental() %} latest_records AS ( - SELECT {{ dbtvault.prefix(rank_cols, 'a', alias_target='target') }} + SELECT {{ dbtvault.prefix(window_cols, 'a', alias_target='target') }} FROM ( - SELECT {{ dbtvault.prefix(rank_cols, 'current_records', alias_target='target') }}, + SELECT {{ dbtvault.prefix(window_cols, 'current_records', alias_target='target') }}, RANK() OVER ( - PARTITION BY {{ dbtvault.prefix([src_pk], 'current_records') }} - ORDER BY {{ dbtvault.prefix([src_ldts], 'current_records') }} DESC + PARTITION BY {{ dbtvault.prefix([src_pk], 'current_records') }} + ORDER BY {{ dbtvault.prefix([src_ldts], 'current_records') }} DESC ) AS rank FROM {{ this }} AS current_records JOIN ( @@ -63,16 +73,16 @@ latest_records AS ( WHERE a.rank = 1 ), -{%- endif %} +{%- endif -%} records_to_insert AS ( SELECT DISTINCT {{ dbtvault.alias_all(source_cols, 'stage') }} FROM source_data AS stage {%- if dbtvault.is_any_incremental() %} - LEFT JOIN latest_records - ON {{ dbtvault.multikey(src_pk, prefix=['latest_records','stage'], condition='=') }} - WHERE {{ dbtvault.prefix([src_hashdiff], 'latest_records', alias_target='target') }} != {{ dbtvault.prefix([src_hashdiff], 'stage') }} - OR {{ dbtvault.prefix([src_hashdiff], 'latest_records', alias_target='target') }} IS NULL + LEFT JOIN latest_records + ON {{ dbtvault.multikey(src_pk, prefix=['latest_records','stage'], condition='=') }} + AND {{ dbtvault.prefix([src_hashdiff], 'latest_records', alias_target='target') }} = {{ dbtvault.prefix([src_hashdiff], 'stage') }} + WHERE {{ dbtvault.prefix([src_hashdiff], 'latest_records', alias_target='target') }} IS NULL {%- endif %} ) diff --git a/macros/tables/snowflake/t_link.sql b/macros/tables/snowflake/t_link.sql index dbe4f0253..3a6bd756c 100644 --- a/macros/tables/snowflake/t_link.sql +++ b/macros/tables/snowflake/t_link.sql @@ -1,28 +1,30 @@ -{%- macro t_link(src_pk, src_fk, src_payload, src_eff, src_ldts, src_source, source_model) -%} +{%- macro t_link(src_pk, src_fk, src_payload, src_extra_columns, src_eff, src_ldts, src_source, source_model) -%} - {{- adapter.dispatch('t_link', 'dbtvault')(src_pk=src_pk, src_fk=src_fk, src_payload=src_payload, - src_eff=src_eff, src_ldts=src_ldts, src_source=src_source, - source_model=source_model) -}} + {{- dbtvault.check_required_parameters(src_pk=src_pk, src_fk=src_fk, src_eff=src_eff, + src_ldts=src_ldts, src_source=src_source, + source_model=source_model) -}} -{%- endmacro %} + {%- set src_pk = dbtvault.escape_column_names(src_pk) -%} + {%- set src_fk = dbtvault.escape_column_names(src_fk) -%} + {%- set src_payload = dbtvault.escape_column_names(src_payload) -%} + {%- set src_extra_columns = dbtvault.escape_column_names(src_extra_columns) -%} + {%- set src_eff = dbtvault.escape_column_names(src_eff) -%} + {%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} + {%- set src_source = dbtvault.escape_column_names(src_source) -%} -{%- macro default__t_link(src_pk, src_fk, src_payload, src_eff, src_ldts, src_source, source_model) -%} + {{ dbtvault.prepend_generated_by() }} -{{- dbtvault.check_required_parameters(src_pk=src_pk, src_fk=src_fk, src_eff=src_eff, - src_ldts=src_ldts, src_source=src_source, - source_model=source_model) -}} + {{ adapter.dispatch('t_link', 'dbtvault')(src_pk=src_pk, src_fk=src_fk, src_payload=src_payload, + src_extra_columns=src_extra_columns, + src_eff=src_eff, src_ldts=src_ldts, src_source=src_source, + source_model=source_model) -}} -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_fk = dbtvault.escape_column_names(src_fk) -%} -{%- set src_payload = dbtvault.escape_column_names(src_payload) -%} -{%- set src_eff = dbtvault.escape_column_names(src_eff) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} -{%- set src_source = dbtvault.escape_column_names(src_source) -%} +{%- endmacro %} -{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_fk, src_payload, src_eff, src_ldts, src_source]) -%} -{%- set fk_cols = dbtvault.expand_column_list([src_fk]) -%} +{%- macro default__t_link(src_pk, src_fk, src_payload, src_extra_columns, src_eff, src_ldts, src_source, source_model) -%} -{{ dbtvault.prepend_generated_by() }} +{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_fk, src_payload, src_extra_columns, src_eff, src_ldts, src_source]) -%} +{%- set fk_cols = dbtvault.expand_column_list([src_fk]) %} WITH stage AS ( SELECT {{ source_cols | join(', ') }} diff --git a/macros/tables/snowflake/xts.sql b/macros/tables/snowflake/xts.sql index ea9a75826..57666465b 100644 --- a/macros/tables/snowflake/xts.sql +++ b/macros/tables/snowflake/xts.sql @@ -1,41 +1,74 @@ -{%- macro xts(src_pk, src_satellite, src_ldts, src_source, source_model) -%} - {{- adapter.dispatch('xts', 'dbtvault')(src_pk=src_pk, +{%- macro xts(src_pk, src_satellite, src_extra_columns, src_ldts, src_source, source_model) -%} + + {{- dbtvault.check_required_parameters(src_pk=src_pk, src_satellite=src_satellite, + src_ldts=src_ldts, src_source=src_source, + source_model=source_model) -}} + + {%- set src_pk = dbtvault.escape_column_names(src_pk) -%} + {%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} + {%- set src_extra_columns = dbtvault.escape_column_names(src_extra_columns) -%} + {%- set src_source = dbtvault.escape_column_names(src_source) -%} + + {%- if not dbtvault.is_list(source_model) -%} + {%- set source_model = [source_model] -%} + {%- endif -%} + + {{ dbtvault.prepend_generated_by() }} + + {{ adapter.dispatch('xts', 'dbtvault')(src_pk=src_pk, src_satellite=src_satellite, + src_extra_columns=src_extra_columns, src_ldts=src_ldts, src_source=src_source, source_model=source_model) -}} {%- endmacro -%} -{%- macro default__xts(src_pk, src_satellite, src_ldts, src_source, source_model) -%} - -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} -{%- set src_source = dbtvault.escape_column_names(src_source) -%} +{%- macro default__xts(src_pk, src_satellite, src_extra_columns, src_ldts, src_source, source_model) -%} -{{ dbtvault.prepend_generated_by() }} +{%- set hashdiff_escaped = dbtvault.escape_column_names('HASHDIFF') -%} +{%- set satellite_name_escaped = dbtvault.escape_column_names('SATELLITE_NAME') %} +{%- set satellite_count = src_satellite.keys() | list | length %} +{%- set stage_count = source_model | length %} -{%- if not (source_model is iterable and source_model is not string) -%} - {%- set source_model = [source_model] -%} +{%- if execute -%} + {%- do dbt_utils.log_info('Loading {} from {} source(s) and {} satellite(s)'.format("{}.{}.{}".format(this.database, this.schema, this.identifier), + stage_count, satellite_count)) -%} {%- endif %} +{%- set ns = namespace(last_cte= "") %} + {{ 'WITH ' }} -{%- for src in source_model %} +{%- for src in source_model -%} {%- for satellite in src_satellite.items() -%} - {%- set satellite_name = (satellite[1]['sat_name'].values() | list) [0] -%} - {%- set hashdiff = (satellite[1]['hashdiff'].values() | list) [0] %} - - satellite_{{ satellite_name }}_from_{{ src }} AS ( - SELECT {{ dbtvault.prefix([src_pk], 's') }}, s.{{ dbtvault.escape_column_names(hashdiff) }} AS HASHDIFF, s.{{ dbtvault.escape_column_names(satellite_name) }} AS SATELLITE_NAME, s.{{ src_ldts }}, s.{{ src_source }} - FROM {{ ref(src) }} AS s - WHERE {{ dbtvault.multikey(src_pk, prefix='s', condition='IS NOT NULL') }} - ), + {%- set satellite_name = (satellite[1]['sat_name'].values() | list)[0] -%} + {%- set hashdiff = (satellite[1]['hashdiff'].values() | list)[0] %} + {%- set cte_name = "satellite_{}_from_{}".format(satellite_name, src) | lower %} + +{{ cte_name }} AS ( + SELECT {{ dbtvault.prefix([src_pk], 's') }}, + s.{{ dbtvault.escape_column_names(hashdiff) }} AS {{ hashdiff_escaped }}, + s.{{ dbtvault.escape_column_names(satellite_name) }} AS {{ satellite_name_escaped }}, + {%- if dbtvault.is_something(src_extra_columns) -%} + {{ dbtvault.prefix([src_extra_columns], 's') }}, + {%- endif %} + s.{{ src_ldts }}, + s.{{ src_source }} + FROM {{ ref(src) }} AS s + WHERE {{ dbtvault.multikey(src_pk, prefix='s', condition='IS NOT NULL') }} +), + + {%- set ns.last_cte = cte_name %} {%- endfor %} {%- endfor %} +{%- if stage_count > 1 or satellite_count > 1 %} + union_satellites AS ( {%- for src in source_model %} {%- for satellite in src_satellite.items() %} - SELECT * FROM satellite_{{ (satellite[1]['sat_name'].values() | list) [0] }}_from_{{ src }} + {%- set satellite_name = (satellite[1]['sat_name'].values() | list)[0] %} + {%- set cte_name = "satellite_{}_from_{}".format(satellite_name, src) | lower %} + SELECT * FROM {{ cte_name }} {%- if not loop.last %} UNION ALL {%- endif %} @@ -45,18 +78,30 @@ union_satellites AS ( {%- endif %} {%- endfor %} ), +{%- set ns.last_cte = "union_satellites" -%} +{%- endif %} records_to_insert AS ( - SELECT DISTINCT union_satellites.* FROM union_satellites - {%- if dbtvault.is_vault_insert_by_period() or is_incremental() %} + SELECT DISTINCT + {{ dbtvault.prefix([src_pk], 'a') }}, + a.{{ hashdiff_escaped }}, + a.{{ satellite_name_escaped }} , + {%- if dbtvault.is_something(src_extra_columns) -%} + {{ dbtvault.prefix([src_extra_columns], 'a') }}, + {%- endif %} + a.{{ src_ldts }}, + a.{{ src_source }} + FROM {{ ns.last_cte }} AS a + {%- if dbtvault.is_any_incremental() %} LEFT JOIN {{ this }} AS d - ON (union_satellites.{{ 'HASHDIFF' }} = d.{{ 'HASHDIFF' }} - AND union_satellites.{{ src_ldts }} = d.{{ src_ldts }} - AND union_satellites.{{ 'SATELLITE_NAME' }} = d.{{ 'SATELLITE_NAME' }} - ) - WHERE {{ dbtvault.prefix(['HASHDIFF'], 'd') }} IS NULL - AND {{ dbtvault.prefix([ src_ldts ], 'd') }} IS NULL - AND {{ dbtvault.prefix([ 'SATELLITE_NAME' ], 'd') }} IS NULL + ON ( + a.{{ hashdiff_escaped }} = d.{{ hashdiff_escaped }} + AND a.{{ src_ldts }} = d.{{ src_ldts }} + AND a.{{ satellite_name_escaped }} = d.{{ satellite_name_escaped }} + ) + WHERE d.{{ hashdiff_escaped }} IS NULL + AND d.{{ src_ldts }} IS NULL + AND d.{{ satellite_name_escaped }} IS NULL {%- endif %} ) diff --git a/macros/tables/sqlserver/bridge.sql b/macros/tables/sqlserver/bridge.sql index 08bb48e3a..04bb1d0f2 100644 --- a/macros/tables/sqlserver/bridge.sql +++ b/macros/tables/sqlserver/bridge.sql @@ -1,277 +1,11 @@ -{%- macro sqlserver__bridge(src_pk, as_of_dates_table, bridge_walk, stage_tables_ldts, src_ldts, source_model) -%} +{%- macro sqlserver__bridge(src_pk, as_of_dates_table, bridge_walk, stage_tables_ldts, src_extra_columns, src_ldts, source_model) -%} -{{- dbtvault.check_required_parameters(source_model=source_model, src_pk=src_pk, - bridge_walk=bridge_walk, - stage_tables_ldts=stage_tables_ldts, - src_ldts=src_ldts) -}} +{{ dbtvault.default__bridge(src_pk=src_pk, + as_of_dates_table=as_of_dates_table, + bridge_walk=bridge_walk, + stage_tables_ldts=stage_tables_ldts, + src_extra_columns=src_extra_columns, + src_ldts=src_ldts, + source_model=source_model) }} -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} - -{{ dbtvault.prepend_generated_by() }} - -{%- if (as_of_dates_table is none) and execute -%} - {%- set error_message -%} - "Bridge error: Missing as_of_dates table configuration. A as_of_dates_table must be provided." - {%- endset -%} - {{- exceptions.raise_compiler_error(error_message) -}} -{%- endif -%} - -{#- Acquiring the source relation for the AS_OF table -#} -{%- if as_of_dates_table is mapping and as_of_dates_table is not none -%} - {%- set source_name = as_of_dates_table | first -%} - {%- set source_table_name = as_of_dates_table[source_name] -%} - {%- set source_relation = source(source_name, source_table_name) -%} -{%- elif as_of_dates_table is not mapping and as_of_dates_table is not none -%} - {%- set source_relation = ref(as_of_dates_table) -%} -{%- endif -%} - -{%- set max_datetime = var('max_datetime', '9999-12-31 23:59:59.9999999') -%} - -{#- Stating the dependencies on the stage tables outside of the If STATEMENT -#} -{% for stg in stage_tables_ldts -%} - {{- "-- depends_on: " ~ ref(stg) -}} -{%- endfor %} - -{#- Setting the new AS_OF dates CTE name -#} -{%- if dbtvault.is_any_incremental() -%} - {%- set new_as_of_dates_cte = 'NEW_ROWS_AS_OF' -%} -{%- else -%} - {%- set new_as_of_dates_cte = 'AS_OF' -%} -{%- endif %} - -WITH as_of AS ( - SELECT a.AS_OF_DATE - FROM {{ source_relation }} AS a - WHERE a.AS_OF_DATE <= GETDATE() -), - -{%- if dbtvault.is_any_incremental() %} - -last_safe_load_datetime AS ( - SELECT MIN(LOAD_DATETIME) AS LAST_SAFE_LOAD_DATETIME - FROM ( - {%- filter indent(width=8) -%} - {%- for stg in stage_tables_ldts -%} - {%- set stage_ldts =(stage_tables_ldts[stg]) -%} - {{ "SELECT MIN(" ~ stage_ldts ~ ") AS LOAD_DATETIME FROM " ~ ref(stg) }} - {{ "UNION ALL" if not loop.last }} - {% endfor -%} - {%- endfilter -%} - ) AS l -), - -as_of_grain_old_entries AS ( - SELECT DISTINCT AS_OF_DATE - FROM {{ this }} -), - -as_of_grain_lost_entries AS ( - SELECT a.AS_OF_DATE - FROM as_of_grain_old_entries AS a - LEFT OUTER JOIN as_of AS b - ON a.AS_OF_DATE = b.AS_OF_DATE - WHERE b.AS_OF_DATE IS NULL -), - -as_of_grain_new_entries AS ( - SELECT a.AS_OF_DATE - FROM as_of AS a - LEFT OUTER JOIN as_of_grain_old_entries AS b - ON a.AS_OF_DATE = b.AS_OF_DATE - WHERE b.AS_OF_DATE IS NULL -), - -min_date AS ( - SELECT min(AS_OF_DATE) AS MIN_DATE - FROM as_of -), - -new_rows_pks AS ( - SELECT {{ dbtvault.prefix([src_pk], 'h') }} - FROM {{ ref(source_model) }} AS h - WHERE h.{{ src_ldts }} >= (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) -), - -new_rows_as_of AS ( - SELECT AS_OF_DATE - FROM as_of - WHERE as_of.AS_OF_DATE >= (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) - UNION - SELECT as_of_date - FROM as_of_grain_new_entries -), - -overlap_pks AS ( - SELECT {{ dbtvault.prefix([src_pk], 'p') }} - FROM {{ this }} AS p - INNER JOIN {{ ref(source_model) }} as h - ON {{ dbtvault.multikey(src_pk, prefix=['p','h'], condition='=') }} - WHERE p.AS_OF_DATE >= (SELECT MIN_DATE FROM min_date) - AND p.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) - AND p.AS_OF_DATE NOT IN (SELECT AS_OF_DATE FROM as_of_grain_lost_entries) -), - -overlap_as_of AS ( - SELECT AS_OF_DATE - FROM as_of AS p - WHERE p.AS_OF_DATE >= (SELECT MIN_DATE FROM min_date) - AND p.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) - AND p.AS_OF_DATE NOT IN (SELECT AS_OF_DATE FROM as_of_grain_lost_entries) -), - -overlap AS ( - SELECT - {{ dbtvault.prefix([src_pk], 'a') }}, - b.AS_OF_DATE - {%- for bridge_step in bridge_walk.keys() -%} - {%- set link_table = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_table']) -%} - {%- set link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_pk']) -%} - {%- set bridge_link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_link_pk']) -%} - {%- set eff_sat_table = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_table']) -%} - {%- set bridge_end_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_end_date']) -%} - {%- set bridge_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_load_date']) -%} - {%- set eff_sat_end_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_end_date']) -%} - {%- set eff_sat_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_load_date']) -%} - {%- filter indent(width=8) %} - {{ ',' ~ link_table ~ '.' ~ link_pk ~ ' AS ' ~ bridge_link_pk }} - {{ ',' ~ eff_sat_table ~ '.' ~ eff_sat_end_date ~ ' AS ' ~ bridge_end_date }} - {{ ',' ~ eff_sat_table ~ '.' ~ eff_sat_load_date ~' AS ' ~ bridge_load_date }} - {%- endfilter -%} - {% endfor %} - FROM overlap_pks AS a - INNER JOIN overlap_as_of AS b - ON (1=1) - {%- set loop_vars = namespace(lastlink = '', last_link_fk = '') -%} - {%- for bridge_step in bridge_walk.keys() -%} - {%- set current_link = bridge_walk[bridge_step]['link_table'] -%} - {%- set current_eff_sat = bridge_walk[bridge_step]['eff_sat_table'] -%} - {%- set link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_pk']) -%} - {%- set link_fk1 = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_fk1']) -%} - {%- set link_fk2 = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_fk2']) -%} - {%- set eff_sat_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_pk']) -%} - {%- set eff_sat_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_load_date']) -%} - {%- if loop.first %} - LEFT JOIN {{ ref(current_link) }} AS {{ dbtvault.escape_column_names(current_link) }} - ON a.{{ src_pk }} = {{ dbtvault.escape_column_names(current_link) }}.{{ link_fk1 }} - {%- else %} - LEFT JOIN {{ ref(current_link) }} AS {{ dbtvault.escape_column_names(current_link) }} - ON {{ loop_vars.last_link }}.{{ loop_vars.last_link_fk2 }} = {{ dbtvault.escape_column_names(current_link) }}.{{ link_fk1 }} - {%- endif %} - INNER JOIN {{ ref(current_eff_sat) }} AS {{ dbtvault.escape_column_names(current_eff_sat) }} - ON {{ dbtvault.escape_column_names(current_eff_sat) }}.{{ eff_sat_pk }} = {{ dbtvault.escape_column_names(current_link) }}.{{ link_pk }} - AND {{ dbtvault.escape_column_names(current_eff_sat) }}.{{ eff_sat_load_date }} <= b.AS_OF_DATE - {%- set loop_vars.last_link = current_link -%} - {%- set loop_vars.last_link_fk2 = link_fk2 -%} - {% endfor %} -), -{%- endif %} - -new_rows AS ( - SELECT - {{ dbtvault.prefix([src_pk], 'a') }}, - b.AS_OF_DATE - {%- for bridge_step in bridge_walk.keys() -%} - {%- set link_table = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_table']) -%} - {%- set link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_pk']) -%} - {%- set bridge_link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_link_pk']) -%} - {%- set eff_sat_table = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_table']) -%} - {%- set bridge_end_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_end_date']) -%} - {%- set bridge_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_load_date']) -%} - {%- set eff_sat_end_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_end_date']) -%} - {%- set eff_sat_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_load_date']) -%} - {%- filter indent(width=8) -%} - {{ ',' ~ link_table ~'.'~ link_pk ~' AS '~ bridge_link_pk }} - {{ ',' ~ eff_sat_table ~ '.' ~ eff_sat_end_date ~ ' AS ' ~ bridge_end_date }} - {{ ',' ~ eff_sat_table ~ '.' ~ eff_sat_load_date ~ ' AS ' ~ bridge_load_date }} - {%- endfilter -%} - {% endfor %} - FROM {{ ref(source_model) }} AS a - INNER JOIN {{ new_as_of_dates_cte }} AS b - ON (1=1) - {%- set loop_vars = namespace(lastlink = '', last_link_fk = '') %} - {%- for bridge_step in bridge_walk.keys() -%} - {%- set current_link = bridge_walk[bridge_step]['link_table'] -%} - {%- set current_eff_sat = bridge_walk[bridge_step]['eff_sat_table'] -%} - {%- set link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_pk']) -%} - {%- set link_fk1 = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_fk1']) -%} - {%- set link_fk2 = dbtvault.escape_column_names(bridge_walk[bridge_step]['link_fk2']) -%} - {%- set eff_sat_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_pk']) -%} - {%- set eff_sat_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['eff_sat_load_date']) -%} - {%- if loop.first %} - LEFT JOIN {{ ref(current_link) }} AS {{ dbtvault.escape_column_names(current_link) }} - ON a.{{ src_pk }} = {{ dbtvault.escape_column_names(current_link) }}.{{ link_fk1 }} - {%- else %} - LEFT JOIN {{ ref(current_link) }} AS {{ dbtvault.escape_column_names(current_link) }} - ON {{ loop_vars.last_link }}.{{ loop_vars.last_link_fk2 }} = {{ dbtvault.escape_column_names(current_link) }}.{{ link_fk1 }} - {%- endif %} - INNER JOIN {{ ref(current_eff_sat) }} AS {{ dbtvault.escape_column_names(current_eff_sat) }} - ON {{ dbtvault.escape_column_names(current_eff_sat) }}.{{ eff_sat_pk }} = {{ dbtvault.escape_column_names(current_link) }}.{{ link_pk }} - AND {{ dbtvault.escape_column_names(current_eff_sat) }}.{{ eff_sat_load_date }} <= b.AS_OF_DATE - {%- set loop_vars.last_link = dbtvault.escape_column_names(current_link) -%} - {%- set loop_vars.last_link_fk2 = link_fk2 -%} - {% endfor %} -), - -{# Full data from bridge walk(s) -#} -all_rows AS ( - SELECT * FROM new_rows - {%- if dbtvault.is_any_incremental() %} - UNION ALL - SELECT * FROM overlap - {%- endif %} -), - -{# Select most recent set of relationship key(s) for each as of date -#} -candidate_rows AS ( - SELECT * - FROM - ( - SELECT *, - ROW_NUMBER() OVER ( - PARTITION BY AS_OF_DATE, - {%- for bridge_step in bridge_walk.keys() -%} - {% set bridge_link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_link_pk']) -%} - {%- if loop.first %} - {{ bridge_link_pk }} - {%- else %} - {{ ','~ bridge_link_pk }} - {%- endif -%} - {%- endfor %} - ORDER BY - {%- for bridge_step in bridge_walk.keys() -%} - {% set bridge_load_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_load_date']) %} - {%- if loop.first %} - {{ bridge_load_date ~' DESC' }} - {%- else %} - {{ ','~ bridge_load_date ~' DESC' }} - {%- endif -%} - {%- endfor %} - ) AS row_num - FROM all_rows - ) AS a - WHERE a.row_num = 1 -), - -bridge AS ( - SELECT - {{ dbtvault.prefix([src_pk], 'c') }}, - c.AS_OF_DATE - {%- for bridge_step in bridge_walk.keys() -%} - {% set bridge_link_pk = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_link_pk']) -%} - {{ ',c.' ~ bridge_link_pk }} - {%- endfor %} - FROM candidate_rows AS c - {%- for bridge_step in bridge_walk.keys() -%} - {%- set bridge_end_date = dbtvault.escape_column_names(bridge_walk[bridge_step]['bridge_end_date']) -%} - {%- if loop.first %} - WHERE CONVERT(DATE, c.{{ bridge_end_date }}) = CONVERT(DATE, '{{ max_datetime }}') - {%- else %} - AND CONVERT(DATE, c.{{ bridge_end_date }}) = CONVERT(DATE, '{{ max_datetime }}') - {%- endif -%} - {%- endfor %} -) - -SELECT * FROM bridge - -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/sqlserver/eff_sat.sql b/macros/tables/sqlserver/eff_sat.sql index 67fc2bc49..5d5e9f775 100644 --- a/macros/tables/sqlserver/eff_sat.sql +++ b/macros/tables/sqlserver/eff_sat.sql @@ -1,27 +1,11 @@ -{%- macro sqlserver__eff_sat(src_pk, src_dfk, src_sfk, src_start_date, src_end_date, src_eff, src_ldts, src_source, source_model) -%} - -{{- dbtvault.check_required_parameters(src_pk=src_pk, src_dfk=src_dfk, src_sfk=src_sfk, - src_start_date=src_start_date, src_end_date=src_end_date, - src_eff=src_eff, src_ldts=src_ldts, src_source=src_source, - source_model=source_model) -}} - -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_dfk = dbtvault.escape_column_names(src_dfk) -%} -{%- set src_sfk = dbtvault.escape_column_names(src_sfk) -%} -{%- set src_start_date = dbtvault.escape_column_names(src_start_date) -%} -{%- set src_end_date = dbtvault.escape_column_names(src_end_date) -%} -{%- set src_eff = dbtvault.escape_column_names(src_eff) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} -{%- set src_source = dbtvault.escape_column_names(src_source) -%} - -{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_dfk, src_sfk, src_start_date, src_end_date, src_eff, src_ldts, src_source]) -%} +{%- macro sqlserver__eff_sat(src_pk, src_dfk, src_sfk, src_extra_columns, src_start_date, src_end_date, src_eff, src_ldts, src_source, source_model) -%} + +{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_dfk, src_sfk, src_extra_columns, src_start_date, src_end_date, src_eff, src_ldts, src_source]) -%} {%- set fk_cols = dbtvault.expand_column_list(columns=[src_dfk, src_sfk]) -%} {%- set dfk_cols = dbtvault.expand_column_list(columns=[src_dfk]) -%} {%- set is_auto_end_dating = config.get('is_auto_end_dating', default=false) %} -{{- dbtvault.prepend_generated_by() }} - -{%- set max_datetime = var('max_datetime', '9999-12-31 23:59:59.9999999') %} +{%- set max_datetime = dbtvault.max_datetime() %} WITH source_data AS ( SELECT {{ dbtvault.prefix(source_cols, 'a', alias_target='source') }} @@ -71,6 +55,9 @@ new_open_records AS ( SELECT DISTINCT {{ dbtvault.prefix([src_pk], 'f') }}, {{ dbtvault.alias_all(fk_cols, 'f') }}, + {%- if dbtvault.is_something(src_extra_columns) -%} + {{ dbtvault.prefix([src_extra_columns], 'f') }}, + {%- endif -%} {%- if is_auto_end_dating %} f.{{ src_eff }} AS {{ src_start_date }}, {% else %} @@ -91,6 +78,9 @@ new_reopened_records AS ( SELECT DISTINCT {{ dbtvault.prefix([src_pk], 'lc') }}, {{ dbtvault.alias_all(fk_cols, 'lc') }}, + {%- if dbtvault.is_something(src_extra_columns) -%} + {{ dbtvault.prefix([src_extra_columns], 'g') }}, + {%- endif -%} {%- if is_auto_end_dating %} g.{{ src_eff }} AS {{ src_start_date }}, {% else %} @@ -114,6 +104,9 @@ new_closed_records AS ( SELECT DISTINCT {{ dbtvault.prefix([src_pk], 'lo') }}, {{ dbtvault.alias_all(fk_cols, 'lo') }}, + {%- if dbtvault.is_something(src_extra_columns) -%} + {{ dbtvault.prefix([src_extra_columns], 'h') }}, + {%- endif -%} lo.{{ src_start_date }} AS {{ src_start_date }}, h.{{ src_eff }} AS {{ src_end_date }}, h.{{ src_eff }} AS {{ src_eff }}, @@ -132,6 +125,9 @@ new_closed_records AS ( SELECT DISTINCT {{ dbtvault.prefix([src_pk], 'lo') }}, {{ dbtvault.alias_all(fk_cols, 'lo') }}, + {%- if dbtvault.is_something(src_extra_columns) -%} + {{ dbtvault.prefix([src_extra_columns], 'h') }}, + {%- endif -%} h.{{ src_start_date }} AS {{ src_start_date }}, h.{{ src_end_date }} AS {{ src_end_date }}, h.{{ src_eff }} AS {{ src_eff }}, @@ -170,4 +166,4 @@ records_to_insert AS ( {%- endif %} SELECT * FROM records_to_insert -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/sqlserver/hub.sql b/macros/tables/sqlserver/hub.sql index 79c98f43c..002cefab9 100644 --- a/macros/tables/sqlserver/hub.sql +++ b/macros/tables/sqlserver/hub.sql @@ -1,27 +1,14 @@ -{%- macro sqlserver__hub(src_pk, src_nk, src_ldts, src_source, source_model) -%} +{%- macro sqlserver__hub(src_pk, src_nk, src_extra_columns, src_ldts, src_source, source_model) -%} -{{- dbtvault.check_required_parameters(src_pk=src_pk, src_nk=src_nk, - src_ldts=src_ldts, src_source=src_source, - source_model=source_model) -}} - -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_nk = dbtvault.escape_column_names(src_nk) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} -{%- set src_source = dbtvault.escape_column_names(src_source) -%} - -{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_nk, src_ldts, src_source]) -%} +{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_nk, src_extra_columns, src_ldts, src_source]) -%} {%- if model.config.materialized == 'vault_insert_by_rank' %} {%- set source_cols_with_rank = source_cols + dbtvault.escape_column_names([config.get('rank_column')]) -%} -{%- endif -%} - -{{ dbtvault.prepend_generated_by() }} +{%- endif %} {{ 'WITH ' -}} -{%- if not (source_model is iterable and source_model is not string) -%} - {%- set source_model = [source_model] -%} -{%- endif -%} +{%- set stage_count = source_model | length -%} {%- set ns = namespace(last_cte= "") -%} @@ -35,8 +22,7 @@ row_rank_{{ source_number }} AS ( {%- else %} SELECT {{ source_cols | join(', ') }} {%- endif %} - FROM - ( + FROM ( {%- if model.config.materialized == 'vault_insert_by_rank' %} SELECT {{ dbtvault.prefix(source_cols_with_rank, 'rr') }}, {%- else %} @@ -53,7 +39,7 @@ row_rank_{{ source_number }} AS ( {%- set ns.last_cte = "row_rank_{}".format(source_number) %} ),{{ "\n" if not loop.last }} {% endfor -%} -{% if source_model | length > 1 %} +{% if stage_count > 1 %} stage_union AS ( {%- for src in source_model %} SELECT * FROM row_rank_{{ loop.index | string }} @@ -64,6 +50,7 @@ stage_union AS ( {%- set ns.last_cte = "stage_union" %} ), {%- endif -%} + {%- if model.config.materialized == 'vault_insert_by_period' %} stage_mat_filter AS ( SELECT * @@ -79,12 +66,12 @@ stage_mat_filter AS ( {%- set ns.last_cte = "stage_mat_filter" %} ), {%- endif -%} -{%- if source_model | length > 1 %} + +{%- if stage_count > 1 %} row_rank_union AS ( SELECT * - FROM - ( + FROM ( SELECT ru.*, ROW_NUMBER() OVER( PARTITION BY {{ dbtvault.prefix([src_pk], 'ru') }} @@ -109,4 +96,4 @@ records_to_insert AS ( SELECT * FROM records_to_insert -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/sqlserver/link.sql b/macros/tables/sqlserver/link.sql index 34caba721..301aea69c 100644 --- a/macros/tables/sqlserver/link.sql +++ b/macros/tables/sqlserver/link.sql @@ -1,28 +1,15 @@ -{%- macro sqlserver__link(src_pk, src_fk, src_ldts, src_source, source_model) -%} +{%- macro sqlserver__link(src_pk, src_fk, src_extra_columns, src_ldts, src_source, source_model) -%} -{{- dbtvault.check_required_parameters(src_pk=src_pk, src_fk=src_fk, - src_ldts=src_ldts, src_source=src_source, - source_model=source_model) -}} - -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_fk = dbtvault.escape_column_names(src_fk) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} -{%- set src_source = dbtvault.escape_column_names(src_source) -%} - -{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_fk, src_ldts, src_source]) -%} +{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_fk, src_extra_columns, src_ldts, src_source]) -%} {%- set fk_cols = dbtvault.expand_column_list([src_fk]) -%} {%- if model.config.materialized == 'vault_insert_by_rank' %} {%- set source_cols_with_rank = source_cols + dbtvault.escape_column_names([config.get('rank_column')]) -%} {%- endif -%} -{{ dbtvault.prepend_generated_by() }} - {{ 'WITH ' -}} -{%- if not (source_model is iterable and source_model is not string) -%} - {%- set source_model = [source_model] -%} -{%- endif -%} +{%- set stage_count = source_model | length -%} {%- set ns = namespace(last_cte= "") -%} @@ -44,7 +31,7 @@ row_rank_{{ source_number }} AS ( ORDER BY {{ dbtvault.prefix([src_ldts], 'rr') }} ) AS row_number FROM {{ ref(src) }} AS rr - {%- if source_model | length == 1 %} + {%- if stage_count == 1 %} WHERE {{ dbtvault.multikey(src_pk, prefix='rr', condition='IS NOT NULL') }} AND {{ dbtvault.multikey(fk_cols, prefix='rr', condition='IS NOT NULL') }} {%- endif %} @@ -53,7 +40,7 @@ row_rank_{{ source_number }} AS ( {%- set ns.last_cte = "row_rank_{}".format(source_number) %} ),{{ "\n" if not loop.last }} {% endfor -%} -{% if source_model | length > 1 %} +{% if stage_count > 1 %} stage_union AS ( {%- for src in source_model %} SELECT * FROM row_rank_{{ loop.index | string }} @@ -79,7 +66,7 @@ stage_mat_filter AS ( {%- set ns.last_cte = "stage_mat_filter" %} ), {% endif %} -{%- if source_model | length > 1 %} +{%- if stage_count > 1 %} row_rank_union AS ( SELECT * @@ -110,4 +97,4 @@ records_to_insert AS ( SELECT * FROM records_to_insert -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/sqlserver/ma_sat.sql b/macros/tables/sqlserver/ma_sat.sql index c1384f03e..801799e48 100644 --- a/macros/tables/sqlserver/ma_sat.sql +++ b/macros/tables/sqlserver/ma_sat.sql @@ -1,27 +1,14 @@ -{%- macro sqlserver__ma_sat(src_pk, src_cdk, src_hashdiff, src_payload, src_eff, src_ldts, src_source, source_model) -%} +{%- macro sqlserver__ma_sat(src_pk, src_cdk, src_hashdiff, src_payload, src_extra_columns, src_eff, src_ldts, src_source, source_model) -%} - -{{- dbtvault.check_required_parameters(src_pk=src_pk, src_cdk=src_cdk, src_hashdiff=src_hashdiff, - src_payload=src_payload, src_ldts=src_ldts, src_source=src_source, - source_model=source_model) -}} - -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_cdk = dbtvault.escape_column_names(src_cdk) -%} -{%- set src_hashdiff = dbtvault.escape_column_names(src_hashdiff) -%} -{%- set src_payload = dbtvault.escape_column_names(src_payload) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} -{%- set src_source = dbtvault.escape_column_names(src_source) -%} - -{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_cdk, src_payload, src_eff, src_ldts, src_source]) -%} +{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_cdk, src_payload, src_extra_columns, src_eff, src_ldts, src_source]) -%} {%- set rank_cols = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_ldts]) -%} {%- set cdk_cols = dbtvault.expand_column_list(columns=[src_cdk]) -%} +{%- set cols_for_latest = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_cdk, src_ldts]) %} {%- if model.config.materialized == 'vault_insert_by_rank' -%} {%- set source_cols_with_rank = source_cols + [config.get('rank_column')] -%} {%- endif -%} -{{ dbtvault.prepend_generated_by() }} - {# Select unique source records -#} WITH source_data AS ( {%- if model.config.materialized == 'vault_insert_by_rank' %} @@ -60,19 +47,13 @@ source_data_with_count AS ( {# Select latest records from satellite, restricted to PKs in source data -#} latest_records AS ( - SELECT {{ dbtvault.prefix([src_pk], 'mas', alias_target='target') }} - ,{{ dbtvault.prefix([src_hashdiff], 'mas', alias_target='target') }} - ,{{ dbtvault.prefix([src_cdk], 'mas', alias_target='target') }} - ,{{ dbtvault.prefix([src_ldts], 'mas', alias_target='target') }} + SELECT {{ dbtvault.prefix(cols_for_latest, 'mas', alias_target='target') }} ,mas.latest_rank ,DENSE_RANK() OVER (PARTITION BY {{ dbtvault.prefix([src_pk], 'mas') }} ORDER BY {{ dbtvault.prefix([src_hashdiff], 'mas', alias_target='target') }}, {{ dbtvault.prefix([src_cdk], 'mas') }} ASC) AS check_rank FROM ( - SELECT {{ dbtvault.prefix([src_pk], 'inner_mas', alias_target='target') }} - ,{{ dbtvault.prefix([src_hashdiff], 'inner_mas', alias_target='target') }} - ,{{ dbtvault.prefix([src_cdk], 'inner_mas', alias_target='target') }} - ,{{ dbtvault.prefix([src_ldts], 'inner_mas', alias_target='target') }} + SELECT {{ dbtvault.prefix(cols_for_latest, 'inner_mas', alias_target='target') }} ,RANK() OVER (PARTITION BY {{ dbtvault.prefix([src_pk], 'inner_mas') }} ORDER BY {{ dbtvault.prefix([src_ldts], 'inner_mas') }} DESC) AS latest_rank FROM {{ this }} AS inner_mas @@ -105,19 +86,13 @@ records_to_insert AS ( {% if dbtvault.is_any_incremental() %} SELECT {{ dbtvault.alias_all(source_cols, 'source_data_with_count') }} FROM source_data_with_count - WHERE EXISTS - ( + WHERE EXISTS ( SELECT 1 FROM source_data_with_count AS stage - WHERE NOT EXISTS - ( + WHERE NOT EXISTS ( SELECT 1 - FROM - ( - SELECT {{ dbtvault.prefix([src_pk], 'lr', alias_target='target') }} - ,{{ dbtvault.prefix([src_hashdiff], 'lr', alias_target='target') }} - ,{{ dbtvault.prefix([src_cdk], 'lr', alias_target='target') }} - ,{{ dbtvault.prefix([src_ldts], 'lr', alias_target='target') }} + FROM ( + SELECT {{ dbtvault.prefix(cols_for_latest, 'lr', alias_target='target') }} ,lg.latest_count FROM latest_records AS lr INNER JOIN latest_group_details AS lg diff --git a/macros/tables/sqlserver/pit.sql b/macros/tables/sqlserver/pit.sql index 2e0b63b8b..29303ba0b 100644 --- a/macros/tables/sqlserver/pit.sql +++ b/macros/tables/sqlserver/pit.sql @@ -1,9 +1,11 @@ -{%- macro sqlserver__pit(src_pk, as_of_dates_table, satellites, stage_tables, src_ldts, source_model) -%} +{%- macro sqlserver__pit(src_pk, src_extra_columns, as_of_dates_table, satellites, stage_tables_ldts, src_ldts, source_model) -%} -{{ dbtvault.default__pit(source_model=source_model, src_pk=src_pk, +{{ dbtvault.default__pit(src_pk=src_pk, + src_extra_columns=src_extra_columns, as_of_dates_table=as_of_dates_table, satellites=satellites, - stage_tables=stage_tables, - src_ldts=src_ldts) }} + stage_tables_ldts=stage_tables_ldts, + src_ldts=src_ldts, + source_model=source_model) }} -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/sqlserver/sat.sql b/macros/tables/sqlserver/sat.sql index d18a3681d..c8b3a7e5c 100644 --- a/macros/tables/sqlserver/sat.sql +++ b/macros/tables/sqlserver/sat.sql @@ -1,74 +1,8 @@ -{%- macro sqlserver__sat(src_pk, src_hashdiff, src_payload, src_eff, src_ldts, src_source, source_model) -%} +{%- macro sqlserver__sat(src_pk, src_hashdiff, src_payload, src_extra_columns, src_eff, src_ldts, src_source, source_model) -%} -{{- dbtvault.check_required_parameters(src_pk=src_pk, src_hashdiff=src_hashdiff, src_payload=src_payload, - src_ldts=src_ldts, src_source=src_source, - source_model=source_model) -}} + {{ dbtvault.default__sat(src_pk=src_pk, src_hashdiff=src_hashdiff, + src_payload=src_payload, src_extra_columns=src_extra_columns, + src_eff=src_eff, src_ldts=src_ldts, + src_source=src_source, source_model=source_model) }} -{%- set src_pk = dbtvault.escape_column_names(src_pk) -%} -{%- set src_hashdiff = dbtvault.escape_column_names(src_hashdiff) -%} -{%- set src_payload = dbtvault.escape_column_names(src_payload) -%} -{%- set src_ldts = dbtvault.escape_column_names(src_ldts) -%} -{%- set src_source = dbtvault.escape_column_names(src_source) -%} - -{%- set source_cols = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_payload, src_eff, src_ldts, src_source]) -%} -{%- set rank_cols = dbtvault.expand_column_list(columns=[src_pk, src_hashdiff, src_ldts]) -%} -{%- set pk_cols = dbtvault.expand_column_list(columns=[src_pk]) -%} - -{%- if model.config.materialized == 'vault_insert_by_rank' %} - {%- set source_cols_with_rank = source_cols + dbtvault.escape_column_names([config.get('rank_column')]) -%} -{%- endif -%} - -{{ dbtvault.prepend_generated_by() }} - -WITH source_data AS ( - {%- if model.config.materialized == 'vault_insert_by_rank' %} - SELECT {{ dbtvault.prefix(source_cols_with_rank, 'a', alias_target='source') }} - {%- else %} - SELECT {{ dbtvault.prefix(source_cols, 'a', alias_target='source') }} - {%- endif %} - FROM {{ ref(source_model) }} AS a - WHERE {{ dbtvault.multikey(src_pk, prefix='a', condition='IS NOT NULL') }} - {%- if model.config.materialized == 'vault_insert_by_period' %} - AND __PERIOD_FILTER__ - {% elif model.config.materialized == 'vault_insert_by_rank' %} - AND __RANK_FILTER__ - {% endif %} -), - -{% if dbtvault.is_any_incremental() %} - -latest_records AS ( - SELECT {{ dbtvault.prefix(rank_cols, 'a', alias_target='target') }} - FROM - ( - SELECT {{ dbtvault.prefix(rank_cols, 'current_records', alias_target='target') }}, - RANK() OVER ( - PARTITION BY {{ dbtvault.prefix([src_pk], 'current_records') }} - ORDER BY {{ dbtvault.prefix([src_ldts], 'current_records') }} DESC - ) AS rank - FROM {{ this }} AS current_records - JOIN ( - SELECT DISTINCT {{ dbtvault.prefix([src_pk], 'source_data') }} - FROM source_data - ) AS source_records - ON {{ dbtvault.multikey(src_pk, prefix=['current_records','source_records'], condition='=') }} - ) AS a - WHERE a.rank = 1 -), - -{%- endif %} - -records_to_insert AS ( - SELECT DISTINCT {{ dbtvault.alias_all(source_cols, 'stage') }} - FROM source_data AS stage - {%- if dbtvault.is_any_incremental() %} - LEFT JOIN latest_records - ON {{ dbtvault.multikey(src_pk, prefix=['latest_records','stage'], condition='=') }} - WHERE {{ dbtvault.prefix([src_hashdiff], 'latest_records', alias_target='target') }} != {{ dbtvault.prefix([src_hashdiff], 'stage') }} - OR {{ dbtvault.prefix([src_hashdiff], 'latest_records', alias_target='target') }} IS NULL - {%- endif %} -) - -SELECT * FROM records_to_insert - -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/tables/sqlserver/t_link.sql b/macros/tables/sqlserver/t_link.sql index 5f48ac6b1..313ba2869 100644 --- a/macros/tables/sqlserver/t_link.sql +++ b/macros/tables/sqlserver/t_link.sql @@ -1,7 +1,8 @@ -{%- macro sqlserver__t_link(src_pk, src_fk, src_payload, src_eff, src_ldts, src_source, source_model) -%} +{%- macro sqlserver__t_link(src_pk, src_fk, src_payload, src_extra_columns, src_eff, src_ldts, src_source, source_model) -%} {{ dbtvault.default__t_link(src_pk=src_pk, src_fk=src_fk, src_payload=src_payload, + src_extra_columns=src_extra_columns, src_eff=src_eff, src_ldts=src_ldts, src_source=src_source, source_model=source_model) }} -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%}