From 61cf8dd179d71c3b6b845b215f864507f0b42e49 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Sun, 24 Nov 2024 06:58:10 +0000 Subject: [PATCH] fix bug : handle cleaning null values - pre hook calls a macro to NULL from dummy dates. - added a CI profile with env var for future GHA runs --- dbt_project/macros/cleanup_nulls.sql | 28 ++++++++++++++++++++ dbt_project/models/marts/core/fact_todos.sql | 6 +++++ dbt_project/models/marts/core/fact_todos.yml | 3 ++- dbt_project/models/staging/stg_todos.sql | 9 ++++--- dbt_project/models/staging/stg_todos.yml | 8 ++++-- dbt_project/profiles.yml | 10 +++++++ 6 files changed, 58 insertions(+), 6 deletions(-) create mode 100644 dbt_project/macros/cleanup_nulls.sql diff --git a/dbt_project/macros/cleanup_nulls.sql b/dbt_project/macros/cleanup_nulls.sql new file mode 100644 index 0000000..19f598a --- /dev/null +++ b/dbt_project/macros/cleanup_nulls.sql @@ -0,0 +1,28 @@ +{# had to hardcode the pre hook cause otherwise the old dummy lookahead dates gonn persist & build up #} + +{% macro cleanup_nulls(column_name) %} + {% if execute %} + -- Check if the table exists + {% set table_exists_query %} + SELECT COUNT(*) + FROM information_schema.tables + WHERE table_schema = '{{ this.schema }}' + AND table_name = '{{ this.table }}' + {% endset %} + + {% set table_exists_results = run_query(table_exists_query) %} + {% set table_exists = table_exists_results.rows[0][0] > 0 %} + + -- Run the DELETE only if the table exists + {% if table_exists %} + DELETE FROM {{ this }} + WHERE {{ column_name }} IS NULL; + {% else %} + {{ log("Skipping DELETE because the table does not exist.", info=True) }} + select 1 + {% endif %} + {% else %} + -- Dry-run mode: Log the SQL + {{ log("DELETE FROM " ~ this ~ " WHERE " ~ column_name ~ " IS NULL;", info=True) }} + {% endif %} +{% endmacro %} diff --git a/dbt_project/models/marts/core/fact_todos.sql b/dbt_project/models/marts/core/fact_todos.sql index 1b58c7a..113d414 100644 --- a/dbt_project/models/marts/core/fact_todos.sql +++ b/dbt_project/models/marts/core/fact_todos.sql @@ -1,11 +1,15 @@ +{# had to hardcode the pre hook cause otherwise the old dummy lookahead dates gonn persist & build up #} {{ config( materialized='incremental', unique_key = ['todo_id', 'date_due_lookahead_key'], incremental_strategy = 'merge', on_schema_change='append_new_columns', + pre_hook = ['{{cleanup_nulls("todo_id")}}'] ) }} WITH source AS ( SELECT + DISTINCT + * FROM {{ ref('stg_todos') }} @@ -17,5 +21,7 @@ FROM {% if is_incremental() %} WHERE todo_modifiedtime >= (select coalesce(max(todo_modifiedtime),'1900-01-01 00:00:00') from {{ this }} ) +OR + todo_modifiedtime IS NULL {% endif %} diff --git a/dbt_project/models/marts/core/fact_todos.yml b/dbt_project/models/marts/core/fact_todos.yml index b8c1416..6eda2fa 100644 --- a/dbt_project/models/marts/core/fact_todos.yml +++ b/dbt_project/models/marts/core/fact_todos.yml @@ -95,7 +95,8 @@ models: - name: todo_key description: "surrogate key by joining todo_id and todo_modifiedtime" data_tests: - - unique + - unique: + severity: warn - not_null meta: dimension: diff --git a/dbt_project/models/staging/stg_todos.sql b/dbt_project/models/staging/stg_todos.sql index a5c314e..8fbb3d0 100644 --- a/dbt_project/models/staging/stg_todos.sql +++ b/dbt_project/models/staging/stg_todos.sql @@ -3,7 +3,8 @@ materialized='incremental', unique_key = ['todo_id', 'date_due_lookahead_key'], incremental_strategy = 'merge', - on_schema_change='append_new_columns' + on_schema_change='append_new_columns', + pre_hook = ['{{cleanup_nulls("todo_id")}}'] ) }} WITH init_todo AS ( @@ -274,8 +275,10 @@ SELECT * FROM joined -WHERE todo_id IS NOT NULL {% if is_incremental() %} - AND todo_modifiedtime >= (select coalesce(max(todo_modifiedtime),'1900-01-01 00:00:00') from {{ this }} ) + WHERE + todo_modifiedtime >= (select coalesce(max(todo_modifiedtime),'1900-01-01 00:00:00') from {{ this }} ) + OR + todo_modifiedtime IS NULL {% endif %} \ No newline at end of file diff --git a/dbt_project/models/staging/stg_todos.yml b/dbt_project/models/staging/stg_todos.yml index 19be3e3..992347d 100644 --- a/dbt_project/models/staging/stg_todos.yml +++ b/dbt_project/models/staging/stg_todos.yml @@ -25,7 +25,9 @@ models: - name: todo_key description: "surrogate key by joining todo_id and todo_modifiedtime" data_tests: - - unique + - unique: + config: + severity: warn - not_null meta: dimension: @@ -48,7 +50,9 @@ models: - name: todo_id description: "" data_tests: - - unique + - unique: + config: + severity: warn meta: dimension: type: string diff --git a/dbt_project/profiles.yml b/dbt_project/profiles.yml index ce4671a..33efcb5 100644 --- a/dbt_project/profiles.yml +++ b/dbt_project/profiles.yml @@ -10,6 +10,16 @@ todo_analytics: schema: dev sslmode: disable threads: 12 + ci: + type: postgres + host: "{{env_var('DW_HOST')}}" + port: 5433 + user: "{{env_var('DW_USER')}}" + password: "{{env_var('DW_PASSWORD')}}" + dbname: "{{env_var('DW_DBNAME')}}" + schema: "{{ var('schema_name', 'ci') }}" + sslmode: disable + threads: 12 prod: type: postgres host: "{{env_var('DW_HOST')}}"