Skip to content

Commit

Permalink
fix bug : handle cleaning null values
Browse files Browse the repository at this point in the history
- pre hook calls a macro to NULL from dummy dates.
- added a CI profile with env var for future GHA runs
  • Loading branch information
luutuankiet committed Nov 24, 2024
1 parent d253a15 commit 61cf8dd
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 6 deletions.
28 changes: 28 additions & 0 deletions dbt_project/macros/cleanup_nulls.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{# had to hardcode the pre hook cause otherwise the old dummy lookahead dates gonn persist & build up #}

{% macro cleanup_nulls(column_name) %}
{% if execute %}
-- Check if the table exists
{% set table_exists_query %}
SELECT COUNT(*)
FROM information_schema.tables
WHERE table_schema = '{{ this.schema }}'
AND table_name = '{{ this.table }}'
{% endset %}

{% set table_exists_results = run_query(table_exists_query) %}
{% set table_exists = table_exists_results.rows[0][0] > 0 %}

-- Run the DELETE only if the table exists
{% if table_exists %}
DELETE FROM {{ this }}
WHERE {{ column_name }} IS NULL;
{% else %}
{{ log("Skipping DELETE because the table does not exist.", info=True) }}
select 1
{% endif %}
{% else %}
-- Dry-run mode: Log the SQL
{{ log("DELETE FROM " ~ this ~ " WHERE " ~ column_name ~ " IS NULL;", info=True) }}
{% endif %}
{% endmacro %}
6 changes: 6 additions & 0 deletions dbt_project/models/marts/core/fact_todos.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
{# had to hardcode the pre hook cause otherwise the old dummy lookahead dates gonn persist & build up #}
{{ config(
materialized='incremental',
unique_key = ['todo_id', 'date_due_lookahead_key'],
incremental_strategy = 'merge',
on_schema_change='append_new_columns',
pre_hook = ['{{cleanup_nulls("todo_id")}}']
) }}
WITH source AS (
SELECT
DISTINCT

*
FROM
{{ ref('stg_todos') }}
Expand All @@ -17,5 +21,7 @@ FROM

{% if is_incremental() %}
WHERE todo_modifiedtime >= (select coalesce(max(todo_modifiedtime),'1900-01-01 00:00:00') from {{ this }} )
OR
todo_modifiedtime IS NULL
{% endif %}

3 changes: 2 additions & 1 deletion dbt_project/models/marts/core/fact_todos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ models:
- name: todo_key
description: "surrogate key by joining todo_id and todo_modifiedtime"
data_tests:
- unique
- unique:
severity: warn
- not_null
meta:
dimension:
Expand Down
9 changes: 6 additions & 3 deletions dbt_project/models/staging/stg_todos.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
materialized='incremental',
unique_key = ['todo_id', 'date_due_lookahead_key'],
incremental_strategy = 'merge',
on_schema_change='append_new_columns'
on_schema_change='append_new_columns',
pre_hook = ['{{cleanup_nulls("todo_id")}}']
) }}

WITH init_todo AS (
Expand Down Expand Up @@ -274,8 +275,10 @@ SELECT
*
FROM
joined
WHERE todo_id IS NOT NULL

{% if is_incremental() %}
AND todo_modifiedtime >= (select coalesce(max(todo_modifiedtime),'1900-01-01 00:00:00') from {{ this }} )
WHERE
todo_modifiedtime >= (select coalesce(max(todo_modifiedtime),'1900-01-01 00:00:00') from {{ this }} )
OR
todo_modifiedtime IS NULL
{% endif %}
8 changes: 6 additions & 2 deletions dbt_project/models/staging/stg_todos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ models:
- name: todo_key
description: "surrogate key by joining todo_id and todo_modifiedtime"
data_tests:
- unique
- unique:
config:
severity: warn
- not_null
meta:
dimension:
Expand All @@ -48,7 +50,9 @@ models:
- name: todo_id
description: ""
data_tests:
- unique
- unique:
config:
severity: warn
meta:
dimension:
type: string
Expand Down
10 changes: 10 additions & 0 deletions dbt_project/profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@ todo_analytics:
schema: dev
sslmode: disable
threads: 12
ci:
type: postgres
host: "{{env_var('DW_HOST')}}"
port: 5433
user: "{{env_var('DW_USER')}}"
password: "{{env_var('DW_PASSWORD')}}"
dbname: "{{env_var('DW_DBNAME')}}"
schema: "{{ var('schema_name', 'ci') }}"
sslmode: disable
threads: 12
prod:
type: postgres
host: "{{env_var('DW_HOST')}}"
Expand Down

0 comments on commit 61cf8dd

Please sign in to comment.