Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add folders & lists snapshots #18

Merged
merged 14 commits into from
Nov 24, 2024
26 changes: 19 additions & 7 deletions .github/workflows/slim_CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,33 @@ on:
push:
branches-ignore: [ "main", "master"]

env:
TARGET_SCHEMA: dev

jobs:
validate_dbt_models:
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write
steps:
- uses: 8BitJonny/[email protected]
id: PR

- name: Set var PR_schema
run: echo "PR_schema=${{ env.schema_var }}" >> $GITHUB_ENV
if: ${{ steps.PR.outputs.pr_found == 'true' }}
env:
schema_var: '{\"schema_name\": \"ci_PR_${{ steps.PR.outputs.number }}\"}'

- name: debug
run: echo "${{ env.PR_schema }}"


- uses: actions/checkout@v3

- uses: ./.github/actions/setup-env
with:
PROD_ENV: ${{ secrets.PROD_ENV }}


- name: Cache Python packages
uses: actions/cache@v3
Expand Down Expand Up @@ -68,14 +80,14 @@ jobs:
run: npm install -g "@lightdash/cli@${{ steps.version.outputs.value }}" || npm install -g @lightdash/cli@latest


- name: validate dbt models
run: dbt build
- name: dbt build models
run: dbt build --target ci --vars '${{env.PR_schema}}'


- name: validate LD
id: validate-lightdash
run: lightdash validate --target ci --vars '${{env.PR_schema}}'
env:
LIGHTDASH_API_KEY: ${{ secrets.LIGHTDASH_API_KEY }}
LIGHTDASH_PROJECT: ${{ secrets.LIGHTDASH_PROJECT }}
LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }}
run: lightdash validate
LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }}
2 changes: 2 additions & 0 deletions dbt_project/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ profile: todo_analytics
model-paths: ["models"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

models:
todo_analytics:
Expand All @@ -31,6 +32,7 @@ seeds:
+column_types:
goal_ids: "text"
date_seed:
full_refresh: false

vars:
"dbt_date:time_zone": "Asia/Ho_Chi_Minh"
28 changes: 28 additions & 0 deletions dbt_project/macros/cleanup_nulls.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{# had to hardcode the pre hook cause otherwise the old dummy lookahead dates gonn persist & build up #}

{% macro cleanup_nulls(column_name) %}
{% if execute %}
-- Check if the table exists
{% set table_exists_query %}
SELECT COUNT(*)
FROM information_schema.tables
WHERE table_schema = '{{ this.schema }}'
AND table_name = '{{ this.table }}'
{% endset %}

{% set table_exists_results = run_query(table_exists_query) %}
{% set table_exists = table_exists_results.rows[0][0] > 0 %}

-- Run the DELETE only if the table exists
{% if table_exists %}
DELETE FROM {{ this }}
WHERE {{ column_name }} IS NULL;
{% else %}
{{ log("Skipping DELETE because the table does not exist.", info=True) }}
select 1
{% endif %}
{% else %}
-- Dry-run mode: Log the SQL
{{ log("DELETE FROM " ~ this ~ " WHERE " ~ column_name ~ " IS NULL;", info=True) }}
{% endif %}
{% endmacro %}
4 changes: 4 additions & 0 deletions dbt_project/macros/clear_schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{% macro clear_schema(schema_name) %}
{% set drop_schema_command = "DROP SCHEMA IF EXISTS " ~ schema_name ~ " CASCADE;" %}
{% do run_query(drop_schema_command) %}
{% endmacro %}
28 changes: 12 additions & 16 deletions dbt_project/models/marts/core/fact_todos.sql
Original file line number Diff line number Diff line change
@@ -1,24 +1,15 @@
{# had to hardcode the pre hook cause otherwise the old dummy lookahead dates gonn persist & build up #}
{{ config(
materialized='incremental',
unique_key = ['todo_id'],
unique_key = ['todo_id', 'date_due_lookahead_key'],
incremental_strategy = 'merge',
on_schema_change='append_new_columns',
indexes=[
{'columns': ['list_key'], 'type': 'hash'},
{'columns': ['folder_key'], 'type': 'hash'},
{'columns': ['status_key'], 'type': 'hash'},
{'columns': ['date_start_key'], 'type': 'hash'},
{'columns': ['date_due_key'], 'type': 'hash'},
{'columns': ['date_completed_key'], 'type': 'hash'},
{'columns': ['date_created_key'], 'type': 'hash'},
{'columns': ['date_modified_key'], 'type': 'hash'},
{'columns': ['date_due_lookahead_key'], 'type': 'hash'},
{'columns': ['todo_key'], 'unique': True},
],
unlogged=True,
pre_hook="delete from {{this}} where todo_id is null"
pre_hook = ['{{cleanup_nulls("todo_id")}}']
) }}
WITH source AS (
SELECT
DISTINCT

*
FROM
{{ ref('stg_todos') }}
Expand All @@ -28,4 +19,9 @@ SELECT
FROM
source

{# dont need a coalesce_defaults at fact table cause the nulls FK already hashed at stg fact #}
{% if is_incremental() %}
WHERE todo_modifiedtime >= (select coalesce(max(todo_modifiedtime),'1900-01-01 00:00:00') from {{ this }} )
OR
todo_modifiedtime IS NULL
{% endif %}

6 changes: 5 additions & 1 deletion dbt_project/models/marts/core/fact_todos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,11 @@ models:
hidden: true
type: string
- name: todo_key
description: ""
description: "surrogate key by joining todo_id and todo_modifiedtime"
data_tests:
- unique:
severity: warn
- not_null
meta:
dimension:
type: string
Expand Down
37 changes: 27 additions & 10 deletions dbt_project/models/raw_source/src__folders_raw.sql
Original file line number Diff line number Diff line change
@@ -1,21 +1,38 @@
{{ config(
materialized = 'incremental',
unique_key = 'folder_id'
materialized = 'table'
) }}
WITH source AS (
WITH source_active AS (
{# direct pull from tick api. contains current data. #}

SELECT
{{ setup_nulls(
source(
'raw_data',
'folders_raw'
)
) }}
*
FROM
{{ source(
'raw_data',
'folders_raw'
) }}
),
source_snp AS (
{# pulls the deleted portion of the data that is gone from tick api. #}
SELECT
*
FROM
{{ ref(
'snp_folders_raw',
) }}
WHERE dbt_valid_to is not null
),

source as (
select
{{ setup_nulls(source('raw_data', 'folders_raw')) }}
from source_active
UNION ALL
select
{{ setup_nulls(source('raw_data', 'folders_raw')) }}
from source_snp
),

renamed AS (
SELECT
{{ adapter.quote("id") }} :: text AS "folder_id",
Expand All @@ -36,4 +53,4 @@ renamed AS (
SELECT
*
FROM
renamed
renamed
3 changes: 3 additions & 0 deletions dbt_project/models/raw_source/src__folders_raw.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ models:
columns:
- name: folder_id
description: ""
data_tests:
- unique
- not_null
meta:
dimension:
type: string
Expand Down
44 changes: 35 additions & 9 deletions dbt_project/models/raw_source/src__lists_raw.sql
Original file line number Diff line number Diff line change
@@ -1,23 +1,49 @@
{{ config(
materialized = 'incremental',
unique_key = 'list_id'
materialized = 'table'
) }}

WITH source AS (
WITH source_active AS (
{# direct pull from tick api. contains current data. #}

SELECT
{{ setup_nulls(
source(
'raw_data',
'lists_raw'
)
) }}
{{dbt_utils.star(
from=source('raw_data','lists_raw'),
except=['modifiedtime']
)}},
-- gotta explicitly handle this cauaes snap's data casted as timestamp
modifiedtime :: text as modifiedtime
FROM
{{ source(
'raw_data',
'lists_raw'
) }}
),
source_snp AS (
{# pulls the deleted portion of the data that is gone from tick api. #}
SELECT
{{dbt_utils.star(
from=source('raw_data','lists_raw'),
except=['modifiedtime']
)}},
-- gotta explicitly handle this cauaes snap's data casted as timestamp
modifiedtime :: text as modifiedtime
FROM
{{ ref(
'snp_lists_raw',
) }}
WHERE dbt_valid_to is not null
),

source as (
select
{{ setup_nulls(source('raw_data', 'lists_raw')) }}
from source_active
UNION ALL
select
{{ setup_nulls(source('raw_data', 'lists_raw')) }}
from source_snp
),

renamed AS (
SELECT
{{ adapter.quote("id") }} :: text AS "list_id",
Expand Down
3 changes: 3 additions & 0 deletions dbt_project/models/raw_source/src__lists_raw.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ models:
columns:
- name: list_id
description: ""
data_tests:
- unique
- not_null
meta:
dimension:
type: string
Expand Down
46 changes: 42 additions & 4 deletions dbt_project/models/raw_source/src__tasks_raw.sql
Original file line number Diff line number Diff line change
@@ -1,20 +1,52 @@
{{ config(
materialized = 'incremental',
unique_key = ['todo_id'],
on_schema_change='append_new_columns'
on_schema_change = 'append_new_columns',
) }}

{% set datetime_list = ['todo_createdtime', 'todo_completedtime', 'todo_startdate', 'todo_duedate', 'todo_modifiedtime'] %}
WITH source AS (
WITH source_active AS (
{# direct pull from tick api. contains current data. #}

SELECT
{{ setup_nulls(source('raw_data', 'tasks_raw')) }}
{{dbt_utils.star(
from=source('raw_data','tasks_raw'),
except=['modifiedtime']
)}},
-- gotta explicitly handle this cauaes snap's data casted as timestamp
modifiedtime :: text as modifiedtime
FROM
{{ source(
'raw_data',
'tasks_raw'
) }}
),
source_snp AS (
{# pulls the deleted portion of the data that is gone from tick api. #}
SELECT
{{dbt_utils.star(
from=source('raw_data','tasks_raw'),
except=['modifiedtime']
)}},
-- gotta explicitly handle this cauaes snap's data casted as timestamp
modifiedtime :: text as modifiedtime
FROM
{{ ref(
'snp_tasks_raw',
) }}
WHERE dbt_valid_to is not null
),

source as (
select
{{ setup_nulls(source('raw_data', 'tasks_raw')) }}
from source_active
UNION ALL
select
{{ setup_nulls(source('raw_data', 'tasks_raw')) }}
from source_snp
),

renamed AS (
SELECT
DISTINCT {{ adapter.quote("id") }} :: text AS "todo_id",
Expand All @@ -26,7 +58,6 @@ renamed AS (
{{ adapter.quote("modifiedtime") }} :: TIMESTAMP + INTERVAL '7 hours' AS "todo_modifiedtime",
{{ adapter.quote("createdtime") }} :: TIMESTAMP + INTERVAL '7 hours' AS "todo_createdtime",
{{ adapter.quote("repeatfirstdate") }} :: TIMESTAMP + INTERVAL '7 hours' AS "todo_repeatfirstdate",

{# #}
{{ adapter.quote("projectid") }} :: text AS "todo_projectid",
{{ adapter.quote("sortorder") }} :: bigint AS "todo_sortorder",
Expand Down Expand Up @@ -105,3 +136,10 @@ SELECT
*
FROM
refine_dates
WHERE

{% if is_incremental() %}
todo_modifiedtime >= coalesce((select max(todo_modifiedtime) from {{ this }}), '1900-01-01 00:00:00')
{% else %}
1=1
{% endif %}
Loading