From 9de063248f9cd209dd0417614d64a52a76cba72e Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Sun, 14 Jul 2024 13:43:32 +0700 Subject: [PATCH 01/29] cleanup dagster and dbt proj structure - specify the int value for env var parse on load dagster dbt - rm unused dbt semantic layer --- app/ETL/constants.py | 2 +- .../marts/metrics/metricflow_time_spine.sql | 17 -- .../marts/metrics/metricflow_time_spine.yml | 9 - .../marts/metrics/semantic_fact_todos.yml | 272 ------------------ 4 files changed, 1 insertion(+), 299 deletions(-) delete mode 100644 dbt_project/models/marts/metrics/metricflow_time_spine.sql delete mode 100644 dbt_project/models/marts/metrics/metricflow_time_spine.yml delete mode 100644 dbt_project/models/marts/metrics/semantic_fact_todos.yml diff --git a/app/ETL/constants.py b/app/ETL/constants.py index 4dc03c8..07a202b 100644 --- a/app/ETL/constants.py +++ b/app/ETL/constants.py @@ -7,7 +7,7 @@ dbt = DbtCliResource(project_dir=DBT_PROFILES_DIR,profiles_dir=DBT_PROFILES_DIR) -if os.getenv("DAGSTER_DBT_PARSE_PROJECT_ON_LOAD"): +if os.getenv("DAGSTER_DBT_PARSE_PROJECT_ON_LOAD") == 1: dbt_manifest_path = ( dbt.cli( ["--quiet", "parse"], diff --git a/dbt_project/models/marts/metrics/metricflow_time_spine.sql b/dbt_project/models/marts/metrics/metricflow_time_spine.sql deleted file mode 100644 index d1113ae..0000000 --- a/dbt_project/models/marts/metrics/metricflow_time_spine.sql +++ /dev/null @@ -1,17 +0,0 @@ --- metricflow_time_spine.sql ---for BQ adapters use "DATE('01/01/2000','mm/dd/yyyy')" -with days as ( - {{dbt_utils.date_spine('day' - , "to_date('01/01/2020','mm/dd/yyyy')" - , "to_date('01/01/2027','mm/dd/yyyy')" - ) - }} -), - -final as ( - select cast(date_day as date) as date_day - from days -) - -select * -from final \ No newline at end of file diff --git a/dbt_project/models/marts/metrics/metricflow_time_spine.yml b/dbt_project/models/marts/metrics/metricflow_time_spine.yml deleted file mode 100644 index 5776be5..0000000 --- a/dbt_project/models/marts/metrics/metricflow_time_spine.yml +++ /dev/null @@ -1,9 +0,0 @@ -version: 2 -models: - - name: metricflow_time_spine - columns: - - name: date_day - description: "" - meta: - dimension: - type: date diff --git a/dbt_project/models/marts/metrics/semantic_fact_todos.yml b/dbt_project/models/marts/metrics/semantic_fact_todos.yml deleted file mode 100644 index 28d257d..0000000 --- a/dbt_project/models/marts/metrics/semantic_fact_todos.yml +++ /dev/null @@ -1,272 +0,0 @@ -semantic_models: - - name: todos - description: "facts for to do items; each row is one task." - model: ref('fact_todos') - defaults: - agg_time_dimension: due_date - entities: - - name: todo_id - type: primary - - name: list_key - type: foreign - - name: folder_key - type: foreign - - name: status_key - type: foreign - - - dimensions: - - name: created_date - type: time - expr: date_trunc('day',created_time) - type_params: - time_granularity: day - - name: start_date - type: time - expr: date_trunc('day',start_date) - type_params: - time_granularity: day - - name: due_date - type: time - expr: date_trunc('day',due_date) - type_params: - time_granularity: day - - name: updated_date - type: time - expr: date_trunc('day',modified_date) - type_params: - time_granularity: day - - name: completed_date - type: time - expr: date_trunc('day',completed_time) - type_params: - time_granularity: day - - name: dim_folder - type: categorical - expr: folder_name - - name: dim_list - type: categorical - expr: list_name - - name: dim_tag - description: the tags parsed from tasks. - type: categorical - expr: TRIM(BOTH ' ' FROM REPLACE(REPLACE(REPLACE(tags, '[', ''), ']', ''), '''', '')) - - name: tag_semantic - type: categorical - expr: > - case when tags like '%clarifyme%' then 'inbox' - when tags = 'default' or tags like '%@%' then 'next action' - when tags like '%someday%' then 'someday' - when tags like '%waiting_for%' then 'waiting for' - when tags like '%tickler%' then 'tickler' - else TRIM(BOTH ' ' FROM REPLACE(REPLACE(REPLACE(tags, '[', ''), ']', ''), '''', '')) - end - - name: task_title - type: categorical - expr: title - description: "for debugging purpose; drills down to the name of task" - - # boolean-based dims - - name: is_repeat - type: categorical - expr: case when repeatFlag <> 'default' then true else false end - - name: is_task - type: categorical - expr: case when NOT (kind = 'NOTE' or tags like '%tickler%') then true else false end - - name: is_active - type: categorical - description: | - the item belongs to an active folder - not in ('🚀SOMEDAY lists','🛩Horizon of focus','💤on hold lists') - expr: | - case when folder_name not in ('🚀SOMEDAY lists','🛩Horizon of focus','💤on hold lists') - AND - list_name not in ('🧳SOMEDAY','📍ref - tickler notes for the day') - - then true else false end - - measures: - - name: tasks_count - expr: todo_id - agg: count - - name: tags_count - agg: count - expr: tags - - name: lists_count - expr: list_name - agg: count_distinct - create_metric: true - - name: folders_count - expr: folder_name - agg: count_distinct - create_metric: true - - - - - name: statuses - model: ref('dim_statuses') - entities: - - name: status_key - type: primary - dimensions: - - name: status_name - expr: desc - type: categorical - description: | - semantic description of the todo item's status. available values - - 'undone' : item has no completion time. - - 'wont do' : item has ben marked as wont do. - - 'done' : item has been marked as done with a valid completed_date. - - - - name: lists - model: ref('dim_lists') - defaults: - agg_time_dimension: created_time - entities: - - name: list_key - type: primary - dimensions: - - name: list_name - type: categorical - - name: is_active - description: indicates if a list has been archived or not. - type: categorical - - name: created_time - description: time the list is created. - type: time - type_params: - time_granularity: day - - name: modified_time - description: time the list is modified. - type: time - type_params: - time_granularity: day - - - - name: folders - model: ref('dim_folders') - entities: - - name: folder_key - type: primary - dimensions: - - name: folder_name - type: categorical - - - - - -metrics: - - name: task_count_metric - type: simple - label: "Task Count" - type_params: - measure: - name: tasks_count - - name: tags_count_metric - type: simple - label: "tags count" - type_params: - measure: - name: tags_count - - name: overdue_tasks - type: simple - label: "overdue_tasks" - description: | - - use case : see if there are unattended tasks from the day before and take actions to it. mostly reschedule. - - logic : count of task that - - is a task type - - is active - - belongs to active list - - is undone - - do not have default due date - - due date before today - - example usage: "dbt parse && mf query --metrics overdue_tasks --group-by todo_id__is_repeat" - type_params: - measure: - name: tasks_count - # now() is only applicable to postgres. - filter: | - {{Dimension('todo_id__is_task')}} is true AND - {{Dimension('todo_id__is_active')}} is true AND - {{Dimension('list_key__is_active')}} is true AND - {{Dimension('status_key__status_name')}} = 'undone' AND - {{TimeDimension('todo_id__due_date', 'day')}}::text not like '%1900%' AND - {{TimeDimension('todo_id__due_date', 'day')}} < now() - - - -# models: -# - name: fact_todos -# description: "" -# columns: -# - name: list_key -# description: "" -# - name: folder_key -# description: "" -# - name: status_key -# description: "" -# - name: date_start_key -# description: "" -# - name: date_due_key -# description: "" -# - name: date_completed_key -# description: "" -# - name: date_created_key -# description: "" -# - name: todo_id -# description: "" -# - name: folder_name -# description: "" -# - name: list_name -# description: "" -# - name: status_id -# description: "" -# - name: title -# description: "" -# - name: timezone -# description: "" -# - name: reminder -# description: "" -# - name: reminders -# description: "" -# - name: exdate -# description: "" -# - name: items -# description: "" -# - name: progress -# description: "" -# - name: modified_time -# description: "" -# - name: completed_time -# description: "" -# - name: created_time -# description: "" -# - name: etag -# description: "" -# - name: deleted -# description: "" -# - name: kind -# description: "" -# - name: tags -# description: "" -# - name: repeatfrom -# description: "" -# - name: repeattaskid -# description: "" -# - name: repeatflag -# description: "" -# - name: pinned_time -# description: "" -# - name: start_date -# description: "" -# - name: due_date -# description: "" -# - name: deletedtime -# description: "" -# - name: repeatfirstdate -# description: "" -# - name: parentid -# description: "" -# - name: remindtime -# description: "" #} From 116769181106faf044b1f864925e0f7cf7d718aa Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Sun, 14 Jul 2024 13:45:21 +0700 Subject: [PATCH 02/29] add jobs to handle cleaning artifacts - compute logs - materilizationartifacts --- app/ETL/__init__.py | 3 ++- app/ETL/dbt_assets.py | 9 ++++++++- app/ETL/job_rapid_ETL_mode.py | 11 ++++++++--- app/ETL/lvl3_helper.py | 4 ++-- app/ETL/weekly_cleanup.py | 20 ++++++++++++++++---- dagster.yaml | 11 ++++++++--- 6 files changed, 44 insertions(+), 14 deletions(-) diff --git a/app/ETL/__init__.py b/app/ETL/__init__.py index c105fc3..3ffdafe 100644 --- a/app/ETL/__init__.py +++ b/app/ETL/__init__.py @@ -1,6 +1,6 @@ #%% import os,sys; sys.path.append(os.path.dirname(__file__)) -from dagster import Definitions,ScheduleDefinition,define_asset_job,load_assets_from_modules,in_process_executor +from dagster import Definitions,ScheduleDefinition,define_asset_job,load_assets_from_modules,in_process_executor,mem_io_manager from dagster_dbt import DbtCliResource from sqlalchemy import true @@ -42,6 +42,7 @@ schedules=[ETL_schedule,rapid_ETL_schedule,helper_schedule,cleanup_schedule], resources={ "dbt": DbtCliResource(project_dir=DBT_PROJECT_DIR), + "io_manager": mem_io_manager, }, ) diff --git a/app/ETL/dbt_assets.py b/app/ETL/dbt_assets.py index 915fffd..31888e4 100644 --- a/app/ETL/dbt_assets.py +++ b/app/ETL/dbt_assets.py @@ -7,6 +7,7 @@ from dagster import AssetExecutionContext, asset from constants import dbt_manifest_path from sqlalchemy import create_engine +import shutil edges = os.listdir(dbt_models_core) + os.listdir(dbt_models_metrics) edges = [edge.replace('.sql','') for edge in edges] @@ -18,5 +19,11 @@ @dbt_assets(manifest=dbt_manifest_path) def ticktick_dbt_assets(context: AssetExecutionContext, dbt: DbtCliResource): - yield from dbt.cli(["run"], context=context).stream() + dbt_invocation = dbt.cli(["run"], context=context) + yield from dbt_invocation.stream() + + #cleanup the dir after done + target_path = dbt_invocation.target_path + if target_path.exists(): + shutil.rmtree(target_path) diff --git a/app/ETL/job_rapid_ETL_mode.py b/app/ETL/job_rapid_ETL_mode.py index 2a42293..8082a5b 100644 --- a/app/ETL/job_rapid_ETL_mode.py +++ b/app/ETL/job_rapid_ETL_mode.py @@ -1,8 +1,9 @@ -from dagster import Definitions,op,job,in_process_executor +from dagster import Definitions,op,job,in_process_executor,mem_io_manager import os,time,sys; sys.path.append(os.path.dirname(os.path.abspath(__file__))) import subprocess import helper.source_env import logging +import shutil exec_time_minutes = int(os.environ.get('TMP_EXEC_TIME_MINUTES',30)) @@ -30,8 +31,12 @@ def schedule_toggle(): -@job + +@job(executor_def=in_process_executor) def rapid_ETL_mode(): schedule_toggle() -defs = Definitions(jobs=[rapid_ETL_mode],executor=in_process_executor) +defs = Definitions(jobs=[rapid_ETL_mode], + resources={ + "io_manager": mem_io_manager + }) diff --git a/app/ETL/lvl3_helper.py b/app/ETL/lvl3_helper.py index 59072b3..4039e3e 100644 --- a/app/ETL/lvl3_helper.py +++ b/app/ETL/lvl3_helper.py @@ -5,7 +5,7 @@ from helper.source_env import dbt_project_dir,service_account_path,dw_path import gspread import csv -from dagster import op,Definitions,job +from dagster import op,Definitions,job,in_process_executor from sqlalchemy import create_engine,text from EL import db_url import pandas as pd @@ -86,7 +86,7 @@ def dump_mapping_to_csv(results=None): -@job +@job(executor_def=in_process_executor) def load_new_lvl3_data(): dump_mapping_to_csv(load_mapping_to_stg()) diff --git a/app/ETL/weekly_cleanup.py b/app/ETL/weekly_cleanup.py index 49c943e..202a1f1 100644 --- a/app/ETL/weekly_cleanup.py +++ b/app/ETL/weekly_cleanup.py @@ -2,11 +2,11 @@ import os,sys; sys.path.append(os.path.dirname(__file__)) from datetime import datetime,timezone - +import shutil from loader import * from loader import _delete_tasks -from dagster import op,job,Definitions +from dagster import op,job,Definitions,in_process_executor from helper.source_env import makefile_path,makefile_dir @@ -45,11 +45,23 @@ def loader_rerun(cleanup): +@op +def cleanup_logs_and_artifacts(loader_rerun): + target_dir = os.getenv('DAGSTER_LOCAL_ARTIFACT_STORAGE_DIR') + if os.path.exists(target_dir): + shutil.rmtree(target_dir) + os.makedirs(target_dir) # Recreate the directory to ensure it exists for future runs + print(f"Cleaned up directory: {target_dir}") + else: + print(f"Directory does not exist: {target_dir}") + #%% -@job +@job(executor_def=in_process_executor) def weekly_cleanup(): - loader_rerun(cleanup()) + cleanup_logs_and_artifacts(loader_rerun(cleanup())) + + diff --git a/dagster.yaml b/dagster.yaml index 9cdfbb1..191eb40 100644 --- a/dagster.yaml +++ b/dagster.yaml @@ -25,8 +25,8 @@ retention: sensor: purge_after_days: skipped: 7 - failure: 30 - success: -1 # keep success ticks indefinitely + failure: 7 + success: 7 # keep success ticks indefinitely # there are two possible ways to configure LocalArtifactStorage @@ -39,4 +39,9 @@ local_artifact_storage: base_dir: env: DAGSTER_LOCAL_ARTIFACT_STORAGE_DIR - +compute_logs: + module: dagster.core.storage.local_compute_log_manager + class: LocalComputeLogManager + config: + base_dir: + env: DAGSTER_LOCAL_ARTIFACT_STORAGE_DIR \ No newline at end of file From c1dd74eab1f081d3240cd572830a07c5f1d57529 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Mon, 15 Jul 2024 05:47:27 +0700 Subject: [PATCH 03/29] handle tz drift for the raw tasks meta --- dbt_project/models/raw_source/src__tasks_raw.sql | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dbt_project/models/raw_source/src__tasks_raw.sql b/dbt_project/models/raw_source/src__tasks_raw.sql index 3ca315b..59f1a4b 100644 --- a/dbt_project/models/raw_source/src__tasks_raw.sql +++ b/dbt_project/models/raw_source/src__tasks_raw.sql @@ -18,11 +18,14 @@ WITH source AS ( renamed AS ( SELECT DISTINCT {{ adapter.quote("id") }} :: text AS "todo_id", - {{ adapter.quote("createdtime") }} :: TIMESTAMP AS "todo_createdtime", {{ adapter.quote("completedtime") }} :: TIMESTAMP AS "todo_completedtime", {# these dates MUST be converted to ETC #} {{ adapter.quote("startdate") }} :: TIMESTAMP + INTERVAL '7 hours' AS "todo_startdate", {{ adapter.quote("duedate") }} :: TIMESTAMP + INTERVAL '7 hours' AS "todo_duedate", + {{ adapter.quote("modifiedtime") }} :: TIMESTAMP + INTERVAL '7 hours' AS "todo_modifiedtime", + {{ adapter.quote("createdtime") }} :: TIMESTAMP + INTERVAL '7 hours' AS "todo_createdtime", + {{ adapter.quote("repeatfirstdate") }} :: TIMESTAMP + INTERVAL '7 hours' AS "todo_repeatfirstdate", + {# #} {{ adapter.quote("projectid") }} :: text AS "todo_projectid", {{ adapter.quote("sortorder") }} :: bigint AS "todo_sortorder", @@ -40,7 +43,6 @@ renamed AS ( {{ adapter.quote("status") }} :: text AS "todo_status", {{ adapter.quote("items") }} :: text AS "todo_items", {{ adapter.quote("progress") }} :: FLOAT AS "todo_progress", - {{ adapter.quote("modifiedtime") }} :: TIMESTAMP AS "todo_modifiedtime", {{ adapter.quote("etag") }} :: text AS "todo_etag", {{ adapter.quote("deleted") }} :: BOOLEAN AS "todo_deleted", {{ adapter.quote("creator") }} :: INT AS "todo_creator", @@ -62,7 +64,6 @@ renamed AS ( -- array {{ adapter.quote("deletedtime") }} :: text AS "todo_deletedtime", -- some weird epoc time ? "120295392.0" >>> to_timestamp(1669956236000 / 1000) - {{ adapter.quote("repeatfirstdate") }} :: TIMESTAMP AS "todo_repeatfirstdate", {{ adapter.quote("pomodorosummaries") }} :: text AS "todo_pomodorosummaries", -- array {{ adapter.quote("parentid") }} :: text AS "todo_parentid", From d22e5354a4cacb0aafe8ee790a2640d6aed25204 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Mon, 15 Jul 2024 05:47:41 +0700 Subject: [PATCH 04/29] fact yml model update : - updated the daily monitor count to count wont do in progress - added new definitions for v2 inbox and v2 next action - init adding today count to monitor : - clarifytoday - open loops - overdue - promoted tag semantic field to a derived field - done code for 3 daily monitor metrics - working code for overdue and clarifytoday --- dbt_project/models/marts/core/fact_todos.yml | 86 +++++++++++++++++++- 1 file changed, 82 insertions(+), 4 deletions(-) diff --git a/dbt_project/models/marts/core/fact_todos.yml b/dbt_project/models/marts/core/fact_todos.yml index 729fbee..9b2df23 100644 --- a/dbt_project/models/marts/core/fact_todos.yml +++ b/dbt_project/models/marts/core/fact_todos.yml @@ -187,7 +187,9 @@ models: description: component metric to compare btwn today's modified next action, and all time inbox filters: - dim_lists.list_isActive: true - - dim_statuses.status_desc: done + - dim_statuses.status_desc: + - done + - wont do - todo_derived__is_task: true - todo_derived__is_active: true - todo_derived__tag_semantic: @@ -204,6 +206,39 @@ models: - todo_derived__is_task: true - todo_derived__is_active: true - todo_derived__tag_semantic: inbox + overdue_count_today: + groups: ['daily monitor'] + type: count + description: count of tasks due today + filters: + - dim_lists.list_isActive: true + - dim_statuses.status_desc: undone + - todo_derived__is_task: true + - todo_derived__is_active: true + - todo_derived__tag_semantic: + - inbox + - next action + - due_date.date_id: "inThePast 1 days" + - todo_derived__overdue: true + clarifytoday_count: + groups: ['daily monitor'] + type: count + description: count of all high priority clarifytoday tasks + filters: + - dim_lists.list_isActive: true + - dim_statuses.status_desc: undone + - todo_derived__is_task: true + - todo_derived__is_active: true + - todo_derived__tag_semantic: inbox + - todo_title: "%clarifytoday%" + open_loops_count: + groups: ['daily monitor'] + type: max + description: count of open loops in the note + sql: | + regexp_count(("fact_todos".todo_content), E'\n[A-Za-z0-9]') + filters: + - todo_id: "9be74928bd36e5772850726c" - name: todo_createdtime description: "" meta: @@ -245,6 +280,7 @@ models: meta: dimension: # hidden: true + label: "(1) title" type: string urls: - label: "open in app" @@ -539,8 +575,10 @@ models: label: 'tag semantic' type: string sql: | - case when ${todo_tags} like '%clarifyme%' then 'inbox' - when (${todo_tags} = 'default' or ${todo_tags} like '%@%') then 'next action' + case when ${todo_tags} like '%clarifyme%' then 'inbox v1' + when ${todo_tags} = 'default' then 'inbox' + -- when (${todo_tags} = 'default' or ${todo_tags} like '%@%') then 'next action v1' + when (${todo_tags} like '%clarified%' or ${todo_tags} like '%@%') then 'next action' when ${todo_tags} like '%someday%' then 'someday' when ${todo_tags} like '%waiting_for%' then 'waiting for' when ${todo_tags} like '%tickler%' then 'tickler' @@ -582,4 +620,44 @@ models: case when ${dim_folders.folder_name} not in ('🚀SOMEDAY lists','🛩Horizon of focus','💤on hold lists') AND ${dim_lists.list_name} not in ('🧳SOMEDAY','📍ref - tickler notes for the day') - then true else false end \ No newline at end of file + then true else false end + - name: todo_derived__unplanned + description: | + tasks that are due on the same day they are created + meta: + dimension: + # hidden: true + label: 'unplanned flag' + groups: ['derived'] + type: string + sql: | + case when ${created_date.date_id} = ${due_date.date_id} then 'unplanned' else 'planned' end + - name: todo_derived__overdue + description: | + tasks that are due compared to current date + meta: + dimension: + # hidden: true + label: 'overdue?' + groups: ['derived'] + type: boolean + sql: | + date_trunc('DAY',${due_date.date_id_day}) < date_trunc('DAY',current_timestamp) + - name: todo_derived__repeat_semantic + description: | + flag if the task is a repeat. so that i can drill down & not getting error from a custom dimension field + meta: + dimension: + # hidden: true + label: 'repeat_semantic' + groups: ['derived'] + type: string + sql: | + case when ${fact_todos.todo_derived__is_repeat} is true then 'repeat' else '' end + - name: dummy + description: dummy column for drill down purposes + meta: + dimension: + groups: ['boilerplate'] + type: string + sql: "case when 1=1 then '' else null end" \ No newline at end of file From 3660a169f3335b50135880b5fbd317413a74b15a Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Mon, 22 Jul 2024 05:23:06 +0700 Subject: [PATCH 05/29] house keeping - cleanup : update cutoff start date - patch : added job to deploy LD at 0:00 to fix the anchor issue of metrics "done_cnt_today" --- app/ETL/__init__.py | 11 +++++++++-- app/ETL/job_deploy_LD.py | 35 +++++++++++++++++++++++++++++++++++ app/ETL/weekly_cleanup.py | 2 +- 3 files changed, 45 insertions(+), 3 deletions(-) create mode 100644 app/ETL/job_deploy_LD.py diff --git a/app/ETL/__init__.py b/app/ETL/__init__.py index 3ffdafe..251c428 100644 --- a/app/ETL/__init__.py +++ b/app/ETL/__init__.py @@ -9,6 +9,8 @@ from lvl3_helper import load_new_lvl3_data,load_mapping_helper from weekly_cleanup import weekly_cleanup from job_rapid_ETL_mode import rapid_ETL_mode +from job_deploy_LD import job_deploy_LD + #%% all_assets = load_assets_from_modules([EL,dbt_assets]) @@ -35,11 +37,16 @@ cron_schedule="0 0 * * 5",execution_timezone="Asia/Bangkok" ) +deploy_schedule = ScheduleDefinition( + job=job_deploy_LD, + cron_schedule="0 0 * * *",execution_timezone="Asia/Bangkok" +) + defs = Definitions( assets=all_assets, - jobs=[load_new_lvl3_data,weekly_cleanup,rapid_ETL_mode], - schedules=[ETL_schedule,rapid_ETL_schedule,helper_schedule,cleanup_schedule], + jobs=[load_new_lvl3_data,weekly_cleanup,rapid_ETL_mode,job_deploy_LD], + schedules=[ETL_schedule,rapid_ETL_schedule,helper_schedule,cleanup_schedule,deploy_schedule], resources={ "dbt": DbtCliResource(project_dir=DBT_PROJECT_DIR), "io_manager": mem_io_manager, diff --git a/app/ETL/job_deploy_LD.py b/app/ETL/job_deploy_LD.py new file mode 100644 index 0000000..524be17 --- /dev/null +++ b/app/ETL/job_deploy_LD.py @@ -0,0 +1,35 @@ +#%% +from importlib import resources +from dagster import job, mem_io_manager,op, Definitions, mem_io_manager,in_process_executor +import os, subprocess +import helper.source_env + + + +@op +def deploy_LD(): + cmd_venv = "source .venv/bin/activate" + LD_cmd = "lightdash deploy" + cmd = f"{cmd_venv} && {LD_cmd}" + + result = subprocess.run( + ["/bin/bash","-c",cmd], + check=True, + cwd=os.environ.get("DAGSTER_HOME"), + env=os.environ + ) + if result.returncode != 0: + raise Exception("Failed to toggle ETL rapid schedule") + + +@job(executor_def=in_process_executor) +def job_deploy_LD(): + deploy_LD() + +defs = Definitions(jobs=[job_deploy_LD], + resources={ + "io_manager": mem_io_manager + } +) + +#%% \ No newline at end of file diff --git a/app/ETL/weekly_cleanup.py b/app/ETL/weekly_cleanup.py index 202a1f1..630b07a 100644 --- a/app/ETL/weekly_cleanup.py +++ b/app/ETL/weekly_cleanup.py @@ -25,7 +25,7 @@ def cleanup(): ) client = TickTickClient(username, password, auth_client) today = datetime.now(timezone.utc) - cutoff_date = today - timedelta(days=7) + cutoff_date = today - timedelta(days=5) # cutoff_date = datetime(2022, 7, 24, tzinfo=timezone.utc) start_date = datetime(2024, 7, 1,tzinfo=timezone.utc) _delete_tasks(end=cutoff_date,client=client,full_load=False,start=start_date) From 99b9e72b88a5102b20595e703e564be2700e0e1c Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Wed, 24 Jul 2024 08:03:12 +0700 Subject: [PATCH 06/29] update : schema fact todo now has a humanized due date field --- app/ETL/EL.py | 6 ++++-- dbt_project/models/marts/core/fact_todos.yml | 17 +++++++++++++++++ .../models/raw_source/src__tasks_raw.sql | 1 + 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/app/ETL/EL.py b/app/ETL/EL.py index 609c4b8..2e2056e 100644 --- a/app/ETL/EL.py +++ b/app/ETL/EL.py @@ -9,6 +9,7 @@ from datetime import datetime import pytz import humanize +import numpy as np #%% engine = create_engine(db_url) @@ -48,6 +49,7 @@ def raw_data(): df.columns = df.columns.str.lower() if name == 'tasks_raw': df['modifiedtime_humanize'] = df['modifiedtime'].apply(humanize_timestamp) + df['duedate_humanize'] = df['duedate'].apply(humanize_timestamp) df.to_sql(name, engine, if_exists='replace', index=False, schema=target_schema+'_raw') yield Output(value=df,output_name=name) @@ -55,8 +57,8 @@ def raw_data(): def humanize_timestamp(ts): - if pd.isnull(ts): - return 'No modified time' + if pd.isnull(ts) or ts == '' or ts == 'nan': + return 'default' # Parse the timestamp dt = datetime.strptime(ts, '%Y-%m-%dT%H:%M:%S.%f%z') diff --git a/dbt_project/models/marts/core/fact_todos.yml b/dbt_project/models/marts/core/fact_todos.yml index 9b2df23..1d42bd8 100644 --- a/dbt_project/models/marts/core/fact_todos.yml +++ b/dbt_project/models/marts/core/fact_todos.yml @@ -166,6 +166,7 @@ models: - completed_date.year_key: "0" - lookahead_flag : true - todo_derived__is_repeat : true + - todo_is_time_sensitive : false - dim_dates_lookahead.date_id: "!1900" # todo: how to discard the count in 1900 only. # - dim_dates_lookahead.date_id: # todo : watch when LD allows this AND interval filter. # - "inThePast 5 days" @@ -263,6 +264,16 @@ models: dimension: hidden: true type: timestamp + additional_dimensions: + todo_is_time_sensitive: + description: "flag if the due date is time sensitive." + type: boolean + sql: | + case when + extract(HOUR from todo_duedate) = 0 and + extract(MINUTE from todo_duedate) = 0 then false else true end + + - name: todo_projectid description: "" meta: @@ -370,6 +381,12 @@ models: dimension: # hidden: true type: string + - name: todo_duedate_humanize + description: "" + meta: + dimension: + # hidden: true + type: string - name: todo_etag description: "" meta: diff --git a/dbt_project/models/raw_source/src__tasks_raw.sql b/dbt_project/models/raw_source/src__tasks_raw.sql index 59f1a4b..3e5fda9 100644 --- a/dbt_project/models/raw_source/src__tasks_raw.sql +++ b/dbt_project/models/raw_source/src__tasks_raw.sql @@ -69,6 +69,7 @@ renamed AS ( {{ adapter.quote("parentid") }} :: text AS "todo_parentid", {{ adapter.quote("annoyingalert") }} :: text AS "todo_annoyingalert", {{ adapter.quote("modifiedtime_humanize") }} :: text AS "todo_modifiedtime_humanize", + {{ adapter.quote("duedate_humanize") }} :: text AS "todo_duedate_humanize", ROW_NUMBER() over( PARTITION BY {{ dbt_utils.star( from = source( From 2c6cfb78df63b5ca63565bafbb60087d7302b203 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Mon, 29 Jul 2024 07:25:11 +0700 Subject: [PATCH 07/29] clean up boot script - added make cmd `up` for each vm reboot - removed the streamlit from deployment script - env.sh use . ./.env instead of source .env for sh compatibility - updated cron to avoid conflict with nas run job at 00:00 --- .github/workflows/deployment.sh | 25 ++++++++++++------------- Makefile | 5 ++--- app/ETL/__init__.py | 2 +- env.sh | 2 +- 4 files changed, 16 insertions(+), 18 deletions(-) diff --git a/.github/workflows/deployment.sh b/.github/workflows/deployment.sh index 8d88945..a50858f 100755 --- a/.github/workflows/deployment.sh +++ b/.github/workflows/deployment.sh @@ -6,22 +6,21 @@ WORKDIR=$(pwd) . $WORKDIR/env.sh -# pip install --upgrade -q -r requirements.txt -# setup sessions for service -STREAMLIT="streamlit" +# # setup sessions for service +# STREAMLIT="streamlit" -# Kill the existing session if it exists -tmux has-session -t $STREAMLIT 2>/dev/null +# # Kill the existing session if it exists +# tmux has-session -t $STREAMLIT 2>/dev/null -if [ $? != 0 ]; then - # Session doesn't exist, create a new one - tmux new-session -s $STREAMLIT -d -else - # Session exists, kill the old one and create a new one - tmux kill-session -t $STREAMLIT - tmux new-session -s $STREAMLIT -d -fi +# if [ $? != 0 ]; then +# # Session doesn't exist, create a new one +# tmux new-session -s $STREAMLIT -d +# else +# # Session exists, kill the old one and create a new one +# tmux kill-session -t $STREAMLIT +# tmux new-session -s $STREAMLIT -d +# fi diff --git a/Makefile b/Makefile index 90670d3..f524fb2 100644 --- a/Makefile +++ b/Makefile @@ -5,9 +5,6 @@ init_deploy: dagster: tmux send-keys -t dagster.0 ". ./.venv/bin/activate && . ./env.sh && dagster dev -m ETL -h 0.0.0.0 -p 60001" ENTER -streamlit: - tmux send-keys -t streamlit.0 ". ./.venv/bin/activate && . ./env.sh && cd app/charts && streamlit run main.py" ENTER - sleeper: sleep 10 @@ -42,3 +39,5 @@ loader_helper: loader_rerun: loader_helper loader +# command after each reboot the vm +up: init_deploy dagster loader diff --git a/app/ETL/__init__.py b/app/ETL/__init__.py index 251c428..1663247 100644 --- a/app/ETL/__init__.py +++ b/app/ETL/__init__.py @@ -39,7 +39,7 @@ deploy_schedule = ScheduleDefinition( job=job_deploy_LD, - cron_schedule="0 0 * * *",execution_timezone="Asia/Bangkok" + cron_schedule="0 4 * * *",execution_timezone="Asia/Bangkok" ) diff --git a/env.sh b/env.sh index 24b3ca6..0d347a1 100755 --- a/env.sh +++ b/env.sh @@ -31,7 +31,7 @@ EOENV # Function to source .env file source_env() { set -a # Automatically export all variables - source .env + . ./.env set +a # Stop automatically exporting variables } From 5ab5e03305ef98a279fe462b0dbc65603b82f911 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Thu, 1 Aug 2024 06:08:44 +0700 Subject: [PATCH 08/29] update fact yml - logic for unplanned flag covers all status - added metric historical counts by upstream action : clarify or deep work --- .gitignore | 4 ++- dbt_project/models/marts/core/fact_todos.yml | 36 ++++++++++++++++++-- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 41432c4..31f7afc 100644 --- a/.gitignore +++ b/.gitignore @@ -34,4 +34,6 @@ package-lock.json package.json deactivate -.devcontainer_prod \ No newline at end of file +.devcontainer_prod + +app.log.* \ No newline at end of file diff --git a/dbt_project/models/marts/core/fact_todos.yml b/dbt_project/models/marts/core/fact_todos.yml index 1d42bd8..8a0909f 100644 --- a/dbt_project/models/marts/core/fact_todos.yml +++ b/dbt_project/models/marts/core/fact_todos.yml @@ -240,6 +240,32 @@ models: regexp_count(("fact_todos".todo_content), E'\n[A-Za-z0-9]') filters: - todo_id: "9be74928bd36e5772850726c" + done_count__semantic_clarification: + groups: ['historical'] + type: count + description: | + count of done tasks with semantically categorized from upstream action : + - from a clarification action - tag semantic is 'inbox' + - from actual deep work action - tag semantic is 'next action' + filters: + - dim_statuses.status_desc: + - done + - wont do + - todo_derived__is_task: true + - todo_derived__tag_semantic: inbox + done_count__semantic_deep_work: + groups: ['historical'] + type: count + description: | + count of done tasks with semantically categorized from upstream action : + - from a clarification action - tag semantic is 'inbox' + - from actual deep work action - tag semantic is 'next action' + filters: + - dim_statuses.status_desc: + - done + - wont do + - todo_derived__is_task: true + - todo_derived__tag_semantic: next action - name: todo_createdtime description: "" meta: @@ -640,7 +666,7 @@ models: then true else false end - name: todo_derived__unplanned description: | - tasks that are due on the same day they are created + tasks that are due on the same day they are created OR done on the same day created meta: dimension: # hidden: true @@ -648,7 +674,13 @@ models: groups: ['derived'] type: string sql: | - case when ${created_date.date_id} = ${due_date.date_id} then 'unplanned' else 'planned' end + case when + (${dim_statuses.status_desc} = 'undone' AND ${created_date.date_id} != ${due_date.date_id} ) + OR + (${dim_statuses.status_desc} in ('done', 'wont do') AND ${created_date.date_id} != ${completed_date.date_id} ) + then 'planned' + when ${dim_statuses.status_desc} not in ('undone', 'done','wont do') then 'default' + else 'unplanned' end - name: todo_derived__overdue description: | tasks that are due compared to current date From c3a9f72e2d5508013d6817571d711c7471415298 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Thu, 1 Aug 2024 00:37:26 +0000 Subject: [PATCH 09/29] update model +fact_todos+ - added dereived repeat flag at stg - linting code blocks --- dbt_project/models/marts/core/dim_dates.yml | 6 - .../models/marts/core/dim_dates_lookahead.yml | 8 +- dbt_project/models/marts/core/dim_folders.yml | 1 - dbt_project/models/marts/core/dim_lists.yml | 9 +- .../models/marts/core/dim_statuses.yml | 2 - dbt_project/models/marts/core/fact_todos.yml | 169 +++++++++++------- dbt_project/models/marts/core/obt.yml | 30 ++++ dbt_project/models/staging/stg_todos.sql | 33 +++- 8 files changed, 171 insertions(+), 87 deletions(-) diff --git a/dbt_project/models/marts/core/dim_dates.yml b/dbt_project/models/marts/core/dim_dates.yml index 505589d..ee41949 100644 --- a/dbt_project/models/marts/core/dim_dates.yml +++ b/dbt_project/models/marts/core/dim_dates.yml @@ -12,7 +12,6 @@ models: description: "" meta: dimension: - # hidden: true type: date time_intervals: - DAY @@ -36,7 +35,6 @@ models: description: "" meta: dimension: - # hidden: true type: number - name: day_of_week description: "" @@ -54,7 +52,6 @@ models: description: "" meta: dimension: - # hidden: true type: string - name: first_day_of_week description: "" @@ -120,7 +117,6 @@ models: description: "" meta: dimension: - # hidden: true type: number - name: day_of_quarter description: "" @@ -132,13 +128,11 @@ models: description: "" meta: dimension: - # hidden: true type: string - name: quarter_desc description: "" meta: dimension: - # hidden: true type: string - name: first_day_of_quarter description: "" diff --git a/dbt_project/models/marts/core/dim_dates_lookahead.yml b/dbt_project/models/marts/core/dim_dates_lookahead.yml index e230789..2aeaff8 100644 --- a/dbt_project/models/marts/core/dim_dates_lookahead.yml +++ b/dbt_project/models/marts/core/dim_dates_lookahead.yml @@ -12,14 +12,13 @@ models: description: "" meta: dimension: - # hidden: true type: date time_intervals: - DAY - WEEK - MONTH - YEAR - - QUARTER + - QUARTER - name: day_of_year description: "" meta: @@ -36,7 +35,6 @@ models: description: "" meta: dimension: - # hidden: true type: number - name: day_of_week description: "" @@ -54,7 +52,6 @@ models: description: "" meta: dimension: - # hidden: true type: string - name: first_day_of_week description: "" @@ -120,7 +117,6 @@ models: description: "" meta: dimension: - # hidden: true type: number - name: day_of_quarter description: "" @@ -132,13 +128,11 @@ models: description: "" meta: dimension: - # hidden: true type: string - name: quarter_desc description: "" meta: dimension: - # hidden: true type: string - name: first_day_of_quarter description: "" diff --git a/dbt_project/models/marts/core/dim_folders.yml b/dbt_project/models/marts/core/dim_folders.yml index 3fcb9bd..1114596 100644 --- a/dbt_project/models/marts/core/dim_folders.yml +++ b/dbt_project/models/marts/core/dim_folders.yml @@ -24,7 +24,6 @@ models: description: "" meta: dimension: - # hidden: true type: string - name: folder_showall description: "" diff --git a/dbt_project/models/marts/core/dim_lists.yml b/dbt_project/models/marts/core/dim_lists.yml index ee59b34..5b99129 100644 --- a/dbt_project/models/marts/core/dim_lists.yml +++ b/dbt_project/models/marts/core/dim_lists.yml @@ -12,17 +12,16 @@ models: description: "" meta: dimension: - # hidden: true type: string - name: list_name description: "" meta: dimension: - # hidden: true type: string urls: - - label: "open in app" - url: "ticktick://ticktick.com/webapp/#p/${row.dim_lists.list_id.raw}/tasks" + - label: open in app + url: >- + ticktick://ticktick.com/webapp/#p/${row.dim_lists.list_id.raw}/tasks - name: list_isowner description: "" meta: @@ -171,11 +170,9 @@ models: description: "" meta: dimension: - # hidden: true type: timestamp - name: list_isactive description: "" meta: dimension: - # hidden: true type: boolean diff --git a/dbt_project/models/marts/core/dim_statuses.yml b/dbt_project/models/marts/core/dim_statuses.yml index 69345d2..7079caa 100644 --- a/dbt_project/models/marts/core/dim_statuses.yml +++ b/dbt_project/models/marts/core/dim_statuses.yml @@ -18,11 +18,9 @@ models: description: "" meta: dimension: - # hidden: true type: string - name: status_comments description: "" meta: dimension: - # hidden: true type: string diff --git a/dbt_project/models/marts/core/fact_todos.yml b/dbt_project/models/marts/core/fact_todos.yml index 8a0909f..2c5760a 100644 --- a/dbt_project/models/marts/core/fact_todos.yml +++ b/dbt_project/models/marts/core/fact_todos.yml @@ -5,7 +5,6 @@ models: order_fields_by: index required_filters: - due_date.date_id: inTheNext 4 weeks - # - todo_id_lookahead: "!null" # todo: watch when LD allows triggers overriding this filter. for now define it @ metric level joins: - join: dim_lists type: inner @@ -79,6 +78,10 @@ models: - relationships: field: date_key to: ref('dim_dates') + meta: + dimension: + type: string + hidden: true - name: date_modified_key description: "" tests: @@ -93,7 +96,6 @@ models: description: "" meta: dimension: - # hidden: true type: string - name: list_key description: "" @@ -133,7 +135,6 @@ models: - apply is not null condition on most metrics except the actual lookahead metrics. meta: dimension: - # hidden: true type: string metrics: lookahead_norepeat_count: @@ -399,19 +400,16 @@ models: description: "" meta: dimension: - # hidden: true type: timestamp - name: todo_modifiedtime_humanize description: "" meta: dimension: - # hidden: true type: string - name: todo_duedate_humanize description: "" meta: dimension: - # hidden: true type: string - name: todo_etag description: "" @@ -613,100 +611,149 @@ models: - name: todo_derived__tag_semantic meta: dimension: - # hidden: true - groups: ['derived'] - label: 'tag semantic' + groups: + - derived + label: tag semantic type: string - sql: | + sql: > case when ${todo_tags} like '%clarifyme%' then 'inbox v1' + when ${todo_tags} = 'default' then 'inbox' - -- when (${todo_tags} = 'default' or ${todo_tags} like '%@%') then 'next action v1' - when (${todo_tags} like '%clarified%' or ${todo_tags} like '%@%') then 'next action' + + -- when (${todo_tags} = 'default' or ${todo_tags} like '%@%') then + 'next action v1' + + when (${todo_tags} like '%clarified%' or ${todo_tags} like '%@%') + then 'next action' + when ${todo_tags} like '%someday%' then 'someday' + when ${todo_tags} like '%waiting_for%' then 'waiting for' + when ${todo_tags} like '%tickler%' then 'tickler' - else TRIM(BOTH ' ' FROM REPLACE(REPLACE(REPLACE(${todo_tags}, '[', ''), ']', ''), '''', '')) + + else TRIM(BOTH ' ' FROM REPLACE(REPLACE(REPLACE(${todo_tags}, '[', + ''), ']', ''), '''', '')) + end metrics: tags_count: type: count + description: "" - name: todo_derived__is_repeat - meta: + meta: dimension: - # hidden: true - groups: ['derived'] - label: 'is repeat' + groups: + - derived + label: is repeat type: boolean - sql: >- - case when ${todo_repeatflag} <> 'default' then true else false - end - - name : todo_derived__is_task - description: "derived from custom sql logic" + sql: case when ${todo_repeatflag} <> 'default' then true else false end + description: "" + - name: todo_derived__is_task + description: derived from custom sql logic meta: dimension: - # hidden: true - groups: ['derived'] - label: 'is task' + groups: + - derived + label: is task type: boolean - sql: case when NOT (${todo_kind} = 'NOTE' or ${todo_tags} like '%tickler%') then true else false end - - - name : todo_derived__is_active - description: | - the item belongs to an active folder - not in ('🚀SOMEDAY lists','🛩Horizon of focus','💤on hold lists') + sql: >- + case when NOT (${todo_kind} = 'NOTE' or ${todo_tags} like + '%tickler%') then true else false end + - name: todo_derived__is_active + description: > + the item belongs to an active folder - not in ('🚀SOMEDAY + lists','🛩Horizon of focus','💤on hold lists') meta: dimension: - # hidden: true - label: 'is active' - groups: ['derived'] + label: is active + groups: + - derived type: boolean - sql: | - case when ${dim_folders.folder_name} not in ('🚀SOMEDAY lists','🛩Horizon of focus','💤on hold lists') + sql: > + case when ${dim_folders.folder_name} not in ('🚀SOMEDAY + lists','🛩Horizon of focus','💤on hold lists') + AND - ${dim_lists.list_name} not in ('🧳SOMEDAY','📍ref - tickler notes for the day') + + ${dim_lists.list_name} not in ('🧳SOMEDAY','📍ref - tickler notes + for the day') + then true else false end - name: todo_derived__unplanned - description: | - tasks that are due on the same day they are created OR done on the same day created + description: > + tasks that are due on the same day they are created OR done on the + same day created meta: dimension: - # hidden: true - label: 'unplanned flag' - groups: ['derived'] + label: unplanned flag + groups: + - derived type: string - sql: | + sql: > case when - (${dim_statuses.status_desc} = 'undone' AND ${created_date.date_id} != ${due_date.date_id} ) + + (${dim_statuses.status_desc} = 'undone' AND + ${created_date.date_id} != ${due_date.date_id} ) + OR - (${dim_statuses.status_desc} in ('done', 'wont do') AND ${created_date.date_id} != ${completed_date.date_id} ) + + (${dim_statuses.status_desc} in ('done', 'wont do') AND + ${created_date.date_id} != ${completed_date.date_id} ) + then 'planned' - when ${dim_statuses.status_desc} not in ('undone', 'done','wont do') then 'default' + + when ${dim_statuses.status_desc} not in ('undone', 'done','wont + do') then 'default' + else 'unplanned' end - name: todo_derived__overdue description: | tasks that are due compared to current date meta: dimension: - # hidden: true - label: 'overdue?' - groups: ['derived'] + label: overdue? + groups: + - derived type: boolean - sql: | - date_trunc('DAY',${due_date.date_id_day}) < date_trunc('DAY',current_timestamp) + sql: > + date_trunc('DAY',${due_date.date_id_day}) < + date_trunc('DAY',current_timestamp) - name: todo_derived__repeat_semantic - description: | - flag if the task is a repeat. so that i can drill down & not getting error from a custom dimension field + description: > + flag if the task is a repeat. so that i can drill down & not getting + error from a custom dimension field meta: dimension: - # hidden: true - label: 'repeat_semantic' - groups: ['derived'] + label: repeat_semantic + groups: + - derived type: string - sql: | - case when ${fact_todos.todo_derived__is_repeat} is true then 'repeat' else '' end + sql: > + case when ${fact_todos.todo_derived__is_repeat} is true then + 'repeat' else '' end - name: dummy description: dummy column for drill down purposes meta: - dimension: - groups: ['boilerplate'] - type: string - sql: "case when 1=1 then '' else null end" \ No newline at end of file + dimension: + groups: + - boilerplate + type: string + sql: case when 1=1 then '' else null end + - name: rn + description: "" + meta: + dimension: + type: number + hidden: true + - name: todo_modifiedtime_derived_date + description: "" + meta: + dimension: + type: date + hidden: true + - name: todo_derived__recurring + description: "flag to identify true recurring tasks" + meta: + dimension: + type: string \ No newline at end of file diff --git a/dbt_project/models/marts/core/obt.yml b/dbt_project/models/marts/core/obt.yml index 212d3bf..e9df0ec 100644 --- a/dbt_project/models/marts/core/obt.yml +++ b/dbt_project/models/marts/core/obt.yml @@ -1015,3 +1015,33 @@ models: meta: dimension: type: boolean + - name: date_modified_key + description: "" + meta: + dimension: + type: string + - name: todo_modifiedtime_humanize + description: "" + meta: + dimension: + type: string + - name: todo_duedate_humanize + description: "" + meta: + dimension: + type: string + - name: rn + description: "" + meta: + dimension: + type: number + - name: todo_modifiedtime_derived_date + description: "" + meta: + dimension: + type: date + - name: todo_derived__recurring + description: "" + meta: + dimension: + type: string diff --git a/dbt_project/models/staging/stg_todos.sql b/dbt_project/models/staging/stg_todos.sql index 042e98f..0476084 100644 --- a/dbt_project/models/staging/stg_todos.sql +++ b/dbt_project/models/staging/stg_todos.sql @@ -1,10 +1,35 @@ -WITH todo AS ( +WITH init_todo AS ( SELECT - distinct - {{ coalesce_defaults(ref('src__tasks_raw')) }} + DISTINCT {{ coalesce_defaults(ref('src__tasks_raw')) }} FROM {{ ref('src__tasks_raw') }} ), +todo AS ( + -- handle flagging habits + SELECT + *, + CASE + WHEN ( + todo_status <> '0' + AND EXISTS ( + SELECT + todo_id + FROM + init_todo A + WHERE + A.todo_id = b.todo_repeattaskid + AND A.todo_repeatflag <> 'default' + ) + ) + OR ( + todo_status = '0' + AND todo_repeatflag <> 'default' + ) THEN 'recurring' + ELSE 'default' + END AS todo_derived__recurring + FROM + init_todo b +), lists AS ( SELECT {{ coalesce_defaults(ref('src__lists_raw')) }} @@ -51,7 +76,7 @@ joined AS ( t.*, case when -- build the flag window - -- case1: the records from due_lookahead + -- case1: the records from due_lookahead dl.date_id is not null then true when -- case2: the left records facts; grabs dummy records within the window From 016cfde38348a1fc7b6d96ee928f8195578ada32 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Thu, 1 Aug 2024 01:18:44 +0000 Subject: [PATCH 10/29] update model : promoted the field todo_derived__is_repeat from stg to endpoint --- dbt_project/models/marts/core/fact_todos.yml | 15 ++++----------- dbt_project/models/staging/stg_todos.sql | 6 +++--- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/dbt_project/models/marts/core/fact_todos.yml b/dbt_project/models/marts/core/fact_todos.yml index 2c5760a..627783d 100644 --- a/dbt_project/models/marts/core/fact_todos.yml +++ b/dbt_project/models/marts/core/fact_todos.yml @@ -640,15 +640,6 @@ models: tags_count: type: count description: "" - - name: todo_derived__is_repeat - meta: - dimension: - groups: - - derived - label: is repeat - type: boolean - sql: case when ${todo_repeatflag} <> 'default' then true else false end - description: "" - name: todo_derived__is_task description: derived from custom sql logic meta: @@ -752,8 +743,10 @@ models: dimension: type: date hidden: true - - name: todo_derived__recurring + - name: todo_derived__is_repeat description: "flag to identify true recurring tasks" meta: dimension: - type: string \ No newline at end of file + type: boolean + groups: + - derived \ No newline at end of file diff --git a/dbt_project/models/staging/stg_todos.sql b/dbt_project/models/staging/stg_todos.sql index 0476084..177e568 100644 --- a/dbt_project/models/staging/stg_todos.sql +++ b/dbt_project/models/staging/stg_todos.sql @@ -24,9 +24,9 @@ todo AS ( OR ( todo_status = '0' AND todo_repeatflag <> 'default' - ) THEN 'recurring' - ELSE 'default' - END AS todo_derived__recurring + ) THEN true + ELSE false + END AS todo_derived__is_repeat FROM init_todo b ), From 3e7fdc9c7ff82d97c3a296cff4f1d52b04f87e70 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Mon, 22 Jul 2024 05:23:06 +0700 Subject: [PATCH 11/29] house keeping - cleanup : update cutoff start date - patch : added job to deploy LD at 0:00 to fix the anchor issue of metrics "done_cnt_today" - misc update : schema fact todo now has a humanized due date field --- dbt_project/models/marts/core/fact_todos.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dbt_project/models/marts/core/fact_todos.yml b/dbt_project/models/marts/core/fact_todos.yml index 2c5760a..21285ea 100644 --- a/dbt_project/models/marts/core/fact_todos.yml +++ b/dbt_project/models/marts/core/fact_todos.yml @@ -411,6 +411,12 @@ models: meta: dimension: type: string + - name: todo_duedate_humanize + description: "" + meta: + dimension: + # hidden: true + type: string - name: todo_etag description: "" meta: From c35a44c6b7e83be51cc58fdfd834540937a743ba Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Thu, 1 Aug 2024 09:12:48 +0700 Subject: [PATCH 12/29] hotfix : unhide the raw duedate --- dbt_project/models/marts/core/fact_todos.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt_project/models/marts/core/fact_todos.yml b/dbt_project/models/marts/core/fact_todos.yml index 21285ea..33cef10 100644 --- a/dbt_project/models/marts/core/fact_todos.yml +++ b/dbt_project/models/marts/core/fact_todos.yml @@ -289,7 +289,7 @@ models: description: "" meta: dimension: - hidden: true + # hidden: true type: timestamp additional_dimensions: todo_is_time_sensitive: From 9176d5dcecb56030d79d94daa14ec95f31819ac5 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Sat, 17 Aug 2024 07:28:22 +0700 Subject: [PATCH 13/29] pipeline optimization - dbt cli resources run in quiet mode seems to be snappier - todo models now have indexes for to optimize joins in incremental loads --- app/ETL/dbt_assets.py | 2 +- dbt_project/models/marts/core/fact_todos.sql | 18 ++++++++++++++++++ .../models/staging/stg_dates_lookahead.sql | 5 +++++ dbt_project/models/staging/stg_todos.sql | 18 ++++++++++++++++++ 4 files changed, 42 insertions(+), 1 deletion(-) diff --git a/app/ETL/dbt_assets.py b/app/ETL/dbt_assets.py index 31888e4..d8bdfef 100644 --- a/app/ETL/dbt_assets.py +++ b/app/ETL/dbt_assets.py @@ -19,7 +19,7 @@ @dbt_assets(manifest=dbt_manifest_path) def ticktick_dbt_assets(context: AssetExecutionContext, dbt: DbtCliResource): - dbt_invocation = dbt.cli(["run"], context=context) + dbt_invocation = dbt.cli(["run","-q"], context=context) yield from dbt_invocation.stream() #cleanup the dir after done diff --git a/dbt_project/models/marts/core/fact_todos.sql b/dbt_project/models/marts/core/fact_todos.sql index 827c2e3..fb18dd3 100644 --- a/dbt_project/models/marts/core/fact_todos.sql +++ b/dbt_project/models/marts/core/fact_todos.sql @@ -1,3 +1,21 @@ +{{ config( + materialized='incremental', + unique_key = ['todo_id'], + on_schema_change='append_new_columns', + indexes=[ + {'columns': ['list_key'], 'type': 'hash'}, + {'columns': ['folder_key'], 'type': 'hash'}, + {'columns': ['status_key'], 'type': 'hash'}, + {'columns': ['date_start_key'], 'type': 'hash'}, + {'columns': ['date_due_key'], 'type': 'hash'}, + {'columns': ['date_completed_key'], 'type': 'hash'}, + {'columns': ['date_created_key'], 'type': 'hash'}, + {'columns': ['date_modified_key'], 'type': 'hash'}, + {'columns': ['date_due_lookahead_key'], 'type': 'hash'}, + {'columns': ['todo_key'], 'unique': True}, + ], + unlogged=True +) }} WITH source AS ( SELECT * diff --git a/dbt_project/models/staging/stg_dates_lookahead.sql b/dbt_project/models/staging/stg_dates_lookahead.sql index b1cbfcd..e0287c4 100644 --- a/dbt_project/models/staging/stg_dates_lookahead.sql +++ b/dbt_project/models/staging/stg_dates_lookahead.sql @@ -1,3 +1,8 @@ +{{ + config( + materialized = 'table', + ) +}} {% set today_offset_5 = (modules.datetime.datetime.now() - modules.datetime.timedelta(days=1)).strftime("%Y-%m-%d")%} {% set today_lookahead = (modules.datetime.datetime.now() + modules.datetime.timedelta(days=90)).strftime("%Y-%m-%d")%} WITH source AS ( diff --git a/dbt_project/models/staging/stg_todos.sql b/dbt_project/models/staging/stg_todos.sql index 0476084..22eefb0 100644 --- a/dbt_project/models/staging/stg_todos.sql +++ b/dbt_project/models/staging/stg_todos.sql @@ -1,3 +1,21 @@ + {# materialized='incremental', #} + {# unique_key = ['todo_id'], #} + {# on_schema_change='append_new_columns', #} +{{ config( + materialized='table', + indexes=[ + {'columns': ['list_key'], 'type': 'hash'}, + {'columns': ['folder_key'], 'type': 'hash'}, + {'columns': ['status_key'], 'type': 'hash'}, + {'columns': ['date_start_key'], 'type': 'hash'}, + {'columns': ['date_due_key'], 'type': 'hash'}, + {'columns': ['date_completed_key'], 'type': 'hash'}, + {'columns': ['date_created_key'], 'type': 'hash'}, + {'columns': ['date_modified_key'], 'type': 'hash'}, + {'columns': ['date_due_lookahead_key'], 'type': 'hash'}, + ], + unlogged=True +) }} WITH init_todo AS ( SELECT DISTINCT {{ coalesce_defaults(ref('src__tasks_raw')) }} From c84c2c746052186e2e95fe7823f5b30d1b536af9 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Sat, 17 Aug 2024 07:28:47 +0700 Subject: [PATCH 14/29] init datahub recipe to enable data profiling --- datahub-recipe.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 datahub-recipe.yaml diff --git a/datahub-recipe.yaml b/datahub-recipe.yaml new file mode 100644 index 0000000..cb9691d --- /dev/null +++ b/datahub-recipe.yaml @@ -0,0 +1,19 @@ +source: + type: "dbt" + config: + # Coordinates + # To use this as-is, set the environment variable DBT_PROJECT_DIR to the root folder of your dbt project + manifest_path: "${DBT_PROJECT_DIR}/target/manifest.json" + catalog_path: "${DBT_PROJECT_DIR}/target/catalog.json" + sources_path: "${DBT_PROJECT_DIR}/target/sources.json" # optional for freshness + test_results_path: "${DBT_PROJECT_DIR}/target/run_results.json" # optional for recording dbt test results after running dbt test + + # Options + target_platform: "postgres" # e.g. bigquery/postgres/etc. + +# sink configs +sink: + type: "datahub-rest" + config: + server: "http://192.168.1.18:8080" + token: eyJhbGciOiJIUzI1NiJ9.eyJhY3RvclR5cGUiOiJVU0VSIiwiYWN0b3JJZCI6ImRhdGFodWIiLCJ0eXBlIjoiUEVSU09OQUwiLCJ2ZXJzaW9uIjoiMiIsImp0aSI6ImY3NGU3YzNmLTZmNzMtNGY0NC1iZGFjLTQ0YTA3NWI5Yzc0NyIsInN1YiI6ImRhdGFodWIiLCJpc3MiOiJkYXRhaHViLW1ldGFkYXRhLXNlcnZpY2UifQ.pPwBfGmpWezmP1Uzp8GpnUnFoaD5D2e9U1JZNw2CzVA \ No newline at end of file From 7a8dd6cad48bd3c0337df84fd4a32fb047b0946f Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Wed, 21 Aug 2024 09:47:29 +0700 Subject: [PATCH 15/29] metrics update - added metric habit streak : streak counter over all time - pseudo metric as average to viz at lightdash - reset streak to 0 if wont do, NULL if not done --- dbt_project/models/marts/core/dim_dates.yml | 4 +- dbt_project/models/marts/core/fact_todos.yml | 34 +++++- dbt_project/models/staging/stg_todos.sql | 110 ++++++++++++++----- 3 files changed, 117 insertions(+), 31 deletions(-) diff --git a/dbt_project/models/marts/core/dim_dates.yml b/dbt_project/models/marts/core/dim_dates.yml index ee41949..75757fd 100644 --- a/dbt_project/models/marts/core/dim_dates.yml +++ b/dbt_project/models/marts/core/dim_dates.yml @@ -57,13 +57,13 @@ models: description: "" meta: dimension: - hidden: true + # hidden: true type: date - name: last_day_of_week description: "" meta: dimension: - hidden: true + # hidden: true type: date - name: month_key description: "" diff --git a/dbt_project/models/marts/core/fact_todos.yml b/dbt_project/models/marts/core/fact_todos.yml index 33cef10..8a94462 100644 --- a/dbt_project/models/marts/core/fact_todos.yml +++ b/dbt_project/models/marts/core/fact_todos.yml @@ -267,6 +267,20 @@ models: - wont do - todo_derived__is_task: true - todo_derived__tag_semantic: next action + habit_streak_metric: + description: "helper pre-agg to allow hiding status in habit_streak when pivotting" + groups: ['habit'] + label: "streak" + type: average + sql: todo_derived__habit_streak + + - name: todo_derived__habit_streak + description: rolling streak of habit that +1 of each consecutive done items. + meta: + dimension: + groups: ['habit'] + hidden: true + type: number - name: todo_createdtime description: "" meta: @@ -762,4 +776,22 @@ models: description: "flag to identify true recurring tasks" meta: dimension: - type: string \ No newline at end of file + groups: + - derived + type: string + - name: todo_derived__habit_repeat_freq + description: "identifies as 3 categories : daily, monthly and all other" + meta: + dimension: + groups: + - derived + type: string + label: "repeat frequency" + sql: | + case when ${todo_repeatflag} like '%DAILY%' then 'daily' + when ${todo_repeatflag} like '%WEEKLY%' then 'weekly' + when ${todo_repeatflag} like '%MONTHLY%' then 'monthly' + when ${todo_repeatflag} like '%CUSTOM%' then 'custom' + else 'default' + end + \ No newline at end of file diff --git a/dbt_project/models/staging/stg_todos.sql b/dbt_project/models/staging/stg_todos.sql index 22eefb0..18c1a09 100644 --- a/dbt_project/models/staging/stg_todos.sql +++ b/dbt_project/models/staging/stg_todos.sql @@ -1,28 +1,29 @@ - {# materialized='incremental', #} - {# unique_key = ['todo_id'], #} - {# on_schema_change='append_new_columns', #} +{# materialized='incremental', #} +{# unique_key = ['todo_id'], #} +{# on_schema_change='append_new_columns', #} {{ config( - materialized='table', - indexes=[ - {'columns': ['list_key'], 'type': 'hash'}, - {'columns': ['folder_key'], 'type': 'hash'}, - {'columns': ['status_key'], 'type': 'hash'}, - {'columns': ['date_start_key'], 'type': 'hash'}, - {'columns': ['date_due_key'], 'type': 'hash'}, - {'columns': ['date_completed_key'], 'type': 'hash'}, - {'columns': ['date_created_key'], 'type': 'hash'}, - {'columns': ['date_modified_key'], 'type': 'hash'}, - {'columns': ['date_due_lookahead_key'], 'type': 'hash'}, - ], - unlogged=True + materialized = 'table', + indexes = [ {'columns': ['list_key'], + 'type': 'hash' },{ 'columns': ['folder_key'], + 'type': 'hash' },{ 'columns': ['status_key'], + 'type': 'hash' },{ 'columns': ['date_start_key'], + 'type': 'hash' },{ 'columns': ['date_due_key'], + 'type': 'hash' },{ 'columns': ['date_completed_key'], + 'type': 'hash' },{ 'columns': ['date_created_key'], + 'type': 'hash' },{ 'columns': ['date_modified_key'], + 'type': 'hash' },{ 'columns': ['date_due_lookahead_key'], + 'type': 'hash' },], + unlogged = True ) }} + WITH init_todo AS ( + SELECT DISTINCT {{ coalesce_defaults(ref('src__tasks_raw')) }} FROM {{ ref('src__tasks_raw') }} ), -todo AS ( +_todo__recurring AS ( -- handle flagging habits SELECT *, @@ -48,6 +49,60 @@ todo AS ( FROM init_todo b ), +_todo__habit_streak_init AS ( + -- create buckets + SELECT + *, + CASE + WHEN todo_status = '2' THEN SUM( + CASE + WHEN todo_status = '2' THEN 1 + ELSE 0 + END + ) over ( + PARTITION BY todo_repeattaskid + ORDER BY + todo_completedtime rows BETWEEN unbounded preceding + AND CURRENT ROW + ) - ROW_NUMBER() over ( + PARTITION BY todo_repeattaskid + ORDER BY + todo_completedtime + ) + 1 + ELSE NULL + END AS _todo__habit_streak_bucket_id + FROM + _todo__recurring +), +_todo__habit_streak AS ( + -- add additional column for rolling streak counter all time + SELECT + *, + CASE + WHEN todo_status = '2' THEN ROW_NUMBER() over( + PARTITION BY todo_repeattaskid, + _todo__habit_streak_bucket_id + ORDER BY + todo_completedtime ASC + ) + when todo_status = '0' then NULL + when todo_status = '-1' then 0 + END AS todo_derived__habit_streak + FROM + _todo__habit_streak_init + WHERE + todo_derived__recurring = 'recurring' +), +todo AS ( + SELECT + r.*, + h.todo_derived__habit_streak, + h._todo__habit_streak_bucket_id + FROM + _todo__recurring r + LEFT JOIN _todo__habit_streak h + ON r.todo_id = h.todo_id +), lists AS ( SELECT {{ coalesce_defaults(ref('src__lists_raw')) }} @@ -92,15 +147,14 @@ joined AS ( {{ dbt_utils.generate_surrogate_key(['folder_id']) }} AS folder_key, {{ dbt_utils.generate_surrogate_key(['status_id']) }} AS status_key, t.*, - case when - -- build the flag window + CASE + WHEN -- build the flag window -- case1: the records from due_lookahead - dl.date_id is not null then true - when - -- case2: the left records facts; grabs dummy records within the window - todo_id is null then true - else false - end as lookahead_flag, + dl.date_id IS NOT NULL THEN TRUE + WHEN -- case2: the left records facts; grabs dummy records within the window + todo_id IS NULL THEN TRUE + ELSE FALSE + END AS lookahead_flag, COALESCE( l.list_id, 'default' @@ -130,11 +184,11 @@ joined AS ( LEFT JOIN dates ddcm ON ddcm.date_id = t.todo_completedtime_derived_date LEFT JOIN dates ddm - ON ddm.date_id = t.todo_modifiedtime_derived_date - FULL OUTER JOIN dates_lookahead dl + ON ddm.date_id = t.todo_modifiedtime_derived_date full + OUTER JOIN dates_lookahead dl ON dl.date_id = t.todo_duedate_derived_date ) SELECT * FROM - joined \ No newline at end of file + joined From 37eecb6f5679bff60b66d69b1f88d37809ca21a0 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Sat, 24 Aug 2024 06:00:31 +0700 Subject: [PATCH 16/29] hotfix stg todo broken due to merge --- dbt_project/models/staging/stg_todos.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbt_project/models/staging/stg_todos.sql b/dbt_project/models/staging/stg_todos.sql index bc27875..18c1a09 100644 --- a/dbt_project/models/staging/stg_todos.sql +++ b/dbt_project/models/staging/stg_todos.sql @@ -43,9 +43,9 @@ _todo__recurring AS ( OR ( todo_status = '0' AND todo_repeatflag <> 'default' - ) THEN true - ELSE false - END AS todo_derived__is_repeat + ) THEN 'recurring' + ELSE 'default' + END AS todo_derived__recurring FROM init_todo b ), From 4a48d6e3f67aff21efc23def82e43419be2d434c Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Sat, 24 Aug 2024 06:30:44 +0700 Subject: [PATCH 17/29] optimized raw load step : drop tbl before load --- app/ETL/EL.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/app/ETL/EL.py b/app/ETL/EL.py index 2e2056e..34a4b20 100644 --- a/app/ETL/EL.py +++ b/app/ETL/EL.py @@ -5,7 +5,7 @@ import sys; sys.path.append('..') # to allow import helper which is 1 dir away from helper.source_env import raw_path,dw_path,ETL_workdir,db_url,target_schema import time -from sqlalchemy import create_engine +from sqlalchemy import create_engine,text from datetime import datetime import pytz import humanize @@ -43,16 +43,26 @@ def init_extract(): compute_kind='python',deps=[init_extract] ) def raw_data(): - for name in names: - raw_file_path = os.path.join(raw_path,name+'.json') - df = pd.read_json(raw_file_path,dtype=str) - df.columns = df.columns.str.lower() - if name == 'tasks_raw': - df['modifiedtime_humanize'] = df['modifiedtime'].apply(humanize_timestamp) - df['duedate_humanize'] = df['duedate'].apply(humanize_timestamp) - df.to_sql(name, engine, if_exists='replace', index=False, schema=target_schema+'_raw') - - yield Output(value=df,output_name=name) + with engine.connect() as conn: + for name in names: + raw_file_path = os.path.join(raw_path, name + '.json') + df = pd.read_json(raw_file_path, dtype=str) + df.columns = df.columns.str.lower() + if name == 'tasks_raw': + df['modifiedtime_humanize'] = df['modifiedtime'].apply(humanize_timestamp) + df['duedate_humanize'] = df['duedate'].apply(humanize_timestamp) + + # Use text() to execute the raw SQL command + conn.execute(text(f"DROP TABLE IF EXISTS {target_schema+'_raw'}.{name}")) + conn.commit() + + # Insert the data + df.to_sql(name, engine, index=False, schema=target_schema+'_raw') + + yield Output(value=df, output_name=name) + conn.close() + + From f5afb00855c7a81daece6aefeb077531766bce18 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Sun, 25 Aug 2024 11:08:17 +0700 Subject: [PATCH 18/29] hotfix swapping field name to address broken dash --- dbt_project/models/marts/core/fact_todos.yml | 2 +- dbt_project/models/staging/stg_todos.sql | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/dbt_project/models/marts/core/fact_todos.yml b/dbt_project/models/marts/core/fact_todos.yml index 9c9bf13..62240b0 100644 --- a/dbt_project/models/marts/core/fact_todos.yml +++ b/dbt_project/models/marts/core/fact_todos.yml @@ -769,7 +769,7 @@ models: dimension: groups: - derived - type: string + type: boolean - name: todo_derived__habit_repeat_freq description: "identifies as 3 categories : daily, monthly and all other" meta: diff --git a/dbt_project/models/staging/stg_todos.sql b/dbt_project/models/staging/stg_todos.sql index 18c1a09..b302046 100644 --- a/dbt_project/models/staging/stg_todos.sql +++ b/dbt_project/models/staging/stg_todos.sql @@ -43,9 +43,9 @@ _todo__recurring AS ( OR ( todo_status = '0' AND todo_repeatflag <> 'default' - ) THEN 'recurring' - ELSE 'default' - END AS todo_derived__recurring + ) THEN True + ELSE False + END AS todo_derived__is_repeat FROM init_todo b ), @@ -91,11 +91,12 @@ _todo__habit_streak AS ( FROM _todo__habit_streak_init WHERE - todo_derived__recurring = 'recurring' + todo_derived__is_repeat is True ), todo AS ( SELECT r.*, + {# case when r.todo_derived__is_repeat is True then 'recurring' else 'default' end as todo_derived__recurring, #} h.todo_derived__habit_streak, h._todo__habit_streak_bucket_id FROM From 9b1040f9015e9ab34f119efa2a2e679cfcebef9b Mon Sep 17 00:00:00 2001 From: luutuankiet <56199834+luutuankiet@users.noreply.github.com> Date: Sun, 25 Aug 2024 12:07:17 +0700 Subject: [PATCH 19/29] Create start-preview.yml --- .github/workflows/start-preview.yml | 77 +++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 .github/workflows/start-preview.yml diff --git a/.github/workflows/start-preview.yml b/.github/workflows/start-preview.yml new file mode 100644 index 0000000..17df662 --- /dev/null +++ b/.github/workflows/start-preview.yml @@ -0,0 +1,77 @@ +name: start-preview + +on: + push: + branches-ignore: [ "main", "master", "BETA_prod" ] + +env: + DBT_VERSION: "1.7.10" + +jobs: + preview: + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-node@v3.4.1 + - uses: actions/setup-python@v1 + with: + python-version: "3.9.x" + + - name: Copy Google credentials file + env: + GOOGLE_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }} + if: "${{ env.GOOGLE_CREDENTIALS != '' }}" + id: create-json + uses: jsdaniell/create-json@1.1.2 + with: + name: "googlecredentials.json" + json: ${{ env.GOOGLE_CREDENTIALS }} + + - name: Move credentials to /tmp + run: mv googlecredentials.json /tmp || true + + - name: Locate dbt_project.yml + run: echo "PROJECT_DIR=$(find . -name "dbt_project.yml" | sed 's/dbt_project.yml//g')" >> $GITHUB_ENV + + - name: Get lightdash version + uses: sergeysova/jq-action@v2 + id: version + env: + LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }} + with: + cmd: curl -s "${LIGHTDASH_URL}/api/v1/health" | jq -r '.results.version' + + - name: Copy profiles.yml + env: + config: ${{ secrets.DBT_PROFILES }} + run: echo -e "$config" > profiles.yml + + - name: Install dbt + run: | + pip install dbt-core==$DBT_VERSION dbt-postgres==$DBT_VERSION dbt-redshift==$DBT_VERSION dbt-snowflake==$DBT_VERSION dbt-bigquery==$DBT_VERSION + dbt deps --project-dir "$PROJECT_DIR" + - name: Install lightdash CLI + run: npm install -g "@lightdash/cli@${{ steps.version.outputs.value }}" || npm install -g @lightdash/cli@latest + + - name: Lightdash CLI start preview + id: start-preview + env: + LIGHTDASH_API_KEY: ${{ secrets.LIGHTDASH_API_KEY }} + LIGHTDASH_PROJECT: ${{ secrets.LIGHTDASH_PROJECT }} + LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }} + GOOGLE_APPLICATION_CREDENTIALS: '/tmp/googlecredentials.json' + + run: lightdash start-preview --project-dir "$PROJECT_DIR" --profiles-dir . --name ${GITHUB_REF##*/} + + - uses: jwalton/gh-find-current-pr@v1 + id: finder + + - name: Leave a comment after deployment + uses: marocchino/sticky-pull-request-comment@v2 + with: + number: ${{ steps.finder.outputs.pr }} + message: | + :rocket: Deployed ${{ github.sha }} to ${{ steps.start-preview.outputs.url }} From f316d74991851e8359107a8d9c8e403eb2d7bbcf Mon Sep 17 00:00:00 2001 From: luutuankiet <56199834+luutuankiet@users.noreply.github.com> Date: Sun, 25 Aug 2024 12:12:27 +0700 Subject: [PATCH 20/29] Create close-preview.yml --- .github/workflows/close-preview.yml | 33 +++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .github/workflows/close-preview.yml diff --git a/.github/workflows/close-preview.yml b/.github/workflows/close-preview.yml new file mode 100644 index 0000000..7c5625b --- /dev/null +++ b/.github/workflows/close-preview.yml @@ -0,0 +1,33 @@ + +name: close-preview + +on: + pull_request: + types: [closed, merged] + +jobs: + preview: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-node@v3.4.1 + + - name: Get lightdash version + uses: sergeysova/jq-action@v2 + id: version + env: + LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }} + with: + cmd: curl -s "${LIGHTDASH_URL}/api/v1/health" | jq -r '.results.version' + + - name: Install lightdash CLI + run: npm install -g "@lightdash/cli@${{ steps.version.outputs.value }}" || npm install -g @lightdash/cli@latest + + - name: Lightdash CLI stop preview + env: + LIGHTDASH_API_KEY: ${{ secrets.LIGHTDASH_API_KEY }} + LIGHTDASH_PROJECT: ${{ secrets.LIGHTDASH_PROJECT }} + LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }} + + run: lightdash stop-preview --name ${GITHUB_HEAD_REF##*/} From c7d110995ccdf1c2ad7e9281eee2c06b8322ee4f Mon Sep 17 00:00:00 2001 From: luutuankiet <56199834+luutuankiet@users.noreply.github.com> Date: Sun, 25 Aug 2024 12:07:17 +0700 Subject: [PATCH 21/29] Create start-preview.yml --- .github/workflows/start-preview.yml | 77 +++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 .github/workflows/start-preview.yml diff --git a/.github/workflows/start-preview.yml b/.github/workflows/start-preview.yml new file mode 100644 index 0000000..17df662 --- /dev/null +++ b/.github/workflows/start-preview.yml @@ -0,0 +1,77 @@ +name: start-preview + +on: + push: + branches-ignore: [ "main", "master", "BETA_prod" ] + +env: + DBT_VERSION: "1.7.10" + +jobs: + preview: + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-node@v3.4.1 + - uses: actions/setup-python@v1 + with: + python-version: "3.9.x" + + - name: Copy Google credentials file + env: + GOOGLE_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }} + if: "${{ env.GOOGLE_CREDENTIALS != '' }}" + id: create-json + uses: jsdaniell/create-json@1.1.2 + with: + name: "googlecredentials.json" + json: ${{ env.GOOGLE_CREDENTIALS }} + + - name: Move credentials to /tmp + run: mv googlecredentials.json /tmp || true + + - name: Locate dbt_project.yml + run: echo "PROJECT_DIR=$(find . -name "dbt_project.yml" | sed 's/dbt_project.yml//g')" >> $GITHUB_ENV + + - name: Get lightdash version + uses: sergeysova/jq-action@v2 + id: version + env: + LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }} + with: + cmd: curl -s "${LIGHTDASH_URL}/api/v1/health" | jq -r '.results.version' + + - name: Copy profiles.yml + env: + config: ${{ secrets.DBT_PROFILES }} + run: echo -e "$config" > profiles.yml + + - name: Install dbt + run: | + pip install dbt-core==$DBT_VERSION dbt-postgres==$DBT_VERSION dbt-redshift==$DBT_VERSION dbt-snowflake==$DBT_VERSION dbt-bigquery==$DBT_VERSION + dbt deps --project-dir "$PROJECT_DIR" + - name: Install lightdash CLI + run: npm install -g "@lightdash/cli@${{ steps.version.outputs.value }}" || npm install -g @lightdash/cli@latest + + - name: Lightdash CLI start preview + id: start-preview + env: + LIGHTDASH_API_KEY: ${{ secrets.LIGHTDASH_API_KEY }} + LIGHTDASH_PROJECT: ${{ secrets.LIGHTDASH_PROJECT }} + LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }} + GOOGLE_APPLICATION_CREDENTIALS: '/tmp/googlecredentials.json' + + run: lightdash start-preview --project-dir "$PROJECT_DIR" --profiles-dir . --name ${GITHUB_REF##*/} + + - uses: jwalton/gh-find-current-pr@v1 + id: finder + + - name: Leave a comment after deployment + uses: marocchino/sticky-pull-request-comment@v2 + with: + number: ${{ steps.finder.outputs.pr }} + message: | + :rocket: Deployed ${{ github.sha }} to ${{ steps.start-preview.outputs.url }} From 289902389bc0f801aa9819c1e34638287b51d751 Mon Sep 17 00:00:00 2001 From: luutuankiet <56199834+luutuankiet@users.noreply.github.com> Date: Sun, 25 Aug 2024 12:12:27 +0700 Subject: [PATCH 22/29] Create close-preview.yml --- .github/workflows/close-preview.yml | 33 +++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .github/workflows/close-preview.yml diff --git a/.github/workflows/close-preview.yml b/.github/workflows/close-preview.yml new file mode 100644 index 0000000..7c5625b --- /dev/null +++ b/.github/workflows/close-preview.yml @@ -0,0 +1,33 @@ + +name: close-preview + +on: + pull_request: + types: [closed, merged] + +jobs: + preview: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-node@v3.4.1 + + - name: Get lightdash version + uses: sergeysova/jq-action@v2 + id: version + env: + LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }} + with: + cmd: curl -s "${LIGHTDASH_URL}/api/v1/health" | jq -r '.results.version' + + - name: Install lightdash CLI + run: npm install -g "@lightdash/cli@${{ steps.version.outputs.value }}" || npm install -g @lightdash/cli@latest + + - name: Lightdash CLI stop preview + env: + LIGHTDASH_API_KEY: ${{ secrets.LIGHTDASH_API_KEY }} + LIGHTDASH_PROJECT: ${{ secrets.LIGHTDASH_PROJECT }} + LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }} + + run: lightdash stop-preview --name ${GITHUB_HEAD_REF##*/} From fc8ca6caa14273f7727ba87c6d360ed5f2f0a1b3 Mon Sep 17 00:00:00 2001 From: luutuankiet <56199834+luutuankiet@users.noreply.github.com> Date: Sun, 25 Aug 2024 13:38:48 +0700 Subject: [PATCH 23/29] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 47f7dd1..d8cb78d 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +[![start-preview](https://github.com/luutuankiet/scrape-ticktick/actions/workflows/start-preview.yml/badge.svg)](https://github.com/luutuankiet/scrape-ticktick/actions/workflows/start-preview.yml) + # prequisite download `service_account.json` put it to /workspaces/scrape-ticktick/app/env @@ -43,4 +45,4 @@ run dbt models - install webhook to allow run dagstger from a url : `sudo apt-get install webhook` # development -- after each model update, should do a full dagster reload definitions for it to parse new models \ No newline at end of file +- after each model update, should do a full dagster reload definitions for it to parse new models From 4eec7973482e264c7ee969319d692486fc23fc58 Mon Sep 17 00:00:00 2001 From: luutuankiet <56199834+luutuankiet@users.noreply.github.com> Date: Sun, 25 Aug 2024 13:40:09 +0700 Subject: [PATCH 24/29] Update close-preview.yml --- .github/workflows/close-preview.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/close-preview.yml b/.github/workflows/close-preview.yml index 7c5625b..fc402b5 100644 --- a/.github/workflows/close-preview.yml +++ b/.github/workflows/close-preview.yml @@ -2,6 +2,7 @@ name: close-preview on: + workflow_dispatch: pull_request: types: [closed, merged] From 9f380382a0b2ee6cf802fd2ebe209b938426b459 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Sun, 25 Aug 2024 12:15:25 +0700 Subject: [PATCH 25/29] added CI CD GHA workflows --- .github/workflows/start-preview.yml | 52 +++++++++---- .gitignore | 2 +- dbt_project/models/staging/stg_folders.sql | 2 +- dbt_project/seeds/list_goal_mapping.csv | 91 ++++++++++++++++++++++ 4 files changed, 131 insertions(+), 16 deletions(-) create mode 100644 dbt_project/seeds/list_goal_mapping.csv diff --git a/.github/workflows/start-preview.yml b/.github/workflows/start-preview.yml index 17df662..1b5697f 100644 --- a/.github/workflows/start-preview.yml +++ b/.github/workflows/start-preview.yml @@ -5,7 +5,9 @@ on: branches-ignore: [ "main", "master", "BETA_prod" ] env: - DBT_VERSION: "1.7.10" + DBT_VERSION: "1.8.1" + PROJECT_DIR: "./dbt_project" + DW_DBNAME: "gtd_dash" jobs: preview: @@ -15,11 +17,44 @@ jobs: pull-requests: write steps: - uses: actions/checkout@v3 - - uses: actions/setup-node@v3.4.1 + + - name: Cache Python packages + uses: actions/cache@v3 + with: + path: | + ~/.cache/pip + ~/.local + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + - uses: actions/setup-python@v1 with: python-version: "3.9.x" + - name: Install Python dependencies + run: | + pip install -r requirements.txt + pip install dbt-core==$DBT_VERSION dbt-postgres==$DBT_VERSION dbt-redshift==$DBT_VERSION dbt-snowflake==$DBT_VERSION dbt-bigquery==$DBT_VERSION + dbt deps --project-dir "$PROJECT_DIR" + + - name: Cache npm packages + uses: actions/cache@v3 + with: + path: | + ~/.npm + ~/.cache/npm + key: ${{ runner.os }}-npm-${{ hashFiles('**/package-lock.json') }} + restore-keys: | + ${{ runner.os }}-npm- + + - uses: actions/setup-node@v3.4.1 + with: + node-version: '20' + + - name: Install npm dependencies + run: npm install -g "@lightdash/cli@${{ steps.version.outputs.value }}" || npm install -g @lightdash/cli@latest + - name: Copy Google credentials file env: GOOGLE_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }} @@ -33,9 +68,6 @@ jobs: - name: Move credentials to /tmp run: mv googlecredentials.json /tmp || true - - name: Locate dbt_project.yml - run: echo "PROJECT_DIR=$(find . -name "dbt_project.yml" | sed 's/dbt_project.yml//g')" >> $GITHUB_ENV - - name: Get lightdash version uses: sergeysova/jq-action@v2 id: version @@ -49,13 +81,6 @@ jobs: config: ${{ secrets.DBT_PROFILES }} run: echo -e "$config" > profiles.yml - - name: Install dbt - run: | - pip install dbt-core==$DBT_VERSION dbt-postgres==$DBT_VERSION dbt-redshift==$DBT_VERSION dbt-snowflake==$DBT_VERSION dbt-bigquery==$DBT_VERSION - dbt deps --project-dir "$PROJECT_DIR" - - name: Install lightdash CLI - run: npm install -g "@lightdash/cli@${{ steps.version.outputs.value }}" || npm install -g @lightdash/cli@latest - - name: Lightdash CLI start preview id: start-preview env: @@ -63,7 +88,6 @@ jobs: LIGHTDASH_PROJECT: ${{ secrets.LIGHTDASH_PROJECT }} LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }} GOOGLE_APPLICATION_CREDENTIALS: '/tmp/googlecredentials.json' - run: lightdash start-preview --project-dir "$PROJECT_DIR" --profiles-dir . --name ${GITHUB_REF##*/} - uses: jwalton/gh-find-current-pr@v1 @@ -74,4 +98,4 @@ jobs: with: number: ${{ steps.finder.outputs.pr }} message: | - :rocket: Deployed ${{ github.sha }} to ${{ steps.start-preview.outputs.url }} + :rocket: Deployed ${{ github.sha }} to ${{ steps.start-preview.outputs.url }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 31f7afc..0b2114b 100644 --- a/.gitignore +++ b/.gitignore @@ -20,7 +20,7 @@ __pycache__/ #streamlit secrets.toml service_account.json -dbt_project/seeds/list_goal_mapping.csv +# dbt_project/seeds/list_goal_mapping.csv app.log # database/raw # database diff --git a/dbt_project/models/staging/stg_folders.sql b/dbt_project/models/staging/stg_folders.sql index f8bf8f6..53004cb 100644 --- a/dbt_project/models/staging/stg_folders.sql +++ b/dbt_project/models/staging/stg_folders.sql @@ -8,7 +8,7 @@ WITH source AS ( 'src__folders_raw' ) }} ) - + SELECT {{ dbt_utils.generate_surrogate_key(['folder_id']) }} AS folder_key,* FROM diff --git a/dbt_project/seeds/list_goal_mapping.csv b/dbt_project/seeds/list_goal_mapping.csv new file mode 100644 index 0000000..a2b6854 --- /dev/null +++ b/dbt_project/seeds/list_goal_mapping.csv @@ -0,0 +1,91 @@ +folder_name,list_name,goal_ids +▶️lvl2 - Innovature,-----------------------, +▶️lvl2 - Innovature,1H 2024 comm pi SOP dev handbook,"5,4" +▶️lvl2 - Innovature,1H 2024 commitment doc mngmnt system,"5,4" +▶️lvl2 - Innovature,ATT NSS worker issue in fFinancial,"5,4" +▶️lvl2 - Innovature,ATT conform first revision fields naming,"5,4" +▶️lvl2 - Innovature,ATT update supplier distro,"5,4" +▶️lvl2 - Innovature,Innovature check out for Joons,"5,4" +▶️lvl2 - Innovature,UA BR change snap at 1000 2000 hrs, +▶️lvl2 - Innovature,UA GL codes phase 1, +▶️lvl2 - Innovature,UA SOW service fee AND PreID, +▶️lvl2 - Innovature,UA inconsistent financial codes at historical level, +▶️lvl2 - Innovature,UA ingest analytics user data,"5,4" +▶️lvl2 - Innovature,UA phase 2 GL code, +▶️lvl2 - Innovature,address integrity issues in inno dw,"5,4" +▶️lvl2 - Innovature,apply recursive query to inno hierrachy needs,"5,11" +▶️lvl2 - Innovature,beef up pi scripting for deployment,"11,5" +▶️lvl2 - Innovature,beef up testing united (h2 comm sub list),"5,11,4" +▶️lvl2 - Innovature,biweekly notes,5 +▶️lvl2 - Innovature,build logic for applicants and requests activity inno dw,"5,4" +▶️lvl2 - Innovature,inno capturing historical employee hierrachy,"5,4" +▶️lvl2 - Innovature,inno dw prep schema for HR metrics,"5,3" +▶️lvl2 - Innovature,python for ad hoc csv,"5,11" +▶️lvl2 - Innovature,reverse engineer & learn Pi etl design,5 +▶️lvl2 - Innovature,setup pi VPC for scripting tasks,11 +▶️lvl2 - Innovature,update inno ETL dag notif mechanism, +▶️lvl2 - Innovature,▶️BAU NEXT ACTION,5 +🏚lvl2 - Personal,-----------------------, +🏚lvl2 - Personal,a day in a day implementation, +🏚lvl2 - Personal,caress for my body and soul under soltitude,6 +🏚lvl2 - Personal,dai hoc sis mission control,"7,8" +🏚lvl2 - Personal,emergency plan for phone lost situations,1 +🏚lvl2 - Personal,keeping healthy habits in check,"6,10" +🏚lvl2 - Personal,list activities metric gtd dashboard,"10,6" +🏚lvl2 - Personal,nas backup, +🏚lvl2 - Personal,nas upgrade, +🏚lvl2 - Personal,peace of mind for the reduncany,"10,6" +🏚lvl2 - Personal,people observation,6 +🏚lvl2 - Personal,philosophy on our relationship,6 +🏚lvl2 - Personal,prep for her bday, +🏚lvl2 - Personal,room org,6 +🏚lvl2 - Personal,setup new fish tank, +🏚lvl2 - Personal,setup photo studio for mom,7 +🏚lvl2 - Personal,sis career and schooling path insights,"7,8" +🏚lvl2 - Personal,support her career search,7 +🏚lvl2 - Personal,⚓breaking a bad habit pmo,6 +💤on hold lists,-----------------------, +💤on hold lists,fix workflow cycle time UA,"5,4" +💵lvl2 - Finance,-----------------------, +💵lvl2 - Finance,financing for next 3 months,1 +💻lvl2 - Professional,-----------------------, +💻lvl2 - Professional,GTD implementation,10 +💻lvl2 - Professional,applicant dash,"10,6,5" +💻lvl2 - Professional,backup my mac workstation, +💻lvl2 - Professional,build scripts and utils with python,11 +💻lvl2 - Professional,checkout big data with pyspark,"2,3" +💻lvl2 - Professional,checkout serverless container migrate calibre, +💻lvl2 - Professional,choose a serverless db for my personal etls, +💻lvl2 - Professional,clean debt with gtd dash, +💻lvl2 - Professional,cloud DE,3 +💻lvl2 - Professional,data core,"2,3,4" +💻lvl2 - Professional,deep implement quota to shallow work endeavor in the month,"10,6" +💻lvl2 - Professional,deep work implementations,"6,10" +💻lvl2 - Professional,gtd activity dash for archivals, +💻lvl2 - Professional,gtd lvl3 quarterly review,"10,6" +💻lvl2 - Professional,gtd weekly review recurring tasks,"10,6" +💻lvl2 - Professional,hands on Azure synapse, +💻lvl2 - Professional,job hoppin - getting ready,"5,1,9" +💻lvl2 - Professional,joon itw, +💻lvl2 - Professional,lear graph db neo4j, +💻lvl2 - Professional,learn azure resource management, +💻lvl2 - Professional,learn cloud VPC and networking, +💻lvl2 - Professional,learn columnar db processing,2 +💻lvl2 - Professional,learn schema on read for DE, +💻lvl2 - Professional,learn snowflake and dbt, +💻lvl2 - Professional,learn stream processing, +💻lvl2 - Professional,learn terminal with games, +💻lvl2 - Professional,personal GTD dashboard, +💻lvl2 - Professional,scrape tictick habit data for data driven next action,"11,10" +💻lvl2 - Professional,set up learning a cert," 2,3,4,5" +💻lvl2 - Professional,start a pet project that involves cloud services, +💻lvl2 - Professional,take on dbt courses, +💻lvl2 - Professional,test driven development with python, +💻lvl2 - Professional,tryout a datalake service, +💻lvl2 - Professional,up my digital security game, +💻lvl2 - Professional,up my documentation skills,"9,5" +💻lvl2 - Professional,util fuzzy search gtd dash, +💻lvl2 - Professional,wildly important goal and any benefit mindset,"10,6" +📥lvl2 - organized inbox,-----------------------, +📥lvl2 - organized inbox,⏭NEXT,10 +📥lvl2 - organized inbox,⏲WAITING FOR, From 484911fbdfa44109b6bb14c1454b41cfcfb90a87 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Sun, 25 Aug 2024 22:14:49 +0000 Subject: [PATCH 26/29] ignore dagster yml --- dagster.yaml | 47 ----------------------------------------------- 1 file changed, 47 deletions(-) delete mode 100644 dagster.yaml diff --git a/dagster.yaml b/dagster.yaml deleted file mode 100644 index 191eb40..0000000 --- a/dagster.yaml +++ /dev/null @@ -1,47 +0,0 @@ -# Postgres storage can be set using either credentials or a connection string. This requires that -# the `dagster-postgres` library be installed. - -storage: - postgres: - postgres_db: - username: - env: DAGSTER_PG_USERNAME - password: - env: DAGSTER_PG_PASSWORD - hostname: - env: DAGSTER_PG_HOST - db_name: - env: DAGSTER_PG_DB - port: 5433 - -telemetry: - enabled: false - - -# Configures how long Dagster keeps sensor / schedule tick data -retention: - schedule: - purge_after_days: 7 # sets retention policy for schedule ticks of all types - sensor: - purge_after_days: - skipped: 7 - failure: 7 - success: 7 # keep success ticks indefinitely - - -# there are two possible ways to configure LocalArtifactStorage - -# example local_artifact_storage setup -local_artifact_storage: - module: dagster.core.storage.root - class: LocalArtifactStorage - config: - base_dir: - env: DAGSTER_LOCAL_ARTIFACT_STORAGE_DIR - -compute_logs: - module: dagster.core.storage.local_compute_log_manager - class: LocalComputeLogManager - config: - base_dir: - env: DAGSTER_LOCAL_ARTIFACT_STORAGE_DIR \ No newline at end of file From 6cbee43f259992ae2901ddbf13f6422cda15118c Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Sun, 25 Aug 2024 22:16:49 +0000 Subject: [PATCH 27/29] ignored dagster yml for local dev --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 31f7afc..6f750a2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +dagster.yaml dagster_artifacts/* app/env/* .venv From 37cd90524bdc33b61e4562a45feeb9754cc81edb Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Tue, 27 Aug 2024 22:41:40 +0000 Subject: [PATCH 28/29] deactive unused workflows --- .github/workflows/{CD.yaml => CD.yaml.inactive} | 0 .../workflows/{python_app_ci.yml => python_app_ci.yml.inactive} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{CD.yaml => CD.yaml.inactive} (100%) rename .github/workflows/{python_app_ci.yml => python_app_ci.yml.inactive} (100%) diff --git a/.github/workflows/CD.yaml b/.github/workflows/CD.yaml.inactive similarity index 100% rename from .github/workflows/CD.yaml rename to .github/workflows/CD.yaml.inactive diff --git a/.github/workflows/python_app_ci.yml b/.github/workflows/python_app_ci.yml.inactive similarity index 100% rename from .github/workflows/python_app_ci.yml rename to .github/workflows/python_app_ci.yml.inactive From f60fd9ede4090e0f30cf73e919abf8b1933fbcfe Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Tue, 27 Aug 2024 23:10:34 +0000 Subject: [PATCH 29/29] added deployment workflow --- .github/workflows/close-preview.yml | 2 +- .github/workflows/gh_deploy.yml | 89 +++++++++++++++++++++++++++++ .vscode/settings.json | 2 +- 3 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/gh_deploy.yml diff --git a/.github/workflows/close-preview.yml b/.github/workflows/close-preview.yml index 7c5625b..a793554 100644 --- a/.github/workflows/close-preview.yml +++ b/.github/workflows/close-preview.yml @@ -3,7 +3,7 @@ name: close-preview on: pull_request: - types: [closed, merged] + types: [closed] jobs: preview: diff --git a/.github/workflows/gh_deploy.yml b/.github/workflows/gh_deploy.yml new file mode 100644 index 0000000..4d8709b --- /dev/null +++ b/.github/workflows/gh_deploy.yml @@ -0,0 +1,89 @@ +name: deploy-lightdash + +on: + push: + branches: [ "main", "master" ] + +env: + DBT_VERSION: "1.7.10" + +jobs: + deploy: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + # Cache Python packages + - name: Cache Python packages + uses: actions/cache@v3 + with: + path: | + ~/.cache/pip + ~/.local + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - uses: actions/setup-python@v1 + with: + python-version: "3.9.x" + + - name: Install Python dependencies + run: | + pip install -r requirements.txt + pip install dbt-core==$DBT_VERSION dbt-postgres==$DBT_VERSION dbt-redshift==$DBT_VERSION dbt-snowflake==$DBT_VERSION dbt-bigquery==$DBT_VERSION + dbt deps --project-dir "$PROJECT_DIR" + + # Cache npm packages + - name: Cache npm packages + uses: actions/cache@v3 + with: + path: | + ~/.npm + ~/.cache/npm + key: ${{ runner.os }}-npm-${{ hashFiles('**/package-lock.json') }} + restore-keys: | + ${{ runner.os }}-npm- + + - uses: actions/setup-node@v3.4.1 + + - name: Copy Google credentials file + env: + GOOGLE_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }} + if: "${{ env.GOOGLE_CREDENTIALS != '' }}" + id: create-json + uses: jsdaniell/create-json@1.1.2 + with: + name: "googlecredentials.json" + json: ${{ env.GOOGLE_CREDENTIALS }} + + - name: Move credentials to /tmp + run: mv googlecredentials.json /tmp || true + + - name: Locate dbt_project.yml + run: echo "PROJECT_DIR=$(find . -name "dbt_project.yml" | sed 's/dbt_project.yml//g')" >> $GITHUB_ENV + + - name: Get lightdash version + uses: sergeysova/jq-action@v2 + id: version + env: + LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }} + with: + cmd: curl -s "${LIGHTDASH_URL}/api/v1/health" | jq -r '.results.version' + + - name: Copy profiles.yml + env: + config: ${{ secrets.DBT_PROFILES }} + run: echo -e "$config" > profiles.yml + + - name: Install lightdash CLI + run: npm install -g "@lightdash/cli@${{ steps.version.outputs.value }}" || npm install -g @lightdash/cli@latest + + - name: Lightdash CLI deploy + env: + LIGHTDASH_API_KEY: ${{ secrets.LIGHTDASH_API_KEY }} + LIGHTDASH_PROJECT: ${{ secrets.LIGHTDASH_PROJECT }} + LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }} + GOOGLE_APPLICATION_CREDENTIALS: '/tmp/googlecredentials.json' + run: lightdash deploy --project-dir "$PROJECT_DIR" --profiles-dir . --profile prod || lightdash deploy --project-dir "$PROJECT_DIR" --profiles-dir . \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 889a46b..c06b228 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -35,7 +35,7 @@ ], "yaml.schemas": { "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/dbt_yml_files-latest.json": [ - "/**/*.yml", + "/dbt_project/**/*.yml", "!profiles.yml", "!dbt_project.yml", "!packages.yml",