Skip to content

Commit

Permalink
Revert "update : promoted new spreadsheet workflow."
Browse files Browse the repository at this point in the history
  • Loading branch information
luutuankiet authored Nov 8, 2024
1 parent 961b23e commit ddb95ae
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 213 deletions.
86 changes: 36 additions & 50 deletions app/ETL/lvl3_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,10 @@
from sqlalchemy import create_engine,text
from EL import db_url
import pandas as pd
from constants import dbt
from dagster import Out, Nothing

#%%


goals_query = """
select todo_title from
prod.fact_todos where
todo_list_name = 'lvl3 - 1 - 2 years goals'
and todo_tags = 'default'
order by todo_sortorder
"""


analyses_path = os.path.join(dbt_project_dir,'target/compiled/todo_analytics/analyses')
Expand All @@ -28,60 +21,60 @@
with open(helper_query_path,'r') as f:
helper_query = f.read()

conn = create_engine(db_url)
# con = duckdb.connect(read_only=False)
# con.sql(f"IMPORT DATABASE '{os.path.dirname(dw_path)}/src';")

client = gspread.service_account(service_account_path)
workbook = client.open_by_url("https://docs.google.com/spreadsheets/d/1My7VU0GrAlYTa46Hj1ciOBXivcF7QKSYeRVXXbyV74o/edit#gid=0")
helper_sheet = workbook.get_worksheet(1)
mapping_sheet = workbook.get_worksheet(0)

conn = create_engine(db_url)


def load_df_to_sheet(sheet: gspread.worksheet.Worksheet, df: pd.DataFrame, cell :str) -> None:
"""takes the df and writes it to the sheet
Args:
sheet (gspread.worksheet.Worksheet): _description_
df (pd.DataFrame): _description_
cell (str): _description_
"""
df_list = [df.columns.tolist()] + df.fillna("").values.tolist()
sheet.update(cell, df_list)




@op
def mapping_helper():
"""
grabs the following from db
- all lists, BOTH mapped and unmapped
- all goals
and writes them to the helper sheet
insert into the mapping_herlper sheet the goals selected from db
"""

with conn.connect() as con:
helper_df = pd.read_sql(helper_query,con=con)

# clears the sheet
helper_sheet.clear()

# writes the lists
helper_df = pd.read_sql(helper_query,con=con)
load_df_to_sheet(helper_sheet,helper_df,"A1")

# writes the lists
helper_sheet.update("A1",values =[helper_df.columns.tolist()] + helper_df.values.tolist())

# writes the latest goals
goals_query = "select goal_id,goal_name from init_duckdb__lvl3 order by 1"
goals_df = pd.read_sql(goals_query,con=con)
load_df_to_sheet(helper_sheet,goals_df,"D1")
helper_sheet.update("D1",values =[goals_df.columns.tolist()] + goals_df.values.tolist())


# @op
# def mapping_helper():
# """
# insert into the mapping_herlper sheet the goals selected from db
# """
# helper_df = con.sql(helper_query).df()

# # clears the sheet
# helper_sheet.clear()


# # writes the lists
# helper_sheet.update("A1",values =[helper_df.columns.tolist()] + helper_df.values.tolist())

# # writes the latest goals
# goals_query = "select goal_id,goal_name from init_duckdb__lvl3 order by 1"
# goals_df = con.sql(goals_query).df()
# helper_sheet.update("D1",values =[goals_df.columns.tolist()] + goals_df.values.tolist())
# con.close()
@op
def load_mapping_to_stg():
stg_data = helper_sheet.get_values('A:C')
mapping_sheet.clear()
mapping_sheet.update("A1",stg_data)
mapping_sheet.update("A1",values =stg_data)


@op
Expand All @@ -91,21 +84,14 @@ def dump_mapping_to_csv(results=None):
writer.writerows(mapping_sheet.get_values())


@op(out=Out(Nothing))
def dbt_seeds(results=None):
"""invoke the dbt seed command for list_goal_mapping
"""
dbt.cli(["seed", "-s", "list_goal_mapping"])


@job(executor_def=in_process_executor)
def load_mapping_helper():
mapping_helper()


@job(executor_def=in_process_executor)
def load_new_lvl3_data():
dbt_seeds(dump_mapping_to_csv(load_mapping_to_stg()))
dump_mapping_to_csv(load_mapping_to_stg())

defs = Definitions(jobs=[load_new_lvl3_data,load_mapping_helper])
@job
def load_mapping_helper():
mapping_helper()

defs = Definitions(jobs=[load_new_lvl3_data,load_mapping_helper])
6 changes: 2 additions & 4 deletions dbt_project/analyses/lvl3_helper_list_extract.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,14 @@ new_seeds AS (
SELECT
folder_name,
list_name,
'' AS goals
'' AS goal_ids
FROM
source
WHERE folder_name != 'default'

)
SELECT
n.folder_name,
n.list_name,
r.goals
r.goal_ids
FROM
new_seeds n
LEFT JOIN ref_seeds r
Expand Down
43 changes: 19 additions & 24 deletions dbt_project/models/marts/metrics/lvl3_progress.sql
Original file line number Diff line number Diff line change
@@ -1,28 +1,23 @@
WITH source AS (
SELECT
*
FROM
{{ ref('stg_duckdb__lvl3') }}
WHERE
goal IS NOT NULL
with source as (
select * from {{ ref('stg_duckdb__lvl3') }}
where goal_name is not null
),
progress AS (
SELECT
*
FROM
{{ ref('lvl1_lvl2_progress') }}

progress as (
select * from {{ ref('lvl1_lvl2_progress') }}
)
SELECT

select
goal_id,
goal AS lvl3_goal,
CAST(AVG(done_progress) over (PARTITION BY goal) AS DECIMAL(10, 2)) AS lvl3_done_progress,
CAST(AVG(clarify_progress) over (PARTITION BY goal) AS DECIMAL(10, 2)) AS lvl3_clarify_progress,
goal_name as lvl3_goal,
cast(avg(done_progress) over (partition by goal_name) as decimal(10,2)) as lvl3_done_progress,
cast(avg(clarify_progress) over (partition by goal_name) as decimal(10,2)) as lvl3_clarify_progress,
source.list_name,
progress.done_progress AS l_done_progress,
progress.clarify_progress AS l_clarify_progress
FROM
source
LEFT JOIN progress
ON source.list_name = progress.list_name
ORDER BY
1
progress.done_progress as l_done_progress,
progress.clarify_progress as l_clarify_progress


from source
left join progress
on source.list_name = progress.list_name
order by 1
26 changes: 14 additions & 12 deletions dbt_project/models/marts/metrics/lvl3_unmapped.sql
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
WITH source AS (
SELECT
*
FROM
{{ ref('stg_duckdb__lvl3') }}
with source as (
select * from {{ ref('lvl1_lvl2_progress') }}
),

map as (
select * from {{ ref('lvl3_progress') }}
)
SELECT
*
FROM
source
WHERE
goal_id IS NULL
AND list_name NOT LIKE '%-%'

select source.*


from source left join map on source.list_name = map.list_name
where
map.list_name is null
and source.list_name not like '%------%'
28 changes: 21 additions & 7 deletions dbt_project/models/staging/init_duckdb__lvl3.sql
Original file line number Diff line number Diff line change
@@ -1,15 +1,29 @@
WITH source AS (
SELECT
ROW_NUMBER() over(
todo_title AS goal_name,
ROW_NUMBER() over (
ORDER BY
todo_sortorder
) AS goal_id,
todo_title AS goal
todo_title
) AS goal_id
FROM
{{ ref("fact_todos") }}
{{ ref(
'stg_todos'
) }}
t
LEFT JOIN {{ ref(
'stg_lists'
) }}
l
ON l.list_key = t.list_key
LEFT JOIN {{ ref(
'stg_folders'
) }}
f
ON f.folder_key = t.folder_key
WHERE
todo_list_name = 'lvl3 - 1 - 2 years goals'
AND todo_tags = 'default'
f.folder_name = '🛩Horizon of focus'
AND l.list_name LIKE '%lvl3%'
AND t.todo_kind = 'TEXT'
)
SELECT
*
Expand Down
61 changes: 20 additions & 41 deletions dbt_project/models/staging/stg_duckdb__lvl3.sql
Original file line number Diff line number Diff line change
@@ -1,51 +1,30 @@
WITH MAP AS (
SELECT
*,
unnest(
regexp_split_to_array(
goals,
',(?=(?:[^"]*"[^"]*")*[^"]*$)'
)
) AS goal
FROM
{{ ref('list_goal_mapping') }}
with map as (
select * from {{ ref('list_goal_mapping') }}

),
goals AS (
SELECT
*
FROM
{{ ref('init_duckdb__lvl3') }}
),
unmapped AS (
SELECT
*
FROM
{{ ref("list_goal_mapping") }}
WHERE
goals IS NULL

goals as (
select * from {{ ref('init_duckdb__lvl3') }}
),
joined AS (
SELECT
goals.*,
MAP.folder_name,
MAP.list_name
map.folder_name,
map.list_name
FROM
goals
LEFT JOIN MAP {# ON goals.goal like '%' || MAP.goal || '%' #}
ON TRIM(goals.goal) LIKE '%' || REPLACE(TRIM(MAP.goal), '"', '') || '%'
UNION ALL
SELECT
NULL AS goal_name,
NULL AS goal_id,
folder_name,
list_name
FROM
unmapped
LEFT JOIN map ON ',' || goals.goal_id || ',' LIKE '%,' || map.goal_ids || ',%'
)

SELECT * FROM joined
UNION ALL
SELECT
*
goals.*,
map.folder_name,
map.list_name
FROM
joined
ORDER BY
folder_name,
list_name
map
LEFT JOIN
goals ON ',' || goals.goal_id || ',' LIKE '%,' || map.goal_ids || ',%'
WHERE
goals.goal_id IS NULL
Loading

0 comments on commit ddb95ae

Please sign in to comment.