Skip to content

Commit

Permalink
Merge remote-tracking branch 'refs/remotes/origin/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
luutuankiet committed Aug 27, 2024
2 parents b0f1708 + 997d264 commit 3e97279
Show file tree
Hide file tree
Showing 37 changed files with 881 additions and 468 deletions.
File renamed without changes.
33 changes: 33 additions & 0 deletions .github/workflows/close-preview.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@

name: close-preview

on:
pull_request:
types: [closed]

jobs:
preview:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- uses: actions/[email protected]

- name: Get lightdash version
uses: sergeysova/jq-action@v2
id: version
env:
LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }}
with:
cmd: curl -s "${LIGHTDASH_URL}/api/v1/health" | jq -r '.results.version'

- name: Install lightdash CLI
run: npm install -g "@lightdash/cli@${{ steps.version.outputs.value }}" || npm install -g @lightdash/cli@latest

- name: Lightdash CLI stop preview
env:
LIGHTDASH_API_KEY: ${{ secrets.LIGHTDASH_API_KEY }}
LIGHTDASH_PROJECT: ${{ secrets.LIGHTDASH_PROJECT }}
LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }}

run: lightdash stop-preview --name ${GITHUB_HEAD_REF##*/}
25 changes: 12 additions & 13 deletions .github/workflows/deployment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,21 @@ WORKDIR=$(pwd)

. $WORKDIR/env.sh

# pip install --upgrade -q -r requirements.txt

# setup sessions for service
STREAMLIT="streamlit"
# # setup sessions for service
# STREAMLIT="streamlit"

# Kill the existing session if it exists
tmux has-session -t $STREAMLIT 2>/dev/null
# # Kill the existing session if it exists
# tmux has-session -t $STREAMLIT 2>/dev/null

if [ $? != 0 ]; then
# Session doesn't exist, create a new one
tmux new-session -s $STREAMLIT -d
else
# Session exists, kill the old one and create a new one
tmux kill-session -t $STREAMLIT
tmux new-session -s $STREAMLIT -d
fi
# if [ $? != 0 ]; then
# # Session doesn't exist, create a new one
# tmux new-session -s $STREAMLIT -d
# else
# # Session exists, kill the old one and create a new one
# tmux kill-session -t $STREAMLIT
# tmux new-session -s $STREAMLIT -d
# fi



Expand Down
89 changes: 89 additions & 0 deletions .github/workflows/gh_deploy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
name: deploy-lightdash

on:
push:
branches: [ "main", "master" ]

env:
DBT_VERSION: "1.7.10"

jobs:
deploy:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3

# Cache Python packages
- name: Cache Python packages
uses: actions/cache@v3
with:
path: |
~/.cache/pip
~/.local
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- uses: actions/setup-python@v1
with:
python-version: "3.9.x"

- name: Install Python dependencies
run: |
pip install -r requirements.txt
pip install dbt-core==$DBT_VERSION dbt-postgres==$DBT_VERSION dbt-redshift==$DBT_VERSION dbt-snowflake==$DBT_VERSION dbt-bigquery==$DBT_VERSION
dbt deps --project-dir "$PROJECT_DIR"
# Cache npm packages
- name: Cache npm packages
uses: actions/cache@v3
with:
path: |
~/.npm
~/.cache/npm
key: ${{ runner.os }}-npm-${{ hashFiles('**/package-lock.json') }}
restore-keys: |
${{ runner.os }}-npm-
- uses: actions/[email protected]

- name: Copy Google credentials file
env:
GOOGLE_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}
if: "${{ env.GOOGLE_CREDENTIALS != '' }}"
id: create-json
uses: jsdaniell/[email protected]
with:
name: "googlecredentials.json"
json: ${{ env.GOOGLE_CREDENTIALS }}

- name: Move credentials to /tmp
run: mv googlecredentials.json /tmp || true

- name: Locate dbt_project.yml
run: echo "PROJECT_DIR=$(find . -name "dbt_project.yml" | sed 's/dbt_project.yml//g')" >> $GITHUB_ENV

- name: Get lightdash version
uses: sergeysova/jq-action@v2
id: version
env:
LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }}
with:
cmd: curl -s "${LIGHTDASH_URL}/api/v1/health" | jq -r '.results.version'

- name: Copy profiles.yml
env:
config: ${{ secrets.DBT_PROFILES }}
run: echo -e "$config" > profiles.yml

- name: Install lightdash CLI
run: npm install -g "@lightdash/cli@${{ steps.version.outputs.value }}" || npm install -g @lightdash/cli@latest

- name: Lightdash CLI deploy
env:
LIGHTDASH_API_KEY: ${{ secrets.LIGHTDASH_API_KEY }}
LIGHTDASH_PROJECT: ${{ secrets.LIGHTDASH_PROJECT }}
LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }}
GOOGLE_APPLICATION_CREDENTIALS: '/tmp/googlecredentials.json'
run: lightdash deploy --project-dir "$PROJECT_DIR" --profiles-dir . --profile prod || lightdash deploy --project-dir "$PROJECT_DIR" --profiles-dir .
File renamed without changes.
101 changes: 101 additions & 0 deletions .github/workflows/start-preview.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
name: start-preview

on:
push:
branches-ignore: [ "main", "master", "BETA_prod" ]

env:
DBT_VERSION: "1.8.1"
PROJECT_DIR: "./dbt_project"
DW_DBNAME: "gtd_dash"

jobs:
preview:
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write
steps:
- uses: actions/checkout@v3

- name: Cache Python packages
uses: actions/cache@v3
with:
path: |
~/.cache/pip
~/.local
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- uses: actions/setup-python@v1
with:
python-version: "3.9.x"

- name: Install Python dependencies
run: |
pip install -r requirements.txt
pip install dbt-core==$DBT_VERSION dbt-postgres==$DBT_VERSION dbt-redshift==$DBT_VERSION dbt-snowflake==$DBT_VERSION dbt-bigquery==$DBT_VERSION
dbt deps --project-dir "$PROJECT_DIR"
- name: Cache npm packages
uses: actions/cache@v3
with:
path: |
~/.npm
~/.cache/npm
key: ${{ runner.os }}-npm-${{ hashFiles('**/package-lock.json') }}
restore-keys: |
${{ runner.os }}-npm-
- uses: actions/[email protected]
with:
node-version: '20'

- name: Install npm dependencies
run: npm install -g "@lightdash/cli@${{ steps.version.outputs.value }}" || npm install -g @lightdash/cli@latest

- name: Copy Google credentials file
env:
GOOGLE_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}
if: "${{ env.GOOGLE_CREDENTIALS != '' }}"
id: create-json
uses: jsdaniell/[email protected]
with:
name: "googlecredentials.json"
json: ${{ env.GOOGLE_CREDENTIALS }}

- name: Move credentials to /tmp
run: mv googlecredentials.json /tmp || true

- name: Get lightdash version
uses: sergeysova/jq-action@v2
id: version
env:
LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }}
with:
cmd: curl -s "${LIGHTDASH_URL}/api/v1/health" | jq -r '.results.version'

- name: Copy profiles.yml
env:
config: ${{ secrets.DBT_PROFILES }}
run: echo -e "$config" > profiles.yml

- name: Lightdash CLI start preview
id: start-preview
env:
LIGHTDASH_API_KEY: ${{ secrets.LIGHTDASH_API_KEY }}
LIGHTDASH_PROJECT: ${{ secrets.LIGHTDASH_PROJECT }}
LIGHTDASH_URL: ${{ secrets.LIGHTDASH_URL }}
GOOGLE_APPLICATION_CREDENTIALS: '/tmp/googlecredentials.json'
run: lightdash start-preview --project-dir "$PROJECT_DIR" --profiles-dir . --name ${GITHUB_REF##*/}

- uses: jwalton/gh-find-current-pr@v1
id: finder

- name: Leave a comment after deployment
uses: marocchino/sticky-pull-request-comment@v2
with:
number: ${{ steps.finder.outputs.pr }}
message: |
:rocket: Deployed ${{ github.sha }} to ${{ steps.start-preview.outputs.url }}
7 changes: 5 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
dagster.yaml
dagster_artifacts/*
app/env/*
.venv
Expand All @@ -20,7 +21,7 @@ __pycache__/
#streamlit
secrets.toml
service_account.json
dbt_project/seeds/list_goal_mapping.csv
# dbt_project/seeds/list_goal_mapping.csv
app.log
# database/raw
# database
Expand All @@ -35,4 +36,6 @@ package.json
deactivate
app.log*

.devcontainer_prod
.devcontainer_prod

app.log.*
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
],
"yaml.schemas": {
"https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/dbt_yml_files-latest.json": [
"/**/*.yml",
"/dbt_project/**/*.yml",
"!profiles.yml",
"!dbt_project.yml",
"!packages.yml",
Expand Down
5 changes: 2 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@ init_deploy:
dagster:
tmux send-keys -t dagster.0 ". ./.venv/bin/activate && . ./env.sh && dagster dev -m ETL -h 0.0.0.0 -p 60001" ENTER

streamlit:
tmux send-keys -t streamlit.0 ". ./.venv/bin/activate && . ./env.sh && cd app/charts && streamlit run main.py" ENTER

sleeper:
sleep 10

Expand Down Expand Up @@ -42,3 +39,5 @@ loader_helper:
loader_rerun: loader_helper loader


# command after each reboot the vm
up: init_deploy dagster loader
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
[![start-preview](https://github.com/luutuankiet/scrape-ticktick/actions/workflows/start-preview.yml/badge.svg)](https://github.com/luutuankiet/scrape-ticktick/actions/workflows/start-preview.yml)

# prequisite

download `service_account.json` put it to /workspaces/scrape-ticktick/app/env
Expand Down Expand Up @@ -43,4 +45,4 @@ run dbt models
- install webhook to allow run dagstger from a url : `sudo apt-get install webhook`

# development
- after each model update, should do a full dagster reload definitions for it to parse new models
- after each model update, should do a full dagster reload definitions for it to parse new models
36 changes: 24 additions & 12 deletions app/ETL/EL.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
import sys; sys.path.append('..') # to allow import helper which is 1 dir away
from helper.source_env import raw_path,dw_path,ETL_workdir,db_url,target_schema
import time
from sqlalchemy import create_engine
from sqlalchemy import create_engine,text
from datetime import datetime
import pytz
import humanize
import numpy as np
#%%

engine = create_engine(db_url)
Expand Down Expand Up @@ -42,21 +43,32 @@ def init_extract():
compute_kind='python',deps=[init_extract]
)
def raw_data():
for name in names:
raw_file_path = os.path.join(raw_path,name+'.json')
df = pd.read_json(raw_file_path,dtype=str)
df.columns = df.columns.str.lower()
if name == 'tasks_raw':
df['modifiedtime_humanize'] = df['modifiedtime'].apply(humanize_timestamp)
df.to_sql(name, engine, if_exists='replace', index=False, schema=target_schema+'_raw')

yield Output(value=df,output_name=name)
with engine.connect() as conn:
for name in names:
raw_file_path = os.path.join(raw_path, name + '.json')
df = pd.read_json(raw_file_path, dtype=str)
df.columns = df.columns.str.lower()
if name == 'tasks_raw':
df['modifiedtime_humanize'] = df['modifiedtime'].apply(humanize_timestamp)
df['duedate_humanize'] = df['duedate'].apply(humanize_timestamp)

# Use text() to execute the raw SQL command
conn.execute(text(f"DROP TABLE IF EXISTS {target_schema+'_raw'}.{name}"))
conn.commit()

# Insert the data
df.to_sql(name, engine, index=False, schema=target_schema+'_raw')

yield Output(value=df, output_name=name)
conn.close()





def humanize_timestamp(ts):
if pd.isnull(ts):
return 'No modified time'
if pd.isnull(ts) or ts == '' or ts == 'nan':
return 'default'
# Parse the timestamp
dt = datetime.strptime(ts, '%Y-%m-%dT%H:%M:%S.%f%z')

Expand Down
Loading

0 comments on commit 3e97279

Please sign in to comment.