From 267c0b1769ca052ea0fdda6172f521bb7895cb89 Mon Sep 17 00:00:00 2001 From: "Zoltan C. Toth" Date: Mon, 20 Jan 2025 10:12:10 +0100 Subject: [PATCH] removing dagster + version updates --- .devcontainer/devcontainer.json | 26 +---- .gitignore | 3 + .vscode/launch.json | 19 ++++ dbtlearn.code-workspace | 8 ++ dbtlearn/analyses/full_moon_no_sleep.sql | 4 +- dbtlearn/dbt_project.yml | 2 - dbtlearn/models/mart/unit_tests.yml | 18 ++++ dbtlearn/models/schema.yml | 34 ++++--- dbtlearn/packages.yml | 4 +- profiles.yml | 3 +- requirements.in | 3 + requirements.txt | 121 ++++++++++++++++++++++- scripts/setup_codespace_env.sh | 5 +- 13 files changed, 203 insertions(+), 47 deletions(-) create mode 100644 .vscode/launch.json create mode 100644 dbtlearn.code-workspace create mode 100644 dbtlearn/models/mart/unit_tests.yml create mode 100644 requirements.in diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 959a93a..64cdaf2 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,35 +1,19 @@ // For format details, see https://aka.ms/devcontainer.json. For config options, see the // README at: https://github.com/devcontainers/templates/tree/main/src/python { - "name": "Python 3", - // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile + "name": "dbtlearn", "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye", - "features": { - "ghcr.io/devcontainers/features/python:1": { - "installTools": true, - "version": "3.11" - } - }, - - // Features to add to the dev container. More info: https://containers.dev/features. - // "features": {}, - - // Use 'forwardPorts' to make a list of ports inside the container available locally. "forwardPorts": [5000], - // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "bash scripts/setup_codespace_env.sh", + "postAttachCommand": "bash scripts/setup_codespace_env.sh", "customizations": { "vscode": { "extensions": [ "innoverio.vscode-dbt-power-user" ] + }, + "codespaces": { + "openFiles":["profiles.yml"] } } - - // Configure tool-specific properties. - // "customizations": {}, - - // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. - // "remoteUser": "root" } \ No newline at end of file diff --git a/.gitignore b/.gitignore index b52ddf1..7cfb90b 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,6 @@ venv/ **/__pycache__/ *.pyc tmp*/ + +.user.yml +.DS_Store diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..8fc58ce --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,19 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: dagster", + "type": "debugpy", + "request": "launch", + "module": "dagster", + "args": ["dev"], + "cwd": "${workspaceFolder}/dbt_dagster_project_dev", + "env": { + "PYDEVD_DISABLE_FILE_VALIDATION": "1" + } + } + ] +} \ No newline at end of file diff --git a/dbtlearn.code-workspace b/dbtlearn.code-workspace new file mode 100644 index 0000000..876a149 --- /dev/null +++ b/dbtlearn.code-workspace @@ -0,0 +1,8 @@ +{ + "folders": [ + { + "path": "." + } + ], + "settings": {} +} \ No newline at end of file diff --git a/dbtlearn/analyses/full_moon_no_sleep.sql b/dbtlearn/analyses/full_moon_no_sleep.sql index 9fba886..154b75d 100644 --- a/dbtlearn/analyses/full_moon_no_sleep.sql +++ b/dbtlearn/analyses/full_moon_no_sleep.sql @@ -1,4 +1,4 @@ -WITH fullmoon_reviews AS ( +WITH mart_fullmoon_reviews AS ( SELECT * FROM {{ ref('mart_fullmoon_reviews') }} ) SELECT @@ -6,7 +6,7 @@ SELECT review_sentiment, COUNT(*) as reviews FROM - fullmoon_reviews + mart_fullmoon_reviews GROUP BY is_full_moon, review_sentiment diff --git a/dbtlearn/dbt_project.yml b/dbtlearn/dbt_project.yml index 3f10653..fba75aa 100644 --- a/dbtlearn/dbt_project.yml +++ b/dbtlearn/dbt_project.yml @@ -4,7 +4,6 @@ # name or the intended use of these models name: 'dbtlearn' version: '1.0.0' -config-version: 2 # This setting configures which "profile" dbt uses for this project. profile: 'dbtlearn' @@ -20,7 +19,6 @@ macro-paths: ["macros"] snapshot-paths: ["snapshots"] asset-paths: ["assets"] -target-path: "target" # directory which will store compiled SQL files clean-targets: # directories to be removed by `dbt clean` - "target" - "dbt_packages" diff --git a/dbtlearn/models/mart/unit_tests.yml b/dbtlearn/models/mart/unit_tests.yml new file mode 100644 index 0000000..701c586 --- /dev/null +++ b/dbtlearn/models/mart/unit_tests.yml @@ -0,0 +1,18 @@ +unit_tests: + - name: unittest_fullmoon_matcher + description: This test checks if the full moon review matches the full moon review in the source table. + model: mart_fullmoon_reviews + given: + - input: ref('fct_reviews') + rows: + - {review_date: '2025-01-13'} + - {review_date: '2025-01-14'} + - {review_date: '2025-01-15'} + - input: ref('seed_full_moon_dates') + rows: + - {full_moon_date: '2025-01-13'} + expect: + rows: + - {review_date: '2025-01-13', is_full_moon: "not full moon"} + - {review_date: '2025-01-14', is_full_moon: "full moon"} + - {review_date: '2025-01-15', is_full_moon: "not full moon"} diff --git a/dbtlearn/models/schema.yml b/dbtlearn/models/schema.yml index e59e464..88dd386 100644 --- a/dbtlearn/models/schema.yml +++ b/dbtlearn/models/schema.yml @@ -1,5 +1,15 @@ version: 2 +################################################# +# +# Since dbt-core 1.8 tests are called data tests. +# +# you'll see the `data_tests` key in this file +# but the `tests` key works equally well. +# +# Most of the course recording uses `tests` +# +################################################# models: - name: dim_listings_cleansed description: Cleansed table which contains Airbnb listings. @@ -7,13 +17,13 @@ models: - name: listing_id description: Primary key for the listing - tests: + data_tests: - unique - not_null - name: host_id description: The hosts's id. References the host table. - tests: + data_tests: - not_null - relationships: to: ref('dim_hosts_cleansed') @@ -21,7 +31,7 @@ models: - name: room_type description: Type of the apartment / room - tests: + data_tests: - accepted_values: values: ['Entire home/apt', 'Private room', @@ -30,7 +40,7 @@ models: - name: minimum_nights description: '{{ doc("dim_listing_cleansed__minimum_nights") }}' - tests: + data_tests: - positive_value - name: dim_hosts_cleansed @@ -38,28 +48,28 @@ models: columns: - name: host_id description: The id of the host. This is the primary key. - tests: + data_tests: - not_null - unique - name: host_name description: The name of the host - tests: + data_tests: - not_null - name: is_superhost description: Defines whether the hosts is a superhost. - tests: + data_tests: - accepted_values: values: ['t', 'f'] - name: dim_listings_w_hosts - tests: + data_tests: - dbt_expectations.expect_table_row_count_to_equal_other_table: compare_model: source('airbnb', 'listings') columns: - name: price - tests: + data_tests: - dbt_expectations.expect_column_values_to_be_of_type: column_type: number - dbt_expectations.expect_column_quantile_values_to_be_between: @@ -74,16 +84,16 @@ models: - name: fct_reviews columns: - name: listing_id - tests: + data_tests: - relationships: to: ref('dim_listings_cleansed') field: listing_id - name: reviewer_name - tests: + data_tests: - not_null - name: review_sentiment - tests: + data_tests: - accepted_values: values: ['positive', 'neutral', 'negative'] diff --git a/dbtlearn/packages.yml b/dbtlearn/packages.yml index e0f977e..67d3dc4 100644 --- a/dbtlearn/packages.yml +++ b/dbtlearn/packages.yml @@ -1,5 +1,5 @@ packages: - package: dbt-labs/dbt_utils - version: 1.1.1 + version: 1.3.0 - package: calogica/dbt_expectations - version: 0.10.1 \ No newline at end of file + version: 0.10.4 \ No newline at end of file diff --git a/profiles.yml b/profiles.yml index 41b86c0..606d448 100644 --- a/profiles.yml +++ b/profiles.yml @@ -1,7 +1,7 @@ dbtlearn: outputs: dev: - account: # Fill in the Snowflake Account Name here (someting like: yczmxn-dk96978) + account: yqxejkg-cz71618 database: AIRBNB password: dbtPassword123 role: transform @@ -11,4 +11,3 @@ dbtlearn: user: dbt warehouse: COMPUTE_WH target: dev - \ No newline at end of file diff --git a/requirements.in b/requirements.in new file mode 100644 index 0000000..4ab1f33 --- /dev/null +++ b/requirements.in @@ -0,0 +1,3 @@ +dbt-snowflake==1.9.0 +dagster-dbt +dagster-webserver diff --git a/requirements.txt b/requirements.txt index 80dce40..96203a1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,118 @@ -dbt-snowflake==1.7.1 -dagster-dbt==0.22.0 -dagster-webserver==1.6.0 +agate==1.9.1 +alembic==1.14.0 +annotated-types==0.7.0 +antlr4-python3-runtime==4.13.2 +anyio==4.8.0 +asn1crypto==1.5.1 +attrs==24.3.0 +babel==2.16.0 +backoff==2.2.1 +certifi==2024.12.14 +cffi==1.17.1 +charset-normalizer==3.4.1 +click==8.1.8 +colorama==0.4.6 +coloredlogs==14.0 +croniter==5.0.1 +cryptography==44.0.0 +daff==1.3.46 +dagster==1.9.8 +dagster-dbt==0.25.8 +dagster-graphql==1.9.8 +dagster-pipes==1.9.8 +dagster-webserver==1.9.8 +dbt-adapters==1.13.1 +dbt-common==1.14.0 +dbt-core==1.9.1 +dbt-extractor==0.5.1 +dbt-semantic-interfaces==0.7.4 +dbt-snowflake==1.9.0 +deepdiff==7.0.1 +docstring_parser==0.16 +filelock==3.16.1 +fsspec==2024.12.0 +gql==3.5.0 +graphene==3.4.3 +graphql-core==3.2.5 +graphql-relay==3.2.0 +grpcio==1.69.0 +grpcio-health-checking==1.69.0 +h11==0.14.0 +httptools==0.6.4 +humanfriendly==10.0 +idna==3.10 +importlib-metadata==6.11.0 +isodate==0.6.1 +jaraco.classes==3.4.0 +jaraco.context==6.0.1 +jaraco.functools==4.1.0 +Jinja2==3.1.5 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +keyring==25.6.0 +leather==0.4.0 +Mako==1.3.8 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 +mashumaro==3.14 +mdurl==0.1.2 +more-itertools==10.5.0 +msgpack==1.1.0 +multidict==6.1.0 +networkx==3.4.2 +ordered-set==4.1.0 +orjson==3.10.14 +packaging==24.2 +parsedatetime==2.6 +pathspec==0.12.1 +platformdirs==4.3.6 +propcache==0.2.1 +protobuf==5.29.3 +pycparser==2.22 +pydantic==2.10.5 +pydantic_core==2.27.2 +Pygments==2.19.1 +PyJWT==2.10.1 +pyOpenSSL==24.3.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-slugify==8.0.4 +pytimeparse==1.1.8 +pytz==2024.2 +PyYAML==6.0.2 +referencing==0.35.1 +requests==2.32.3 +requests-toolbelt==1.0.0 +rich==13.9.4 +rpds-py==0.22.3 +setuptools==75.8.0 +shellingham==1.5.4 +six==1.17.0 +sniffio==1.3.1 +snowflake-connector-python==3.12.4 +snowplow-tracker==1.0.4 +sortedcontainers==2.4.0 +SQLAlchemy==2.0.37 +sqlglot==26.1.3 +sqlglotrs==0.3.4 +sqlparse==0.5.3 +starlette==0.45.2 +structlog==24.4.0 +tabulate==0.9.0 +text-unidecode==1.3 +tomli==2.2.1 +tomlkit==0.13.2 +toposort==1.10 +tqdm==4.67.1 +typer==0.15.1 +types-requests==2.32.0.20241016 +typing_extensions==4.12.2 +universal_pathlib==0.2.6 +urllib3==2.3.0 +uvicorn==0.34.0 +uvloop==0.21.0 +watchdog==5.0.3 +watchfiles==1.0.4 +websockets==14.1 +yarl==1.18.3 +zipp==3.21.0 diff --git a/scripts/setup_codespace_env.sh b/scripts/setup_codespace_env.sh index 9d1ee19..8e0fd30 100755 --- a/scripts/setup_codespace_env.sh +++ b/scripts/setup_codespace_env.sh @@ -3,6 +3,5 @@ set -x mkdir -p ~/.dbt ln -sf $(readlink -f profiles.yml) ~/.dbt/profiles.yml -pip3 install --user -r requirements.txt - -code profiles.yml +pip3 install --upgrade pip +pip3 install --user -r requirements.in