Skip to content
This repository has been archived by the owner on May 17, 2024. It is now read-only.

Commit

Permalink
Track data-diff usage in MotherDuck (#800)
Browse files Browse the repository at this point in the history
* Update DuckDB connection parameters

* remove submods

* tracking logic

* conditional connection

* semver parsing

* motherduck test configs

* remove submods

* add motherduck dbt test

* passing motherduck tests

* more readable config

* remove submods

* user agent spec

* previous presto version

---------

Co-authored-by: Sung Won Chung <[email protected]>
  • Loading branch information
sungchun12 and Sung Won Chung authored Dec 18, 2023
1 parent 71a1b3d commit 0b74046
Show file tree
Hide file tree
Showing 9 changed files with 1,112 additions and 959 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ jobs:
DATADIFF_CLICKHOUSE_URI: 'clickhouse://clickhouse:Password1@localhost:9000/clickhouse'
DATADIFF_VERTICA_URI: 'vertica://vertica:Password1@localhost:5433/vertica'
DATADIFF_REDSHIFT_URI: '${{ secrets.DATADIFF_REDSHIFT_URI }}'
MOTHERDUCK_TOKEN: '${{ secrets.MOTHERDUCK_TOKEN }}'
run: |
chmod +x tests/waiting_for_stack_up.sh
./tests/waiting_for_stack_up.sh && TEST_ACROSS_ALL_DBS=0 poetry run unittest-parallel -j 16
1 change: 1 addition & 0 deletions .github/workflows/ci_full.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ jobs:
DATADIFF_VERTICA_URI: 'vertica://vertica:Password1@localhost:5433/vertica'
# DATADIFF_BIGQUERY_URI: '${{ secrets.DATADIFF_BIGQUERY_URI }}'
DATADIFF_REDSHIFT_URI: '${{ secrets.DATADIFF_REDSHIFT_URI }}'
MOTHERDUCK_TOKEN: '${{ secrets.MOTHERDUCK_TOKEN }}'
run: |
chmod +x tests/waiting_for_stack_up.sh
./tests/waiting_for_stack_up.sh && poetry run unittest-parallel -j 16
16 changes: 15 additions & 1 deletion data_diff/databases/duckdb.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Any, ClassVar, Dict, Union, Type

import attrs
from packaging.version import parse as parse_version

from data_diff.utils import match_regexps
from data_diff.abcs.database_types import (
Expand All @@ -27,6 +28,7 @@
CHECKSUM_OFFSET,
)
from data_diff.databases.base import MD5_HEXDIGITS, CHECKSUM_HEXDIGITS
from data_diff.version import __version__


@import_helper("duckdb")
Expand Down Expand Up @@ -148,9 +150,21 @@ def close(self):
def create_connection(self):
ddb = import_duckdb()
try:
return ddb.connect(self._args["filepath"])
# custom_user_agent is only available in duckdb >= 0.9.2
if parse_version(ddb.__version__) >= parse_version("0.9.2"):
custom_user_agent = f"data-diff/v{__version__}"
config = {"custom_user_agent": custom_user_agent}
connection = ddb.connect(database=self._args["filepath"], config=config)
custom_user_agent_results = connection.sql("PRAGMA USER_AGENT;").fetchall()
custom_user_agent_filtered = custom_user_agent_results[0][0]
assert custom_user_agent in custom_user_agent_filtered
else:
connection = ddb.connect(database=self._args["filepath"])
return connection
except ddb.OperationalError as e:
raise ConnectError(*e.args) from e
except AssertionError:
raise ConnectError("Assertion failed: Custom user agent is invalid.") from None

def select_table_schema(self, path: DbPath) -> str:
database, schema, table = self._normalize_table_path(path)
Expand Down
2,020 changes: 1,068 additions & 952 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ psycopg2 = "*"
snowflake-connector-python = ">=3.0.2,<4.0.0"
cryptography = "*"
trino = "^0.314.0"
presto-python-client = "*"
presto-python-client = "0.8.3"
clickhouse-driver = "*"
vertica-python = "*"
duckdb = "^0.7.0"
duckdb = "^0.9.0"
dbt-core = "^1.0.0"
ruff = "^0.1.4"
# google-cloud-bigquery = "*"
Expand Down
Binary file modified tests/dbt_artifacts/jaffle_shop.duckdb
Binary file not shown.
7 changes: 7 additions & 0 deletions tests/dbt_artifacts/motherduck/profiles.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
jaffle_shop:
target: dev_motherduck
outputs:
dev_motherduck:
type: duckdb
path: 'md:jaffle_shop?motherduck_token={{ env_var("MOTHERDUCK_TOKEN") }}'
schema: dev
4 changes: 0 additions & 4 deletions tests/dbt_artifacts/profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,3 @@ jaffle_shop:
type: duckdb
path: "./tests/dbt_artifacts/jaffle_shop.duckdb"
schema: dev
different_dev:
type: duckdb
path: "./tests/dbt_artifacts/jaffle_shop.duckdb"
schema: "{{ env_var('some_env_var') }}"
18 changes: 18 additions & 0 deletions tests/test_dbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,24 @@ def test_integration_basic_dbt(self):
# 1 with a diff
assert diff_string.count(" Rows Added Rows Removed") == 1

def test_integration_motherduck_dbt(self):
artifacts_path = os.getcwd() + "/tests/dbt_artifacts"
test_project_path = os.environ.get("DATA_DIFF_DBT_PROJ") or artifacts_path
test_profiles_path = os.environ.get("DATA_DIFF_DBT_PROJ") or artifacts_path + "/motherduck"
diff = run_datadiff_cli(
"--dbt", "--dbt-project-dir", test_project_path, "--dbt-profiles-dir", test_profiles_path
)

# assertions for the diff that exists in tests/dbt_artifacts/jaffle_shop.duckdb
if test_project_path == artifacts_path:
diff_string = b"".join(diff).decode("utf-8")
# 5 diffs were ran
assert diff_string.count("<>") == 5
# 4 with no diffs
assert diff_string.count("No row differences") == 4
# 1 with a diff
assert diff_string.count(" Rows Added Rows Removed") == 1

def test_integration_cloud_dbt(self):
project_dir = os.environ.get("DATA_DIFF_DBT_PROJ")
if project_dir is not None:
Expand Down

0 comments on commit 0b74046

Please sign in to comment.