Skip to content

Commit

Permalink
(fix): MotherDuck config should set SaaS mode at the end (#446)
Browse files Browse the repository at this point in the history
* make sure to set saas_mode after the MD token is set
* generalize how config settings are parsed from input
* add tests to confirm that SaaS mode also works when I attach a MD database
* pin DuckDB to 1.1.1 for MD tests
  • Loading branch information
guenp authored Sep 26, 2024
1 parent acf622d commit 47d447c
Show file tree
Hide file tree
Showing 9 changed files with 251 additions and 159 deletions.
2 changes: 1 addition & 1 deletion dbt/adapters/duckdb/environments/motherduck.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def motherduck_saas_mode(self, handle: DuckDBConnectionWrapper):
# Get SaaS mode from DuckDB config
con = handle.cursor()
(motherduck_saas_mode,) = con.sql(MOTHERDUCK_SAAS_MODE_QUERY).fetchone()
if motherduck_saas_mode.lower() in ["1", "true"]:
if str(motherduck_saas_mode).lower() in ["1", "true"]:
self._motherduck_saas_mode = True
return True
return False
Expand Down
2 changes: 1 addition & 1 deletion dbt/adapters/duckdb/plugins/glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def _add_partition_columns(
# Remove columns from StorageDescriptor if they match with partition columns to avoid duplicate columns
for p_column in partition_columns:
table_def["StorageDescriptor"]["Columns"] = [
column
column # type: ignore
for column in table_def["StorageDescriptor"]["Columns"]
if not (column["Name"] == p_column["Name"] and column["Type"] == p_column["Type"])
]
Expand Down
98 changes: 53 additions & 45 deletions dbt/adapters/duckdb/plugins/motherduck.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,72 +10,80 @@
from dbt.adapters.duckdb.credentials import DuckDBCredentials
from dbt.version import __version__

TOKEN = "token"
MOTHERDUCK_TOKEN = "motherduck_token"
CUSTOM_USER_AGENT = "custom_user_agent"
MOTHERDUCK_EXT = "motherduck"
MOTHERDUCK_CONFIG_OPTIONS = [MOTHERDUCK_TOKEN]
# MotherDuck config options, in order in which they need to be set
# (SaaS mode is last because it locks other config options)
MOTHERDUCK_CONFIG_OPTIONS = [
"motherduck_token",
"motherduck_attach_mode",
"motherduck_saas_mode",
]


class Plugin(BasePlugin):
def initialize(self, plugin_config: Dict[str, Any]):
self._config = plugin_config
self._token = self.token_from_config(plugin_config)

@staticmethod
def get_config_from_path(path):
return {key: value[0] for key, value in parse_qs(urlparse(path).query).items()}

@staticmethod
def get_md_config_settings(config):
# Get MotherDuck config settings
md_config = {}
for name in MOTHERDUCK_CONFIG_OPTIONS:
for key in [
name,
name.replace("motherduck_", ""),
name.upper(),
name.replace("motherduck_", "").upper(),
]:
if key in config:
md_config[name] = config[key]

# Sort values (SaaS mode should be set last)
return dict(
sorted(
md_config.items(),
key=lambda x: MOTHERDUCK_CONFIG_OPTIONS.index(x[0]),
)
)

def configure_connection(self, conn: DuckDBPyConnection):
conn.load_extension(MOTHERDUCK_EXT)
# If a MotherDuck database is in attachments,
# set config options *before* attaching
if self.creds is not None and self.creds.is_motherduck_attach:
# Check if the config options are specified in the path
config = {}

# add config options specified in the path
for attachment in self.creds.motherduck_attach:
parsed = urlparse(attachment.path)
qs = parse_qs(parsed.query)
for KEY in MOTHERDUCK_CONFIG_OPTIONS:
value = qs.get(KEY)
if value:
conn.execute(f"SET {KEY} = '{value[0]}'")
# If config options are specified via plugin config, set them here
if self._config:
conn.execute(f"SET {MOTHERDUCK_TOKEN} = '{self._token}'")
elif self.creds.settings:
if MOTHERDUCK_TOKEN in self.creds.settings:
token = self.creds.settings.pop(MOTHERDUCK_TOKEN)
conn.execute(f"SET {MOTHERDUCK_TOKEN} = '{token}'")
config.update(self.get_config_from_path(attachment.path))

@staticmethod
def token_from_config(config: Dict[str, Any]) -> str:
"""Load the token from the MotherDuck plugin config
If not specified, this returns an empty string
# add config options specified via plugin config
config.update(self._config)

:param str: MotherDuck token
"""
if (
TOKEN in config
or TOKEN.upper() in config
or MOTHERDUCK_TOKEN in config
or MOTHERDUCK_TOKEN.upper() in config
):
token = (
config.get(TOKEN)
or config.get(TOKEN.upper())
or config.get(MOTHERDUCK_TOKEN)
or config.get(MOTHERDUCK_TOKEN.upper())
)
return str(token)
return ""
# add config options specified via settings
if self.creds.settings is not None:
config.update(self.creds.settings)

# set MD config options and remove from settings
for key, value in self.get_md_config_settings(config).items():
conn.execute(f"SET {key} = '{value}'")
if self.creds.settings is not None and key in self.creds.settings:
self.creds.settings.pop(key)

def update_connection_config(self, creds: DuckDBCredentials, config: Dict[str, Any]):
user_agent = f"dbt/{__version__} dbt-duckdb/{__plugin_version__}"
if CUSTOM_USER_AGENT in config:
user_agent = f"{user_agent} {config[CUSTOM_USER_AGENT]}"
settings: Dict[str, Any] = creds.settings or {}
if CUSTOM_USER_AGENT in settings:
user_agent = f"{user_agent} {settings.pop(CUSTOM_USER_AGENT)}"

custom_user_agent = config.get(CUSTOM_USER_AGENT) or settings.pop(CUSTOM_USER_AGENT, None)
if custom_user_agent:
user_agent = f"{user_agent} {custom_user_agent}"
config[CUSTOM_USER_AGENT] = user_agent

# If a user specified MotherDuck config options via the plugin config,
# pass it to the config kwarg in duckdb.connect.
if not creds.is_motherduck_attach and self._token:
config[MOTHERDUCK_TOKEN] = self._token
if not creds.is_motherduck_attach:
config.update(self.get_md_config_settings(self._config))
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ requires = ["setuptools >= 61.2", "pbr>=1.9"]
glue =
boto3
mypy-boto3-glue
md =
duckdb==1.1.1

[files]
packages =
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -90,20 +90,18 @@ def md_sql(self, database_name):

@pytest.fixture(autouse=True)
def run_dbt_scope(self, project, database_name):
# CREATE DATABASE does not work with SaaS mode on duckdb 1.0.0
# This will be fixed in duckdb 1.1.0
# project.run_sql(f"CREATE DATABASE IF NOT EXISTS {database_name}")
project.run_sql("CREATE OR REPLACE TABLE plugin_table (i integer, j string)")
project.run_sql("INSERT INTO plugin_table (i, j) VALUES (1, 'foo')")
project.run_sql(f"CREATE DATABASE IF NOT EXISTS {database_name}")
project.run_sql(f"CREATE OR REPLACE TABLE {database_name}.plugin_table (i integer, j string)")
project.run_sql(f"INSERT INTO {database_name}.plugin_table (i, j) VALUES (1, 'foo')")
yield
project.run_sql("DROP VIEW md_table")
project.run_sql("DROP TABLE random_logs_test")
project.run_sql("DROP TABLE summary_of_logs_test")
project.run_sql("DROP TABLE plugin_table")
project.run_sql(f"DROP TABLE {database_name}.plugin_table")
project.run_sql("DROP TABLE python_pyarrow_table_model")

def test_motherduck(self, project):
run_dbt(expect_pass=False)
run_dbt(expect_pass=True)


@pytest.mark.skip_profile("buenavista", "file", "memory")
Expand Down
188 changes: 188 additions & 0 deletions tests/functional/plugins/motherduck/test_motherduck_saas_mode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
from urllib.parse import urlparse
import pytest
from dbt.tests.util import (
run_dbt,
)
from dbt.artifacts.schemas.results import RunStatus

from dbt.adapters.duckdb.environments.motherduck import MOTHERDUCK_SAAS_MODE_QUERY

random_logs_sql = """
{{ config(materialized='table', meta=dict(temp_schema_name='dbt_temp_test')) }}
select
uuid()::varchar as log_id,
'2023-10-01'::timestamp + interval 1 minute * (random() * 20000)::int as dt ,
(random() * 4)::int64 as user_id
from generate_series(1, 10000) g(x)
"""

summary_of_logs_sql = """
{{
config(
materialized='incremental',
meta=dict(temp_schema_name='dbt_temp_test'),
)
}}
select dt::date as dt, user_id, count(1) as c
from {{ ref('random_logs_test') }}
{% if is_incremental() %}
-- this filter will only be applied on an incremental run
-- (uses > to include records whose timestamp occurred since the last run of this model)
where dt > '2023-10-08'::timestamp
{% endif %}
group by all
"""

python_pyarrow_table_model = """
import pyarrow as pa
def model(dbt, con):
return pa.Table.from_pydict({"a": [1,2,3]})
"""

@pytest.mark.skip_profile("buenavista", "file", "memory")
class TestMDPluginSaaSMode:
@pytest.fixture(scope="class")
def profiles_config_update(self, dbt_profile_target):
md_config = {"motherduck_token": dbt_profile_target.get("token"), "motherduck_saas_mode": True}
return {
"test": {
"outputs": {
"dev": {
"type": "duckdb",
"path": dbt_profile_target.get("path", ":memory:") + "?user=1",
"config_options": md_config,
}
},
"target": "dev",
}
}

@pytest.fixture(scope="class")
def models(self, md_sql):
return {
"md_table.sql": md_sql,
"random_logs_test.sql": random_logs_sql,
"summary_of_logs_test.sql": summary_of_logs_sql,
"python_pyarrow_table_model.py": python_pyarrow_table_model,
}

@pytest.fixture(scope="class")
def database_name(self, dbt_profile_target):
return urlparse(dbt_profile_target["path"]).path

@pytest.fixture(scope="class")
def md_sql(self, database_name):
# Reads from a MD database in my test account in the cloud
return f"""
select * FROM {database_name}.main.plugin_table
"""

@pytest.fixture(autouse=True)
def run_dbt_scope(self, project, database_name):
# CREATE DATABASE does not work with SaaS mode on duckdb 1.0.0
# This will be fixed in duckdb 1.1.1
# project.run_sql(f"CREATE DATABASE IF NOT EXISTS {database_name}")
project.run_sql(f"CREATE OR REPLACE TABLE {database_name}.plugin_table (i integer, j string)")
project.run_sql(f"INSERT INTO {database_name}.plugin_table (i, j) VALUES (1, 'foo')")
yield
project.run_sql("DROP VIEW md_table")
project.run_sql("DROP TABLE random_logs_test")
project.run_sql("DROP TABLE summary_of_logs_test")
project.run_sql(f"DROP TABLE {database_name}.plugin_table")

def test_motherduck(self, project):
(motherduck_saas_mode,) = project.run_sql(MOTHERDUCK_SAAS_MODE_QUERY, fetch="one")
if str(motherduck_saas_mode).lower() not in ["1", "true"]:
raise ValueError("SaaS mode was not set")
result = run_dbt(expect_pass=False)
expected_msg = "Python models are disabled when MotherDuck SaaS Mode is on."
assert [_res for _res in result.results if _res.status != RunStatus.Success][0].message == expected_msg


@pytest.mark.skip_profile("buenavista", "file", "memory")
class TestMDPluginSaaSModeViaAttach(TestMDPluginSaaSMode):
@pytest.fixture(scope="class")
def profiles_config_update(self, dbt_profile_target):
md_config = {
"token": dbt_profile_target.get("token"),
"saas_mode": 1
}
plugins = [{"module": "motherduck", "config": md_config}]
return {
"test": {
"outputs": {
"dev": {
"type": "duckdb",
"path": ":memory:",
"plugins": plugins,
"attach": [
{
"path": dbt_profile_target.get("path", ":memory:") + "?user=2",
"type": "motherduck"
}
]
}
},
"target": "dev",
}
}


@pytest.mark.skip_profile("buenavista", "file", "memory")
class TestMDPluginSaaSModeViaAttachWithSettings(TestMDPluginSaaSMode):
@pytest.fixture(scope="class")
def profiles_config_update(self, dbt_profile_target):
md_setting = {
"motherduck_token": dbt_profile_target.get("token"),
"motherduck_saas_mode": True
}
return {
"test": {
"outputs": {
"dev": {
"type": "duckdb",
"path": ":memory:",
"attach": [
{
"path": dbt_profile_target.get("path", ":memory:") + "?user=3",
"type": "motherduck"
}
],
"settings": md_setting
}
},
"target": "dev",
}
}


@pytest.mark.skip_profile("buenavista", "file", "memory")
class TestMDPluginSaaSModeViaAttachWithTokenInPath(TestMDPluginSaaSMode):
@pytest.fixture(scope="class")
def profiles_config_update(self, dbt_profile_target):
token = dbt_profile_target.get("token")
qs = f"?motherduck_token={token}&saas_mode=true&user=4"
return {
"test": {
"outputs": {
"dev": {
"type": "duckdb",
"path": ":memory:",
"attach": [
{
"path": dbt_profile_target.get("path", ":memory:") + qs,
"type": "motherduck"
}
]
}
},
"target": "dev",
}
}
Loading

0 comments on commit 47d447c

Please sign in to comment.