diff --git a/.gitignore b/.gitignore index 1d25ea61fc..2607be83a4 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ experiments/* secrets.toml *.session.sql *.duckdb +*.wal # Byte-compiled / optimized / DLL files **/__pycache__/ diff --git a/dlt/destinations/duckdb/configuration.py b/dlt/destinations/duckdb/configuration.py index ee87c4dde8..176d6b380e 100644 --- a/dlt/destinations/duckdb/configuration.py +++ b/dlt/destinations/duckdb/configuration.py @@ -9,7 +9,8 @@ from dlt.common.destination.reference import DestinationClientDwhConfiguration from dlt.common.typing import DictStrAny, TSecretValue -DEFAULT_DUCK_DB_NAME = "quack.duckdb" +DUCK_DB_NAME = "%s.duckdb" +DEFAULT_DUCK_DB_NAME = DUCK_DB_NAME % "quack" LOCAL_STATE_KEY = "duckdb_database" @@ -117,7 +118,8 @@ def _path_from_pipeline(self, default_path: str) -> str: context = Container()[PipelineContext] if context.is_active(): try: - # get + # use pipeline name as default + default_path = DUCK_DB_NAME % context.pipeline().pipeline_name return context.pipeline().get_local_state_val(LOCAL_STATE_KEY) # type: ignore except KeyError: pass diff --git a/docs/website/docs/destinations.md b/docs/website/docs/destinations.md index 6a58ce4198..dfe5efdb3b 100644 --- a/docs/website/docs/destinations.md +++ b/docs/website/docs/destinations.md @@ -155,7 +155,7 @@ python3 chess.py ### Destination Configuration -By default, a DuckDB database will be created in the current working directory with a name `quack.duckdb`. After loading, it is available in `read/write` mode via `with pipeline.sql_client() as con:` which is a wrapper over `DuckDBPyConnection`. See [duckdb docs](https://duckdb.org/docs/api/python/overview#persistent-storage) for details. +By default, a DuckDB database will be created in the current working directory with a name `.duckdb` (`chess.duckdb` in the example above). After loading, it is available in `read/write` mode via `with pipeline.sql_client() as con:` which is a wrapper over `DuckDBPyConnection`. See [duckdb docs](https://duckdb.org/docs/api/python/overview#persistent-storage) for details. The `duckdb` credentials do not require any secret values. You are free to pass the configuration explicitly via the `credentials` parameter to `dlt.pipeline` or `pipeline.run` methods. For example: ```python diff --git a/tests/load/duckdb/test_duckdb_client.py b/tests/load/duckdb/test_duckdb_client.py index 8edca08868..06a32c1a81 100644 --- a/tests/load/duckdb/test_duckdb_client.py +++ b/tests/load/duckdb/test_duckdb_client.py @@ -42,12 +42,12 @@ def test_duckdb_open_conn_default() -> None: def test_duckdb_database_path() -> None: # resolve without any path provided c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset")) - assert c.credentials.database.lower() == os.path.abspath(DEFAULT_DUCK_DB_NAME).lower() + assert c.credentials.database.lower() == os.path.abspath("quack.duckdb").lower() # resolve without any path but with pipeline context p = dlt.pipeline(pipeline_name="quack_pipeline") c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset")) # still cwd - db_path = os.path.abspath(os.path.join(".", DEFAULT_DUCK_DB_NAME)) + db_path = os.path.abspath(os.path.join(".", "quack_pipeline.duckdb")) assert c.credentials.database.lower() == db_path.lower() # but it is kept in the local state assert p.get_local_state_val("duckdb_database").lower() == db_path.lower()