diff --git a/.gitignore b/.gitignore index 1d25ea61fc..2607be83a4 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ experiments/* secrets.toml *.session.sql *.duckdb +*.wal # Byte-compiled / optimized / DLL files **/__pycache__/ diff --git a/dlt/common/pipeline.py b/dlt/common/pipeline.py index fce32262bf..07c80268c0 100644 --- a/dlt/common/pipeline.py +++ b/dlt/common/pipeline.py @@ -97,6 +97,12 @@ class SupportsPipeline(Protocol): def state(self) -> TPipelineState: """Returns dictionary with pipeline state""" + def set_local_state_val(self, key: str, value: Any) -> None: + """Sets value in local state. Local state is not synchronized with destination.""" + + def get_local_state_val(self, key: str) -> Any: + """Gets value from local state. Local state is not synchronized with destination.""" + def run( self, data: Any = None, diff --git a/dlt/destinations/duckdb/configuration.py b/dlt/destinations/duckdb/configuration.py index 59d766c83e..176d6b380e 100644 --- a/dlt/destinations/duckdb/configuration.py +++ b/dlt/destinations/duckdb/configuration.py @@ -9,7 +9,9 @@ from dlt.common.destination.reference import DestinationClientDwhConfiguration from dlt.common.typing import DictStrAny, TSecretValue -DEFAULT_DUCK_DB_NAME = "quack.duckdb" +DUCK_DB_NAME = "%s.duckdb" +DEFAULT_DUCK_DB_NAME = DUCK_DB_NAME % "quack" +LOCAL_STATE_KEY = "duckdb_database" @configspec @@ -25,7 +27,7 @@ class DuckDbCredentials(ConnectionStringCredentials): # __config_gen_annotations__: ClassVar[List[str]] = ["database"] - def borrow_conn(self, read_only: bool, config: DictStrAny = None) -> Any: + def borrow_conn(self, read_only: bool) -> Any: import duckdb if not hasattr(self, "_conn_lock"): @@ -34,7 +36,7 @@ def borrow_conn(self, read_only: bool, config: DictStrAny = None) -> Any: # obtain a lock because duck releases the GIL and we have refcount concurrency with self._conn_lock: if not hasattr(self, "_conn"): - self._conn = duckdb.connect(database=self.database, read_only=read_only, config=config) + self._conn = duckdb.connect(database=self.database, read_only=read_only) self._conn_owner = True self._conn_borrows = 0 @@ -70,31 +72,59 @@ def parse_native_representation(self, native_value: Any) -> None: try: super().parse_native_representation(native_value) except InvalidConnectionString: - if is_valid_filepath(native_value, platform="auto"): + if native_value == ":pipeline:" or is_valid_filepath(native_value, platform="auto"): self.database = native_value else: raise def on_resolved(self) -> None: - # if database is not set, try the pipeline context - if not self.database: + # do not set any paths for external database + if self.database == ":external:": + return + # try the pipeline context + if self.database == ":pipeline:": self.database = self._path_in_pipeline(DEFAULT_DUCK_DB_NAME) - # if pipeline context was not present + # if pipeline context was not present or database was not set if not self.database: # create database locally - self.database = DEFAULT_DUCK_DB_NAME + self.database = self._path_from_pipeline(DEFAULT_DUCK_DB_NAME) # always make database an abs path self.database = os.path.abspath(self.database) + self._path_to_pipeline(self.database) def _path_in_pipeline(self, rel_path: str) -> str: from dlt.common.configuration.container import Container from dlt.common.pipeline import PipelineContext + context = Container()[PipelineContext] if context.is_active(): # pipeline is active, get the working directory return os.path.join(context.pipeline().working_dir, rel_path) return None + + def _path_to_pipeline(self, abspath: str) -> None: + from dlt.common.configuration.container import Container + from dlt.common.pipeline import PipelineContext + + context = Container()[PipelineContext] + if context.is_active(): + context.pipeline().set_local_state_val(LOCAL_STATE_KEY, abspath) + + def _path_from_pipeline(self, default_path: str) -> str: + from dlt.common.configuration.container import Container + from dlt.common.pipeline import PipelineContext + + context = Container()[PipelineContext] + if context.is_active(): + try: + # use pipeline name as default + default_path = DUCK_DB_NAME % context.pipeline().pipeline_name + return context.pipeline().get_local_state_val(LOCAL_STATE_KEY) # type: ignore + except KeyError: + pass + return default_path + def _delete_conn(self) -> None: # print("Closing conn because is owner") self._conn.close() diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py index 69841e1b22..acfa4541bf 100644 --- a/dlt/pipeline/pipeline.py +++ b/dlt/pipeline/pipeline.py @@ -11,7 +11,7 @@ from dlt.common.configuration import inject_section, known_sections from dlt.common.configuration.specs import RunConfiguration, NormalizeVolumeConfiguration, SchemaVolumeConfiguration, LoadVolumeConfiguration, PoolRunnerConfiguration from dlt.common.configuration.container import Container -from dlt.common.configuration.exceptions import ConfigFieldMissingException +from dlt.common.configuration.exceptions import ConfigFieldMissingException, ContextDefaultCannotBeCreated from dlt.common.configuration.specs.config_section_context import ConfigSectionContext from dlt.common.exceptions import MissingDependencyException from dlt.common.normalizers import default_normalizers, import_normalizers @@ -573,6 +573,29 @@ def sync_schema(self, schema_name: str = None, credentials: Any = None) -> None: client.initialize_storage() client.update_storage_schema() + + def set_local_state_val(self, key: str, value: Any) -> None: + """Sets value in local state. Local state is not synchronized with destination.""" + try: + # get managed state that is read/write + state = self._container[StateInjectableContext].state + state["_local"][key] = value # type: ignore + except ContextDefaultCannotBeCreated: + state = self._get_state() + state["_local"][key] = value # type: ignore + self._save_state(state) + + + def get_local_state_val(self, key: str) -> Any: + """Gets value from local state. Local state is not synchronized with destination.""" + try: + # get managed state that is read/write + state = self._container[StateInjectableContext].state + except ContextDefaultCannotBeCreated: + state = self._get_state() + return state["_local"][key] # type: ignore + + def sql_client(self, schema_name: str = None, credentials: Any = None) -> SqlClientBase[Any]: """Returns a sql connection configured to query/change the destination and dataset that were used to load the data.""" # if not self.default_schema_name: diff --git a/docs/website/docs/destinations.md b/docs/website/docs/destinations.md index d08ca8fe39..dfe5efdb3b 100644 --- a/docs/website/docs/destinations.md +++ b/docs/website/docs/destinations.md @@ -155,9 +155,9 @@ python3 chess.py ### Destination Configuration -By default, a DuckDB database will be created inside the pipeline working directory with a name `quack.duckdb`. It is available in `read/write` mode via `pipeline.sql_client`. +By default, a DuckDB database will be created in the current working directory with a name `.duckdb` (`chess.duckdb` in the example above). After loading, it is available in `read/write` mode via `with pipeline.sql_client() as con:` which is a wrapper over `DuckDBPyConnection`. See [duckdb docs](https://duckdb.org/docs/api/python/overview#persistent-storage) for details. -As the `duckdb` credentials do not require any secret values, you are free to pass the configuration explicitly via the `credentials` parameter to `dlt.pipeline` or `pipeline.run` methods. For example: +The `duckdb` credentials do not require any secret values. You are free to pass the configuration explicitly via the `credentials` parameter to `dlt.pipeline` or `pipeline.run` methods. For example: ```python # will load data to files/data.db database file p = dlt.pipeline(pipeline_name='chess', destination='duckdb', dataset_name='chess_data', full_refresh=False, credentials="files/data.db") diff --git a/poetry.lock b/poetry.lock index 97e5eecef9..2b005ce04f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -362,7 +362,7 @@ dates = ["pytz (>=2019.1)"] [[package]] name = "duckdb" -version = "0.7.0" +version = "0.7.1" description = "DuckDB embedded database" category = "main" optional = true @@ -2135,53 +2135,53 @@ domdf-python-tools = [ {file = "domdf_python_tools-3.6.0.tar.gz", hash = "sha256:0ac5efa2ac648dca5653e386fe73aa995e66b215c9d16b7ee87e931322a1e6c8"}, ] duckdb = [ - {file = "duckdb-0.7.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3fcae66586bd858c522559e85068d80414a27f23839705d5b400156c0fdbbeca"}, - {file = "duckdb-0.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d07d37edd7e2d1078bf500d11d068d6c1b62868bf372beca4d856cb3fa58c02"}, - {file = "duckdb-0.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5b86e4bff0f5d49b6ac0aaae5fd6d4bd3d65f1aeeb77529d717515a0e4e16e60"}, - {file = "duckdb-0.7.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b9fce20c0e43089c6892f6af0112f6532ec9e163dbfd744ce6579a3ed5c63e6"}, - {file = "duckdb-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c730ccfef672b8fe04010862e5b766d1698c2a7401413958bd09a2640260966"}, - {file = "duckdb-0.7.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:36add2ce4747331bcc650d50d656604cc5442a1de9e88ab52481f51e53f10632"}, - {file = "duckdb-0.7.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:115e5e4ed087a129510c34dbac5bc8b30e05f95b05e34b3589771ccaab7215ec"}, - {file = "duckdb-0.7.0-cp310-cp310-win32.whl", hash = "sha256:5fa43a479c68937c7cad9ee13a6fd99417820a1f44aaaaf6d8675ea916278226"}, - {file = "duckdb-0.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:864432628afbc8c4309c2c26b60cd97fdfc7891acdbc633a09e7961bdf5a1dcf"}, - {file = "duckdb-0.7.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a09f52996921b8341a06a7425d8a4b6bc05fbdb8cf39ed1e915a0c9fdd3186ee"}, - {file = "duckdb-0.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e3956bed611b1a9b543f3fed1d01ee41cfcb68cc150bebd2b3152b817541ec01"}, - {file = "duckdb-0.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bc9ec4507b0c09aa163d27e3c4fcb8d3d904f0d02bb99f628534741cae45cf08"}, - {file = "duckdb-0.7.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ac9e94c402d149a894febe76012cf86f6ed2b56232dd89a9ea06ac0843aacf69"}, - {file = "duckdb-0.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d735ba4d054cd05f7cceb4bc591151f0934460bfc6346d84063c58f5156c16a"}, - {file = "duckdb-0.7.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:49d33c0beaf075a8347bcab7589066f30d537bf1cba44bc87b2812343a080691"}, - {file = "duckdb-0.7.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:264fd585c5a6ccd0fb6ca3a957a0b82fc195afa0533392c77f7ac6b58a8cc617"}, - {file = "duckdb-0.7.0-cp311-cp311-win32.whl", hash = "sha256:5f93ec36c1b038cc09ac57c58b8506c13c722eca68e9f24af04ca0ee76b6fb1b"}, - {file = "duckdb-0.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:cee0f977015dd7a35e7374ee341d56101c6561363adde213a87595e49719284c"}, - {file = "duckdb-0.7.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f0c9c54a6ce29ce45efc271ad4bd4d8f8b6ceb5e7e9ee66a236cda4c15ea1be6"}, - {file = "duckdb-0.7.0-cp36-cp36m-win32.whl", hash = "sha256:fe2ea12990ecc5eaf2de4f349d9c8c23b4855f8037c6e8fd1c659d22464d32ff"}, - {file = "duckdb-0.7.0-cp36-cp36m-win_amd64.whl", hash = "sha256:cc473a5ebd7efbe1c16fd72885031c167e925a2c7afbf6fb1ff66b9f4bfb93b9"}, - {file = "duckdb-0.7.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:462df10e74205ee45532315d239bcc37d5372c5169496ac5f227ab521d97a8a1"}, - {file = "duckdb-0.7.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22fb9d39b4cac968b2c80683be7be70138d49df0917cb6a3d141fb147afe4bb4"}, - {file = "duckdb-0.7.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:309ba76ac99f5a9a38579205406f13d117d9506e4a097b4bbb0a3484a103291c"}, - {file = "duckdb-0.7.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:78fb8e34d4bb73d21104c5c9cd170a0f2d79ab37bd1e7d03bd92ee65e50b80f1"}, - {file = "duckdb-0.7.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:5766c9d08d9cffd755153eca6c8f61f70c3b6a6d3c58ed2b2d4057b0a50f7850"}, - {file = "duckdb-0.7.0-cp37-cp37m-win32.whl", hash = "sha256:0d733bc19806e45782c79a0a6b9a6e88cd8502020533175b83db75d1fa564246"}, - {file = "duckdb-0.7.0-cp37-cp37m-win_amd64.whl", hash = "sha256:136bdc26d5f571d4ff3c09d867923e5f9ac03cc62db732d4b2ca2d4d9f84dd1d"}, - {file = "duckdb-0.7.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a22607efe25bc3b85e078fc5619095bf0d1cc8d39f0039a0e424075e2c77197b"}, - {file = "duckdb-0.7.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1db7cacd682f465a3770ca97928edb9d245ef3ebe4e195c3cb6b812f099fae4f"}, - {file = "duckdb-0.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ef71d92a738b1e4f64073e355dae90122793d5008c5e728e35b17bc023dbc7a7"}, - {file = "duckdb-0.7.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65458c1fac678e1e03cf9e0aa00666b3990792adb318fe41bd49b15fe8a6e78a"}, - {file = "duckdb-0.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f21317cc8cc7f331936cc42294f52b8212a8e6a221b09672f0bfa7f290898a5"}, - {file = "duckdb-0.7.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b9e0010145b0abe510bb9ea34782fa82df4ef4f2e6c9b87877df99ff20b8c993"}, - {file = "duckdb-0.7.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0e308354b54d235cd262c52036da56b3d49969c7cad47f784aff618cb3bed7a"}, - {file = "duckdb-0.7.0-cp38-cp38-win32.whl", hash = "sha256:8ae0e5ec024de474f12613524b5c2466955b5e42a68ea2ec788ac50040d44b88"}, - {file = "duckdb-0.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:99ba303d81d8145edcbb5cae27e873e3c1c403532041cbe94b514af8e7db3009"}, - {file = "duckdb-0.7.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1ce42f93dd35e6b256abe1c7344445913536da4604f6e8ee3c886fe7c1eb5580"}, - {file = "duckdb-0.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b09ba609cf1744f87c1c7143db46c668e24bab893f422e22c7d059bc07a18b3d"}, - {file = "duckdb-0.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fdb88cccd4a9446e67a375ac7e8058c35df62f4199b781e2e408ad95af10deb3"}, - {file = "duckdb-0.7.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:94d3cf46267a698f5e98dadf9cb0ce5c4f68e7a4c9702c2a37b57d56d2f944d7"}, - {file = "duckdb-0.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ff5ff3326faa087892d6529e700774878227c80ce8e16b018a62cb50d5bd4ac"}, - {file = "duckdb-0.7.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ee3333009b828faf767582b23e0b9c1c4f888e8f1ae42de614f3ed0c40b1fe49"}, - {file = "duckdb-0.7.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1521f430409ac6be7b959bb6f3ebfa9597f6a8c8837e13e060022fd9f6b45726"}, - {file = "duckdb-0.7.0-cp39-cp39-win32.whl", hash = "sha256:77e4607798d196c601795032b50cfb0d564ff42cbd10cf8ff47a5eb992bb442f"}, - {file = "duckdb-0.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:726a3d555c6387e6653bc46d94645eac8139f825adfd0b7876ea858b5717796b"}, - {file = "duckdb-0.7.0.tar.gz", hash = "sha256:08ebfcc67a6a073fa2efd0453de9a7e453637bba522e67968fe0c03ccbfc2ec5"}, + {file = "duckdb-0.7.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3e0170be6cc315c179169dfa3e06485ef7009ef8ce399cd2908f29105ef2c67b"}, + {file = "duckdb-0.7.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6360d41023e726646507d5479ba60960989a09f04527b36abeef3643c61d8c48"}, + {file = "duckdb-0.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:578c269d7aa27184e8d45421694f89deda3f41fe6bd2a8ce48b262b9fc975326"}, + {file = "duckdb-0.7.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:36aae9a923c9f78da1cf3fcf75873f62d32ea017d4cef7c706d16d3eca527ca2"}, + {file = "duckdb-0.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:630e0122a02f19bb1fafae00786350b2c31ae8422fce97c827bd3686e7c386af"}, + {file = "duckdb-0.7.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9b9ca2d294725e523ce207bc37f28787478ae6f7a223e2cf3a213a2d498596c3"}, + {file = "duckdb-0.7.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0bd89f388205b6c99b62650169efe9a02933555ee1d46ddf79fbd0fb9e62652b"}, + {file = "duckdb-0.7.1-cp310-cp310-win32.whl", hash = "sha256:a9e987565a268fd8da9f65e54621d28f39c13105b8aee34c96643074babe6d9c"}, + {file = "duckdb-0.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:5d986b5ad1307b069309f9707c0c5051323e29865aefa059eb6c3b22dc9751b6"}, + {file = "duckdb-0.7.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:54606dfd24d7181d3098030ca6858f6be52f3ccbf42fff05f7587f2d9cdf4343"}, + {file = "duckdb-0.7.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bd9367ae650b6605ffe00412183cf0edb688a5fc9fbb03ed757e8310e7ec3b6c"}, + {file = "duckdb-0.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:aaf33aeb543c7816bd915cd10141866d54f92f698e1b5712de9d8b7076da19df"}, + {file = "duckdb-0.7.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e56b0329c38c0356b40449917bab6fce6ac27d356257b9a9da613d2a0f064e0"}, + {file = "duckdb-0.7.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:604b8b476d6cc6bf91625d8c2722ef9c50c402b3d64bc518c838d6c279e6d93b"}, + {file = "duckdb-0.7.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:32a268508c6d7fdc99d5442736051de74c28a5166c4cc3dcbbf35d383299b941"}, + {file = "duckdb-0.7.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:90794406fa2111414877ee9db154fef940911f3920c312c1cf69947621737c8d"}, + {file = "duckdb-0.7.1-cp311-cp311-win32.whl", hash = "sha256:bf20c5ee62cbbf10b39ebdfd70d454ce914e70545c7cb6cb78cb5befef96328a"}, + {file = "duckdb-0.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:bb2700785cab37cd1e7a76c4547a5ab0f8a7c28ad3f3e4d02a8fae52be223090"}, + {file = "duckdb-0.7.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b09741cfa31388b8f9cdf5c5200e0995d55a5b54d2d1a75b54784e2f5c042f7f"}, + {file = "duckdb-0.7.1-cp36-cp36m-win32.whl", hash = "sha256:766e6390f7ace7f1e322085c2ca5d0ad94767bde78a38d168253d2b0b4d5cd5c"}, + {file = "duckdb-0.7.1-cp36-cp36m-win_amd64.whl", hash = "sha256:6a3f3315e2b553db3463f07324f62dfebaf3b97656a87558e59e2f1f816eaf15"}, + {file = "duckdb-0.7.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:278edb8c912d836b3b77fd1695887e1dbd736137c3912478af3608c9d7307bb0"}, + {file = "duckdb-0.7.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e189b558d10b58fe6ed85ce79f728e143eb4115db1e63147a44db613cd4dd0d9"}, + {file = "duckdb-0.7.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b91ec3544ee4dc9e6abbdf2669475d5adedaaea51987c67acf161673e6b7443"}, + {file = "duckdb-0.7.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:3fe3f3dbd62b76a773144eef31aa29794578c359da932e77fef04516535318ca"}, + {file = "duckdb-0.7.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1e78c7f59325e99f0b3d9fe7c2bad4aaadf42d2c7711925cc26331d7647a91b2"}, + {file = "duckdb-0.7.1-cp37-cp37m-win32.whl", hash = "sha256:bc2a12d9f4fc8ef2fd1022d610287c9fc9972ea06b7510fc87387f1fa256a390"}, + {file = "duckdb-0.7.1-cp37-cp37m-win_amd64.whl", hash = "sha256:53e3db1bc0f445ee48b23cde47bfba08c7fa5a69976c740ec8cdf89543d2405d"}, + {file = "duckdb-0.7.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:1247cc11bac17f2585d11681329806c86295e32242f84a10a604665e697d5c81"}, + {file = "duckdb-0.7.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5feaff16a012075b49dfa09d4cb24455938d6b0e06b08e1404ec00089119dba2"}, + {file = "duckdb-0.7.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b411a0c361eab9b26dcd0d0c7a0d1bc0ad6b214068555de7e946fbdd2619961a"}, + {file = "duckdb-0.7.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7c76d8694ecdb579241ecfeaf03c51d640b984dbbe8e1d9f919089ebf3cdea6"}, + {file = "duckdb-0.7.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193b896eed44d8751a755ccf002a137630020af0bc3505affa21bf19fdc90df3"}, + {file = "duckdb-0.7.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7da132ee452c80a3784b8daffd86429fa698e1b0e3ecb84660db96d36c27ad55"}, + {file = "duckdb-0.7.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5fd08c97c3e8cb5bec3822cf78b966b489213dcaab24b25c05a99f7caf8db467"}, + {file = "duckdb-0.7.1-cp38-cp38-win32.whl", hash = "sha256:9cb956f94fa55c4782352dac7cc7572a58312bd7ce97332bb14591d6059f0ea4"}, + {file = "duckdb-0.7.1-cp38-cp38-win_amd64.whl", hash = "sha256:289a5f65213e66d320ebcd51a94787e7097b9d1c3492d01a121a2c809812bf19"}, + {file = "duckdb-0.7.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8085ad58c9b5854ee3820804fa1797e6b3134429c1506c3faab3cb96e71b07e9"}, + {file = "duckdb-0.7.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b47c19d1f2f662a5951fc6c5f6939d0d3b96689604b529cdcffd9afdcc95bff2"}, + {file = "duckdb-0.7.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6a611f598226fd634b7190f509cc6dd668132ffe436b0a6b43847b4b32b99e4a"}, + {file = "duckdb-0.7.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6730f03b5b78f3943b752c90bdf37b62ae3ac52302282a942cc675825b4a8dc9"}, + {file = "duckdb-0.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe23e938d29cd8ea6953d77dc828b7f5b95a4dbc7cd7fe5bcc3531da8cec3dba"}, + {file = "duckdb-0.7.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:feffe503c2e2a99480e1e5e15176f37796b3675e4dadad446fe7c2cc672aed3c"}, + {file = "duckdb-0.7.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:72fceb06f5bf24ad6bb5974c60d397a7a7e61b3d847507a22276de076f3392e2"}, + {file = "duckdb-0.7.1-cp39-cp39-win32.whl", hash = "sha256:c4d5217437d20d05fe23317bbc161befa1f9363f3622887cd1d2f4719b407936"}, + {file = "duckdb-0.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:066885e1883464ce3b7d1fd844f9431227dcffe1ee39bfd2a05cd6d53f304557"}, + {file = "duckdb-0.7.1.tar.gz", hash = "sha256:a7db6da0366b239ea1e4541fcc19556b286872f5015c9a54c2e347146e25a2ad"}, ] flake8 = [ {file = "flake8-5.0.4-py2.py3-none-any.whl", hash = "sha256:7a1cf6b73744f5806ab95e526f6f0d8c01c66d7bbe349562d22dfca20610b248"}, diff --git a/pyproject.toml b/pyproject.toml index f51f30d6b7..fbdb00534a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "python-dlt" -version = "0.2.0a21" +version = "0.2.0a22" description = "DLT is an open-source python-native scalable data loading framework that does not require any devops efforts to run." authors = ["dltHub Inc. "] maintainers = [ "Marcin Rudolf ", "Adrian Brudaru ", "Ty Dunn "] diff --git a/tests/load/bigquery/test_bigquery_table_builder.py b/tests/load/bigquery/test_bigquery_table_builder.py index d81ff5b9c8..6612b37b21 100644 --- a/tests/load/bigquery/test_bigquery_table_builder.py +++ b/tests/load/bigquery/test_bigquery_table_builder.py @@ -51,7 +51,8 @@ def test_create_table(gcp_client: BigQueryClient) -> None: assert "`col6` NUMERIC(38,9) NOT NULL" in sql assert "`col7` BYTES" in sql assert "`col8` BIGNUMERIC" in sql - assert "`col9` JSON NOT NULL)" in sql + assert "`col9` JSON NOT NULL" in sql + assert "`col10` DATE" in sql assert "CLUSTER BY" not in sql assert "PARTITION BY" not in sql @@ -70,6 +71,7 @@ def test_alter_table(gcp_client: BigQueryClient) -> None: assert "ADD COLUMN `col7` BYTES" in sql assert "ADD COLUMN `col8` BIGNUMERIC" in sql assert "ADD COLUMN `col9` JSON NOT NULL" in sql + assert "ADD COLUMN `col10` DATE" in sql # table has col1 already in storage mod_table = deepcopy(TABLE_UPDATE) mod_table.pop(0) diff --git a/tests/load/duckdb/test_duckdb_client.py b/tests/load/duckdb/test_duckdb_client.py index f3916afc43..06a32c1a81 100644 --- a/tests/load/duckdb/test_duckdb_client.py +++ b/tests/load/duckdb/test_duckdb_client.py @@ -16,6 +16,8 @@ def delete_default_duckdb_credentials() -> None: # remove the default duckdb config # os.environ.pop("DESTINATION__DUCKDB__CREDENTIALS", None) os.environ.clear() + yield + delete_quack_db() def test_duckdb_open_conn_default() -> None: @@ -40,16 +42,33 @@ def test_duckdb_open_conn_default() -> None: def test_duckdb_database_path() -> None: # resolve without any path provided c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset")) - assert c.credentials.database.lower() == os.path.abspath(DEFAULT_DUCK_DB_NAME).lower() + assert c.credentials.database.lower() == os.path.abspath("quack.duckdb").lower() # resolve without any path but with pipeline context p = dlt.pipeline(pipeline_name="quack_pipeline") c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset")) + # still cwd + db_path = os.path.abspath(os.path.join(".", "quack_pipeline.duckdb")) + assert c.credentials.database.lower() == db_path.lower() + # but it is kept in the local state + assert p.get_local_state_val("duckdb_database").lower() == db_path.lower() + # connect + try: + conn = c.credentials.borrow_conn(read_only=False) + c.credentials.return_conn(conn) + assert os.path.isfile(db_path) + finally: + if os.path.isfile(db_path): + os.unlink(db_path) + + # test special :pipeline: path to create in pipeline folder + c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset", credentials=":pipeline:")) db_path = os.path.abspath(os.path.join(p.working_dir, DEFAULT_DUCK_DB_NAME)) assert c.credentials.database.lower() == db_path.lower() # connect conn = c.credentials.borrow_conn(read_only=False) c.credentials.return_conn(conn) assert os.path.isfile(db_path) + assert p.get_local_state_val("duckdb_database").lower() == db_path.lower() # provide relative path db_path = "_storage/test_quack.duckdb" @@ -92,6 +111,33 @@ def test_duckdb_database_path() -> None: conn = c.credentials.borrow_conn(read_only=False) +def test_keeps_initial_db_path() -> None: + db_path = "_storage/path_test_quack.duckdb" + p = dlt.pipeline(pipeline_name="quack_pipeline", credentials=db_path, destination="duckdb") + with p.sql_client() as conn: + # still cwd + assert conn.credentials.database.lower() == os.path.abspath(db_path).lower() + # but it is kept in the local state + assert p.get_local_state_val("duckdb_database").lower() == os.path.abspath(db_path).lower() + + # attach the pipeline + p = dlt.attach(pipeline_name="quack_pipeline") + assert p.get_local_state_val("duckdb_database").lower() == os.path.abspath(db_path).lower() + with p.sql_client() as conn: + # still cwd + assert p.get_local_state_val("duckdb_database").lower() == os.path.abspath(db_path).lower() + assert conn.credentials.database.lower() == os.path.abspath(db_path).lower() + + # now create a new pipeline + dlt.pipeline(pipeline_name="not_quack", destination="dummy") + with p.sql_client() as conn: + # still cwd + assert p.get_local_state_val("duckdb_database").lower() == os.path.abspath(db_path).lower() + # new pipeline context took over + # TODO: restore pipeline context on each call + assert conn.credentials.database.lower() != os.path.abspath(db_path).lower() + + def test_external_duckdb_database() -> None: import duckdb diff --git a/tests/load/duckdb/test_duckdb_table_builder.py b/tests/load/duckdb/test_duckdb_table_builder.py index 425d8f6c7e..9361dcdb32 100644 --- a/tests/load/duckdb/test_duckdb_table_builder.py +++ b/tests/load/duckdb/test_duckdb_table_builder.py @@ -32,6 +32,7 @@ def test_create_table_with_hints(client: DuckDbClient) -> None: assert '"col1" BIGINT NOT NULL' in sql assert '"col2" DOUBLE NOT NULL' in sql assert '"col5" VARCHAR ' in sql + assert '"col10" DATE ' in sql # no hints assert '"col3" BOOLEAN NOT NULL' in sql assert '"col4" TIMESTAMP WITH TIME ZONE NOT NULL' in sql diff --git a/tests/load/postgres/test_postgres_table_builder.py b/tests/load/postgres/test_postgres_table_builder.py index 982753e9de..1776a6bca7 100644 --- a/tests/load/postgres/test_postgres_table_builder.py +++ b/tests/load/postgres/test_postgres_table_builder.py @@ -34,6 +34,7 @@ def test_create_table(client: PostgresClient) -> None: assert '"col7" bytea' in sql assert '"col8" numeric(156,78)' in sql assert '"col9" jsonb NOT NULL' in sql + assert '"col10" date NOT NULL' in sql def test_alter_table(client: PostgresClient) -> None: @@ -52,6 +53,7 @@ def test_alter_table(client: PostgresClient) -> None: assert '"col7" bytea' in sql assert '"col8" numeric(156,78)' in sql assert '"col9" jsonb NOT NULL' in sql + assert '"col10" date NOT NULL' in sql def test_create_table_with_hints(client: PostgresClient) -> None: diff --git a/tests/load/redshift/test_redshift_table_builder.py b/tests/load/redshift/test_redshift_table_builder.py index ff279c51d8..056ea0a3cc 100644 --- a/tests/load/redshift/test_redshift_table_builder.py +++ b/tests/load/redshift/test_redshift_table_builder.py @@ -45,6 +45,7 @@ def test_create_table(client: RedshiftClient) -> None: assert '"col7" varbinary' in sql assert '"col8" numeric(38,0)' in sql assert '"col9" super NOT NULL' in sql + assert '"col10" date NOT NULL' in sql def test_alter_table(client: RedshiftClient) -> None: @@ -63,6 +64,7 @@ def test_alter_table(client: RedshiftClient) -> None: assert '"col7" varbinary' in sql assert '"col8" numeric(38,0)' in sql assert '"col9" super NOT NULL' in sql + assert '"col10" date NOT NULL' in sql def test_create_table_with_hints(client: RedshiftClient) -> None: diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index 69d6471310..d7560dc84d 100644 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -415,4 +415,25 @@ def test_extract_exception() -> None: @pytest.mark.skip("Not implemented") def test_extract_all_data_types() -> None: # list, iterators, generators, resource, source, list of resources, list of sources - pass \ No newline at end of file + pass + + +def test_set_get_local_Value() -> None: + p = dlt.pipeline(destination="dummy", full_refresh=True) + value = uniq_id() + # value is set + p.set_local_state_val(value, value) + assert p.get_local_state_val(value) == value + # check if this is actual local state + assert p.state["_local"][value] == value + + new_val = uniq_id() + # check in context manager + @dlt.resource + def _w_local_state(): + # join existing managed state + p.set_local_state_val(new_val, new_val) + yield 1 + + p.extract(_w_local_state) + assert p.state["_local"][new_val] == new_val