From 36ceed4e93bcf5e52f09d87a739ac42e6d20ec26 Mon Sep 17 00:00:00 2001 From: Max Mynter <32773644+maxmynter@users.noreply.github.com> Date: Thu, 18 Jan 2024 10:00:28 +0100 Subject: [PATCH] (docs): Add lakeFS-spec and lakeFS-SDK transaction differences (#252) * (docs): Inform about differences of lakeFS-spec and lakeFS-SDK transactions * Remove set _intrans=False in transaction complete method since that is already taken care of in the __exit__ method of the fsspec parent class --------- Co-authored-by: Nicholas Junge Co-authored-by: Adrian Rumpold --- docs/guides/transactions.md | 13 +++++++++++-- src/lakefs_spec/transaction.py | 2 -- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/docs/guides/transactions.md b/docs/guides/transactions.md index d6271ec3..7a961cdc 100644 --- a/docs/guides/transactions.md +++ b/docs/guides/transactions.md @@ -2,9 +2,18 @@ In addition to file operations, you can carry out versioning operations in your Python code using file system *transactions*. -A transaction is basically a context manager that collects all file uploads, defers them, and executes the uploads on completion of the transaction. +A transaction is essentially a context manager that collects all file uploads, defers them, and executes the uploads on completion of the transaction. They are an "all or nothing" proposition: If an error occurs during the transaction, none of the queued files are uploaded. -For more information on fsspec transactions, see the official [documentation](https://filesystem-spec.readthedocs.io/en/latest/features.html#transactions). + +!!! info + The transactions in lakeFS-spec are different from the transactions in the [high-level lakeFS SDK](https://docs.lakefs.io/integrations/python.html#transactions), which were added in v0.2.0. + + *High-level lakeFS SDK* transactions create an ephemeral branch, perform the operations in the context block on that ephemeral branch, and merge it back into the source branch upon exiting the context manager. + + *lakeFS-spec* transactions collect the versioning operations and perform them one by one directly on the source branch once the context manager is exited. + This allows for a more fine-grained control over the applied versioning operations. For example, multiple commits can be created in a single lakeFS-spec transaction. + +The lakeFS-spec transaction inherits from fsspec transactions. For more information on fsspec transactions, see the [official documentation](https://filesystem-spec.readthedocs.io/en/latest/features.html#transactions). The main features of the lakeFS file system transaction are: diff --git a/src/lakefs_spec/transaction.py b/src/lakefs_spec/transaction.py index 5bf2399d..6645a607 100644 --- a/src/lakefs_spec/transaction.py +++ b/src/lakefs_spec/transaction.py @@ -159,8 +159,6 @@ def complete(self, commit: bool = True) -> None: if isinstance(retval, Placeholder): retval.value = result - self.fs._intrans = False - def create_branch( self, repository: str | Repository, name: str, source: str | Branch, exist_ok: bool = True ) -> str: