From 514d4477dada4b08c2be2fc6a297c43fe30282a4 Mon Sep 17 00:00:00 2001 From: Aaron Taylor Date: Sat, 20 Jul 2024 19:41:35 -0700 Subject: [PATCH] allow for roundtrips of cloudpaths through pickle serialization This avoids an exception thrown because the _client is not serialized into the pickled object, and thus when __getstate__ is called the second time, there is no _client field to delete. Closes #450 --- HISTORY.md | 1 + cloudpathlib/cloudpath.py | 3 ++- tests/test_cloudpath_serialize.py | 14 ++++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 tests/test_cloudpath_serialize.py diff --git a/HISTORY.md b/HISTORY.md index 046ab12d..d7e716ca 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,6 +2,7 @@ ## UNRELEASED +- Allow `CloudPath` objects to be loaded/dumped through pickle format repeatedly. (Issue [#450](https://github.com/drivendataorg/cloudpathlib/issues/450)) - Fixes typo in `FileCacheMode` where values were being filled by envvar `CLOUPATHLIB_FILE_CACHE_MODE` instead of `CLOUDPATHLIB_FILE_CACHE_MODE`. (PR [#424](https://github.com/drivendataorg/cloudpathlib/pull/424) - Fix `CloudPath` cleanup via `CloudPath.__del__` when `Client` encounters an exception during initialization and does not create a `file_cache_mode` attribute. (Issue [#372](https://github.com/drivendataorg/cloudpathlib/issues/372), thanks to [@bryanwweber](https://github.com/bryanwweber)) - Drop support for Python 3.7; pin minimal `boto3` version to Python 3.8+ versions. (PR [#407](https://github.com/drivendataorg/cloudpathlib/pull/407)) diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index 8bad810d..e57c4af2 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -263,7 +263,8 @@ def __getstate__(self) -> Dict[str, Any]: state = self.__dict__.copy() # don't pickle client - del state["_client"] + if "_client" in state: + del state["_client"] return state diff --git a/tests/test_cloudpath_serialize.py b/tests/test_cloudpath_serialize.py new file mode 100644 index 00000000..ae8857af --- /dev/null +++ b/tests/test_cloudpath_serialize.py @@ -0,0 +1,14 @@ +import pickle + +from cloudpathlib import CloudPath + + +def test_pickle_roundtrip(): + path1 = CloudPath("s3://bucket/key") + pkl1 = pickle.dumps(path1) + + path2 = pickle.loads(pkl1) + pkl2 = pickle.dumps(path2) + + assert path1 == path2 + assert pkl1 == pkl2