From a429343126c505414e32765b85d9c33cdadde604 Mon Sep 17 00:00:00 2001 From: Jay Qi Date: Wed, 21 Aug 2024 20:26:21 -0400 Subject: [PATCH 1/2] Change LocalClient to not explicitly store default storage directory --- HISTORY.md | 10 ++++--- cloudpathlib/local/localclient.py | 43 ++++++++++++++++++++++--------- test.py | 7 +++++ tests/test_local.py | 22 ++++++++++++---- 4 files changed, 62 insertions(+), 20 deletions(-) create mode 100644 test.py diff --git a/HISTORY.md b/HISTORY.md index 190d5345..b5ec26b6 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,17 +3,21 @@ ## UNRELEASED - Fixes typo in `FileCacheMode` where values were being filled by envvar `CLOUPATHLIB_FILE_CACHE_MODE` instead of `CLOUDPATHLIB_FILE_CACHE_MODE`. (PR [#424](https://github.com/drivendataorg/cloudpathlib/pull/424) -- Fix `CloudPath` cleanup via `CloudPath.__del__` when `Client` encounters an exception during initialization and does not create a `file_cache_mode` attribute. (Issue [#372](https://github.com/drivendataorg/cloudpathlib/issues/372), thanks to [@bryanwweber](https://github.com/bryanwweber)) +- Fix `CloudPath` cleanup via `CloudPath.__del__` when `Client` encounters an exception during initialization and does not create a `file_cache_mode` attribute. (Issue [#372](https://github.com/drivendataorg/cloudpathlib/issues/372), thanks to [@bryanwweber](https://github.com/bryanwweber)) - Drop support for Python 3.7; pin minimal `boto3` version to Python 3.8+ versions. (PR [#407](https://github.com/drivendataorg/cloudpathlib/pull/407)) - fix: use native `exists()` method in `GSClient`. (PR [#420](https://github.com/drivendataorg/cloudpathlib/pull/420)) - Enhancement: lazy instantiation of default client (PR [#432](https://github.com/drivendataorg/cloudpathlib/issues/432), Issue [#428](https://github.com/drivendataorg/cloudpathlib/issues/428)) - Adds existence check before downloading in `download_to` (Issue [#430](https://github.com/drivendataorg/cloudpathlib/issues/430), PR [#432](https://github.com/drivendataorg/cloudpathlib/pull/432)) - Add env vars `CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD` and `CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD`. (Issue [#393](https://github.com/drivendataorg/cloudpathlib/issues/393), PR [#437](https://github.com/drivendataorg/cloudpathlib/pull/437)) -- Have `glob` for local clients match behavior of cloud versions for parity in testing; make `reset_default_storage_dir` work with default client. (Issue [#414](https://github.com/drivendataorg/cloudpathlib/issues/414), Issue [#415](https://github.com/drivendataorg/cloudpathlib/issues/415), [PR #436](https://github.com/drivendataorg/cloudpathlib/pull/436)) +- Fixed `glob` for `cloudpathlib.local.LocalPath` and subclass implementations to match behavior of cloud versions for parity in testing. (Issue [#415](https://github.com/drivendataorg/cloudpathlib/issues/415), [PR #436](https://github.com/drivendataorg/cloudpathlib/pull/436)) +- Changed how `cloudpathlib.local.LocalClient` and subclass implementations track the default local storage directory (used to simulate the cloud) used when no local storage directory is explicitly provided. ([PR #436](https://github.com/drivendataorg/cloudpathlib/pull/436), [PR #462](https://github.com/drivendataorg/cloudpathlib/pull/462)) + - Changed `LocalClient` so that client instances using the default storage access the default local storage directory through the `get_default_storage_dir` rather than having an explicit reference to the path set at instantiation. This means that calling `get_default_storage_dir` will reset the local storage for all clients using the default local storage, whether the client has already been instantiated or is instantiated after resetting. This fixes unintuitive behavior where `reset_local_storage` did not reset local storage when using the default client. (Issue [#414](https://github.com/drivendataorg/cloudpathlib/issues/414)) + - Added a new `local_storage_dir` property to `LocalClient`. This will return the current local storage directory used by that client instance. + by reference through the `get_default_ rather than with an explicit. ## v0.18.1 (2024-02-26) -- Fixed import error due to incompatible `google-cloud-storage` by not using `transfer_manager` if it is not available. ([Issue #408](https://github.com/drivendataorg/cloudpathlib/issues/408), [PR #410](https://github.com/drivendataorg/cloudpathlib/pull/410)) +- Fixed import error due to incompatible `google-cloud-storage` by not using `transfer_manager` if it is not available. ([Issue #408](https://github.com/drivendataorg/cloudpathlib/issues/408), [PR #410](https://github.com/drivendataorg/cloudpathlib/pull/410)) Includes all changes from v0.18.0. diff --git a/cloudpathlib/local/localclient.py b/cloudpathlib/local/localclient.py index f6a4d5a5..e057390c 100644 --- a/cloudpathlib/local/localclient.py +++ b/cloudpathlib/local/localclient.py @@ -6,7 +6,7 @@ import shutil from tempfile import TemporaryDirectory from time import sleep -from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union +from typing import Callable, ClassVar, Dict, Iterable, List, Optional, Tuple, Union from ..client import Client from ..enums import FileCacheMode @@ -17,7 +17,12 @@ class LocalClient(Client): """Abstract client for accessing objects the local filesystem. Subclasses are as a monkeypatch substitutes for normal Client subclasses when writing tests.""" - _default_storage_temp_dir = None + # Class-level variable to tracks the default storage directory for this client class + # that is used if a client is instantiated without a directory being explicitly provided + _default_storage_temp_dir: ClassVar[Optional[TemporaryDirectory]] = None + + # Instance-level variable that tracks the local storage directory for this client + _local_storage_dir: Optional[Union[str, os.PathLike]] def __init__( self, @@ -28,10 +33,7 @@ def __init__( content_type_method: Optional[Callable] = mimetypes.guess_type, **kwargs, ): - # setup caching and local versions of file. use default temp dir if not provided - if local_storage_dir is None: - local_storage_dir = self.get_default_storage_dir() - self._local_storage_dir = Path(local_storage_dir) + self._local_storage_dir = local_storage_dir super().__init__( local_cache_dir=local_cache_dir, @@ -41,6 +43,10 @@ def __init__( @classmethod def get_default_storage_dir(cls) -> Path: + """Return the default storage directory for this client class. This is used if a client + is instantiated without a storage directory being explicitly provided. In this usage, + "storage" refers to the local storage that simulates the cloud. + """ if cls._default_storage_temp_dir is None: cls._default_storage_temp_dir = TemporaryDirectory() _temp_dirs_to_clean.append(cls._default_storage_temp_dir) @@ -48,21 +54,34 @@ def get_default_storage_dir(cls) -> Path: @classmethod def reset_default_storage_dir(cls) -> Path: + """Reset the default storage directly. This tears down and recreates the directory used by + default for this client class when instantiating a client without explicitly providing + a storage directory. In this usage, "storage" refers to the local storage that simulates + the cloud. + """ cls._default_storage_temp_dir = None - - # Also reset storage on default client so it uses the new temp dir - cls.get_default_client()._local_storage_dir = cls.get_default_storage_dir() # type: ignore - return cls.get_default_storage_dir() + @property + def local_storage_dir(self) -> Path: + """The local directory where files are stored for this client. This storage directory is + the one that simulates the cloud. If no storage directory was provided on instantiating the + client, the default storage directory for this client class is used. + """ + if self._local_storage_dir is None: + # No explicit local storage was provided on instantiating the client. + # Use the default storage directory for this class. + return self.get_default_storage_dir() + return Path(self._local_storage_dir) + def _cloud_path_to_local(self, cloud_path: "LocalPath") -> Path: - return self._local_storage_dir / cloud_path._no_prefix + return self.local_storage_dir / cloud_path._no_prefix def _local_to_cloud_path(self, local_path: Union[str, os.PathLike]) -> "LocalPath": local_path = Path(local_path) cloud_prefix = self._cloud_meta.path_class.cloud_prefix return self.CloudPath( - f"{cloud_prefix}{PurePosixPath(local_path.relative_to(self._local_storage_dir))}" + f"{cloud_prefix}{PurePosixPath(local_path.relative_to(self.local_storage_dir))}" ) def _download_file(self, cloud_path: "LocalPath", local_path: Union[str, os.PathLike]) -> Path: diff --git a/test.py b/test.py new file mode 100644 index 00000000..139dd2d6 --- /dev/null +++ b/test.py @@ -0,0 +1,7 @@ +from cloudpathlib import S3Path, S3Client + +client = S3Client(profile_name="bad_profile") + +import gc + +gc.collect() diff --git a/tests/test_local.py b/tests/test_local.py index 5499ab29..a983fdd3 100644 --- a/tests/test_local.py +++ b/tests/test_local.py @@ -59,6 +59,8 @@ def test_default_storage_dir(client_class, monkeypatch): p1.write_text("hello") assert p1.exists() assert p1.read_text() == "hello" + + # p2 uses a new client, but the simulated "cloud" should be the same assert p2.exists() assert p2.read_text() == "hello" @@ -76,20 +78,30 @@ def test_reset_default_storage_dir(client_class, monkeypatch): cloud_prefix = client_class._cloud_meta.path_class.cloud_prefix p1 = client_class().CloudPath(f"{cloud_prefix}drive/file.txt") - client_class.reset_default_storage_dir() - p2 = client_class().CloudPath(f"{cloud_prefix}drive/file.txt") - assert not p1.exists() - assert not p2.exists() - p1.write_text("hello") assert p1.exists() assert p1.read_text() == "hello" + + client_class.reset_default_storage_dir() + + # We've reset the default storage directory, so the file should be gone + assert not p1.exists() + + # Also should be gone for p2, which uses a new client that is still using default storage dir + p2 = client_class().CloudPath(f"{cloud_prefix}drive/file.txt") assert not p2.exists() # clean up client_class.reset_default_storage_dir() + +def test_reset_default_storage_dir_with_default_client(): + """Test that reset_default_storage_dir resets the storage used by all clients that are using + the default storage directory, such as the default client. + + Regression test for https://github.com/drivendataorg/cloudpathlib/issues/414 + """ # try default client instantiation from cloudpathlib.local import LocalS3Path, LocalS3Client From 729d6e7a7e208fb9c7fc5298e5614331f8cba803 Mon Sep 17 00:00:00 2001 From: Jay Qi Date: Wed, 21 Aug 2024 21:56:50 -0400 Subject: [PATCH 2/2] Remove extraneous file --- test.py | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 test.py diff --git a/test.py b/test.py deleted file mode 100644 index 139dd2d6..00000000 --- a/test.py +++ /dev/null @@ -1,7 +0,0 @@ -from cloudpathlib import S3Path, S3Client - -client = S3Client(profile_name="bad_profile") - -import gc - -gc.collect()