diff --git a/HISTORY.md b/HISTORY.md index 9cf6a661..b5ec26b6 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,16 +3,21 @@ ## UNRELEASED - Fixes typo in `FileCacheMode` where values were being filled by envvar `CLOUPATHLIB_FILE_CACHE_MODE` instead of `CLOUDPATHLIB_FILE_CACHE_MODE`. (PR [#424](https://github.com/drivendataorg/cloudpathlib/pull/424) -- Fix `CloudPath` cleanup via `CloudPath.__del__` when `Client` encounters an exception during initialization and does not create a `file_cache_mode` attribute. (Issue [#372](https://github.com/drivendataorg/cloudpathlib/issues/372), thanks to [@bryanwweber](https://github.com/bryanwweber)) +- Fix `CloudPath` cleanup via `CloudPath.__del__` when `Client` encounters an exception during initialization and does not create a `file_cache_mode` attribute. (Issue [#372](https://github.com/drivendataorg/cloudpathlib/issues/372), thanks to [@bryanwweber](https://github.com/bryanwweber)) - Drop support for Python 3.7; pin minimal `boto3` version to Python 3.8+ versions. (PR [#407](https://github.com/drivendataorg/cloudpathlib/pull/407)) - fix: use native `exists()` method in `GSClient`. (PR [#420](https://github.com/drivendataorg/cloudpathlib/pull/420)) - Enhancement: lazy instantiation of default client (PR [#432](https://github.com/drivendataorg/cloudpathlib/issues/432), Issue [#428](https://github.com/drivendataorg/cloudpathlib/issues/428)) - Adds existence check before downloading in `download_to` (Issue [#430](https://github.com/drivendataorg/cloudpathlib/issues/430), PR [#432](https://github.com/drivendataorg/cloudpathlib/pull/432)) - Add env vars `CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD` and `CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD`. (Issue [#393](https://github.com/drivendataorg/cloudpathlib/issues/393), PR [#437](https://github.com/drivendataorg/cloudpathlib/pull/437)) +- Fixed `glob` for `cloudpathlib.local.LocalPath` and subclass implementations to match behavior of cloud versions for parity in testing. (Issue [#415](https://github.com/drivendataorg/cloudpathlib/issues/415), [PR #436](https://github.com/drivendataorg/cloudpathlib/pull/436)) +- Changed how `cloudpathlib.local.LocalClient` and subclass implementations track the default local storage directory (used to simulate the cloud) used when no local storage directory is explicitly provided. ([PR #436](https://github.com/drivendataorg/cloudpathlib/pull/436), [PR #462](https://github.com/drivendataorg/cloudpathlib/pull/462)) + - Changed `LocalClient` so that client instances using the default storage access the default local storage directory through the `get_default_storage_dir` rather than having an explicit reference to the path set at instantiation. This means that calling `get_default_storage_dir` will reset the local storage for all clients using the default local storage, whether the client has already been instantiated or is instantiated after resetting. This fixes unintuitive behavior where `reset_local_storage` did not reset local storage when using the default client. (Issue [#414](https://github.com/drivendataorg/cloudpathlib/issues/414)) + - Added a new `local_storage_dir` property to `LocalClient`. This will return the current local storage directory used by that client instance. + by reference through the `get_default_ rather than with an explicit. ## v0.18.1 (2024-02-26) -- Fixed import error due to incompatible `google-cloud-storage` by not using `transfer_manager` if it is not available. ([Issue #408](https://github.com/drivendataorg/cloudpathlib/issues/408), [PR #410](https://github.com/drivendataorg/cloudpathlib/pull/410)) +- Fixed import error due to incompatible `google-cloud-storage` by not using `transfer_manager` if it is not available. ([Issue #408](https://github.com/drivendataorg/cloudpathlib/issues/408), [PR #410](https://github.com/drivendataorg/cloudpathlib/pull/410)) Includes all changes from v0.18.0. diff --git a/cloudpathlib/local/localclient.py b/cloudpathlib/local/localclient.py index a0eadb0a..e057390c 100644 --- a/cloudpathlib/local/localclient.py +++ b/cloudpathlib/local/localclient.py @@ -6,7 +6,7 @@ import shutil from tempfile import TemporaryDirectory from time import sleep -from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union +from typing import Callable, ClassVar, Dict, Iterable, List, Optional, Tuple, Union from ..client import Client from ..enums import FileCacheMode @@ -17,7 +17,12 @@ class LocalClient(Client): """Abstract client for accessing objects the local filesystem. Subclasses are as a monkeypatch substitutes for normal Client subclasses when writing tests.""" - _default_storage_temp_dir = None + # Class-level variable to tracks the default storage directory for this client class + # that is used if a client is instantiated without a directory being explicitly provided + _default_storage_temp_dir: ClassVar[Optional[TemporaryDirectory]] = None + + # Instance-level variable that tracks the local storage directory for this client + _local_storage_dir: Optional[Union[str, os.PathLike]] def __init__( self, @@ -28,10 +33,7 @@ def __init__( content_type_method: Optional[Callable] = mimetypes.guess_type, **kwargs, ): - # setup caching and local versions of file. use default temp dir if not provided - if local_storage_dir is None: - local_storage_dir = self.get_default_storage_dir() - self._local_storage_dir = Path(local_storage_dir) + self._local_storage_dir = local_storage_dir super().__init__( local_cache_dir=local_cache_dir, @@ -41,6 +43,10 @@ def __init__( @classmethod def get_default_storage_dir(cls) -> Path: + """Return the default storage directory for this client class. This is used if a client + is instantiated without a storage directory being explicitly provided. In this usage, + "storage" refers to the local storage that simulates the cloud. + """ if cls._default_storage_temp_dir is None: cls._default_storage_temp_dir = TemporaryDirectory() _temp_dirs_to_clean.append(cls._default_storage_temp_dir) @@ -48,17 +54,34 @@ def get_default_storage_dir(cls) -> Path: @classmethod def reset_default_storage_dir(cls) -> Path: + """Reset the default storage directly. This tears down and recreates the directory used by + default for this client class when instantiating a client without explicitly providing + a storage directory. In this usage, "storage" refers to the local storage that simulates + the cloud. + """ cls._default_storage_temp_dir = None return cls.get_default_storage_dir() + @property + def local_storage_dir(self) -> Path: + """The local directory where files are stored for this client. This storage directory is + the one that simulates the cloud. If no storage directory was provided on instantiating the + client, the default storage directory for this client class is used. + """ + if self._local_storage_dir is None: + # No explicit local storage was provided on instantiating the client. + # Use the default storage directory for this class. + return self.get_default_storage_dir() + return Path(self._local_storage_dir) + def _cloud_path_to_local(self, cloud_path: "LocalPath") -> Path: - return self._local_storage_dir / cloud_path._no_prefix + return self.local_storage_dir / cloud_path._no_prefix def _local_to_cloud_path(self, local_path: Union[str, os.PathLike]) -> "LocalPath": local_path = Path(local_path) cloud_prefix = self._cloud_meta.path_class.cloud_prefix return self.CloudPath( - f"{cloud_prefix}{PurePosixPath(local_path.relative_to(self._local_storage_dir))}" + f"{cloud_prefix}{PurePosixPath(local_path.relative_to(self.local_storage_dir))}" ) def _download_file(self, cloud_path: "LocalPath", local_path: Union[str, os.PathLike]) -> Path: @@ -89,15 +112,9 @@ def _is_file(self, cloud_path: "LocalPath") -> bool: def _list_dir( self, cloud_path: "LocalPath", recursive=False ) -> Iterable[Tuple["LocalPath", bool]]: - if recursive: - return ( - (self._local_to_cloud_path(obj), obj.is_dir()) - for obj in self._cloud_path_to_local(cloud_path).glob("**/*") - ) - return ( - (self._local_to_cloud_path(obj), obj.is_dir()) - for obj in self._cloud_path_to_local(cloud_path).iterdir() - ) + pattern = "**/*" if recursive else "*" + for obj in self._cloud_path_to_local(cloud_path).glob(pattern): + yield (self._local_to_cloud_path(obj), obj.is_dir()) def _md5(self, cloud_path: "LocalPath") -> str: return md5(self._cloud_path_to_local(cloud_path).read_bytes()).hexdigest() diff --git a/tests/test_caching.py b/tests/test_caching.py index 2d0d0a76..71890c43 100644 --- a/tests/test_caching.py +++ b/tests/test_caching.py @@ -4,7 +4,7 @@ from pathlib import Path import pytest -from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed +from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_random_exponential from cloudpathlib.enums import FileCacheMode from cloudpathlib.exceptions import ( @@ -505,8 +505,9 @@ def test_manual_cache_clearing(rig: CloudProviderTestRig): # in CI there can be a lag before the cleanup actually happens @retry( retry=retry_if_exception_type(AssertionError), - wait=wait_fixed(1), - stop=stop_after_attempt(5), + wait=wait_random_exponential(multiplier=0.5, max=5), + stop=stop_after_attempt(10), + reraise=True, ) def _resilient_assert(): gc.collect() # force gc before asserting diff --git a/tests/test_cloudpath_instantiation.py b/tests/test_cloudpath_instantiation.py index b625a47b..64951495 100644 --- a/tests/test_cloudpath_instantiation.py +++ b/tests/test_cloudpath_instantiation.py @@ -92,6 +92,9 @@ def test_dependencies_not_loaded(rig, monkeypatch): with pytest.raises(MissingDependenciesError): rig.create_cloud_path("dir_0/file0_0.txt") + # manual reset for teardown order so teardown doesn't fail + monkeypatch.setattr(rig.path_class._cloud_meta, "dependencies_loaded", True) + def test_is_pathlike(rig): p = rig.create_cloud_path("dir_0") diff --git a/tests/test_local.py b/tests/test_local.py index e14a8586..a983fdd3 100644 --- a/tests/test_local.py +++ b/tests/test_local.py @@ -59,6 +59,8 @@ def test_default_storage_dir(client_class, monkeypatch): p1.write_text("hello") assert p1.exists() assert p1.read_text() == "hello" + + # p2 uses a new client, but the simulated "cloud" should be the same assert p2.exists() assert p2.read_text() == "hello" @@ -76,16 +78,51 @@ def test_reset_default_storage_dir(client_class, monkeypatch): cloud_prefix = client_class._cloud_meta.path_class.cloud_prefix p1 = client_class().CloudPath(f"{cloud_prefix}drive/file.txt") - client_class.reset_default_storage_dir() - p2 = client_class().CloudPath(f"{cloud_prefix}drive/file.txt") - assert not p1.exists() - assert not p2.exists() - p1.write_text("hello") assert p1.exists() assert p1.read_text() == "hello" + + client_class.reset_default_storage_dir() + + # We've reset the default storage directory, so the file should be gone + assert not p1.exists() + + # Also should be gone for p2, which uses a new client that is still using default storage dir + p2 = client_class().CloudPath(f"{cloud_prefix}drive/file.txt") assert not p2.exists() # clean up client_class.reset_default_storage_dir() + + +def test_reset_default_storage_dir_with_default_client(): + """Test that reset_default_storage_dir resets the storage used by all clients that are using + the default storage directory, such as the default client. + + Regression test for https://github.com/drivendataorg/cloudpathlib/issues/414 + """ + # try default client instantiation + from cloudpathlib.local import LocalS3Path, LocalS3Client + + s3p = LocalS3Path("s3://drive/file.txt") + assert not s3p.exists() + s3p.write_text("hello") + assert s3p.exists() + + LocalS3Client.reset_default_storage_dir() + s3p2 = LocalS3Path("s3://drive/file.txt") + assert not s3p2.exists() + + +@pytest.mark.parametrize("client_class", [LocalAzureBlobClient, LocalGSClient, LocalS3Client]) +def test_glob_matches(client_class, monkeypatch): + if client_class is LocalAzureBlobClient: + monkeypatch.setenv("AZURE_STORAGE_CONNECTION_STRING", "") + + cloud_prefix = client_class._cloud_meta.path_class.cloud_prefix + p = client_class().CloudPath(f"{cloud_prefix}drive/not/exist") + p.mkdir(parents=True) + + # match CloudPath, which returns empty; not glob module, which raises + assert list(p.glob("*")) == []