Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Env vars for force overwrite functions #437

Merged
merged 12 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- fix: use native `exists()` method in `GSClient`. (PR [#420](https://github.com/drivendataorg/cloudpathlib/pull/420))
- Enhancement: lazy instantiation of default client (PR [#432](https://github.com/drivendataorg/cloudpathlib/issues/432), Issue [#428](https://github.com/drivendataorg/cloudpathlib/issues/428))
- Adds existence check before downloading in `download_to` (Issue [#430](https://github.com/drivendataorg/cloudpathlib/issues/430), PR [#432](https://github.com/drivendataorg/cloudpathlib/pull/432))
- Add env vars `CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD` and `CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD`. (Issue [#393](https://github.com/drivendataorg/cloudpathlib/issues/393), PR [#437](https://github.com/drivendataorg/cloudpathlib/pull/437))

## v0.18.1 (2024-02-26)

Expand Down
30 changes: 20 additions & 10 deletions cloudpathlib/cloudpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@

def __fspath__(self) -> str:
if self.is_file():
self._refresh_cache(force_overwrite_from_cloud=False)
self._refresh_cache()
return str(self._local)

def __lt__(self, other: Any) -> bool:
Expand Down Expand Up @@ -549,7 +549,7 @@
encoding: Optional[str] = None,
errors: Optional[str] = None,
newline: Optional[str] = None,
force_overwrite_from_cloud: bool = False, # extra kwarg not in pathlib
force_overwrite_from_cloud: Optional[bool] = None, # extra kwarg not in pathlib
force_overwrite_to_cloud: bool = False, # extra kwarg not in pathlib
pjbull marked this conversation as resolved.
Show resolved Hide resolved
) -> IO[Any]:
# if trying to call open on a directory that exists
Expand Down Expand Up @@ -1112,19 +1112,24 @@

return self.client.CloudPath(path)

def _refresh_cache(self, force_overwrite_from_cloud: bool = False) -> None:
def _refresh_cache(self, force_overwrite_from_cloud: Optional[bool] = None) -> None:
try:
stats = self.stat()
except NoStatError:
# nothing to cache if the file does not exist; happens when creating
# new files that will be uploaded
return

if force_overwrite_from_cloud is None:
force_overwrite_from_cloud = os.environ.get(
"CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD", "False"
).lower() in ["1", "true"]

# if not exist or cloud newer
if (
not self._local.exists()
force_overwrite_from_cloud
or not self._local.exists()
or (self._local.stat().st_mtime < stats.st_mtime)
or force_overwrite_from_cloud
):
# ensure there is a home for the file
self._local.parent.mkdir(parents=True, exist_ok=True)
Expand All @@ -1138,7 +1143,7 @@
f"Local file ({self._local}) for cloud path ({self}) has been changed by your code, but "
f"is being requested for download from cloud. Either (1) push your changes to the cloud, "
f"(2) remove the local file, or (3) pass `force_overwrite_from_cloud=True` to "
f"overwrite."
f"overwrite; or set env var CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD=1."
)

# if local newer but not dirty, it was updated
Expand All @@ -1148,12 +1153,12 @@
f"Local file ({self._local}) for cloud path ({self}) is newer on disk, but "
f"is being requested for download from cloud. Either (1) push your changes to the cloud, "
f"(2) remove the local file, or (3) pass `force_overwrite_from_cloud=True` to "
f"overwrite."
f"overwrite; or set env var CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD=1."
)

def _upload_local_to_cloud(
self,
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Self:
"""Uploads cache file at self._local to the cloud"""
# We should never try to be syncing entire directories; we should only
Expand All @@ -1178,11 +1183,16 @@
def _upload_file_to_cloud(
self,
local_path: Path,
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Self:
"""Uploads file at `local_path` to the cloud if there is not a newer file
already there.
"""
if force_overwrite_to_cloud is None:
force_overwrite_to_cloud = os.environ.get(

Check warning on line 1192 in cloudpathlib/cloudpath.py

View check run for this annotation

Codecov / codecov/patch

cloudpathlib/cloudpath.py#L1192

Added line #L1192 was not covered by tests
"CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD", "False"
).lower() in ["1", "true"]

if force_overwrite_to_cloud:
# If we are overwriting no need to perform any checks, so we can save time
self.client._upload_file(
Expand Down Expand Up @@ -1210,7 +1220,7 @@
f"Local file ({self._local}) for cloud path ({self}) is newer in the cloud disk, but "
f"is being requested to be uploaded to the cloud. Either (1) redownload changes from the cloud or "
f"(2) pass `force_overwrite_to_cloud=True` to "
f"overwrite."
f"overwrite; or set env var CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD=1."
)

# =========== pydantic integration special methods ===============
Expand Down
6 changes: 5 additions & 1 deletion docs/docs/caching.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -398,10 +398,14 @@
"\n",
"The `CloudPath.open` method supports a `force_overwrite_from_cloud` kwarg to force overwriting your local version.\n",
"\n",
"You can make overwriting the cache with the cloud copy the default by setting the environment variable `CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD=1` or `CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD=True`.\n",
"\n",
"`OverwriteNewerCloudError`\n",
"This exception is raised if we are asked to upload a file, but the one on the cloud is newer than our local version. This likely means that a separate process has updated the cloud version, and we don't want to overwrite and lose that new data in the cloud.\n",
"\n",
"The `CloudPath.open` method supports a `force_overwrite_to_cloud` kwarg to force overwriting the cloud version.\n",
"\n",
"You can make overwriting the cloud copy with the local one being uploaded by setting the environment variable `CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD=1` or `CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD=True`.\n",
"\n"
]
},
Expand Down Expand Up @@ -773,7 +777,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.12.1"
},
"vscode": {
"interpreter": {
Expand Down
37 changes: 36 additions & 1 deletion tests/test_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest

from cloudpathlib.enums import FileCacheMode
from cloudpathlib.exceptions import InvalidConfigurationException
from cloudpathlib.exceptions import InvalidConfigurationException, OverwriteNewerLocalError
from tests.conftest import CloudProviderTestRig


Expand Down Expand Up @@ -344,6 +344,41 @@ def test_environment_variable_local_cache_dir(rig: CloudProviderTestRig, tmpdir)
os.environ["CLOUDPATHLIB_LOCAL_CACHE_DIR"] = original_env_setting


def test_environment_variables_force_overwrite(rig: CloudProviderTestRig, tmpdir):
# environment instantiation
original_env_setting = os.environ.get("CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD", "")

try:
pjbull marked this conversation as resolved.
Show resolved Hide resolved
# explicitly false overwrite
os.environ["CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD"] = "False"

p = rig.create_cloud_path("dir_0/file0_0.txt")
p._refresh_cache() # dl to cache
p._local.touch() # update mod time

with pytest.raises(OverwriteNewerLocalError):
p._refresh_cache()

for val in ["1", "True", "TRUE"]:
os.environ["CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD"] = val

p = rig.create_cloud_path("dir_0/file0_0.txt")

orig_mod_time = p.stat().st_mtime

p._refresh_cache() # dl to cache
p._local.touch() # update mod time

new_mod_time = p._local.stat().st_mtime

p._refresh_cache()
assert p._local.stat().st_mtime == orig_mod_time
assert p._local.stat().st_mtime < new_mod_time

finally:
os.environ["CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD"] = original_env_setting


def test_manual_cache_clearing(rig: CloudProviderTestRig):
# use client that we can delete rather than default
client = rig.client_class(**rig.required_client_kwargs)
Expand Down
Loading