Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Env vars for force overwrite functions #437

Merged
merged 12 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- fix: use native `exists()` method in `GSClient`. (PR [#420](https://github.com/drivendataorg/cloudpathlib/pull/420))
- Enhancement: lazy instantiation of default client (PR [#432](https://github.com/drivendataorg/cloudpathlib/issues/432), Issue [#428](https://github.com/drivendataorg/cloudpathlib/issues/428))
- Adds existence check before downloading in `download_to` (Issue [#430](https://github.com/drivendataorg/cloudpathlib/issues/430), PR [#432](https://github.com/drivendataorg/cloudpathlib/pull/432))
- Add env vars `CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD` and `CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD`. (Issue [#393](https://github.com/drivendataorg/cloudpathlib/issues/393), PR [#437](https://github.com/drivendataorg/cloudpathlib/pull/437))

## v0.18.1 (2024-02-26)

Expand Down
55 changes: 35 additions & 20 deletions cloudpathlib/cloudpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def __eq__(self, other: Any) -> bool:

def __fspath__(self) -> str:
if self.is_file():
self._refresh_cache(force_overwrite_from_cloud=False)
self._refresh_cache()
return str(self._local)

def __lt__(self, other: Any) -> bool:
Expand Down Expand Up @@ -549,8 +549,8 @@ def open(
encoding: Optional[str] = None,
errors: Optional[str] = None,
newline: Optional[str] = None,
force_overwrite_from_cloud: bool = False, # extra kwarg not in pathlib
force_overwrite_to_cloud: bool = False, # extra kwarg not in pathlib
force_overwrite_from_cloud: Optional[bool] = None, # extra kwarg not in pathlib
force_overwrite_to_cloud: Optional[bool] = None, # extra kwarg not in pathlib
) -> IO[Any]:
# if trying to call open on a directory that exists
if self.exists() and not self.is_file():
Expand Down Expand Up @@ -931,7 +931,7 @@ def rmtree(self) -> None:
def upload_from(
self,
source: Union[str, os.PathLike],
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Self:
"""Upload a file or directory to the cloud path."""
source = Path(source)
Expand All @@ -956,24 +956,24 @@ def upload_from(
def copy(
self,
destination: Self,
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Self: ...

@overload
def copy(
self,
destination: Path,
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Path: ...

@overload
def copy(
self,
destination: str,
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Union[Path, "CloudPath"]: ...

def copy(self, destination, force_overwrite_to_cloud=False):
def copy(self, destination, force_overwrite_to_cloud=None):
"""Copy self to destination folder of file, if self is a file."""
if not self.exists() or not self.is_file():
raise ValueError(
Expand All @@ -992,6 +992,11 @@ def copy(self, destination, force_overwrite_to_cloud=False):
if destination.exists() and destination.is_dir():
destination = destination / self.name

if force_overwrite_to_cloud is None:
force_overwrite_to_cloud = os.environ.get(
"CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD", "False"
).lower() in ["1", "true"]

if (
not force_overwrite_to_cloud
and destination.exists()
Expand Down Expand Up @@ -1019,27 +1024,27 @@ def copy(self, destination, force_overwrite_to_cloud=False):
def copytree(
self,
destination: Self,
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
ignore: Optional[Callable[[str, Iterable[str]], Container[str]]] = None,
) -> Self: ...

@overload
def copytree(
self,
destination: Path,
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
ignore: Optional[Callable[[str, Iterable[str]], Container[str]]] = None,
) -> Path: ...

@overload
def copytree(
self,
destination: str,
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
ignore: Optional[Callable[[str, Iterable[str]], Container[str]]] = None,
) -> Union[Path, "CloudPath"]: ...

def copytree(self, destination, force_overwrite_to_cloud=False, ignore=None):
def copytree(self, destination, force_overwrite_to_cloud=None, ignore=None):
"""Copy self to a directory, if self is a directory."""
if not self.is_dir():
raise CloudPathNotADirectoryError(
Expand Down Expand Up @@ -1112,19 +1117,24 @@ def _new_cloudpath(self, path: Union[str, os.PathLike]) -> Self:

return self.client.CloudPath(path)

def _refresh_cache(self, force_overwrite_from_cloud: bool = False) -> None:
def _refresh_cache(self, force_overwrite_from_cloud: Optional[bool] = None) -> None:
try:
stats = self.stat()
except NoStatError:
# nothing to cache if the file does not exist; happens when creating
# new files that will be uploaded
return

if force_overwrite_from_cloud is None:
force_overwrite_from_cloud = os.environ.get(
"CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD", "False"
).lower() in ["1", "true"]

# if not exist or cloud newer
if (
not self._local.exists()
force_overwrite_from_cloud
or not self._local.exists()
or (self._local.stat().st_mtime < stats.st_mtime)
or force_overwrite_from_cloud
):
# ensure there is a home for the file
self._local.parent.mkdir(parents=True, exist_ok=True)
Expand All @@ -1138,7 +1148,7 @@ def _refresh_cache(self, force_overwrite_from_cloud: bool = False) -> None:
f"Local file ({self._local}) for cloud path ({self}) has been changed by your code, but "
f"is being requested for download from cloud. Either (1) push your changes to the cloud, "
f"(2) remove the local file, or (3) pass `force_overwrite_from_cloud=True` to "
f"overwrite."
f"overwrite; or set env var CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD=1."
)

# if local newer but not dirty, it was updated
Expand All @@ -1148,12 +1158,12 @@ def _refresh_cache(self, force_overwrite_from_cloud: bool = False) -> None:
f"Local file ({self._local}) for cloud path ({self}) is newer on disk, but "
f"is being requested for download from cloud. Either (1) push your changes to the cloud, "
f"(2) remove the local file, or (3) pass `force_overwrite_from_cloud=True` to "
f"overwrite."
f"overwrite; or set env var CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD=1."
)

def _upload_local_to_cloud(
self,
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Self:
"""Uploads cache file at self._local to the cloud"""
# We should never try to be syncing entire directories; we should only
Expand All @@ -1178,11 +1188,16 @@ def _upload_local_to_cloud(
def _upload_file_to_cloud(
self,
local_path: Path,
force_overwrite_to_cloud: bool = False,
force_overwrite_to_cloud: Optional[bool] = None,
) -> Self:
"""Uploads file at `local_path` to the cloud if there is not a newer file
already there.
"""
if force_overwrite_to_cloud is None:
force_overwrite_to_cloud = os.environ.get(
"CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD", "False"
).lower() in ["1", "true"]

if force_overwrite_to_cloud:
# If we are overwriting no need to perform any checks, so we can save time
self.client._upload_file(
Expand Down Expand Up @@ -1210,7 +1225,7 @@ def _upload_file_to_cloud(
f"Local file ({self._local}) for cloud path ({self}) is newer in the cloud disk, but "
f"is being requested to be uploaded to the cloud. Either (1) redownload changes from the cloud or "
f"(2) pass `force_overwrite_to_cloud=True` to "
f"overwrite."
f"overwrite; or set env var CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD=1."
)

# =========== pydantic integration special methods ===============
Expand Down
6 changes: 5 additions & 1 deletion docs/docs/caching.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -398,10 +398,14 @@
"\n",
"The `CloudPath.open` method supports a `force_overwrite_from_cloud` kwarg to force overwriting your local version.\n",
"\n",
"You can make overwriting the cache with the cloud copy the default by setting the environment variable `CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD=1` or `CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD=True`.\n",
"\n",
"`OverwriteNewerCloudError`\n",
"This exception is raised if we are asked to upload a file, but the one on the cloud is newer than our local version. This likely means that a separate process has updated the cloud version, and we don't want to overwrite and lose that new data in the cloud.\n",
"\n",
"The `CloudPath.open` method supports a `force_overwrite_to_cloud` kwarg to force overwriting the cloud version.\n",
"\n",
"You can make overwriting the cloud copy with the local one being uploaded by setting the environment variable `CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD=1` or `CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD=True`.\n",
"\n"
]
},
Expand Down Expand Up @@ -773,7 +777,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.12.1"
},
"vscode": {
"interpreter": {
Expand Down
111 changes: 110 additions & 1 deletion tests/test_caching.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import gc
import os
from time import sleep
from pathlib import Path

import pytest

from cloudpathlib.enums import FileCacheMode
from cloudpathlib.exceptions import InvalidConfigurationException
from cloudpathlib.exceptions import (
InvalidConfigurationException,
OverwriteNewerCloudError,
OverwriteNewerLocalError,
)
from tests.conftest import CloudProviderTestRig


Expand Down Expand Up @@ -344,6 +349,107 @@ def test_environment_variable_local_cache_dir(rig: CloudProviderTestRig, tmpdir)
os.environ["CLOUDPATHLIB_LOCAL_CACHE_DIR"] = original_env_setting


def test_environment_variables_force_overwrite_from(rig: CloudProviderTestRig, tmpdir):
# environment instantiation
original_env_setting = os.environ.get("CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD", "")

try:
pjbull marked this conversation as resolved.
Show resolved Hide resolved
# explicitly false overwrite
os.environ["CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD"] = "False"

p = rig.create_cloud_path("dir_0/file0_0.txt")
p._refresh_cache() # dl to cache
p._local.touch() # update mod time

with pytest.raises(OverwriteNewerLocalError):
p._refresh_cache()

for val in ["1", "True", "TRUE"]:
os.environ["CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD"] = val

p = rig.create_cloud_path("dir_0/file0_0.txt")

orig_mod_time = p.stat().st_mtime

p._refresh_cache() # dl to cache
p._local.touch() # update mod time

new_mod_time = p._local.stat().st_mtime

p._refresh_cache()
assert p._local.stat().st_mtime == orig_mod_time
assert p._local.stat().st_mtime < new_mod_time

finally:
os.environ["CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD"] = original_env_setting


def test_environment_variables_force_overwrite_to(rig: CloudProviderTestRig, tmpdir):
# environment instantiation
original_env_setting = os.environ.get("CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD", "")

try:
# explicitly false overwrite
os.environ["CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD"] = "False"

p = rig.create_cloud_path("dir_0/file0_0.txt")

new_local = Path((tmpdir / "new_content.txt").strpath)
new_local.write_text("hello")
new_also_cloud = rig.create_cloud_path("dir_0/another_cloud_file.txt")
new_also_cloud.write_text("newer")

# make cloud newer than local or other cloud file
os.utime(new_local, (new_local.stat().st_mtime - 2, new_local.stat().st_mtime - 2))

p.write_text("updated")

with pytest.raises(OverwriteNewerCloudError):
p._upload_file_to_cloud(new_local)

with pytest.raises(OverwriteNewerCloudError):
# copy short-circuits upload if same client, so we test separately

# raises if destination is newer
new_also_cloud.write_text("newest")
sleep(0.01)
p.copy(new_also_cloud)

for val in ["1", "True", "TRUE"]:
os.environ["CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD"] = val

p = rig.create_cloud_path("dir_0/file0_0.txt")

new_local.write_text("updated")

# make cloud newer than local
os.utime(new_local, (new_local.stat().st_mtime - 2, new_local.stat().st_mtime - 2))

p.write_text("updated")

orig_cloud_mod_time = p.stat().st_mtime

assert p.stat().st_mtime >= new_local.stat().st_mtime

# would raise if not set
sleep(1.01) # give time so not equal when rounded
p._upload_file_to_cloud(new_local)
assert p.stat().st_mtime > orig_cloud_mod_time # cloud now overwritten

new_also_cloud = rig.create_cloud_path("dir_0/another_cloud_file.txt")
sleep(1.01) # give time so not equal when rounded
new_also_cloud.write_text("newer")

new_cloud_mod_time = new_also_cloud.stat().st_mtime

assert p.stat().st_mtime < new_cloud_mod_time # would raise if not set
p.copy(new_also_cloud)
assert new_also_cloud.stat().st_mtime >= new_cloud_mod_time

finally:
os.environ["CLOUDPATHLIB_FORCE_OVERWRITE_TO_CLOUD"] = original_env_setting


def test_manual_cache_clearing(rig: CloudProviderTestRig):
# use client that we can delete rather than default
client = rig.client_class(**rig.required_client_kwargs)
Expand Down Expand Up @@ -395,6 +501,9 @@ def test_manual_cache_clearing(rig: CloudProviderTestRig):
del cp
del client

gc.collect() # force gc before asserting
sleep(0.5) # give time to delete

assert not local_cache_path.exists()
assert not client_cache_folder.exists()

Expand Down
Loading