Skip to content

Commit

Permalink
Revert "add & test verify_existing option to cache"
Browse files Browse the repository at this point in the history
This reverts commit c8e03ae.
  • Loading branch information
cisaacstern committed Oct 3, 2023
1 parent 086275a commit 8a0f3fe
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 46 deletions.
11 changes: 2 additions & 9 deletions pangeo_forge_recipes/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,26 +171,19 @@ def _full_path(self, path: str) -> str:
return os.path.join(self.root_path, new_path)


@dataclass
class CacheFSSpecTarget(FlatFSSpecTarget):
"""Alias for FlatFSSpecTarget"""

verify_existing: bool = True

def cache_file(self, fname: str, secrets: Optional[dict], **open_kwargs) -> None:
# check and see if the file already exists in the cache
logger.info(f"Caching file '{fname}'")
exists = self.exists(fname)
if exists and self.verify_existing:
if self.exists(fname):
cached_size = self.size(fname)
remote_size = _get_url_size(fname, secrets, **open_kwargs)
if cached_size == remote_size:
# TODO: add checksumming here
logger.info(f"File '{fname}' is already cached, and matches remote size.")
logger.info(f"File '{fname}' is already cached")
return
elif exists and not self.verify_existing:
logger.info(f"File '{fname}' is already cached, skipping verification.")
return

input_opener = _get_opener(fname, secrets, **open_kwargs)
target_opener = self.open(fname, mode="wb")
Expand Down
37 changes: 0 additions & 37 deletions tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,43 +91,6 @@ def test_caching_only_truncates_long_fnames_for_local_fs(fs_cls, fname_longer_th
assert len(fname_in_full_path) > POSIX_MAX_FNAME_LENGTH


@pytest.mark.parametrize("verify_existing", [True, False])
def test_cache_no_verify_existing(tmpdir_factory: pytest.TempdirFactory, verify_existing: bool):
tmp_src = tmpdir_factory.mktemp("src")
tmp_dst = tmpdir_factory.mktemp("dst")
cache = CacheFSSpecTarget(LocalFileSystem(), tmp_dst, verify_existing=verify_existing)
src_fname = str(tmp_src / "source.txt")

# write the source file
with open(src_fname, mode="w") as f:
f.write("0")

# cache it
cache.cache_file(src_fname, secrets=None)

# overwrite source with new data
with open(src_fname, mode="w") as f:
f.write("00")

# cache it again
cache.cache_file(src_fname, secrets=None)

# open from cache
cached_fname = cache._full_path(src_fname)
with open(cached_fname) as f:
if not verify_existing:
# if we *do not* verify the existing cache, the second caching operation will be
# skipped due to the presence of the cached filename already existing in the cache.
# we expect the data to reflect the data contained in the initial source file.
assert f.read() == "0"
else:
# if we *do verify* the length of the existing data, we will recognize that the source
# file has changed since the first caching operation, and therefore the second caching
# operation will recognize the inconsistent lengths of the source data between the first
# and second caching operations, and re-cache the data the second time around.
assert f.read() == "00"


def test_suffix(tmp_path):
assert str((FSSpecTarget(LocalFileSystem(), tmp_path) / "test").root_path) == str(
tmp_path / "test"
Expand Down

0 comments on commit 8a0f3fe

Please sign in to comment.