Skip to content

Commit

Permalink
Merge pull request #243 from alxmrs/xr-zarr-metadata-local
Browse files Browse the repository at this point in the history
Skip re-computing metadata cache.
  • Loading branch information
rabernat authored Jan 31, 2022
2 parents d5dbbdb + 8d86aa5 commit 3b20ff6
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 5 deletions.
20 changes: 15 additions & 5 deletions pangeo_forge_recipes/recipes/xarray_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,17 +157,28 @@ def cache_input(input_key: InputKey, *, config: XarrayZarrRecipe) -> None:
if config.cache_metadata:
if config.metadata_cache is None:
raise ValueError("metadata_cache is not set.")
logger.info(f"Caching metadata for input '{input_key!s}'")
with open_input(input_key, config=config) as ds:
input_metadata = ds.to_dict(data=False)
config.metadata_cache[_input_metadata_fname(input_key)] = input_metadata

if not _input_metadata_fname(input_key) in config.metadata_cache:
with open_input(input_key, config=config) as ds:
logger.info(f"Caching metadata for input '{input_key!s}'")
input_metadata = ds.to_dict(data=False)
config.metadata_cache[_input_metadata_fname(input_key)] = input_metadata
else:
logger.info(f"Metadata already ached for input '{input_key!s}'")

if config.open_input_with_fsspec_reference:
if config.file_pattern.is_opendap:
raise ValueError("Can't make references for opendap inputs")
if config.metadata_cache is None:
raise ValueError("Can't make references; no metadata_cache assigned")
fname = config.file_pattern[input_key]

ref_fname = _input_reference_fname(input_key)

if ref_fname in config.metadata_cache:
logger.info("Metadata is already cached with fsspec_reference.")
return

if config.input_cache is None:
protocol = fsspec.utils.get_protocol(fname)
url = unstrip_protocol(fname, protocol)
Expand All @@ -184,7 +195,6 @@ def cache_input(input_key: InputKey, *, config: XarrayZarrRecipe) -> None:
**config.file_pattern.fsspec_open_kwargs,
) as fp:
ref_data = create_hdf5_reference(fp, url, fname)
ref_fname = _input_reference_fname(input_key)
config.metadata_cache[ref_fname] = ref_data


Expand Down
3 changes: 3 additions & 0 deletions pangeo_forge_recipes/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,9 @@ def __setitem__(self, key: str, value: dict) -> None:
def __getitem__(self, key: str) -> dict:
return json.loads(self.get_mapper()[key])

def __contains__(self, item: str) -> bool:
return item in self.get_mapper()

def getitems(self, keys: Sequence[str]) -> dict:
mapper = self.get_mapper()
all_meta_raw = mapper.getitems(keys)
Expand Down
2 changes: 2 additions & 0 deletions tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def test_cache(tmp_cache):
def test_metadata_target(tmp_metadata_target):
data = {"foo": 1, "bar": "baz"}
tmp_metadata_target["key1"] = data
assert "key1" in tmp_metadata_target
assert "key2" not in tmp_metadata_target
assert tmp_metadata_target["key1"] == data
assert tmp_metadata_target.getitems(["key1"]) == {"key1": data}

Expand Down

0 comments on commit 3b20ff6

Please sign in to comment.