diff --git a/pangeo_forge_recipes/recipes/xarray_zarr.py b/pangeo_forge_recipes/recipes/xarray_zarr.py index 1ad006d9..fb20292a 100644 --- a/pangeo_forge_recipes/recipes/xarray_zarr.py +++ b/pangeo_forge_recipes/recipes/xarray_zarr.py @@ -157,10 +157,14 @@ def cache_input(input_key: InputKey, *, config: XarrayZarrRecipe) -> None: if config.cache_metadata: if config.metadata_cache is None: raise ValueError("metadata_cache is not set.") - logger.info(f"Caching metadata for input '{input_key!s}'") - with open_input(input_key, config=config) as ds: - input_metadata = ds.to_dict(data=False) - config.metadata_cache[_input_metadata_fname(input_key)] = input_metadata + + if not _input_metadata_fname(input_key) in config.metadata_cache: + with open_input(input_key, config=config) as ds: + logger.info(f"Caching metadata for input '{input_key!s}'") + input_metadata = ds.to_dict(data=False) + config.metadata_cache[_input_metadata_fname(input_key)] = input_metadata + else: + logger.info(f"Metadata already ached for input '{input_key!s}'") if config.open_input_with_fsspec_reference: if config.file_pattern.is_opendap: @@ -168,6 +172,13 @@ def cache_input(input_key: InputKey, *, config: XarrayZarrRecipe) -> None: if config.metadata_cache is None: raise ValueError("Can't make references; no metadata_cache assigned") fname = config.file_pattern[input_key] + + ref_fname = _input_reference_fname(input_key) + + if ref_fname in config.metadata_cache: + logger.info("Metadata is already cached with fsspec_reference.") + return + if config.input_cache is None: protocol = fsspec.utils.get_protocol(fname) url = unstrip_protocol(fname, protocol) @@ -184,7 +195,6 @@ def cache_input(input_key: InputKey, *, config: XarrayZarrRecipe) -> None: **config.file_pattern.fsspec_open_kwargs, ) as fp: ref_data = create_hdf5_reference(fp, url, fname) - ref_fname = _input_reference_fname(input_key) config.metadata_cache[ref_fname] = ref_data diff --git a/pangeo_forge_recipes/storage.py b/pangeo_forge_recipes/storage.py index bf6b2f07..90eaf1fa 100644 --- a/pangeo_forge_recipes/storage.py +++ b/pangeo_forge_recipes/storage.py @@ -173,6 +173,9 @@ def __setitem__(self, key: str, value: dict) -> None: def __getitem__(self, key: str) -> dict: return json.loads(self.get_mapper()[key]) + def __contains__(self, item: str) -> bool: + return item in self.get_mapper() + def getitems(self, keys: Sequence[str]) -> dict: mapper = self.get_mapper() all_meta_raw = mapper.getitems(keys) diff --git a/tests/test_storage.py b/tests/test_storage.py index e1548405..f9ea1602 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -35,6 +35,8 @@ def test_cache(tmp_cache): def test_metadata_target(tmp_metadata_target): data = {"foo": 1, "bar": "baz"} tmp_metadata_target["key1"] = data + assert "key1" in tmp_metadata_target + assert "key2" not in tmp_metadata_target assert tmp_metadata_target["key1"] == data assert tmp_metadata_target.getitems(["key1"]) == {"key1": data}