Skip to content

Commit

Permalink
Merge pull request #506 from maresb/allow-no-inline
Browse files Browse the repository at this point in the history
Allow disabling inlining for MultiZarrToZarr
  • Loading branch information
martindurant authored Sep 26, 2024
2 parents 36d807c + 0a77860 commit b59e0a6
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 4 deletions.
14 changes: 10 additions & 4 deletions kerchunk/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ class MultiZarrToZarr:
from scratch. Assumes the same coordinates are being concatenated.
"""

inline: int

def __init__(
self,
path,
Expand All @@ -106,7 +108,7 @@ def __init__(
target_options=None,
remote_protocol=None,
remote_options=None,
inline_threshold=500,
inline_threshold: int = 500,
preprocess=None,
postprocess=None,
out=None,
Expand Down Expand Up @@ -584,9 +586,13 @@ def second_pass(self):
key = key.rstrip(".")

ref = fs.references.get(fn)
if isinstance(ref, list) and (
(len(ref) > 1 and ref[2] < self.inline)
or fs.info(fn)["size"] < self.inline
if (
self.inline > 0
and isinstance(ref, list)
and (
(len(ref) > 1 and ref[2] < self.inline)
or fs.info(fn)["size"] < self.inline
)
):
to_download[key] = fn
else:
Expand Down
23 changes: 23 additions & 0 deletions kerchunk/tests/test_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,29 @@ def test_inline(refs):
assert ref.references["data/0.0.0"].startswith("base64:")


def test_no_inline(refs):
"""Ensure that inline_threshold=0 disables MultiZarrToZarr checking file size."""
ds = xr.Dataset(dict(x=[1, 2, 3]))
ds["y"] = 3 + ds["x"]
store = fsspec.get_mapper("memory://zarr_store")
ds.to_zarr(store, mode="w", consolidated=False)
ref = kerchunk.utils.consolidate(store)
# This type of reference with no offset or total size is produced by
# kerchunk.zarr.single_zarr or equivalently ZarrToZarr.translate.
ref["refs"]["y/0"] = ["file:///tmp/some/data-that-shouldnt-be-accessed"]

mzz_no_inline = MultiZarrToZarr([ref], concat_dims=["x"], inline_threshold=0)
# Should be okay because inline_threshold=None so we don't check the file size
# in order to see if it should be inlined
mzz_no_inline.translate()

mzz_inline = MultiZarrToZarr([ref], concat_dims=["x"], inline_threshold=1)
with pytest.raises(FileNotFoundError):
# Should raise because we check the file size to see if it should be inlined,
# and the example was engineered so that the file doesn't exist.
mzz_inline.translate()


def test_merge_vars():
a = dict({"version": 1, "refs": dict({"item1": 1})})
b = dict({"version": 1, "refs": dict({"item2": 2})})
Expand Down

0 comments on commit b59e0a6

Please sign in to comment.