Skip to content

Commit

Permalink
working with zarr in zip
Browse files Browse the repository at this point in the history
  • Loading branch information
martindurant committed Aug 15, 2023
1 parent f18c9b5 commit 634eeb4
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 5 deletions.
3 changes: 2 additions & 1 deletion kerchunk/netCDF3.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def __init__(
self.threshold = inline_threshold
self.max_chunk_size = max_chunk_size
self.out = {}
self.storage_options = storage_options
with fsspec.open(filename, **(storage_options or {})) as fp:
super().__init__(
fp, *args, mmap=False, mode="r", maskandscale=False, **kwargs
Expand Down Expand Up @@ -259,7 +260,7 @@ def translate(self):
)

if self.threshold > 0:
out = do_inline(out, self.threshold)
out = do_inline(out, self.threshold, remote_options=self.storage_options)
out = _encode_for_JSON(out)

return {"version": 1, "refs": out}
Expand Down
65 changes: 65 additions & 0 deletions kerchunk/tests/test_zarr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import xarray as xr
import pandas as pd
import pytest
import numpy as np

import kerchunk.zarr
import kerchunk.utils


@pytest.fixture(scope="module")
def ds():
ds = xr.Dataset(
{
"x": xr.DataArray(np.linspace(-np.pi, np.pi, 10), dims=["x"]),
"y": xr.DataArray(np.linspace(-np.pi / 2, np.pi / 2, 10), dims=["y"]),
"time": xr.DataArray(pd.date_range("2020", "2021"), dims=["time"]),
},
)
ds["temp"] = (
np.cos(ds.x)
* np.sin(ds.y)
* xr.ones_like(ds.time).astype("float")
* np.random.random(ds.time.shape)
)
return ds


@pytest.fixture
def zarr_in_zip(tmpdir, ds):
def _zip(file):
import os
import zipfile

filename = file + os.path.extsep + "zip"
with zipfile.ZipFile(
filename, "w", compression=zipfile.ZIP_STORED, allowZip64=True
) as fh:
for root, _, filenames in os.walk(file):
for each_filename in filenames:
each_filename = os.path.join(root, each_filename)
fh.write(each_filename, os.path.relpath(each_filename, file))
return filename

fn = f"{tmpdir}/test.zarr"
ds.to_zarr(fn, mode="w")
return _zip(fn)


def test_zarr_in_zip(zarr_in_zip, ds):
out = kerchunk.zarr.ZarrToZarr(
url="zip://", storage_options={"fo": zarr_in_zip}
).translate()
ds2 = xr.open_dataset(
"reference://",
engine="zarr",
backend_kwargs={
"storage_options": {
"fo": out,
"remote_protocol": "zip",
"remote_options": {"fo": zarr_in_zip},
},
"consolidated": False,
},
)
assert ds.equals(ds2)
9 changes: 7 additions & 2 deletions kerchunk/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,13 +134,18 @@ def _encode_for_JSON(store):
return store


def do_inline(store, threshold, remote_options=None):
def do_inline(store, threshold, remote_options=None, remote_protocol=None):
"""Replace short chunks with the value of that chunk
The chunk may need encoding with base64 if not ascii, so actual
length may be larger than threshold.
"""
fs = fsspec.filesystem("reference", fo=store, **(remote_options or {}))
fs = fsspec.filesystem(
"reference",
fo=store,
remote_options=remote_options,
remote_protocol=remote_protocol,
)
out = fs.references.copy()
get_keys = [
k
Expand Down
3 changes: 2 additions & 1 deletion kerchunk/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ def single_zarr(uri_or_store, storage_options=None, inline_threshold=100, inline
refs[k] = mapper[k]
else:
refs[k] = [fsspec.utils._unstrip_protocol(mapper._key_to_str(k), mapper.fs)]
refs = do_inline(refs, inline_threshold)
if inline_threshold:
refs = do_inline(refs, inline_threshold, remote_options=storage_options)
return refs


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"fill_hdf_strings = kerchunk.codecs:FillStringsCodec",
"FITSAscii = kerchunk.codecs:AsciiTableCodec",
"FITSVarBintable = kerchunk.codecs:VarArrCodec",
"record_member = kerchunk.codecs.RecordArrayMember",
"record_member = kerchunk.codecs:RecordArrayMember",
],
},
zip_safe=False,
Expand Down

0 comments on commit 634eeb4

Please sign in to comment.