Skip to content

Commit

Permalink
Implement JSON embed for complex HDF5 arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
martindurant committed Jul 11, 2023
1 parent f272c66 commit d112239
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 1 deletion.
21 changes: 20 additions & 1 deletion kerchunk/hdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import zarr
from zarr.meta import encode_fill_value
import numcodecs

from .codecs import FillStringsCodec
from .utils import _encode_for_JSON

Expand Down Expand Up @@ -358,8 +359,26 @@ def _translator(self, name: str, h5obj: Union[h5py.Dataset, h5py.Group]):
)
for v in h5obj.dtype.names
]
else:
elif self.vlen == "embed":
# embed fails due to https://github.com/zarr-developers/numcodecs/issues/333
data = h5obj[:].tolist()
data2 = []
for d in data:
data2.append(
[
(
_.decode(errors="ignore")
if isinstance(_, bytes)
else _
)
for _ in d
]
)
dt = "O"
kwargs["data"] = data2
kwargs["object_codec"] = numcodecs.JSON()
fill = None
else:
raise NotImplementedError
# Add filter for shuffle
if h5obj.shuffle and h5obj.dtype.kind != "O":
Expand Down
Binary file added kerchunk/tests/NEONDSTowerTemperatureData.hdf5
Binary file not shown.
20 changes: 20 additions & 0 deletions kerchunk/tests/test_hdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,3 +286,23 @@ def test_compact():
m = fsspec.get_mapper("reference://", fo=out)
g = zarr.open(m)
assert np.allclose(g.ancillary_data.atlas_sdp_gps_epoch[:], 1.19880002e09)


def test_embed():
fn = osp.join(here, "NEONDSTowerTemperatureData.hdf5")
h = kerchunk.hdf.SingleHdf5ToZarr(fn, vlen_encode="embed")
out = h.translate()

fs = fsspec.filesystem("reference", fo=out)
z = zarr.open(fs.get_mapper())
data = z["Domain_10"]["STER"]["min_1"]["boom_1"]["temperature"][:]
assert data[0].tolist() == [
"2014-04-01 00:00:00.0",
"60",
"6.72064364129017",
"6.667845743708792",
"6.774491093631761",
"0.0012746926446369846",
"0.004609216572327277",
"0.01298182345556785",
]

0 comments on commit d112239

Please sign in to comment.