Skip to content

Commit

Permalink
Support pcodec v0.3 (zarr-developers#639)
Browse files Browse the repository at this point in the history
* support delta_spec and paging_spec

* coverage, try to fix zarr v3

* explicitly test try_consecutive

* delta_spec default of auto

* pcodec upper bound

* revert zarr test change

* test parameterize, match cases

* docstring and matching cleanup

* add release notes

---------

Co-authored-by: David Stansby <[email protected]>
  • Loading branch information
slevang and dstansby authored Dec 3, 2024
1 parent 9bdbaf0 commit 4b95d66
Show file tree
Hide file tree
Showing 48 changed files with 122 additions and 23 deletions.
11 changes: 11 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ Release notes
Unreleased
----------

Breaking changes
~~~~~~~~~~~~~~~~
* All arguments to the ``PCodec`` constructor except for ``level``
are now keyword only, to support the updated API.
By :user:`Sam Levang <slevang>`, :issue:`623`


Fixes
~~~~~
* Fixes issue with ``Delta`` Zarr 3 codec not working with ``astype``.
Expand All @@ -24,6 +31,10 @@ Fixes

Improvements
~~~~~~~~~~~~
* Add support for ``pcodec`` 0.3. This exposes the new ``delta_spec``
and ``paging_spec`` arguments, but maintains full backwards
compatibility for data written with older package versions.
By :user:`Sam Levang <slevang>`, :issue:`623`
* If an import error is raised when trying to define a codec that is *not*
an optional dependency, it is no longer silently caught. Instead it will
be propagated to the user, as this indicates an issue with the installed
Expand Down
9 changes: 9 additions & 0 deletions fixture/pcodec/codec.06/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"delta_encoding_order": 2,
"delta_spec": "auto",
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto",
"paging_spec": "equal_pages_up_to"
}
Binary file added fixture/pcodec/codec.06/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.09.dat
Binary file not shown.
9 changes: 9 additions & 0 deletions fixture/pcodec/codec.07/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"delta_encoding_order": null,
"delta_spec": "try_lookback",
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto",
"paging_spec": "equal_pages_up_to"
}
Binary file added fixture/pcodec/codec.07/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.09.dat
Binary file not shown.
9 changes: 9 additions & 0 deletions fixture/pcodec/codec.08/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"delta_encoding_order": null,
"delta_spec": "none",
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto",
"paging_spec": "equal_pages_up_to"
}
Binary file added fixture/pcodec/codec.08/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.09.dat
Binary file not shown.
9 changes: 9 additions & 0 deletions fixture/pcodec/codec.09/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"delta_encoding_order": 1,
"delta_spec": "try_consecutive",
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto",
"paging_spec": "equal_pages_up_to"
}
Binary file added fixture/pcodec/codec.09/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.09.dat
Binary file not shown.
75 changes: 57 additions & 18 deletions numcodecs/pcodec.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from numcodecs.compat import ensure_contiguous_ndarray

try:
from pcodec import ChunkConfig, ModeSpec, PagingSpec, standalone
from pcodec import ChunkConfig, DeltaSpec, ModeSpec, PagingSpec, standalone
except ImportError: # pragma: no cover
standalone = None

Expand All @@ -27,14 +27,21 @@ class PCodec(Codec):
level : int
A compression level from 0-12, where 12 take the longest and compresses
the most.
delta_encoding_order : init or None
Either a delta encoding level from 0-7 or None. If set to None, pcodec
will try to infer the optimal delta encoding order.
mode_spec : {'auto', 'classic'}
mode_spec : {"auto", "classic"}
Configures whether Pcodec should try to infer the best "mode" or
structure of the data (e.g. approximate multiples of 0.1) to improve
compression ratio, or skip this step and just use the numbers as-is
(Classic mode).
(Classic mode). Note that the "try*" specs are not currently supported.
delta_spec : {"auto", "none", "try_consecutive", "try_lookback"}
Configures the delta encoding strategy. By default, uses "auto" which
will try to infer the best encoding order.
paging_spec : {"equal_pages_up_to"}
Configures the paging strategy. Only "equal_pages_up_to" is currently
supported.
delta_encoding_order : int or None
Explicit delta encoding level from 0-7. Only valid if delta_spec is
"try_consecutive" or "auto" (to support backwards compatibility with
older versions of this codec).
equal_pages_up_to : int
Divide the chunk into equal pages of up to this many numbers.
"""
Expand All @@ -44,39 +51,71 @@ class PCodec(Codec):
def __init__(
self,
level: int = 8,
*,
mode_spec: Literal["auto", "classic"] = "auto",
delta_spec: Literal["auto", "none", "try_consecutive", "try_lookback"] = "auto",
paging_spec: Literal["equal_pages_up_to"] = "equal_pages_up_to",
delta_encoding_order: Optional[int] = None,
equal_pages_up_to: int = 262144,
# TODO one day, add support for the Try* mode specs
mode_spec: Literal['auto', 'classic'] = 'auto',
equal_pages_up_to: int = DEFAULT_MAX_PAGE_N,
):
if standalone is None: # pragma: no cover
raise ImportError("pcodec must be installed to use the PCodec codec.")

# note that we use `level` instead of `compression_level` to
# match other codecs
self.level = level
self.mode_spec = mode_spec
self.delta_spec = delta_spec
self.paging_spec = paging_spec
self.delta_encoding_order = delta_encoding_order
self.equal_pages_up_to = equal_pages_up_to
self.mode_spec = mode_spec

def encode(self, buf):
buf = ensure_contiguous_ndarray(buf)

def _get_chunk_config(self):
match self.mode_spec:
case 'auto':
case "auto":
mode_spec = ModeSpec.auto()
case 'classic':
case "classic":
mode_spec = ModeSpec.classic()
case _:
raise ValueError(f"unknown value for mode_spec: {self.mode_spec}")
paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
raise ValueError(f"mode_spec {self.mode_spec} is not supported")

if self.delta_encoding_order is not None and self.delta_spec == "auto":
# backwards compat for before delta_spec was introduced
delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
elif self.delta_encoding_order is not None and self.delta_spec != "try_consecutive":
raise ValueError(
"delta_encoding_order can only be set for delta_spec='try_consecutive'"
)
else:
match self.delta_spec:
case "auto":
delta_spec = DeltaSpec.auto()
case "none":
delta_spec = DeltaSpec.none()
case "try_consecutive":
delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
case "try_lookback":
delta_spec = DeltaSpec.try_lookback()
case _:
raise ValueError(f"delta_spec {self.delta_spec} is not supported")

match self.paging_spec:
case "equal_pages_up_to":
paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
case _:
raise ValueError(f"paging_spec {self.paging_spec} is not supported")

config = ChunkConfig(
compression_level=self.level,
delta_encoding_order=self.delta_encoding_order,
delta_spec=delta_spec,
mode_spec=mode_spec,
paging_spec=paging_spec,
)
return config

def encode(self, buf):
buf = ensure_contiguous_ndarray(buf)
config = self._get_chunk_config()
return standalone.simple_compress(buf, config)

def decode(self, buf, out=None):
Expand Down
21 changes: 17 additions & 4 deletions numcodecs/tests/test_pcodec.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,12 @@
PCodec(level=1),
PCodec(level=5),
PCodec(level=9),
PCodec(mode_spec='classic'),
PCodec(mode_spec="classic"),
PCodec(equal_pages_up_to=300),
PCodec(delta_encoding_order=2),
PCodec(delta_spec="try_lookback"),
PCodec(delta_spec="none"),
PCodec(delta_spec="try_consecutive", delta_encoding_order=1),
]


Expand Down Expand Up @@ -56,15 +60,24 @@ def test_config():
check_config(codec)


def test_invalid_config_error():
codec = PCodec(mode_spec='bogus')
@pytest.mark.parametrize("param", ["mode_spec", "delta_spec", "paging_spec"])
def test_invalid_config_error(param):
codec = PCodec(**{param: "bogus"})
with pytest.raises(ValueError):
check_encode_decode_array_to_bytes(arrays[0], codec)


def test_invalid_delta_encoding_combo():
codec = PCodec(delta_encoding_order=2, delta_spec="none")
with pytest.raises(ValueError):
check_encode_decode_array_to_bytes(arrays[0], codec)


def test_repr():
check_repr(
"PCodec(delta_encoding_order=None, equal_pages_up_to=262144, level=3, mode_spec='auto')"
"PCodec(delta_encoding_order=None, delta_spec='auto',"
" equal_pages_up_to=262144, level=3, mode_spec='auto',"
" paging_spec='equal_pages_up_to')"
)


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ zfpy = [
"numpy<2.0.0",
]
pcodec = [
"pcodec>=0.2,<0.3",
"pcodec>=0.3,<0.4",
]
crc32c = [
"crc32c>=2.7",
Expand Down

0 comments on commit 4b95d66

Please sign in to comment.