Skip to content

Commit b2c40a8

Browse files
committed
support delta_spec and paging_spec
1 parent 29995e3 commit b2c40a8

36 files changed

+95
-21
lines changed

fixture/pcodec/codec.06/config.json

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"delta_encoding_order": 2,
3+
"delta_spec": null,
4+
"equal_pages_up_to": 262144,
5+
"id": "pcodec",
6+
"level": 8,
7+
"mode_spec": "auto",
8+
"paging_spec": "equal_pages_up_to"
9+
}
30 Bytes
Binary file not shown.
42 Bytes
Binary file not shown.
30 Bytes
Binary file not shown.
42 Bytes
Binary file not shown.
237 Bytes
Binary file not shown.
202 Bytes
Binary file not shown.
5.73 KB
Binary file not shown.
5.73 KB
Binary file not shown.
7.58 KB
Binary file not shown.
814 Bytes
Binary file not shown.

fixture/pcodec/codec.07/config.json

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"delta_encoding_order": null,
3+
"delta_spec": "try_lookback",
4+
"equal_pages_up_to": 262144,
5+
"id": "pcodec",
6+
"level": 8,
7+
"mode_spec": "auto",
8+
"paging_spec": "equal_pages_up_to"
9+
}
34 Bytes
Binary file not shown.
42 Bytes
Binary file not shown.
34 Bytes
Binary file not shown.
42 Bytes
Binary file not shown.
159 Bytes
Binary file not shown.
126 Bytes
Binary file not shown.
5.74 KB
Binary file not shown.
5.72 KB
Binary file not shown.
7.65 KB
Binary file not shown.
728 Bytes
Binary file not shown.

fixture/pcodec/codec.08/config.json

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"delta_encoding_order": null,
3+
"delta_spec": "none",
4+
"equal_pages_up_to": 262144,
5+
"id": "pcodec",
6+
"level": 8,
7+
"mode_spec": "auto",
8+
"paging_spec": "equal_pages_up_to"
9+
}
1.24 KB
Binary file not shown.
1.25 KB
Binary file not shown.
1.24 KB
Binary file not shown.
1.25 KB
Binary file not shown.
1.74 KB
Binary file not shown.
5.28 KB
Binary file not shown.
5.57 KB
Binary file not shown.
5.57 KB
Binary file not shown.
7.35 KB
Binary file not shown.
579 Bytes
Binary file not shown.

numcodecs/pcodec.py

+52-17
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from numcodecs.compat import ensure_contiguous_ndarray
55

66
try:
7-
from pcodec import ChunkConfig, ModeSpec, PagingSpec, standalone
7+
from pcodec import ChunkConfig, DeltaSpec, ModeSpec, PagingSpec, standalone
88
except ImportError: # pragma: no cover
99
standalone = None
1010

@@ -27,14 +27,17 @@ class PCodec(Codec):
2727
level : int
2828
A compression level from 0-12, where 12 take the longest and compresses
2929
the most.
30-
delta_encoding_order : init or None
31-
Either a delta encoding level from 0-7 or None. If set to None, pcodec
32-
will try to infer the optimal delta encoding order.
33-
mode_spec : {'auto', 'classic'}
30+
mode_spec : {"auto", "classic"}
3431
Configures whether Pcodec should try to infer the best "mode" or
3532
structure of the data (e.g. approximate multiples of 0.1) to improve
3633
compression ratio, or skip this step and just use the numbers as-is
3734
(Classic mode).
35+
delta_spec : {"auto", "none", "try_consecutive", "try_lookback"} or None
36+
Configures the delta encoding strategy. By default, uses "auto" which
37+
will try to infer the best encoding order.
38+
delta_encoding_order : int or None
39+
Explicit delta encoding level from 0-7. Only valid if delta_spec is
40+
"try_consecutive" or None.
3841
equal_pages_up_to : int
3942
Divide the chunk into equal pages of up to this many numbers.
4043
"""
@@ -44,39 +47,71 @@ class PCodec(Codec):
4447
def __init__(
4548
self,
4649
level: int = 8,
50+
mode_spec: Literal["auto", "classic"] = "auto",
51+
delta_spec: Literal["auto", "none", "try_consecutive", "try_lookback"] | None = None,
52+
paging_spec: Literal["equal_pages_up_to"] = "equal_pages_up_to",
4753
delta_encoding_order: Optional[int] = None,
48-
equal_pages_up_to: int = 262144,
49-
# TODO one day, add support for the Try* mode specs
50-
mode_spec: Literal['auto', 'classic'] = 'auto',
54+
equal_pages_up_to: int = DEFAULT_MAX_PAGE_N,
5155
):
5256
if standalone is None: # pragma: no cover
5357
raise ImportError("pcodec must be installed to use the PCodec codec.")
5458

5559
# note that we use `level` instead of `compression_level` to
5660
# match other codecs
5761
self.level = level
62+
self.mode_spec = mode_spec
63+
self.delta_spec = delta_spec
64+
self.paging_spec = paging_spec
5865
self.delta_encoding_order = delta_encoding_order
5966
self.equal_pages_up_to = equal_pages_up_to
60-
self.mode_spec = mode_spec
61-
62-
def encode(self, buf):
63-
buf = ensure_contiguous_ndarray(buf)
6467

68+
def _get_chunk_config(self):
6569
match self.mode_spec:
66-
case 'auto':
70+
case "auto" | None:
6771
mode_spec = ModeSpec.auto()
68-
case 'classic':
72+
case "classic":
6973
mode_spec = ModeSpec.classic()
7074
case _:
71-
raise ValueError(f"unknown value for mode_spec: {self.mode_spec}")
72-
paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
75+
raise ValueError(f"mode_spec {self.mode_spec} is not supported")
76+
77+
if self.delta_encoding_order is not None:
78+
# backwards compat for before delta_spec was introduced
79+
if self.delta_spec in (None, "try_consecutive"):
80+
delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
81+
else:
82+
raise ValueError(
83+
"delta_encoding_order can only be set for delta_spec='try_consecutive'"
84+
)
85+
else:
86+
match self.delta_spec:
87+
case "auto" | None:
88+
delta_spec = DeltaSpec.auto()
89+
case "none":
90+
delta_spec = DeltaSpec.none()
91+
case "try_consecutive":
92+
delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
93+
case "try_lookback":
94+
delta_spec = DeltaSpec.try_lookback()
95+
case _:
96+
raise ValueError(f"delta_spec {self.delta_spec} is not supported")
97+
98+
match self.paging_spec:
99+
case "equal_pages_up_to" | None:
100+
paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
101+
case _:
102+
raise ValueError(f"paging_spec {self.paging_spec} is not supported")
73103

74104
config = ChunkConfig(
75105
compression_level=self.level,
76-
delta_encoding_order=self.delta_encoding_order,
106+
delta_spec=delta_spec,
77107
mode_spec=mode_spec,
78108
paging_spec=paging_spec,
79109
)
110+
return config
111+
112+
def encode(self, buf):
113+
buf = ensure_contiguous_ndarray(buf)
114+
config = self._get_chunk_config()
80115
return standalone.simple_compress(buf, config)
81116

82117
def decode(self, buf, out=None):

numcodecs/tests/test_pcodec.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,11 @@
2323
PCodec(level=1),
2424
PCodec(level=5),
2525
PCodec(level=9),
26-
PCodec(mode_spec='classic'),
26+
PCodec(mode_spec="classic"),
2727
PCodec(equal_pages_up_to=300),
28+
PCodec(delta_encoding_order=2),
29+
PCodec(delta_spec="try_lookback"),
30+
PCodec(delta_spec="none"),
2831
]
2932

3033

@@ -57,14 +60,23 @@ def test_config():
5760

5861

5962
def test_invalid_config_error():
60-
codec = PCodec(mode_spec='bogus')
63+
for param in ["mode_spec", "delta_spec", "paging_spec"]:
64+
codec = PCodec(**{param: "bogus"})
65+
with pytest.raises(ValueError):
66+
check_encode_decode_array_to_bytes(arrays[0], codec)
67+
68+
69+
def test_invalid_delta_encoding_combo():
70+
codec = PCodec(delta_encoding_order=2, delta_spec="none")
6171
with pytest.raises(ValueError):
6272
check_encode_decode_array_to_bytes(arrays[0], codec)
6373

6474

6575
def test_repr():
6676
check_repr(
67-
"PCodec(delta_encoding_order=None, equal_pages_up_to=262144, level=3, mode_spec='auto')"
77+
"PCodec(delta_encoding_order=None, delta_spec='auto',"
78+
" equal_pages_up_to=262144, level=3, mode_spec='auto',"
79+
" paging_spec='equal_pages_up_to')"
6880
)
6981

7082

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ zfpy = [
6868
"numpy<2.0.0",
6969
]
7070
pcodec = [
71-
"pcodec>=0.2,<0.3",
71+
"pcodec>=0.3",
7272
]
7373
crc32c = [
7474
"crc32c>=2.7",

0 commit comments

Comments
 (0)