|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
3 | | -from dataclasses import dataclass |
| 3 | +from dataclasses import dataclass, field |
4 | 4 | from itertools import islice, pairwise |
5 | | -from typing import TYPE_CHECKING, Any, TypeVar |
| 5 | +from typing import TYPE_CHECKING, Any, TypeVar, cast |
6 | 6 | from warnings import warn |
7 | 7 |
|
8 | 8 | from zarr.abc.codec import ( |
|
13 | 13 | BytesBytesCodec, |
14 | 14 | Codec, |
15 | 15 | CodecPipeline, |
| 16 | + SupportsSyncCodec, |
16 | 17 | ) |
17 | 18 | from zarr.core.common import concurrent_map |
18 | 19 | from zarr.core.config import config |
@@ -68,6 +69,134 @@ def fill_value_or_default(chunk_spec: ArraySpec) -> Any: |
68 | 69 | return fill_value |
69 | 70 |
|
70 | 71 |
|
| 72 | +@dataclass(frozen=True) |
| 73 | +class CodecChain: |
| 74 | + """Lightweight codec chain: array-array -> array-bytes -> bytes-bytes. |
| 75 | +
|
| 76 | + Pure compute only -- no IO methods, no threading, no batching. |
| 77 | + """ |
| 78 | + |
| 79 | + array_array_codecs: tuple[ArrayArrayCodec, ...] |
| 80 | + array_bytes_codec: ArrayBytesCodec |
| 81 | + bytes_bytes_codecs: tuple[BytesBytesCodec, ...] |
| 82 | + |
| 83 | + _all_sync: bool = field(default=False, init=False, repr=False, compare=False) |
| 84 | + |
| 85 | + def __post_init__(self) -> None: |
| 86 | + object.__setattr__( |
| 87 | + self, |
| 88 | + "_all_sync", |
| 89 | + all(isinstance(c, SupportsSyncCodec) for c in self), |
| 90 | + ) |
| 91 | + |
| 92 | + def __iter__(self) -> Iterator[Codec]: |
| 93 | + yield from self.array_array_codecs |
| 94 | + yield self.array_bytes_codec |
| 95 | + yield from self.bytes_bytes_codecs |
| 96 | + |
| 97 | + @classmethod |
| 98 | + def from_codecs(cls, codecs: Iterable[Codec]) -> CodecChain: |
| 99 | + aa, ab, bb = codecs_from_list(list(codecs)) |
| 100 | + return cls(array_array_codecs=aa, array_bytes_codec=ab, bytes_bytes_codecs=bb) |
| 101 | + |
| 102 | + def resolve_metadata_chain( |
| 103 | + self, chunk_spec: ArraySpec |
| 104 | + ) -> tuple[ |
| 105 | + list[tuple[ArrayArrayCodec, ArraySpec]], |
| 106 | + tuple[ArrayBytesCodec, ArraySpec], |
| 107 | + list[tuple[BytesBytesCodec, ArraySpec]], |
| 108 | + ]: |
| 109 | + """Resolve metadata through the codec chain for a single chunk_spec.""" |
| 110 | + aa_codecs_with_spec: list[tuple[ArrayArrayCodec, ArraySpec]] = [] |
| 111 | + spec = chunk_spec |
| 112 | + for aa_codec in self.array_array_codecs: |
| 113 | + aa_codecs_with_spec.append((aa_codec, spec)) |
| 114 | + spec = aa_codec.resolve_metadata(spec) |
| 115 | + |
| 116 | + ab_codec_with_spec = (self.array_bytes_codec, spec) |
| 117 | + spec = self.array_bytes_codec.resolve_metadata(spec) |
| 118 | + |
| 119 | + bb_codecs_with_spec: list[tuple[BytesBytesCodec, ArraySpec]] = [] |
| 120 | + for bb_codec in self.bytes_bytes_codecs: |
| 121 | + bb_codecs_with_spec.append((bb_codec, spec)) |
| 122 | + spec = bb_codec.resolve_metadata(spec) |
| 123 | + |
| 124 | + return (aa_codecs_with_spec, ab_codec_with_spec, bb_codecs_with_spec) |
| 125 | + |
| 126 | + def decode_chunk( |
| 127 | + self, |
| 128 | + chunk_bytes: Buffer, |
| 129 | + chunk_spec: ArraySpec, |
| 130 | + aa_chain: Iterable[tuple[ArrayArrayCodec, ArraySpec]] | None = None, |
| 131 | + ab_pair: tuple[ArrayBytesCodec, ArraySpec] | None = None, |
| 132 | + bb_chain: Iterable[tuple[BytesBytesCodec, ArraySpec]] | None = None, |
| 133 | + ) -> NDBuffer: |
| 134 | + """Decode a single chunk through the full codec chain, synchronously. |
| 135 | +
|
| 136 | + Pure compute -- no IO. Only callable when all codecs support sync. |
| 137 | +
|
| 138 | + The optional ``aa_chain``, ``ab_pair``, ``bb_chain`` parameters allow |
| 139 | + pre-resolved metadata to be reused across many chunks with the same spec. |
| 140 | + If not provided, ``resolve_metadata_chain`` is called internally. |
| 141 | + """ |
| 142 | + if aa_chain is None or ab_pair is None or bb_chain is None: |
| 143 | + aa_chain, ab_pair, bb_chain = self.resolve_metadata_chain(chunk_spec) |
| 144 | + |
| 145 | + bb_out: Any = chunk_bytes |
| 146 | + for bb_codec, spec in reversed(list(bb_chain)): |
| 147 | + bb_out = cast("SupportsSyncCodec", bb_codec)._decode_sync(bb_out, spec) |
| 148 | + |
| 149 | + ab_codec, ab_spec = ab_pair |
| 150 | + ab_out: Any = cast("SupportsSyncCodec", ab_codec)._decode_sync(bb_out, ab_spec) |
| 151 | + |
| 152 | + for aa_codec, spec in reversed(list(aa_chain)): |
| 153 | + ab_out = cast("SupportsSyncCodec", aa_codec)._decode_sync(ab_out, spec) |
| 154 | + |
| 155 | + return ab_out # type: ignore[no-any-return] |
| 156 | + |
| 157 | + def encode_chunk( |
| 158 | + self, |
| 159 | + chunk_array: NDBuffer, |
| 160 | + chunk_spec: ArraySpec, |
| 161 | + ) -> Buffer | None: |
| 162 | + """Encode a single chunk through the full codec chain, synchronously. |
| 163 | +
|
| 164 | + Pure compute -- no IO. Only callable when all codecs support sync. |
| 165 | + """ |
| 166 | + spec = chunk_spec |
| 167 | + aa_out: Any = chunk_array |
| 168 | + |
| 169 | + for aa_codec in self.array_array_codecs: |
| 170 | + if aa_out is None: |
| 171 | + return None |
| 172 | + aa_out = cast("SupportsSyncCodec", aa_codec)._encode_sync(aa_out, spec) |
| 173 | + spec = aa_codec.resolve_metadata(spec) |
| 174 | + |
| 175 | + if aa_out is None: |
| 176 | + return None |
| 177 | + bb_out: Any = cast("SupportsSyncCodec", self.array_bytes_codec)._encode_sync(aa_out, spec) |
| 178 | + spec = self.array_bytes_codec.resolve_metadata(spec) |
| 179 | + |
| 180 | + for bb_codec in self.bytes_bytes_codecs: |
| 181 | + if bb_out is None: |
| 182 | + return None |
| 183 | + bb_out = cast("SupportsSyncCodec", bb_codec)._encode_sync(bb_out, spec) |
| 184 | + spec = bb_codec.resolve_metadata(spec) |
| 185 | + |
| 186 | + return bb_out # type: ignore[no-any-return] |
| 187 | + |
| 188 | + def compute_encoded_size(self, byte_length: int, array_spec: ArraySpec) -> int: |
| 189 | + for codec in self: |
| 190 | + byte_length = codec.compute_encoded_size(byte_length, array_spec) |
| 191 | + array_spec = codec.resolve_metadata(array_spec) |
| 192 | + return byte_length |
| 193 | + |
| 194 | + def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: |
| 195 | + for codec in self: |
| 196 | + chunk_spec = codec.resolve_metadata(chunk_spec) |
| 197 | + return chunk_spec |
| 198 | + |
| 199 | + |
71 | 200 | @dataclass(frozen=True) |
72 | 201 | class BatchedCodecPipeline(CodecPipeline): |
73 | 202 | """Default codec pipeline. |
|
0 commit comments