fixup

d-v-b · d-v-b · commit ae0580c9442c · 2026-04-09T10:38:17.000+02:00
diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py
@@ -14,7 +14,7 @@
 
 if TYPE_CHECKING:
     from collections.abc import Awaitable, Callable, Iterable
-    from typing import Any, Self
+    from typing import Self
 
     from zarr.abc.store import ByteGetter, ByteSetter, Store
     from zarr.core.array_spec import ArraySpec
@@ -36,7 +36,6 @@
     "GetResult",
     "PreparedWrite",
     "SupportsChunkCodec",
-    "SupportsChunkMapping",
     "SupportsSyncCodec",
 ]
 
@@ -89,117 +88,20 @@ def _encode_sync(self, chunk_data: CI, chunk_spec: ArraySpec) -> CO | None: ...
 class SupportsChunkCodec(Protocol):
     """Protocol for objects that can decode/encode whole chunks synchronously.
 
-    `ChunkTransform` satisfies this protocol.
+    `ChunkTransform` satisfies this protocol. The ``chunk_shape`` parameter
+    allows decoding/encoding chunks of different shapes (e.g. rectilinear
+    grids) without rebuilding the transform.
     """
 
     array_spec: ArraySpec
 
-    def decode_chunk(self, chunk_bytes: Buffer) -> NDBuffer: ...
+    def decode_chunk(
+        self, chunk_bytes: Buffer, chunk_shape: tuple[int, ...] | None = None
+    ) -> NDBuffer: ...
 
-    def encode_chunk(self, chunk_array: NDBuffer) -> Buffer | None: ...
-
-
-@runtime_checkable
-class SupportsChunkMapping(Protocol):
-    """Protocol for codecs that expose their stored data as a mapping
-    from chunk coordinates to encoded buffers.
-
-    A single store key holds a blob. This protocol defines how to
-    interpret that blob as a ``dict[tuple[int, ...], Buffer | None]`` —
-    a mapping from inner-chunk coordinates to their encoded bytes.
-
-    For a non-sharded codec (``BytesCodec``), the mapping is trivial:
-    one entry at ``(0,)`` containing the entire blob. For a sharded
-    codec, the mapping has one entry per inner chunk, derived from the
-    shard index embedded in the blob. The pipeline doesn't need to know
-    which case it's dealing with — it operates on the mapping uniformly.
-
-    This abstraction enables the three-phase IO/compute/IO pattern:
-
-    1. **IO**: fetch the blob from the store.
-    2. **Compute**: unpack the blob into the chunk mapping, decode/merge/
-       re-encode entries, pack back into a blob. All pure compute.
-    3. **IO**: write the blob to the store.
-    """
-
-    @property
-    def inner_codec_chain(self) -> SupportsChunkCodec | None:
-        """The codec chain for inner chunks, or `None` to use the pipeline's."""
-        ...
-
-    def unpack_chunks(
-        self,
-        raw: Buffer | None,
-        chunk_spec: ArraySpec,
-    ) -> dict[tuple[int, ...], Buffer | None]:
-        """Unpack a storage blob into per-inner-chunk encoded buffers."""
-        ...
-
-    def pack_chunks(
-        self,
-        chunk_dict: dict[tuple[int, ...], Buffer | None],
-        chunk_spec: ArraySpec,
-    ) -> Buffer | None:
-        """Pack per-inner-chunk encoded buffers into a single storage blob."""
-        ...
-
-    def prepare_read_sync(
-        self,
-        byte_getter: Any,
-        chunk_selection: SelectorTuple,
-        codec_chain: SupportsChunkCodec,
-    ) -> NDBuffer | None:
-        """Fetch and decode a chunk synchronously, returning the selected region."""
-        ...
-
-    def prepare_write_sync(
-        self,
-        byte_setter: Any,
-        codec_chain: SupportsChunkCodec,
-        chunk_selection: SelectorTuple,
-        out_selection: SelectorTuple,
-        replace: bool,
-    ) -> PreparedWrite:
-        """Prepare a synchronous write: fetch existing data if needed, unpack."""
-        ...
-
-    def finalize_write_sync(
-        self,
-        prepared: PreparedWrite,
-        chunk_spec: ArraySpec,
-        byte_setter: Any,
-    ) -> None:
-        """Pack the prepared chunk data and write it to the store."""
-        ...
-
-    async def prepare_read(
-        self,
-        byte_getter: Any,
-        chunk_selection: SelectorTuple,
-        codec_chain: SupportsChunkCodec,
-    ) -> NDBuffer | None:
-        """Async variant of `prepare_read_sync`."""
-        ...
-
-    async def prepare_write(
-        self,
-        byte_setter: Any,
-        codec_chain: SupportsChunkCodec,
-        chunk_selection: SelectorTuple,
-        out_selection: SelectorTuple,
-        replace: bool,
-    ) -> PreparedWrite:
-        """Async variant of `prepare_write_sync`."""
-        ...
-
-    async def finalize_write(
-        self,
-        prepared: PreparedWrite,
-        chunk_spec: ArraySpec,
-        byte_setter: Any,
-    ) -> None:
-        """Async variant of `finalize_write_sync`."""
-        ...
+    def encode_chunk(
+        self, chunk_array: NDBuffer, chunk_shape: tuple[int, ...] | None = None
+    ) -> Buffer | None: ...
 
 
 class BaseCodec[CI: CodecInput, CO: CodecOutput](Metadata):
diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
@@ -5,16 +5,15 @@
 from enum import Enum
 from typing import TYPE_CHECKING
 
-from zarr.abc.codec import ArrayBytesCodec, PreparedWrite, SupportsChunkCodec
+from zarr.abc.codec import ArrayBytesCodec
 from zarr.core.buffer import Buffer, NDBuffer
 from zarr.core.common import JSON, parse_enum, parse_named_configuration
 from zarr.core.dtype.common import HasEndianness
 
 if TYPE_CHECKING:
-    from typing import Any, Self
+    from typing import Self
 
     from zarr.core.array_spec import ArraySpec
-    from zarr.core.indexing import SelectorTuple
 
 
 class Endian(Enum):
@@ -126,114 +125,3 @@ async def _encode_single(
 
     def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         return input_byte_length
-
-    # -- SupportsChunkMapping --
-
-    @property
-    def inner_codec_chain(self) -> SupportsChunkCodec | None:
-        """Returns `None` — the pipeline should use its own codec chain."""
-        return None
-
-    def unpack_chunks(
-        self,
-        raw: Buffer | None,
-        chunk_spec: ArraySpec,
-    ) -> dict[tuple[int, ...], Buffer | None]:
-        """Single chunk keyed at `(0,)`."""
-        return {(0,): raw}
-
-    def pack_chunks(
-        self,
-        chunk_dict: dict[tuple[int, ...], Buffer | None],
-        chunk_spec: ArraySpec,
-    ) -> Buffer | None:
-        """Return the single chunk's bytes."""
-        return chunk_dict.get((0,))
-
-    def prepare_read_sync(
-        self,
-        byte_getter: Any,
-        chunk_selection: SelectorTuple,
-        codec_chain: SupportsChunkCodec,
-    ) -> NDBuffer | None:
-        """Fetch, decode, and return the selected region synchronously."""
-        raw = byte_getter.get_sync(prototype=codec_chain.array_spec.prototype)
-        if raw is None:
-            return None
-        chunk_array = codec_chain.decode_chunk(raw)
-        return chunk_array[chunk_selection]
-
-    def prepare_write_sync(
-        self,
-        byte_setter: Any,
-        codec_chain: SupportsChunkCodec,
-        chunk_selection: SelectorTuple,
-        out_selection: SelectorTuple,
-        replace: bool,
-    ) -> PreparedWrite:
-        """Fetch existing data if needed, unpack, return `PreparedWrite`."""
-        from zarr.core.indexing import ChunkProjection
-
-        existing: Buffer | None = None
-        if not replace:
-            existing = byte_setter.get_sync(prototype=codec_chain.array_spec.prototype)
-        chunk_dict = self.unpack_chunks(existing, codec_chain.array_spec)
-        indexer = [ChunkProjection((0,), chunk_selection, out_selection, replace)]  # type: ignore[arg-type]
-        return PreparedWrite(chunk_dict=chunk_dict, indexer=indexer)
-
-    def finalize_write_sync(
-        self,
-        prepared: PreparedWrite,
-        chunk_spec: ArraySpec,
-        byte_setter: Any,
-    ) -> None:
-        """Pack and write to store, or delete if empty."""
-        blob = self.pack_chunks(prepared.chunk_dict, chunk_spec)
-        if blob is None:
-            byte_setter.delete_sync()
-        else:
-            byte_setter.set_sync(blob)
-
-    async def prepare_read(
-        self,
-        byte_getter: Any,
-        chunk_selection: SelectorTuple,
-        codec_chain: SupportsChunkCodec,
-    ) -> NDBuffer | None:
-        """Async variant of `prepare_read_sync`."""
-        raw = await byte_getter.get(prototype=codec_chain.array_spec.prototype)
-        if raw is None:
-            return None
-        chunk_array = codec_chain.decode_chunk(raw)
-        return chunk_array[chunk_selection]
-
-    async def prepare_write(
-        self,
-        byte_setter: Any,
-        codec_chain: SupportsChunkCodec,
-        chunk_selection: SelectorTuple,
-        out_selection: SelectorTuple,
-        replace: bool,
-    ) -> PreparedWrite:
-        """Async variant of `prepare_write_sync`."""
-        from zarr.core.indexing import ChunkProjection
-
-        existing: Buffer | None = None
-        if not replace:
-            existing = await byte_setter.get(prototype=codec_chain.array_spec.prototype)
-        chunk_dict = self.unpack_chunks(existing, codec_chain.array_spec)
-        indexer = [ChunkProjection((0,), chunk_selection, out_selection, replace)]  # type: ignore[arg-type]
-        return PreparedWrite(chunk_dict=chunk_dict, indexer=indexer)
-
-    async def finalize_write(
-        self,
-        prepared: PreparedWrite,
-        chunk_spec: ArraySpec,
-        byte_setter: Any,
-    ) -> None:
-        """Async variant of `finalize_write_sync`."""
-        blob = self.pack_chunks(prepared.chunk_dict, chunk_spec)
-        if blob is None:
-            await byte_setter.delete()
-        else:
-            await byte_setter.set(blob)
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
@@ -229,29 +229,23 @@ def create_codec_pipeline(metadata: ArrayMetadata, *, store: Store | None = None
 
     if isinstance(metadata, ArrayV3Metadata):
         pipeline = get_pipeline_class().from_codecs(metadata.codecs)
-        # PhasedCodecPipeline needs evolve_from_array_spec to build its
-        # ChunkTransform and ShardLayout. BatchedCodecPipeline does not.
-        if hasattr(pipeline, "chunk_transform") and pipeline.chunk_transform is None:
-            from zarr.core.metadata.v3 import RegularChunkGridMetadata
-
-            # Use the regular chunk shape if available, otherwise use a
-            # placeholder shape. The ChunkTransform is shape-agnostic —
-            # the actual chunk shape is passed per-call at decode/encode time.
-            if isinstance(metadata.chunk_grid, RegularChunkGridMetadata):
-                chunk_shape = metadata.chunk_grid.chunk_shape
-            else:
-                # Rectilinear: use a 1-element shape per dimension as placeholder.
-                # Only dtype/fill_value/config matter for codec evolution.
-                chunk_shape = (1,) * len(metadata.shape)
-            chunk_spec = ArraySpec(
-                shape=chunk_shape,
-                dtype=metadata.data_type,
-                fill_value=metadata.fill_value,
-                config=ArrayConfig.from_dict({}),
-                prototype=default_buffer_prototype(),
-            )
-            pipeline = pipeline.evolve_from_array_spec(chunk_spec)
-        return pipeline
+        from zarr.core.metadata.v3 import RegularChunkGridMetadata
+
+        # Use the regular chunk shape if available, otherwise use a
+        # placeholder. The ChunkTransform is shape-agnostic — the actual
+        # chunk shape is passed per-call at decode/encode time.
+        if isinstance(metadata.chunk_grid, RegularChunkGridMetadata):
+            chunk_shape = metadata.chunk_grid.chunk_shape
+        else:
+            chunk_shape = (1,) * len(metadata.shape)
+        chunk_spec = ArraySpec(
+            shape=chunk_shape,
+            dtype=metadata.data_type,
+            fill_value=metadata.fill_value,
+            config=ArrayConfig.from_dict({}),
+            prototype=default_buffer_prototype(),
+        )
+        return pipeline.evolve_from_array_spec(chunk_spec)
     elif isinstance(metadata, ArrayV2Metadata):
         v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor)
         return get_pipeline_class().from_codecs([v2_codec])
diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py
@@ -656,11 +656,13 @@ def codecs_from_list(
 ) -> tuple[tuple[ArrayArrayCodec, ...], ArrayBytesCodec, tuple[BytesBytesCodec, ...]]:
     from zarr.codecs.sharding import ShardingCodec
 
+    codecs = tuple(codecs)  # materialize to avoid generator consumption issues
+
     array_array: tuple[ArrayArrayCodec, ...] = ()
     array_bytes_maybe: ArrayBytesCodec | None = None
     bytes_bytes: tuple[BytesBytesCodec, ...] = ()
 
-    if any(isinstance(codec, ShardingCodec) for codec in codecs) and len(tuple(codecs)) > 1:
+    if any(isinstance(codec, ShardingCodec) for codec in codecs) and len(codecs) > 1:
         warn(
             "Combining a `sharding_indexed` codec disables partial reads and "
             "writes, which may lead to inefficient performance.",