Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions changes/3897.misc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Bump the minimum version of `typing-extensions` to 4.13 to support the `extra_items`
keyword argument on `TypedDict` (PEP 728).
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ dependencies = [
'numpy>=2',
'numcodecs>=0.14',
'google-crc32c>=1.5',
'typing_extensions>=4.12',
'typing_extensions>=4.13',
'donfig>=0.8',
]

Expand Down Expand Up @@ -243,7 +243,7 @@ extra-dependencies = [
'fsspec==2023.10.0',
's3fs==2023.10.0',
'universal_pathlib==0.2.0',
'typing_extensions==4.12.*',
'typing_extensions==4.13.*',
'donfig==0.8.*',
'obstore==0.5.*',
]
Expand Down
49 changes: 35 additions & 14 deletions src/zarr/core/metadata/v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import json
from collections.abc import Iterable, Mapping, Sequence
from dataclasses import dataclass, field, replace
from typing import TYPE_CHECKING, Any, Literal, NotRequired, TypedDict, TypeGuard, cast
from typing import TYPE_CHECKING, Any, Final, Literal, NotRequired, TypeGuard, cast

from typing_extensions import TypedDict

from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec
from zarr.abc.metadata import Metadata
Expand Down Expand Up @@ -136,10 +138,11 @@ def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]:
)


class AllowedExtraField(TypedDict):
class AllowedExtraField(TypedDict, extra_items=JSON): # type: ignore[call-arg]
"""
This class models allowed extra fields in array metadata.
They are ignored by Zarr Python.
They must have ``must_understand`` set to ``False``, and may contain
arbitrary additional JSON data.
"""

must_understand: Literal[False]
Expand Down Expand Up @@ -411,25 +414,43 @@ def parse_chunk_grid(
raise ValueError(f"Unknown chunk grid name: {name!r}")


class ArrayMetadataJSON_V3(TypedDict):
class ArrayMetadataJSON_V3(TypedDict, extra_items=AllowedExtraField): # type: ignore[call-arg]
"""
A typed dictionary model for zarr v3 metadata.
A typed dictionary model for zarr v3 array metadata.

Extra keys are permitted if they conform to ``AllowedExtraField``
(i.e. they are mappings with ``must_understand: false``).
"""

zarr_format: Literal[3]
node_type: Literal["array"]
data_type: str | NamedConfig[str, Mapping[str, object]]
data_type: str | NamedConfig[str, Mapping[str, JSON]]
shape: tuple[int, ...]
chunk_grid: NamedConfig[str, Mapping[str, object]]
chunk_key_encoding: NamedConfig[str, Mapping[str, object]]
fill_value: object
codecs: tuple[str | NamedConfig[str, Mapping[str, object]], ...]
chunk_grid: str | NamedConfig[str, Mapping[str, JSON]]
chunk_key_encoding: str | NamedConfig[str, Mapping[str, JSON]]
fill_value: JSON
codecs: tuple[str | NamedConfig[str, Mapping[str, JSON]], ...]
attributes: NotRequired[Mapping[str, JSON]]
storage_transformers: NotRequired[tuple[NamedConfig[str, Mapping[str, object]], ...]]
storage_transformers: NotRequired[tuple[str | NamedConfig[str, Mapping[str, JSON]], ...]]
dimension_names: NotRequired[tuple[str | None]]
Comment thread
d-v-b marked this conversation as resolved.
Outdated


ARRAY_METADATA_KEYS = set(ArrayMetadataJSON_V3.__annotations__.keys())
"""
The names of the fields of the array metadata document defined in the zarr V3 spec.
"""
ARRAY_METADATA_KEYS: Final[set[str]] = {
"zarr_format",
"node_type",
"data_type",
"shape",
"chunk_grid",
"chunk_key_encoding",
"fill_value",
"codecs",
"attributes",
"storage_transformers",
"dimension_names",
}
Comment thread
d-v-b marked this conversation as resolved.


@dataclass(frozen=True, kw_only=True)
Expand Down Expand Up @@ -617,8 +638,8 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:

return cls(
shape=_data_typed["shape"],
chunk_grid=_data_typed["chunk_grid"],
chunk_key_encoding=_data_typed["chunk_key_encoding"],
chunk_grid=_data_typed["chunk_grid"], # type: ignore[arg-type]
chunk_key_encoding=_data_typed["chunk_key_encoding"], # type: ignore[arg-type]
codecs=_data_typed["codecs"],
attributes=_data_typed.get("attributes", {}), # type: ignore[arg-type]
dimension_names=_data_typed.get("dimension_names", None),
Expand Down
19 changes: 19 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,25 @@
from zarr.core.dtype.wrapper import ZDType


@dataclass
class Expect[TIn, TOut]:
"""A test case with explicit input, expected output, and a human-readable id."""

input: TIn
output: TOut
id: str


@dataclass
class ExpectFail[TIn]:
"""A test case that should raise an exception."""

input: TIn
exception: type[Exception]
id: str
msg: str | None = None


async def parse_store(
store: Literal["local", "memory", "fsspec", "zip", "memory_get_latency"], path: str
) -> LocalStore | MemoryStore | FsspecStore | ZipStore | LatencyStore:
Expand Down
41 changes: 41 additions & 0 deletions tests/test_metadata/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any

from zarr.codecs.bytes import BytesCodec

if TYPE_CHECKING:
from zarr.core.metadata.v3 import ArrayMetadataJSON_V3


def minimal_metadata_dict_v3(
extra_fields: dict[str, Any] | None = None, **overrides: Any
) -> ArrayMetadataJSON_V3:
"""Build a minimal valid V3 array metadata JSON dict.

The output matches the shape of ``ArrayV3Metadata.to_dict()`` — all
fields that ``to_dict`` always emits are included.

Parameters
----------
extra_fields : dict, optional
Extra keys to inject into the dict (e.g. extension fields).
**overrides
Override any of the standard metadata fields.
"""
d: ArrayMetadataJSON_V3 = {
"zarr_format": 3,
"node_type": "array",
"shape": (4, 4),
"data_type": "uint8",
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (4, 4)}},
"chunk_key_encoding": {"name": "default", "configuration": {"separator": "/"}},
"fill_value": 0,
"codecs": (BytesCodec().to_dict(),), # type: ignore[typeddict-item]
"attributes": {},
"storage_transformers": (),
}
d.update(overrides) # type: ignore[typeddict-item]
if extra_fields is not None:
d.update(extra_fields) # type: ignore[typeddict-item]
return d
Loading
Loading