Skip to content

Commit 64820a7

Browse files
committed
Add a ManagedMemoryStore class that uses an internal registry of dictionaries to manage memory-based zarr storage.
Instances of `ManagedMemoryStore` have a URL representation based on the `id` of the backing dict, e.g. `memory://<id>/`. This means the same memory-backed store can be accessed without passing an explicit store reference.
1 parent 3a88eee commit 64820a7

4 files changed

Lines changed: 333 additions & 69 deletions

File tree

src/zarr/storage/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from zarr.storage._fsspec import FsspecStore
99
from zarr.storage._local import LocalStore
1010
from zarr.storage._logging import LoggingStore
11-
from zarr.storage._memory import GpuMemoryStore, MemoryStore
11+
from zarr.storage._memory import GpuMemoryStore, ManagedMemoryStore, MemoryStore
1212
from zarr.storage._obstore import ObjectStore
1313
from zarr.storage._wrapper import WrapperStore
1414
from zarr.storage._zip import ZipStore
@@ -18,6 +18,7 @@
1818
"GpuMemoryStore",
1919
"LocalStore",
2020
"LoggingStore",
21+
"ManagedMemoryStore",
2122
"MemoryStore",
2223
"ObjectStore",
2324
"StoreLike",

src/zarr/storage/_common.py

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
)
1818
from zarr.errors import ContainsArrayAndGroupError, ContainsArrayError, ContainsGroupError
1919
from zarr.storage._local import LocalStore
20-
from zarr.storage._memory import MemoryStore, _get_memory_store_from_url
20+
from zarr.storage._memory import ManagedMemoryStore, MemoryStore
2121
from zarr.storage._utils import normalize_path
2222

2323
_has_fsspec = importlib.util.find_spec("fsspec")
@@ -343,16 +343,7 @@ async def make_store(
343343
elif isinstance(store_like, str):
344344
# Check for memory:// URLs first (in-process registry lookup)
345345
if store_like.startswith("memory://"):
346-
memory_store = _get_memory_store_from_url(store_like)
347-
if memory_store is not None:
348-
if _read_only and not memory_store.read_only:
349-
return memory_store.with_read_only(read_only=True)
350-
return memory_store
351-
# Memory store not found in registry - it may have been garbage collected
352-
raise ValueError(
353-
f"Memory store not found for URL '{store_like}'. "
354-
"The store may have been garbage collected."
355-
)
346+
return ManagedMemoryStore.from_url(store_like, read_only=_read_only)
356347
# Either an FSSpec URI or a local filesystem path
357348
elif _is_fsspec_uri(store_like):
358349
return FsspecStore.from_url(
@@ -432,20 +423,18 @@ async def make_store_path(
432423

433424
elif isinstance(store_like, str) and store_like.startswith("memory://"):
434425
# Handle memory:// URLs specially - extract path from URL
435-
memory_store = _get_memory_store_from_url(store_like)
436-
if memory_store is None:
437-
raise ValueError(
438-
f"Memory store not found for URL '{store_like}'. "
439-
"The store may have been garbage collected."
440-
)
426+
_read_only = mode == "r"
427+
memory_store = ManagedMemoryStore.from_url(store_like, read_only=_read_only)
441428
# Extract path from URL: "memory://123456/path/to/node" -> "path/to/node"
442429
url_without_scheme = store_like[len("memory://") :]
443430
parts = url_without_scheme.split("/", 1)
444431
url_path = parts[1] if len(parts) > 1 else ""
445432
# Combine URL path with any additional path argument
446433
combined_path = normalize_path(url_path)
447434
if path_normalized:
448-
combined_path = f"{combined_path}/{path_normalized}" if combined_path else path_normalized
435+
combined_path = (
436+
f"{combined_path}/{path_normalized}" if combined_path else path_normalized
437+
)
449438
return await StorePath.open(memory_store, path=combined_path, mode=mode)
450439

451440
else:

src/zarr/storage/_memory.py

Lines changed: 176 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -19,53 +19,6 @@
1919
logger = getLogger(__name__)
2020

2121

22-
# -----------------------------------------------------------------------------
23-
# Memory store registry
24-
# -----------------------------------------------------------------------------
25-
# This registry allows memory stores to be looked up by their URL within the
26-
# same process. This enables specs containing memory:// URLs to be re-opened.
27-
# We use weakrefs so that stores can be garbage collected when no longer in use.
28-
29-
_memory_store_registry: weakref.WeakValueDictionary[int, MemoryStore] = weakref.WeakValueDictionary()
30-
31-
32-
def _register_memory_store(store: MemoryStore) -> None:
33-
"""Register a memory store in the registry."""
34-
store_id = id(store._store_dict)
35-
_memory_store_registry[store_id] = store
36-
37-
38-
def _get_memory_store_from_url(url: str) -> MemoryStore | None:
39-
"""
40-
Look up a memory store by its URL.
41-
42-
Parameters
43-
----------
44-
url : str
45-
A URL like "memory://123456" or "memory://123456/path/to/node"
46-
47-
Returns
48-
-------
49-
MemoryStore | None
50-
The store if found in the registry, None otherwise.
51-
"""
52-
if not url.startswith("memory://"):
53-
return None
54-
55-
# Parse the store ID from the URL (handle optional path)
56-
# "memory://123456" -> "123456"
57-
# "memory://123456/path" -> "123456"
58-
url_without_scheme = url[len("memory://") :]
59-
store_id_str = url_without_scheme.split("/")[0]
60-
61-
try:
62-
store_id = int(store_id_str)
63-
except ValueError:
64-
return None
65-
66-
return _memory_store_registry.get(store_id)
67-
68-
6922
class MemoryStore(Store):
7023
"""
7124
Store for local memory.
@@ -100,8 +53,6 @@ def __init__(
10053
if store_dict is None:
10154
store_dict = {}
10255
self._store_dict = store_dict
103-
# Register this store so it can be looked up by URL
104-
_register_memory_store(self)
10556

10657
def with_read_only(self, read_only: bool = False) -> MemoryStore:
10758
# docstring inherited
@@ -525,3 +476,179 @@ async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None
525476
# Convert to gpu.Buffer
526477
gpu_value = value if isinstance(value, gpu.Buffer) else gpu.Buffer.from_buffer(value)
527478
await super().set(key, gpu_value, byte_range=byte_range)
479+
480+
481+
# -----------------------------------------------------------------------------
482+
# ManagedMemoryStore and its registry
483+
# -----------------------------------------------------------------------------
484+
# ManagedMemoryStore owns the lifecycle of its backing dict, enabling proper
485+
# weakref-based tracking. This allows memory:// URLs to be resolved back to
486+
# the store's dict within the same process.
487+
488+
489+
class _ManagedStoreDict(dict[str, Buffer]):
490+
"""
491+
A dict subclass that supports weak references.
492+
493+
Regular dicts don't support weakrefs, but we need to track managed store dicts
494+
in a WeakValueDictionary so they can be garbage collected when no longer
495+
referenced. This subclass adds the necessary __weakref__ slot.
496+
"""
497+
498+
__slots__ = ("__weakref__",)
499+
500+
501+
class _ManagedStoreDictRegistry:
502+
"""
503+
Registry for managed store dicts.
504+
505+
This registry is the source of truth for managed store dicts. It creates
506+
new dicts, tracks them via weak references, and looks them up by URL.
507+
"""
508+
509+
def __init__(self) -> None:
510+
self._registry: weakref.WeakValueDictionary[int, _ManagedStoreDict] = (
511+
weakref.WeakValueDictionary()
512+
)
513+
514+
def create(self) -> _ManagedStoreDict:
515+
"""Create a new managed dict and register it."""
516+
store_dict = _ManagedStoreDict()
517+
self._registry[id(store_dict)] = store_dict
518+
return store_dict
519+
520+
def get_from_url(self, url: str) -> _ManagedStoreDict | None:
521+
"""
522+
Look up a managed store dict by its URL.
523+
524+
Parameters
525+
----------
526+
url : str
527+
A URL like "memory://123456" or "memory://123456/path/to/node"
528+
529+
Returns
530+
-------
531+
_ManagedStoreDict | None
532+
The store dict if found, None otherwise.
533+
"""
534+
if not url.startswith("memory://"):
535+
return None
536+
537+
# Parse the store ID from the URL (handle optional path)
538+
# "memory://123456" -> "123456"
539+
# "memory://123456/path" -> "123456"
540+
url_without_scheme = url[len("memory://") :]
541+
store_id_str = url_without_scheme.split("/")[0]
542+
543+
try:
544+
store_id = int(store_id_str)
545+
except ValueError:
546+
return None
547+
548+
return self._registry.get(store_id)
549+
550+
551+
_managed_store_dict_registry = _ManagedStoreDictRegistry()
552+
553+
554+
class ManagedMemoryStore(MemoryStore):
555+
"""
556+
A memory store that owns and manages the lifecycle of its backing dict.
557+
558+
Unlike ``MemoryStore`` which accepts any ``MutableMapping``, this store
559+
creates and owns its backing dict internally. This enables proper lifecycle
560+
management and allows the store to be looked up by its ``memory://`` URL
561+
within the same process.
562+
563+
Parameters
564+
----------
565+
read_only : bool
566+
Whether the store is read-only.
567+
568+
Notes
569+
-----
570+
The backing dict is tracked via weak references and will be garbage collected
571+
when no ``ManagedMemoryStore`` instances reference it. URLs pointing to a
572+
garbage-collected store will fail to resolve.
573+
574+
See Also
575+
--------
576+
MemoryStore : A memory store that accepts any MutableMapping.
577+
578+
Examples
579+
--------
580+
>>> store = ManagedMemoryStore()
581+
>>> url = str(store) # e.g., "memory://123456789"
582+
>>> # Later, resolve the URL back to the store's dict
583+
>>> store2 = ManagedMemoryStore.from_url(url)
584+
>>> store2._store_dict is store._store_dict
585+
True
586+
"""
587+
588+
_store_dict: _ManagedStoreDict
589+
590+
def __init__(self, *, read_only: bool = False) -> None:
591+
# Skip MemoryStore.__init__ and call Store.__init__ directly
592+
# because we need to set up _store_dict differently
593+
Store.__init__(self, read_only=read_only)
594+
595+
# Get a managed dict from the registry
596+
self._store_dict = _managed_store_dict_registry.create()
597+
598+
def __str__(self) -> str:
599+
return self._to_url()
600+
601+
def __repr__(self) -> str:
602+
return f"ManagedMemoryStore('{self}')"
603+
604+
@classmethod
605+
def _from_managed_dict(
606+
cls, managed_dict: _ManagedStoreDict, *, read_only: bool = False
607+
) -> ManagedMemoryStore:
608+
"""Internal: create a store from an existing managed dict."""
609+
store = object.__new__(cls)
610+
Store.__init__(store, read_only=read_only)
611+
store._store_dict = managed_dict
612+
return store
613+
614+
def with_read_only(self, read_only: bool = False) -> ManagedMemoryStore:
615+
# docstring inherited
616+
return type(self)._from_managed_dict(self._store_dict, read_only=read_only)
617+
618+
def _to_url(self) -> str:
619+
"""Return a URL representation of this store."""
620+
return f"memory://{id(self._store_dict)}"
621+
622+
@classmethod
623+
def from_url(cls, url: str, *, read_only: bool = False) -> ManagedMemoryStore:
624+
"""
625+
Create a ManagedMemoryStore from a memory:// URL.
626+
627+
This looks up the backing dict in the process-wide registry and creates
628+
a new store instance that shares the same dict.
629+
630+
Parameters
631+
----------
632+
url : str
633+
A URL like "memory://123456" identifying the store.
634+
read_only : bool
635+
Whether the new store should be read-only.
636+
637+
Returns
638+
-------
639+
ManagedMemoryStore
640+
A store sharing the same backing dict as the original.
641+
642+
Raises
643+
------
644+
ValueError
645+
If the URL is not a valid memory:// URL or the store has been
646+
garbage collected.
647+
"""
648+
managed_dict = _managed_store_dict_registry.get_from_url(url)
649+
if managed_dict is None:
650+
raise ValueError(
651+
f"Memory store not found for URL '{url}'. "
652+
"The store may have been garbage collected or the URL is invalid."
653+
)
654+
return cls._from_managed_dict(managed_dict, read_only=read_only)

0 commit comments

Comments
 (0)