Add support for threadlocal caches

masklinn · masklinn · commit c0abcbba4646 · 2024-02-28T22:07:58.000+01:00
Doesn't seem super useful, as the GIL means a cache lock might not get to contend that much. However it might find utility with the free threaded interpreter eventually. Anyway it's not huge and it's not very complex, although the contextvars API is not great for lazy initialisation. Still even though the initialisation looks like it could lead to redundant inits (similar to the clearing cache which can get multi-cleared) it should be safe: different threads hitting `cache` concurrently will each hit their own lookup failure, and initialise their local cache, and set their personal contextvar. For a var to get double-init would require the same thread to be concurrent with itself, which is not possible. Fixes #180
diff --git a/src/ua_parser/__main__.py b/src/ua_parser/__main__.py
@@ -22,7 +22,7 @@
     PartialParseResult,
     Resolver,
 )
-from .caching import Cache
+from .caching import Cache, Local
 from .loaders import load_builtins, load_yaml
 from .re2 import Resolver as Re2Resolver
 from .user_agent_parser import Parse
@@ -243,11 +243,12 @@ def run_threaded(args: argparse.Namespace) -> None:
     basic = BasicResolver(load_builtins())
     resolvers: List[Tuple[str, Resolver]] = [
         ("clearing", CachingResolver(basic, Clearing(CACHESIZE))),
-        ("LRU", CachingResolver(basic, Locking(LRU(CACHESIZE)))),
+        ("locking-lru", CachingResolver(basic, Locking(LRU(CACHESIZE)))),
+        ("local-lru", CachingResolver(basic, Local(lambda: LRU(CACHESIZE)))),
         ("re2", Re2Resolver(load_builtins())),
     ]
     for name, resolver in resolvers:
-        print(f"{name:10}: ", end="", flush=True)
+        print(f"{name:11}: ", end="", flush=True)
         # randomize the dataset for each thread, predictably, to
         # simulate distributed load (not great but better than
         # nothing, and probably better than reusing the exact same
diff --git a/src/ua_parser/caching.py b/src/ua_parser/caching.py
@@ -1,7 +1,8 @@
 import abc
 import threading
 from collections import OrderedDict
-from typing import Dict, Optional, Protocol
+from contextvars import ContextVar
+from typing import Callable, Dict, Optional, Protocol
 
 from .core import Domain, PartialParseResult, Resolver
 
@@ -122,6 +123,35 @@ def __setitem__(self, key: str, value: PartialParseResult) -> None:
             self.cache[key] = value
 
 
+class Local:
+    """Thread local cache decorator. Takes a cache factory and lazily
+    instantiates a cache for each thread it's accessed from.
+
+    This means the cache capacity and memory consumption is
+    figuratively multiplied by however many threads the cache is used
+    from, but those threads don't share their caching.
+
+    """
+
+    def __init__(self, factory: Callable[[], Cache]) -> None:
+        self.cv: ContextVar[Cache] = ContextVar("local-cache")
+        self.factory = factory
+
+    @property
+    def cache(self) -> Cache:
+        c = self.cv.get(None)
+        if c is None:
+            c = self.factory()
+            self.cv.set(c)
+        return c
+
+    def __getitem__(self, key: str) -> Optional[PartialParseResult]:
+        return self.cache[key]
+
+    def __setitem__(self, key: str, value: PartialParseResult) -> None:
+        self.cache[key] = value
+
+
 class CachingResolver:
     """A wrapping parser which takes an underlying concrete :class:`Cache`
     for the actual caching and cache strategy.