Skip to content

Commit 9e667cb

Browse files
committed
Split Parser and reorganise package
Parser turns out to not really make sense as a superclass / ABC: it really only has one useful method, and because parsers use delegation there's no real way to override the utility methods / shortcuts, so they're only useful on the caller / client side but they constrain the implementor (who has to extend the ABC and then possibly deal with multiple-inheritance shenanigans). Making the core object just a callable protocol instead makes the implementation somewhat simpler and more flexible (e.g. just a function or HoF can be a "parser"), however the convenient utility methods *are* important for end users and should not be discounted. For that, keep a wrapper `Parser` object which can be wrapped around a "parser" in order to provide the additional convenience (similar to the free functions at the root). Importantly, `Parser` methods can also be used as free functions by passing a "parser" as `self`, they are intended to be compatible. It doesn't work super well from the typechecking perspective, but it works fine enough. Consideration was given to making the free functions at the package root parametric on the parser e.g. def parse(ua: str, resolver: Optional[Resolver] = None, /) -> ParseResult: if resolver is None: from . import parser as resolver return resolver(ua, Domain.ALL).complete() but that feels like it would be pretty error prone, in the sense that it would be too easy to forget to pass in the resolver, compared to consistently resolving via a bespoke parser, or just installing a parser globally. Also move things around a bit: - move matcher utility functions out of the core, un-prefix them since we're using `__all__` for visibility anyway - move eager matchers out of the core, similar to the lazy matchers Fixes #189
1 parent 8d4e624 commit 9e667cb

18 files changed

Lines changed: 404 additions & 369 deletions

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ class EagerWriter(Writer):
182182
__all__ = ["MATCHERS"]
183183
184184
from typing import Tuple, List
185-
from .core import UserAgentMatcher, OSMatcher, DeviceMatcher
185+
from .matchers import UserAgentMatcher, OSMatcher, DeviceMatcher
186186
187187
MATCHERS: Tuple[List[UserAgentMatcher], List[OSMatcher], List[DeviceMatcher]] = ([
188188
"""

src/ua_parser/__init__.py

Lines changed: 70 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -16,29 +16,25 @@
1616
This way importing anything but the top-level package should not be
1717
necessary unless you want to *implement* a parser.
1818
"""
19+
from __future__ import annotations
1920

2021
__all__ = [
21-
"BasicParser",
22-
"CachingParser",
22+
"BasicResolver",
23+
"CachingResolver",
2324
"Clearing",
2425
"DefaultedParseResult",
2526
"Device",
26-
"DeviceMatcher",
2727
"Domain",
2828
"LRU",
2929
"Locking",
3030
"Matchers",
3131
"OS",
32-
"OSMatcher",
3332
"ParseResult",
34-
"Parser",
33+
"Resolver",
3534
"PartialParseResult",
3635
"UserAgent",
37-
"UserAgentMatcher",
3836
"load_builtins",
3937
"load_lazy_builtins",
40-
"load_data",
41-
"load_yaml",
4238
"parse",
4339
"parse_device",
4440
"parse_os",
@@ -48,43 +44,89 @@
4844
import contextlib
4945
from typing import Callable, Optional
5046

51-
from .basic import Parser as BasicParser
52-
from .caching import CachingParser, Clearing, Locking, LRU
47+
from .basic import Resolver as BasicResolver
48+
from .caching import CachingResolver, Clearing, Locking, LRU
5349
from .core import (
5450
DefaultedParseResult,
5551
Device,
56-
DeviceMatcher,
5752
Domain,
5853
Matchers,
5954
OS,
60-
OSMatcher,
61-
Parser,
6255
ParseResult,
6356
PartialParseResult,
57+
Resolver,
6458
UserAgent,
65-
UserAgentMatcher,
6659
)
67-
from .loaders import load_builtins, load_data, load_lazy_builtins, load_yaml
60+
from .loaders import load_builtins, load_lazy_builtins
6861

69-
Re2Parser: Optional[Callable[[Matchers], Parser]] = None
62+
Re2Resolver: Optional[Callable[[Matchers], Resolver]] = None
7063
with contextlib.suppress(ImportError):
71-
from .re2 import Parser as Re2Parser
64+
from .re2 import Resolver as Re2Resolver
7265

7366

7467
VERSION = (1, 0, 0)
68+
69+
70+
class Parser:
71+
@classmethod
72+
def from_matchers(cls, m: Matchers, /) -> Parser:
73+
if Re2Resolver is not None:
74+
return cls(Re2Resolver(m))
75+
else:
76+
return cls(
77+
CachingResolver(
78+
BasicResolver(m),
79+
Locking(LRU(200)),
80+
)
81+
)
82+
83+
def __init__(self, resolver: Resolver) -> None:
84+
self.resolver = resolver
85+
86+
def __call__(self, ua: str, domains: Domain, /) -> PartialParseResult:
87+
"""Parses the ``ua`` string, returning a parse result with *at least*
88+
the requested :class:`domains <Domain>` resolved (whether to success or
89+
failure).
90+
91+
A parser may resolve more :class:`domains <Domain>` than
92+
requested, but it *must not* resolve less.
93+
"""
94+
return self.resolver(ua, domains)
95+
96+
def parse(self, ua: str) -> ParseResult:
97+
"""Convenience method for parsing all domains, and falling back to
98+
default values for all failures.
99+
"""
100+
return self(ua, Domain.ALL).complete()
101+
102+
def parse_user_agent(self, ua: str) -> Optional[UserAgent]:
103+
"""Convenience method for parsing the :class:`UserAgent` domain,
104+
falling back to the default value in case of failure.
105+
"""
106+
return self(ua, Domain.USER_AGENT).user_agent
107+
108+
def parse_os(self, ua: str) -> Optional[OS]:
109+
"""Convenience method for parsing the :class:`OS` domain, falling back
110+
to the default value in case of failure.
111+
"""
112+
return self(ua, Domain.OS).os
113+
114+
def parse_device(self, ua: str) -> Optional[Device]:
115+
"""Convenience method for parsing the :class:`Device` domain, falling
116+
back to the default value in case of failure.
117+
"""
118+
return self(ua, Domain.DEVICE).device
119+
120+
75121
parser: Parser
76122

77123

78124
def __getattr__(name: str) -> Parser:
79125
global parser
80126
if name == "parser":
81-
if Re2Parser is not None:
82-
parser = Re2Parser(load_lazy_builtins())
83-
else:
84-
parser = CachingParser(
85-
BasicParser(load_builtins()),
86-
Locking(LRU(200)),
87-
)
127+
parser = Parser.from_matchers(
128+
load_builtins() if Re2Resolver is None else load_lazy_builtins()
129+
)
88130
return parser
89131
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
90132

@@ -105,7 +147,7 @@ def parse(ua: str) -> ParseResult:
105147
# parser, a `global` access fails to and we get a NameError
106148
from . import parser
107149

108-
return parser.parse(ua)
150+
return parser(ua, Domain.ALL).complete()
109151

110152

111153
def parse_user_agent(ua: str) -> Optional[UserAgent]:
@@ -114,7 +156,7 @@ def parse_user_agent(ua: str) -> Optional[UserAgent]:
114156
"""
115157
from . import parser
116158

117-
return parser.parse_user_agent(ua)
159+
return parser(ua, Domain.USER_AGENT).user_agent
118160

119161

120162
def parse_os(ua: str) -> Optional[OS]:
@@ -123,7 +165,7 @@ def parse_os(ua: str) -> Optional[OS]:
123165
"""
124166
from . import parser
125167

126-
return parser.parse_os(ua)
168+
return parser(ua, Domain.OS).os
127169

128170

129171
def parse_device(ua: str) -> Optional[Device]:
@@ -132,4 +174,4 @@ def parse_device(ua: str) -> Optional[Device]:
132174
"""
133175
from . import parser
134176

135-
return parser.parse_device(ua)
177+
return parser(ua, Domain.DEVICE).device

src/ua_parser/_matchers.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ __all__ = ["MATCHERS"]
22

33
from typing import List, Tuple
44

5-
from .core import DeviceMatcher, OSMatcher, UserAgentMatcher
5+
from .matchers import DeviceMatcher, OSMatcher, UserAgentMatcher
66

77
MATCHERS: Tuple[
88
List[UserAgentMatcher],

src/ua_parser/basic.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
__all__ = ["Resolver"]
2+
13
from operator import methodcaller
24
from typing import List
35

@@ -7,13 +9,12 @@
79
Matcher,
810
Matchers,
911
OS,
10-
Parser as AbstractParser,
1112
PartialParseResult,
1213
UserAgent,
1314
)
1415

1516

16-
class Parser(AbstractParser):
17+
class Resolver:
1718
"""A simple pure-python parser based around trying a numer of regular
1819
expressions in sequence for each domain, and returning a result
1920
when one matches.
@@ -27,9 +28,7 @@ def __init__(
2728
self,
2829
matchers: Matchers,
2930
) -> None:
30-
self.user_agent_matchers = matchers[0]
31-
self.os_matchers = matchers[1]
32-
self.device_matchers = matchers[2]
31+
self.user_agent_matchers, self.os_matchers, self.device_matchers = matchers
3332

3433
def __call__(self, ua: str, domains: Domain, /) -> PartialParseResult:
3534
parse = methodcaller("__call__", ua)

src/ua_parser/bench.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,18 @@
77
from typing import Any, Callable, Iterable, List, Optional
88

99
from . import (
10-
BasicParser,
11-
CachingParser,
10+
BasicResolver,
11+
CachingResolver,
1212
Clearing,
1313
Locking,
1414
LRU,
1515
Matchers,
1616
Parser,
17-
load_builtins,
18-
load_yaml,
17+
Resolver,
1918
)
2019
from .caching import Cache
21-
from .re2 import Parser as Re2Parser
20+
from .loaders import load_builtins, load_yaml
21+
from .re2 import Resolver as Re2Resolver
2222
from .user_agent_parser import Parse
2323

2424
CACHEABLE = {
@@ -222,19 +222,19 @@ def run_csv(args: argparse.Namespace) -> None:
222222
def get_parser(
223223
parser: str, cache: str, cachesize: int, rules: Matchers
224224
) -> Callable[[str], Any]:
225-
p: Parser
225+
r: Resolver
226226
if parser == "legacy":
227227
return Parse
228228
elif parser == "basic":
229-
p = BasicParser(rules)
229+
r = BasicResolver(rules)
230230
elif parser == "re2":
231-
p = Re2Parser(rules)
231+
r = Re2Resolver(rules)
232232
else:
233233
sys.exit(f"unknown parser {parser!r}")
234234

235235
c: Callable[[int], Cache]
236236
if cache == "none":
237-
return p.parse
237+
return Parser(r).parse
238238
elif cache == "clearing":
239239
c = Clearing
240240
elif cache == "lru":
@@ -244,7 +244,7 @@ def get_parser(
244244
else:
245245
sys.exit(f"unknown cache algorithm {cache!r}")
246246

247-
return CachingParser(p, c(cachesize)).parse
247+
return Parser(CachingResolver(r, c(cachesize))).parse
248248

249249

250250
def run(

src/ua_parser/caching.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
import abc
22
import threading
33
from collections import OrderedDict
4-
from typing import Dict, Optional
4+
from typing import Dict, Optional, Protocol
55

6-
from .core import Domain, Parser, PartialParseResult
6+
from .core import Domain, PartialParseResult, Resolver
77

88
__all__ = [
9-
"CachingParser",
9+
"CachingResolver",
1010
"Cache",
1111
"Clearing",
1212
"Locking",
1313
"LRU",
1414
]
1515

1616

17-
class Cache(abc.ABC):
17+
class Cache(Protocol):
1818
"""Cache abstract protocol. The :class:`CachingParser` will look
1919
values up, merge what was returned (possibly nothing) with what it
2020
got from its actual parser, and *re-set the result*.
@@ -33,7 +33,7 @@ def __getitem__(self, key: str) -> Optional[PartialParseResult]:
3333
...
3434

3535

36-
class Clearing(Cache):
36+
class Clearing:
3737
"""A clearing cache, if the cache is full, just remove all the entries
3838
and re-fill from scratch.
3939
@@ -62,7 +62,7 @@ def __setitem__(self, key: str, value: PartialParseResult) -> None:
6262
self.cache[key] = value
6363

6464

65-
class LRU(Cache):
65+
class LRU:
6666
"""Cache following a least-recently used replacement policy: when
6767
there is no more room in the cache, whichever entry was last seen
6868
the least recently is removed.
@@ -103,7 +103,7 @@ def __setitem__(self, key: str, value: PartialParseResult) -> None:
103103
self.cache.popitem(last=False)
104104

105105

106-
class Locking(Cache):
106+
class Locking:
107107
"""Locking cache decorator. Takes a non-thread-safe cache and
108108
ensures retrieving and setting entries is protected by a mutex.
109109
@@ -122,7 +122,7 @@ def __setitem__(self, key: str, value: PartialParseResult) -> None:
122122
self.cache[key] = value
123123

124124

125-
class CachingParser(Parser):
125+
class CachingResolver:
126126
"""A wrapping parser which takes an underlying concrete :class:`Cache`
127127
for the actual caching and cache strategy.
128128
@@ -134,8 +134,8 @@ class CachingParser(Parser):
134134
really, they're immutable).
135135
"""
136136

137-
def __init__(self, parser: Parser, cache: Cache):
138-
self.parser: Parser = parser
137+
def __init__(self, parser: Resolver, cache: Cache):
138+
self.parser: Resolver = parser
139139
self.cache: Cache = cache
140140

141141
def __call__(self, ua: str, domains: Domain, /) -> PartialParseResult:

0 commit comments

Comments
 (0)