1+ import re
12import logging
23from ast import literal_eval
34from functools import lru_cache
89GET = 'GET'
910
1011
12+ class _WildcardToken :
13+ """Sentinel object for wildcard path tokens.
14+
15+ Using a dedicated class (instead of plain strings) ensures that a literal
16+ dict key ``'*'`` (parsed from ``root['*']``) is never confused with the
17+ wildcard ``*`` (parsed from ``root[*]``).
18+ """
19+ def __init__ (self , symbol ):
20+ self ._symbol = symbol
21+
22+ def __repr__ (self ):
23+ return self ._symbol
24+
25+ def __eq__ (self , other ):
26+ return isinstance (other , _WildcardToken ) and self ._symbol == other ._symbol
27+
28+ def __hash__ (self ):
29+ return hash (('_WildcardToken' , self ._symbol ))
30+
31+
32+ SINGLE_WILDCARD = _WildcardToken ('*' )
33+ MULTI_WILDCARD = _WildcardToken ('**' )
34+
35+
1136class PathExtractionError (ValueError ):
1237 pass
1338
@@ -21,6 +46,16 @@ def _add_to_elements(elements, elem, inside):
2146 if not elem :
2247 return
2348 if not elem .startswith ('__' ):
49+ # Handle wildcard tokens (* and **) as-is.
50+ # Unquoted root[*] arrives as bare '*' which matches the string check.
51+ # Quoted root['*'] arrives as "'*'" which does NOT match, so it falls
52+ # through to literal_eval and becomes the plain string '*' — which is
53+ # distinct from the _WildcardToken sentinel and thus treated as a
54+ # literal dict key.
55+ if elem in ('*' , '**' ):
56+ action = GETATTR if inside == '.' else GET
57+ elements .append ((SINGLE_WILDCARD if elem == '*' else MULTI_WILDCARD , action ))
58+ return
2459 remove_quotes = False
2560 if '𝆺𝅥𝅯' in elem or '\\ ' in elem :
2661 remove_quotes = True
@@ -321,3 +356,129 @@ def stringify_path(path, root_element=DEFAULT_FIRST_ELEMENT, quote_str="'{}'"):
321356 else :
322357 result .append (f".{ element } " )
323358 return '' .join (result )
359+
360+
361+ # Regex to detect wildcard segments in a raw path string.
362+ # Matches [*], [**], .*, .** that are NOT inside quotes.
363+ _WILDCARD_RE = re .compile (
364+ r'\[\*\*?\]' # [*] or [**]
365+ r'|\.\*\*?(?=[.\[]|$)' # .* or .** followed by . or [ or end of string
366+ )
367+
368+
369+ def path_has_wildcard (path ):
370+ """Check if a path string contains wildcard segments (* or **)."""
371+ return bool (_WILDCARD_RE .search (path ))
372+
373+
374+ class GlobPathMatcher :
375+ """Pre-compiled matcher for a single glob pattern path.
376+
377+ Parses a pattern like ``root['users'][*]['password']`` into segments
378+ and matches concrete path strings against it.
379+
380+ ``*`` matches exactly one path segment (any key, index, or attribute).
381+ ``**`` matches zero or more path segments.
382+ """
383+
384+ def __init__ (self , pattern_path ):
385+ self .original_pattern = pattern_path
386+ elements = _path_to_elements (pattern_path , root_element = ('root' , GETATTR ))
387+ # Skip the root element for matching
388+ self ._pattern = elements [1 :]
389+
390+ def match (self , path_string ):
391+ """Return True if *path_string* matches this pattern exactly."""
392+ elements = _path_to_elements (path_string , root_element = ('root' , GETATTR ))
393+ target = elements [1 :]
394+ return self ._match_segments (self ._pattern , target , 0 , 0 )
395+
396+ def match_or_is_ancestor (self , path_string ):
397+ """Return True if *path_string* matches OR is an ancestor of a potential match.
398+
399+ This is needed for ``include_paths``: we must not prune a path that
400+ could lead to a matching descendant.
401+ """
402+ elements = _path_to_elements (path_string , root_element = ('root' , GETATTR ))
403+ target = elements [1 :]
404+ return (self ._match_segments (self ._pattern , target , 0 , 0 ) or
405+ self ._could_match_descendant (self ._pattern , target , 0 , 0 ))
406+
407+ def match_or_is_descendant (self , path_string ):
408+ """Return True if *path_string* matches OR is a descendant of a matching path.
409+
410+ This checks whether the pattern matches any prefix of *path_string*,
411+ meaning the path is "inside" a matched subtree.
412+ """
413+ elements = _path_to_elements (path_string , root_element = ('root' , GETATTR ))
414+ target = elements [1 :]
415+ # Check exact match first
416+ if self ._match_segments (self ._pattern , target , 0 , 0 ):
417+ return True
418+ # Check if any prefix of target matches (making this path a descendant)
419+ for length in range (len (target )):
420+ if self ._match_segments (self ._pattern , target [:length ], 0 , 0 ):
421+ return True
422+ return False
423+
424+ @staticmethod
425+ def _match_segments (pattern , target , pi , ti ):
426+ """Recursive segment matcher with backtracking for ``**``."""
427+ while pi < len (pattern ) and ti < len (target ):
428+ pat_elem = pattern [pi ][0 ]
429+
430+ if pat_elem == MULTI_WILDCARD :
431+ # ** matches zero or more segments — try every suffix
432+ for k in range (ti , len (target ) + 1 ):
433+ if GlobPathMatcher ._match_segments (pattern , target , pi + 1 , k ):
434+ return True
435+ return False
436+ elif pat_elem == SINGLE_WILDCARD :
437+ # * matches exactly one segment regardless of value/action
438+ pi += 1
439+ ti += 1
440+ else :
441+ tgt_elem = target [ti ][0 ]
442+ if pat_elem != tgt_elem :
443+ return False
444+ pi += 1
445+ ti += 1
446+
447+ # Consume any trailing ** (they can match zero segments)
448+ while pi < len (pattern ) and pattern [pi ][0 ] == MULTI_WILDCARD :
449+ pi += 1
450+
451+ return pi == len (pattern ) and ti == len (target )
452+
453+ @staticmethod
454+ def _could_match_descendant (pattern , target , pi , ti ):
455+ """Check if *target* is a prefix that could lead to a match deeper down."""
456+ if ti == len (target ):
457+ # Target exhausted — it's an ancestor if pattern has remaining segments
458+ return pi < len (pattern )
459+
460+ if pi >= len (pattern ):
461+ return False
462+
463+ pat_elem = pattern [pi ][0 ]
464+
465+ if pat_elem == MULTI_WILDCARD :
466+ return (GlobPathMatcher ._could_match_descendant (pattern , target , pi + 1 , ti ) or
467+ GlobPathMatcher ._could_match_descendant (pattern , target , pi , ti + 1 ))
468+ elif pat_elem == SINGLE_WILDCARD :
469+ return GlobPathMatcher ._could_match_descendant (pattern , target , pi + 1 , ti + 1 )
470+ else :
471+ tgt_elem = target [ti ][0 ]
472+ if pat_elem != tgt_elem :
473+ return False
474+ return GlobPathMatcher ._could_match_descendant (pattern , target , pi + 1 , ti + 1 )
475+
476+
477+ def compile_glob_paths (paths ):
478+ """Compile a list of glob pattern strings into GlobPathMatcher objects.
479+
480+ Returns a list of ``GlobPathMatcher`` or ``None`` if *paths* is empty/None.
481+ """
482+ if not paths :
483+ return None
484+ return [GlobPathMatcher (p ) for p in paths ]
0 commit comments