Skip to content

Commit 5d9c952

Browse files
committed
Add wildcard/glob pattern support for exclude_paths and include_paths
1 parent 1b25960 commit 5d9c952

10 files changed

Lines changed: 1773 additions & 2013 deletions

File tree

deepdiff/deephash.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
convert_item_or_items_into_compiled_regexes_else_none,
1515
get_id, type_is_subclass_of_type_group, type_in_type_group,
1616
number_to_string, datetime_normalize, KEY_TO_VAL_STR,
17-
get_truncate_datetime, dict_, add_root_to_paths, PydanticBaseModel)
17+
get_truncate_datetime, dict_, add_root_to_paths, PydanticBaseModel,
18+
separate_wildcard_and_exact_paths)
1819

1920
from deepdiff.base import Base
2021

@@ -189,6 +190,7 @@ def __init__(self,
189190
custom_operators: Optional[List[Any]] = None,
190191
default_timezone: Union[datetime.timezone, "BaseTzInfo"] = datetime.timezone.utc,
191192
encodings: Optional[List[str]] = None,
193+
exclude_glob_paths: Optional[List[Any]] = None,
192194
exclude_obj_callback: Optional[Callable[[Any, str], bool]] = None,
193195
exclude_paths: Optional[PathType] = None,
194196
exclude_regex_paths: Optional[RegexType] = None,
@@ -205,6 +207,7 @@ def __init__(self,
205207
ignore_type_in_groups: Any = None,
206208
ignore_type_subclasses: bool = False,
207209
ignore_uuid_types: bool = False,
210+
include_glob_paths: Optional[List[Any]] = None,
208211
include_paths: Optional[PathType] = None,
209212
number_format_notation: str = "f",
210213
number_to_string_func: Optional[NumberToStringFunc] = None,
@@ -231,8 +234,14 @@ def __init__(self,
231234
exclude_types = set() if exclude_types is None else set(exclude_types)
232235
self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance
233236
self.ignore_repetition = ignore_repetition
234-
self.exclude_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(exclude_paths))
235-
self.include_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(include_paths))
237+
_exclude_set = convert_item_or_items_into_set_else_none(exclude_paths)
238+
_exclude_exact, _exclude_globs = separate_wildcard_and_exact_paths(_exclude_set)
239+
self.exclude_paths = add_root_to_paths(_exclude_exact)
240+
self.exclude_glob_paths = exclude_glob_paths or _exclude_globs
241+
_include_set = convert_item_or_items_into_set_else_none(include_paths)
242+
_include_exact, _include_globs = separate_wildcard_and_exact_paths(_include_set)
243+
self.include_paths = add_root_to_paths(_include_exact)
244+
self.include_glob_paths = include_glob_paths or _include_globs
236245
self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths)
237246
self.hasher = default_hasher if hasher is None else hasher
238247
self.hashes[UNPROCESSED_KEY] = [] # type: ignore
@@ -461,11 +470,21 @@ def _skip_this(self, obj: Any, parent: str) -> bool:
461470
skip = False
462471
if self.exclude_paths and parent in self.exclude_paths:
463472
skip = True
464-
if self.include_paths and parent != 'root':
465-
if parent not in self.include_paths:
466-
skip = True
467-
for prefix in self.include_paths:
468-
if parent.startswith(prefix):
473+
elif self.exclude_glob_paths and any(gp.match(parent) for gp in self.exclude_glob_paths):
474+
skip = True
475+
if (self.include_paths or self.include_glob_paths) and parent != 'root':
476+
skip = True
477+
if self.include_paths:
478+
if parent in self.include_paths:
479+
skip = False
480+
else:
481+
for prefix in self.include_paths:
482+
if parent.startswith(prefix):
483+
skip = False
484+
break
485+
if skip and self.include_glob_paths:
486+
for gp in self.include_glob_paths:
487+
if gp.match_or_is_ancestor(parent):
469488
skip = False
470489
break
471490
elif self.exclude_regex_paths and any(

deepdiff/diff.py

Lines changed: 418 additions & 1946 deletions
Large diffs are not rendered by default.

deepdiff/helper.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,30 @@ def add_root_to_paths(paths: Optional[Iterable[str]]) -> Optional[SetOrdered]:
352352
return result
353353

354354

355+
def separate_wildcard_and_exact_paths(paths):
356+
"""Separate a set of paths into exact paths and wildcard pattern paths.
357+
358+
Returns ``(exact_set_or_none, wildcard_list_or_none)``.
359+
Wildcard paths must start with ``root``; a ``ValueError`` is raised otherwise.
360+
"""
361+
if not paths:
362+
return None, None
363+
from deepdiff.path import path_has_wildcard, compile_glob_paths
364+
exact = set()
365+
wildcards = []
366+
for path in paths:
367+
if path_has_wildcard(path):
368+
if not path.startswith('root'):
369+
raise ValueError(
370+
"Wildcard paths must start with 'root'. Got: {}".format(path))
371+
wildcards.append(path)
372+
else:
373+
exact.add(path)
374+
exact_result = exact if exact else None
375+
glob_result = compile_glob_paths(wildcards) if wildcards else None
376+
return exact_result, glob_result
377+
378+
355379
RE_COMPILED_TYPE = type(re.compile(''))
356380

357381

deepdiff/path.py

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import re
12
import logging
23
from ast import literal_eval
34
from functools import lru_cache
@@ -8,6 +9,30 @@
89
GET = 'GET'
910

1011

12+
class _WildcardToken:
13+
"""Sentinel object for wildcard path tokens.
14+
15+
Using a dedicated class (instead of plain strings) ensures that a literal
16+
dict key ``'*'`` (parsed from ``root['*']``) is never confused with the
17+
wildcard ``*`` (parsed from ``root[*]``).
18+
"""
19+
def __init__(self, symbol):
20+
self._symbol = symbol
21+
22+
def __repr__(self):
23+
return self._symbol
24+
25+
def __eq__(self, other):
26+
return isinstance(other, _WildcardToken) and self._symbol == other._symbol
27+
28+
def __hash__(self):
29+
return hash(('_WildcardToken', self._symbol))
30+
31+
32+
SINGLE_WILDCARD = _WildcardToken('*')
33+
MULTI_WILDCARD = _WildcardToken('**')
34+
35+
1136
class PathExtractionError(ValueError):
1237
pass
1338

@@ -21,6 +46,16 @@ def _add_to_elements(elements, elem, inside):
2146
if not elem:
2247
return
2348
if not elem.startswith('__'):
49+
# Handle wildcard tokens (* and **) as-is.
50+
# Unquoted root[*] arrives as bare '*' which matches the string check.
51+
# Quoted root['*'] arrives as "'*'" which does NOT match, so it falls
52+
# through to literal_eval and becomes the plain string '*' — which is
53+
# distinct from the _WildcardToken sentinel and thus treated as a
54+
# literal dict key.
55+
if elem in ('*', '**'):
56+
action = GETATTR if inside == '.' else GET
57+
elements.append((SINGLE_WILDCARD if elem == '*' else MULTI_WILDCARD, action))
58+
return
2459
remove_quotes = False
2560
if '𝆺𝅥𝅯' in elem or '\\' in elem:
2661
remove_quotes = True
@@ -321,3 +356,129 @@ def stringify_path(path, root_element=DEFAULT_FIRST_ELEMENT, quote_str="'{}'"):
321356
else:
322357
result.append(f".{element}")
323358
return ''.join(result)
359+
360+
361+
# Regex to detect wildcard segments in a raw path string.
362+
# Matches [*], [**], .*, .** that are NOT inside quotes.
363+
_WILDCARD_RE = re.compile(
364+
r'\[\*\*?\]' # [*] or [**]
365+
r'|\.\*\*?(?=[.\[]|$)' # .* or .** followed by . or [ or end of string
366+
)
367+
368+
369+
def path_has_wildcard(path):
370+
"""Check if a path string contains wildcard segments (* or **)."""
371+
return bool(_WILDCARD_RE.search(path))
372+
373+
374+
class GlobPathMatcher:
375+
"""Pre-compiled matcher for a single glob pattern path.
376+
377+
Parses a pattern like ``root['users'][*]['password']`` into segments
378+
and matches concrete path strings against it.
379+
380+
``*`` matches exactly one path segment (any key, index, or attribute).
381+
``**`` matches zero or more path segments.
382+
"""
383+
384+
def __init__(self, pattern_path):
385+
self.original_pattern = pattern_path
386+
elements = _path_to_elements(pattern_path, root_element=('root', GETATTR))
387+
# Skip the root element for matching
388+
self._pattern = elements[1:]
389+
390+
def match(self, path_string):
391+
"""Return True if *path_string* matches this pattern exactly."""
392+
elements = _path_to_elements(path_string, root_element=('root', GETATTR))
393+
target = elements[1:]
394+
return self._match_segments(self._pattern, target, 0, 0)
395+
396+
def match_or_is_ancestor(self, path_string):
397+
"""Return True if *path_string* matches OR is an ancestor of a potential match.
398+
399+
This is needed for ``include_paths``: we must not prune a path that
400+
could lead to a matching descendant.
401+
"""
402+
elements = _path_to_elements(path_string, root_element=('root', GETATTR))
403+
target = elements[1:]
404+
return (self._match_segments(self._pattern, target, 0, 0) or
405+
self._could_match_descendant(self._pattern, target, 0, 0))
406+
407+
def match_or_is_descendant(self, path_string):
408+
"""Return True if *path_string* matches OR is a descendant of a matching path.
409+
410+
This checks whether the pattern matches any prefix of *path_string*,
411+
meaning the path is "inside" a matched subtree.
412+
"""
413+
elements = _path_to_elements(path_string, root_element=('root', GETATTR))
414+
target = elements[1:]
415+
# Check exact match first
416+
if self._match_segments(self._pattern, target, 0, 0):
417+
return True
418+
# Check if any prefix of target matches (making this path a descendant)
419+
for length in range(len(target)):
420+
if self._match_segments(self._pattern, target[:length], 0, 0):
421+
return True
422+
return False
423+
424+
@staticmethod
425+
def _match_segments(pattern, target, pi, ti):
426+
"""Recursive segment matcher with backtracking for ``**``."""
427+
while pi < len(pattern) and ti < len(target):
428+
pat_elem = pattern[pi][0]
429+
430+
if pat_elem == MULTI_WILDCARD:
431+
# ** matches zero or more segments — try every suffix
432+
for k in range(ti, len(target) + 1):
433+
if GlobPathMatcher._match_segments(pattern, target, pi + 1, k):
434+
return True
435+
return False
436+
elif pat_elem == SINGLE_WILDCARD:
437+
# * matches exactly one segment regardless of value/action
438+
pi += 1
439+
ti += 1
440+
else:
441+
tgt_elem = target[ti][0]
442+
if pat_elem != tgt_elem:
443+
return False
444+
pi += 1
445+
ti += 1
446+
447+
# Consume any trailing ** (they can match zero segments)
448+
while pi < len(pattern) and pattern[pi][0] == MULTI_WILDCARD:
449+
pi += 1
450+
451+
return pi == len(pattern) and ti == len(target)
452+
453+
@staticmethod
454+
def _could_match_descendant(pattern, target, pi, ti):
455+
"""Check if *target* is a prefix that could lead to a match deeper down."""
456+
if ti == len(target):
457+
# Target exhausted — it's an ancestor if pattern has remaining segments
458+
return pi < len(pattern)
459+
460+
if pi >= len(pattern):
461+
return False
462+
463+
pat_elem = pattern[pi][0]
464+
465+
if pat_elem == MULTI_WILDCARD:
466+
return (GlobPathMatcher._could_match_descendant(pattern, target, pi + 1, ti) or
467+
GlobPathMatcher._could_match_descendant(pattern, target, pi, ti + 1))
468+
elif pat_elem == SINGLE_WILDCARD:
469+
return GlobPathMatcher._could_match_descendant(pattern, target, pi + 1, ti + 1)
470+
else:
471+
tgt_elem = target[ti][0]
472+
if pat_elem != tgt_elem:
473+
return False
474+
return GlobPathMatcher._could_match_descendant(pattern, target, pi + 1, ti + 1)
475+
476+
477+
def compile_glob_paths(paths):
478+
"""Compile a list of glob pattern strings into GlobPathMatcher objects.
479+
480+
Returns a list of ``GlobPathMatcher`` or ``None`` if *paths* is empty/None.
481+
"""
482+
if not paths:
483+
return None
484+
return [GlobPathMatcher(p) for p in paths]

deepdiff/search.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
import logging
77

88
from deepdiff.helper import (
9-
strings, numbers, add_to_frozen_set, get_doc, dict_, RE_COMPILED_TYPE, ipranges
9+
strings, numbers, add_to_frozen_set, get_doc, dict_, RE_COMPILED_TYPE, ipranges,
10+
separate_wildcard_and_exact_paths,
1011
)
1112

1213

@@ -106,7 +107,8 @@ def __init__(self,
106107
self.obj: Any = obj
107108
self.case_sensitive: bool = case_sensitive if isinstance(item, strings) else True
108109
item = item if self.case_sensitive else (item.lower() if isinstance(item, str) else item)
109-
self.exclude_paths: SetOrdered = SetOrdered(exclude_paths)
110+
_exclude_exact, self.exclude_glob_paths = separate_wildcard_and_exact_paths(set(exclude_paths) if exclude_paths else None)
111+
self.exclude_paths: SetOrdered = SetOrdered(_exclude_exact) if _exclude_exact else SetOrdered()
110112
self.exclude_regex_paths: List[Pattern[str]] = [re.compile(exclude_regex_path) for exclude_regex_path in exclude_regex_paths]
111113
self.exclude_types: SetOrdered = SetOrdered(exclude_types)
112114
self.exclude_types_tuple: tuple[type, ...] = tuple(
@@ -193,6 +195,8 @@ def __skip_this(self, item: Any, parent: str) -> bool:
193195
skip = False
194196
if parent in self.exclude_paths:
195197
skip = True
198+
elif self.exclude_glob_paths and any(gp.match(parent) for gp in self.exclude_glob_paths):
199+
skip = True
196200
elif self.exclude_regex_paths and any(
197201
[exclude_regex_path.search(parent) for exclude_regex_path in self.exclude_regex_paths]):
198202
skip = True

docs/deephash_doc.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,12 @@ exclude_types: list, default = None
3232

3333
exclude_paths: list, default = None
3434
List of paths to exclude from the report. If only one item, you can pass it as a string instead of a list containing only one path.
35+
Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth.
3536

3637

3738
include_paths: list, default = None
3839
List of the only paths to include in the report. If only one item, you can pass it as a string.
40+
Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth.
3941

4042

4143
exclude_regex_paths: list, default = None

docs/diff_doc.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ encodings: List, default = None
5555

5656
exclude_paths: list, default = None
5757
:ref:`exclude_paths_label`
58-
List of paths to exclude from the report. If only one item, you can path it as a string.
58+
List of paths to exclude from the report. If only one item, you can pass it as a string.
59+
Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth.
5960

6061
exclude_regex_paths: list, default = None
6162
:ref:`exclude_regex_paths_label`
@@ -77,6 +78,7 @@ exclude_obj_callback_strict: function, default = None
7778
include_paths: list, default = None
7879
:ref:`include_paths_label`
7980
List of the only paths to include in the report. If only one item is in the list, you can pass it as a string.
81+
Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth.
8082

8183
include_obj_callback: function, default = None
8284
:ref:`include_obj_callback_label`

docs/exclude_paths.rst

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,49 @@ Example
5959
{'values_changed': {"root['foo']['bar']": {'new_value': 'banana', 'old_value': 'potato'}}}
6060

6161

62+
.. _wildcard_paths_label:
63+
64+
Wildcard (Glob) Paths
65+
---------------------
66+
67+
Both ``exclude_paths`` and ``include_paths`` support wildcard patterns for matching multiple paths at once:
68+
69+
- ``[*]`` or ``.*`` matches exactly **one** path segment (any key, index, or attribute).
70+
- ``[**]`` or ``.**`` matches **zero or more** path segments at any depth.
71+
72+
Wildcard patterns must use the full ``root`` prefix (shorthand keys are not supported for wildcards).
73+
74+
Exclude all ``password`` fields regardless of the parent key:
75+
>>> t1 = {"users": {"alice": {"name": "Alice", "password": "s1"}, "bob": {"name": "Bob", "password": "s2"}}}
76+
>>> t2 = {"users": {"alice": {"name": "Alice", "password": "x1"}, "bob": {"name": "Bob", "password": "x2"}}}
77+
>>> DeepDiff(t1, t2, exclude_paths=["root['users'][*]['password']"])
78+
{}
79+
80+
Include only ``name`` fields at any depth:
81+
>>> t1 = {"a": {"name": "A", "secret": 1}, "b": {"name": "B", "secret": 2}}
82+
>>> t2 = {"a": {"name": "X", "secret": 1}, "b": {"name": "Y", "secret": 2}}
83+
>>> result = DeepDiff(t1, t2, include_paths=["root[*]['name']"])
84+
>>> set(result.get('values_changed', {}).keys()) == {"root['a']['name']", "root['b']['name']"}
85+
True
86+
87+
Use ``[**]`` to match at any depth:
88+
>>> t1 = {"config": {"db": {"password": "old"}, "cache": {"password": "old"}}}
89+
>>> t2 = {"config": {"db": {"password": "new"}, "cache": {"password": "new"}}}
90+
>>> DeepDiff(t1, t2, exclude_paths=["root[**]['password']"])
91+
{}
92+
93+
Literal keys named ``*`` or ``**`` are not treated as wildcards when quoted:
94+
>>> t1 = {"*": 1, "a": 2}
95+
>>> t2 = {"*": 10, "a": 20}
96+
>>> result = DeepDiff(t1, t2, exclude_paths=["root['*']"])
97+
>>> "root['a']" in result.get('values_changed', {})
98+
True
99+
100+
When both ``exclude_paths`` and ``include_paths`` apply to the same path, exclusion takes precedence.
101+
102+
Wildcards also work with ``DeepHash`` and ``DeepSearch`` exclude_paths.
103+
104+
62105
.. _exclude_regex_paths_label:
63106

64107
Exclude Regex Paths

0 commit comments

Comments
 (0)