Skip to content

Commit 85dbe9e

Browse files
fix: copy over globs translate (#619)
* fix: copy over globs translate * fix: update comment
1 parent b75406d commit 85dbe9e

2 files changed

Lines changed: 157 additions & 3 deletions

File tree

codecov_cli/helpers/folder_searcher.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
import functools
2+
import logging
23
import os
34
import pathlib
45
import re
5-
from fnmatch import translate
66
from typing import Generator, List, Optional, Pattern
77

8+
from codecov_cli.helpers.glob import translate
9+
10+
logger = logging.getLogger("codecovcli")
11+
812

913
def _is_included(
1014
filename_include_regex: Pattern,
@@ -99,5 +103,9 @@ def globs_to_regex(patterns: List[str]) -> Optional[Pattern]:
99103
if not patterns:
100104
return None
101105

102-
regex_str = ["(" + translate(pattern) + ")" for pattern in patterns]
103-
return re.compile("|".join(regex_str))
106+
regex_patterns = []
107+
for pattern in patterns:
108+
regex_pattern = translate(pattern, recursive=True, include_hidden=True)
109+
logger.debug(f"Translating `{pattern}` into `{regex_pattern}`")
110+
regex_patterns.append(regex_pattern)
111+
return re.compile("|".join(regex_patterns))

codecov_cli/helpers/glob.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
"""
2+
This is a copy of the function of the same name in the python
3+
standard library. The reason for its inclusion is that it has
4+
been added in python3.13, but not earlier versions
5+
https://github.com/python/cpython/blob/main/Lib/glob.py
6+
https://github.com/python/cpython/blob/main/Lib/fnmatch.py
7+
https://github.com/python/cpython/blob/main/Lib/functools.py
8+
"""
9+
10+
import os
11+
import re
12+
13+
from glob import fnmatch
14+
15+
16+
def translate(pat, *, recursive=False, include_hidden=False, seps=None):
17+
"""Translate a pathname with shell wildcards to a regular expression.
18+
19+
If `recursive` is true, the pattern segment '**' will match any number of
20+
path segments.
21+
22+
If `include_hidden` is true, wildcards can match path segments beginning
23+
with a dot ('.').
24+
25+
If a sequence of separator characters is given to `seps`, they will be
26+
used to split the pattern into segments and match path separators. If not
27+
given, os.path.sep and os.path.altsep (where available) are used.
28+
"""
29+
if not seps:
30+
if os.path.altsep:
31+
seps = (os.path.sep, os.path.altsep)
32+
else:
33+
seps = os.path.sep
34+
escaped_seps = ''.join(map(re.escape, seps))
35+
any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps
36+
not_sep = f'[^{escaped_seps}]'
37+
if include_hidden:
38+
one_last_segment = f'{not_sep}+'
39+
one_segment = f'{one_last_segment}{any_sep}'
40+
any_segments = f'(?:.+{any_sep})?'
41+
any_last_segments = '.*'
42+
else:
43+
one_last_segment = f'[^{escaped_seps}.]{not_sep}*'
44+
one_segment = f'{one_last_segment}{any_sep}'
45+
any_segments = f'(?:{one_segment})*'
46+
any_last_segments = f'{any_segments}(?:{one_last_segment})?'
47+
48+
results = []
49+
parts = re.split(any_sep, pat)
50+
last_part_idx = len(parts) - 1
51+
for idx, part in enumerate(parts):
52+
if part == '*':
53+
results.append(one_segment if idx < last_part_idx else one_last_segment)
54+
elif recursive and part == '**':
55+
if idx < last_part_idx:
56+
if parts[idx + 1] != '**':
57+
results.append(any_segments)
58+
else:
59+
results.append(any_last_segments)
60+
else:
61+
if part:
62+
if not include_hidden and part[0] in '*?':
63+
results.append(r'(?!\.)')
64+
results.extend(_translate(part, f'{not_sep}*', not_sep)[0])
65+
if idx < last_part_idx:
66+
results.append(any_sep)
67+
res = ''.join(results)
68+
return fr'(?s:{res})\Z'
69+
70+
71+
_re_setops_sub = re.compile(r'([&~|])').sub
72+
def _translate(pat, star, question_mark):
73+
res = []
74+
add = res.append
75+
star_indices = []
76+
77+
i, n = 0, len(pat)
78+
while i < n:
79+
c = pat[i]
80+
i = i+1
81+
if c == '*':
82+
# store the position of the wildcard
83+
star_indices.append(len(res))
84+
add(star)
85+
# compress consecutive `*` into one
86+
while i < n and pat[i] == '*':
87+
i += 1
88+
elif c == '?':
89+
add(question_mark)
90+
elif c == '[':
91+
j = i
92+
if j < n and pat[j] == '!':
93+
j = j+1
94+
if j < n and pat[j] == ']':
95+
j = j+1
96+
while j < n and pat[j] != ']':
97+
j = j+1
98+
if j >= n:
99+
add('\\[')
100+
else:
101+
stuff = pat[i:j]
102+
if '-' not in stuff:
103+
stuff = stuff.replace('\\', r'\\')
104+
else:
105+
chunks = []
106+
k = i+2 if pat[i] == '!' else i+1
107+
while True:
108+
k = pat.find('-', k, j)
109+
if k < 0:
110+
break
111+
chunks.append(pat[i:k])
112+
i = k+1
113+
k = k+3
114+
chunk = pat[i:j]
115+
if chunk:
116+
chunks.append(chunk)
117+
else:
118+
chunks[-1] += '-'
119+
# Remove empty ranges -- invalid in RE.
120+
for k in range(len(chunks)-1, 0, -1):
121+
if chunks[k-1][-1] > chunks[k][0]:
122+
chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
123+
del chunks[k]
124+
# Escape backslashes and hyphens for set difference (--).
125+
# Hyphens that create ranges shouldn't be escaped.
126+
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
127+
for s in chunks)
128+
i = j+1
129+
if not stuff:
130+
# Empty range: never match.
131+
add('(?!)')
132+
elif stuff == '!':
133+
# Negated empty range: match any character.
134+
add('.')
135+
else:
136+
# Escape set operations (&&, ~~ and ||).
137+
stuff = _re_setops_sub(r'\\\1', stuff)
138+
if stuff[0] == '!':
139+
stuff = '^' + stuff[1:]
140+
elif stuff[0] in ('^', '['):
141+
stuff = '\\' + stuff
142+
add(f'[{stuff}]')
143+
else:
144+
add(re.escape(c))
145+
assert i == n
146+
return res, star_indices

0 commit comments

Comments
 (0)