fix: copy over globs translate (#619)

thomasrockhu-codecov · web-flow · commit 85dbe9e5f11a · 2025-02-13T12:22:38.000-03:00
* fix: copy over globs translate

* fix: update comment
diff --git a/codecov_cli/helpers/folder_searcher.py b/codecov_cli/helpers/folder_searcher.py
@@ -1,10 +1,14 @@
 import functools
+import logging
 import os
 import pathlib
 import re
-from fnmatch import translate
 from typing import Generator, List, Optional, Pattern
 
+from codecov_cli.helpers.glob import translate
+
+logger = logging.getLogger("codecovcli")
+
 
 def _is_included(
     filename_include_regex: Pattern,
@@ -99,5 +103,9 @@ def globs_to_regex(patterns: List[str]) -> Optional[Pattern]:
     if not patterns:
         return None
 
-    regex_str = ["(" + translate(pattern) + ")" for pattern in patterns]
-    return re.compile("|".join(regex_str))
+    regex_patterns = []
+    for pattern in patterns:
+        regex_pattern = translate(pattern, recursive=True, include_hidden=True)
+        logger.debug(f"Translating `{pattern}` into `{regex_pattern}`")
+        regex_patterns.append(regex_pattern)
+    return re.compile("|".join(regex_patterns))
diff --git a/codecov_cli/helpers/glob.py b/codecov_cli/helpers/glob.py
@@ -0,0 +1,146 @@
+"""
+This is a copy of the function of the same name in the python
+standard library. The reason for its inclusion is that it has
+been added in python3.13, but not earlier versions
+https://github.com/python/cpython/blob/main/Lib/glob.py
+https://github.com/python/cpython/blob/main/Lib/fnmatch.py
+https://github.com/python/cpython/blob/main/Lib/functools.py
+"""
+
+import os
+import re
+
+from glob import fnmatch
+
+
+def translate(pat, *, recursive=False, include_hidden=False, seps=None):
+    """Translate a pathname with shell wildcards to a regular expression.
+
+    If `recursive` is true, the pattern segment '**' will match any number of
+    path segments.
+
+    If `include_hidden` is true, wildcards can match path segments beginning
+    with a dot ('.').
+
+    If a sequence of separator characters is given to `seps`, they will be
+    used to split the pattern into segments and match path separators. If not
+    given, os.path.sep and os.path.altsep (where available) are used.
+    """
+    if not seps:
+        if os.path.altsep:
+            seps = (os.path.sep, os.path.altsep)
+        else:
+            seps = os.path.sep
+    escaped_seps = ''.join(map(re.escape, seps))
+    any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps
+    not_sep = f'[^{escaped_seps}]'
+    if include_hidden:
+        one_last_segment = f'{not_sep}+'
+        one_segment = f'{one_last_segment}{any_sep}'
+        any_segments = f'(?:.+{any_sep})?'
+        any_last_segments = '.*'
+    else:
+        one_last_segment = f'[^{escaped_seps}.]{not_sep}*'
+        one_segment = f'{one_last_segment}{any_sep}'
+        any_segments = f'(?:{one_segment})*'
+        any_last_segments = f'{any_segments}(?:{one_last_segment})?'
+
+    results = []
+    parts = re.split(any_sep, pat)
+    last_part_idx = len(parts) - 1
+    for idx, part in enumerate(parts):
+        if part == '*':
+            results.append(one_segment if idx < last_part_idx else one_last_segment)
+        elif recursive and part == '**':
+            if idx < last_part_idx:
+                if parts[idx + 1] != '**':
+                    results.append(any_segments)
+            else:
+                results.append(any_last_segments)
+        else:
+            if part:
+                if not include_hidden and part[0] in '*?':
+                    results.append(r'(?!\.)')
+                results.extend(_translate(part, f'{not_sep}*', not_sep)[0])
+            if idx < last_part_idx:
+                results.append(any_sep)
+    res = ''.join(results)
+    return fr'(?s:{res})\Z'
+
+
+_re_setops_sub = re.compile(r'([&~|])').sub
+def _translate(pat, star, question_mark):
+    res = []
+    add = res.append
+    star_indices = []
+
+    i, n = 0, len(pat)
+    while i < n:
+        c = pat[i]
+        i = i+1
+        if c == '*':
+            # store the position of the wildcard
+            star_indices.append(len(res))
+            add(star)
+            # compress consecutive `*` into one
+            while i < n and pat[i] == '*':
+                i += 1
+        elif c == '?':
+            add(question_mark)
+        elif c == '[':
+            j = i
+            if j < n and pat[j] == '!':
+                j = j+1
+            if j < n and pat[j] == ']':
+                j = j+1
+            while j < n and pat[j] != ']':
+                j = j+1
+            if j >= n:
+                add('\\[')
+            else:
+                stuff = pat[i:j]
+                if '-' not in stuff:
+                    stuff = stuff.replace('\\', r'\\')
+                else:
+                    chunks = []
+                    k = i+2 if pat[i] == '!' else i+1
+                    while True:
+                        k = pat.find('-', k, j)
+                        if k < 0:
+                            break
+                        chunks.append(pat[i:k])
+                        i = k+1
+                        k = k+3
+                    chunk = pat[i:j]
+                    if chunk:
+                        chunks.append(chunk)
+                    else:
+                        chunks[-1] += '-'
+                    # Remove empty ranges -- invalid in RE.
+                    for k in range(len(chunks)-1, 0, -1):
+                        if chunks[k-1][-1] > chunks[k][0]:
+                            chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
+                            del chunks[k]
+                    # Escape backslashes and hyphens for set difference (--).
+                    # Hyphens that create ranges shouldn't be escaped.
+                    stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
+                                     for s in chunks)
+                i = j+1
+                if not stuff:
+                    # Empty range: never match.
+                    add('(?!)')
+                elif stuff == '!':
+                    # Negated empty range: match any character.
+                    add('.')
+                else:
+                    # Escape set operations (&&, ~~ and ||).
+                    stuff = _re_setops_sub(r'\\\1', stuff)
+                    if stuff[0] == '!':
+                        stuff = '^' + stuff[1:]
+                    elif stuff[0] in ('^', '['):
+                        stuff = '\\' + stuff
+                    add(f'[{stuff}]')
+        else:
+            add(re.escape(c))
+    assert i == n
+    return res, star_indices