-
Notifications
You must be signed in to change notification settings - Fork 59
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
264 additions
and
44 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,205 @@ | ||
import fnmatch | ||
import functools | ||
import re | ||
|
||
# | ||
# Globbing helpers | ||
# | ||
|
||
|
||
@functools.cache | ||
def _is_case_sensitive(flavour): | ||
return flavour.normcase("Aa") == "Aa" | ||
|
||
|
||
# fnmatch.translate() returns a regular expression that includes a prefix and | ||
# a suffix, which enable matching newlines and ensure the end of the string is | ||
# matched, respectively. These features are undesirable for our implementation | ||
# of PurePatch.match(), which represents path separators as newlines and joins | ||
# pattern segments together. As a workaround, we define a slice object that | ||
# can remove the prefix and suffix from any translate() result. See the | ||
# _compile_pattern_lines() function for more details. | ||
_FNMATCH_PREFIX, _FNMATCH_SUFFIX = fnmatch.translate("_").split("_") | ||
_FNMATCH_SLICE = slice(len(_FNMATCH_PREFIX), -len(_FNMATCH_SUFFIX)) | ||
_SWAP_SEP_AND_NEWLINE = { | ||
"/": str.maketrans({"/": "\n", "\n": "/"}), | ||
"\\": str.maketrans({"\\": "\n", "\n": "\\"}), | ||
} | ||
|
||
|
||
@functools.lru_cache() | ||
def _make_selector(pattern_parts, flavour, case_sensitive): | ||
pat = pattern_parts[0] | ||
if not pat: | ||
return _TerminatingSelector() | ||
if pat == "**": | ||
child_parts_idx = 1 | ||
while child_parts_idx < len(pattern_parts) and pattern_parts[child_parts_idx] == "**": | ||
child_parts_idx += 1 | ||
child_parts = pattern_parts[child_parts_idx:] | ||
if "**" in child_parts: | ||
cls = _DoubleRecursiveWildcardSelector | ||
else: | ||
cls = _RecursiveWildcardSelector | ||
else: | ||
child_parts = pattern_parts[1:] | ||
if pat == "..": | ||
cls = _ParentSelector | ||
elif "**" in pat: | ||
raise ValueError("Invalid pattern: '**' can only be an entire path component") | ||
else: | ||
cls = _WildcardSelector | ||
return cls(pat, child_parts, flavour, case_sensitive) | ||
|
||
|
||
@functools.lru_cache(maxsize=256) | ||
def _compile_pattern(pat, case_sensitive): | ||
flags = re.NOFLAG if case_sensitive else re.IGNORECASE | ||
return re.compile(fnmatch.translate(pat), flags).match | ||
|
||
|
||
@functools.lru_cache() | ||
def _compile_pattern_lines(pattern_lines, case_sensitive): | ||
"""Compile the given pattern lines to an `re.Pattern` object. | ||
The *pattern_lines* argument is a glob-style pattern (e.g. '*/*.py') with | ||
its path separators and newlines swapped (e.g. '*\n*.py`). By using | ||
newlines to separate path components, and not setting `re.DOTALL`, we | ||
ensure that the `*` wildcard cannot match path separators. | ||
The returned `re.Pattern` object may have its `match()` method called to | ||
match a complete pattern, or `search()` to match from the right. The | ||
argument supplied to these methods must also have its path separators and | ||
newlines swapped. | ||
""" | ||
|
||
# Match the start of the path, or just after a path separator | ||
parts = ["^"] | ||
for part in pattern_lines.splitlines(keepends=True): | ||
if part == "*\n": | ||
part = r".+\n" | ||
elif part == "*": | ||
part = r".+" | ||
else: | ||
# Any other component: pass to fnmatch.translate(). We slice off | ||
# the common prefix and suffix added by translate() to ensure that | ||
# re.DOTALL is not set, and the end of the string not matched, | ||
# respectively. With DOTALL not set, '*' wildcards will not match | ||
# path separators, because the '.' characters in the pattern will | ||
# not match newlines. | ||
part = fnmatch.translate(part)[_FNMATCH_SLICE] | ||
parts.append(part) | ||
# Match the end of the path, always. | ||
parts.append(r"\Z") | ||
flags = re.MULTILINE | ||
if not case_sensitive: | ||
flags |= re.IGNORECASE | ||
return re.compile("".join(parts), flags=flags) | ||
|
||
|
||
class _Selector: | ||
"""A selector matches a specific glob pattern part against the children | ||
of a given path.""" | ||
|
||
def __init__(self, child_parts, flavour, case_sensitive): | ||
self.child_parts = child_parts | ||
if child_parts: | ||
self.successor = _make_selector(child_parts, flavour, case_sensitive) | ||
self.dironly = True | ||
else: | ||
self.successor = _TerminatingSelector() | ||
self.dironly = False | ||
|
||
def select_from(self, parent_path): | ||
"""Iterate over all child paths of `parent_path` matched by this | ||
selector. This can contain parent_path itself.""" | ||
path_cls = type(parent_path) | ||
scandir = path_cls._scandir | ||
if not parent_path.is_dir(): | ||
return iter([]) | ||
return self._select_from(parent_path, scandir) | ||
|
||
|
||
class _TerminatingSelector: | ||
|
||
def _select_from(self, parent_path, scandir): | ||
yield parent_path | ||
|
||
|
||
class _ParentSelector(_Selector): | ||
|
||
def __init__(self, name, child_parts, flavour, case_sensitive): | ||
_Selector.__init__(self, child_parts, flavour, case_sensitive) | ||
|
||
def _select_from(self, parent_path, scandir): | ||
path = parent_path._make_child_relpath("..") | ||
for p in self.successor._select_from(path, scandir): | ||
yield p | ||
|
||
|
||
class _WildcardSelector(_Selector): | ||
|
||
def __init__(self, pat, child_parts, flavour, case_sensitive): | ||
_Selector.__init__(self, child_parts, flavour, case_sensitive) | ||
if case_sensitive is None: | ||
# TODO: evaluate case-sensitivity of each directory in _select_from() | ||
case_sensitive = _is_case_sensitive(flavour) | ||
self.match = _compile_pattern(pat, case_sensitive) | ||
|
||
def _select_from(self, parent_path, scandir): | ||
try: | ||
# We must close the scandir() object before proceeding to | ||
# avoid exhausting file descriptors when globbing deep trees. | ||
with scandir(parent_path) as scandir_it: | ||
entries = list(scandir_it) | ||
except OSError: | ||
pass | ||
else: | ||
for entry in entries: | ||
if self.dironly: | ||
try: | ||
if not entry.is_dir(): | ||
continue | ||
except OSError: | ||
continue | ||
name = entry.name | ||
if self.match(name): | ||
path = parent_path._make_child_relpath(name) | ||
for p in self.successor._select_from(path, scandir): | ||
yield p | ||
|
||
|
||
class _RecursiveWildcardSelector(_Selector): | ||
|
||
def __init__(self, pat, child_parts, flavour, case_sensitive): | ||
_Selector.__init__(self, child_parts, flavour, case_sensitive) | ||
|
||
def _iterate_directories(self, parent_path): | ||
yield parent_path | ||
for dirpath, dirnames, _ in parent_path.walk(): | ||
for dirname in dirnames: | ||
yield dirpath._make_child_relpath(dirname) | ||
|
||
def _select_from(self, parent_path, scandir): | ||
successor_select = self.successor._select_from | ||
for starting_point in self._iterate_directories(parent_path): | ||
for p in successor_select(starting_point, scandir): | ||
yield p | ||
|
||
|
||
class _DoubleRecursiveWildcardSelector(_RecursiveWildcardSelector): | ||
""" | ||
Like _RecursiveWildcardSelector, but also de-duplicates results from | ||
successive selectors. This is necessary if the pattern contains | ||
multiple non-adjacent '**' segments. | ||
""" | ||
|
||
def _select_from(self, parent_path, scandir): | ||
yielded = set() | ||
try: | ||
for p in super()._select_from(parent_path, scandir): | ||
if p not in yielded: | ||
yield p | ||
yielded.add(p) | ||
finally: | ||
yielded.clear() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.