Skip to content

Commit a844a9b

Browse files
author
atollk
committed
Added filter_glob and exclude_glob to fs.walk.Walker.
These extend the class by an option to include/exclude resources by their entire path, not just its last component. To do so, fs.wildcard had to undergo a rework to remove the dependency on the `re` module. Unit tests were added for all new/changed code.
1 parent 50b1c99 commit a844a9b

File tree

8 files changed

+536
-132
lines changed

8 files changed

+536
-132
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
88

99
## Unreleased
1010

11+
### Added
12+
13+
- To `fs.walk.Walker`, added parameters `filter_glob` and `exclude_glob`.
14+
Closes [#459](https://github.com/PyFilesystem/pyfilesystem2/issues/459).
15+
16+
### Fixed
17+
- Elaborated documentation of `filter_dirs` and `exclude_dirs` in `fs.walk.Walker`.
18+
Closes [#371](https://github.com/PyFilesystem/pyfilesystem2/issues/371).
19+
1120

1221
## [2.4.16] - 2022-05-02
1322

fs/base.py

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from contextlib import closing
2222
from functools import partial, wraps
2323

24-
from . import copy, errors, fsencode, iotools, tools, walk, wildcard
24+
from . import copy, errors, fsencode, iotools, tools, walk, wildcard, glob
2525
from .copy import copy_modified_time
2626
from .glob import BoundGlobber
2727
from .mode import validate_open_mode
@@ -1653,8 +1653,8 @@ def check(self):
16531653
if self.isclosed():
16541654
raise errors.FilesystemClosed()
16551655

1656-
def match(self, patterns, name):
1657-
# type: (Optional[Iterable[Text]], Text) -> bool
1656+
def match(self, patterns, name, accept_prefix=False):
1657+
# type: (Optional[Iterable[Text]], Text, bool) -> bool
16581658
"""Check if a name matches any of a list of wildcards.
16591659
16601660
If a filesystem is case *insensitive* (such as Windows) then
@@ -1696,6 +1696,61 @@ def match(self, patterns, name):
16961696
matcher = wildcard.get_matcher(patterns, case_sensitive)
16971697
return matcher(name)
16981698

1699+
def match_glob(self, patterns, path, accept_prefix=False):
1700+
# type: (Optional[Iterable[Text]], Text, bool) -> bool
1701+
"""Check if a path matches any of a list of glob patterns.
1702+
1703+
If a filesystem is case *insensitive* (such as Windows) then
1704+
this method will perform a case insensitive match (i.e. ``*.py``
1705+
will match the same names as ``*.PY``). Otherwise the match will
1706+
be case sensitive (``*.py`` and ``*.PY`` will match different
1707+
names).
1708+
1709+
Arguments:
1710+
patterns (list, optional): A list of patterns, e.g.
1711+
``['*.py']``, or `None` to match everything.
1712+
path (str): A resource path, starting with "/".
1713+
accept_prefix (bool): If ``True``, the path is
1714+
not required to match the wildcards themselves
1715+
but only need to be a prefix of a string that does.
1716+
1717+
Returns:
1718+
bool: `True` if ``path`` matches any of the patterns.
1719+
1720+
Raises:
1721+
TypeError: If ``patterns`` is a single string instead of
1722+
a list (or `None`).
1723+
ValueError: If ``path`` is not a string starting with "/".
1724+
1725+
Example:
1726+
>>> my_fs.match_glob(['*.py'], '/__init__.py')
1727+
True
1728+
>>> my_fs.match_glob(['*.jpg', '*.png'], '/foo.gif')
1729+
False
1730+
>>> my_fs.match_glob(['dir/file.txt'], '/dir/', accept_prefix=True)
1731+
True
1732+
>>> my_fs.match_glob(['dir/file.txt'], '/dir/gile.txt', accept_prefix=True)
1733+
False
1734+
1735+
Note:
1736+
If ``patterns`` is `None` (or ``['*']``), then this
1737+
method will always return `True`.
1738+
1739+
"""
1740+
if patterns is None:
1741+
return True
1742+
if not path or path[0] != "/":
1743+
raise ValueError("%s needs to be a string starting with /" % path)
1744+
if isinstance(patterns, six.text_type):
1745+
raise TypeError("patterns must be a list or sequence")
1746+
case_sensitive = not typing.cast(
1747+
bool, self.getmeta().get("case_insensitive", False)
1748+
)
1749+
matcher = glob.get_matcher(
1750+
patterns, case_sensitive, accept_prefix=accept_prefix
1751+
)
1752+
return matcher(path)
1753+
16991754
def tree(self, **kwargs):
17001755
# type: (**Any) -> None
17011756
"""Render a tree view of the filesystem to stdout or a file.

fs/errors.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
"OperationFailed",
4242
"OperationTimeout",
4343
"PathError",
44+
"PatternError",
4445
"PermissionDenied",
4546
"RemoteConnectionError",
4647
"RemoveRootError",
@@ -346,3 +347,19 @@ class UnsupportedHash(ValueError):
346347
not supported by hashlib.
347348
348349
"""
350+
351+
352+
class PatternError(ValueError):
353+
"""A string pattern with invalid syntax was given."""
354+
355+
default_message = "pattern '{pattern}' is invalid at position {position}"
356+
357+
def __init__(self, pattern, position, exc=None, msg=None): # noqa: D107
358+
# type: (Text, int, Optional[Exception], Optional[Text]) -> None
359+
self.pattern = pattern
360+
self.position = position
361+
self.exc = exc
362+
super(ValueError, self).__init__()
363+
364+
def __reduce__(self):
365+
return type(self), (self.path, self.position, self.exc, self._msg)

fs/glob.py

Lines changed: 173 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,31 @@
44
from __future__ import unicode_literals
55

66
import typing
7+
from functools import partial
78

89
import re
910
from collections import namedtuple
1011

11-
from . import wildcard
1212
from ._repr import make_repr
1313
from .lrucache import LRUCache
1414
from .path import iteratepath
1515

16+
1617
GlobMatch = namedtuple("GlobMatch", ["path", "info"])
1718
Counts = namedtuple("Counts", ["files", "directories", "data"])
1819
LineCounts = namedtuple("LineCounts", ["lines", "non_blank"])
1920

2021
if typing.TYPE_CHECKING:
21-
from typing import Iterator, List, Optional, Pattern, Text, Tuple
22-
22+
from typing import (
23+
Iterator,
24+
List,
25+
Optional,
26+
Pattern,
27+
Text,
28+
Tuple,
29+
Iterable,
30+
Callable,
31+
)
2332
from .base import FS
2433

2534

@@ -28,17 +37,87 @@
2837
) # type: LRUCache[Tuple[Text, bool], Tuple[int, bool, Pattern]]
2938

3039

40+
def _split_pattern_by_rec(pattern):
41+
# type: (Text) -> List[Text]
42+
"""Split a glob pattern at its directory seperators (/).
43+
44+
Takes into account escaped cases like [/].
45+
"""
46+
indices = [-1]
47+
bracket_open = False
48+
for i, c in enumerate(pattern):
49+
if c == "/" and not bracket_open:
50+
indices.append(i)
51+
elif c == "[":
52+
bracket_open = True
53+
elif c == "]":
54+
bracket_open = False
55+
56+
indices.append(len(pattern))
57+
return [pattern[i + 1 : j] for i, j in zip(indices[:-1], indices[1:])]
58+
59+
60+
def _translate(pattern, case_sensitive=True):
61+
# type: (Text, bool) -> Text
62+
"""Translate a wildcard pattern to a regular expression.
63+
64+
There is no way to quote meta-characters.
65+
Arguments:
66+
pattern (str): A wildcard pattern.
67+
case_sensitive (bool): Set to `False` to use a case
68+
insensitive regex (default `True`).
69+
70+
Returns:
71+
str: A regex equivalent to the given pattern.
72+
73+
"""
74+
if not case_sensitive:
75+
pattern = pattern.lower()
76+
i, n = 0, len(pattern)
77+
res = []
78+
while i < n:
79+
c = pattern[i]
80+
i = i + 1
81+
if c == "*":
82+
res.append("[^/]*")
83+
elif c == "?":
84+
res.append("[^/]")
85+
elif c == "[":
86+
j = i
87+
if j < n and pattern[j] == "!":
88+
j = j + 1
89+
if j < n and pattern[j] == "]":
90+
j = j + 1
91+
while j < n and pattern[j] != "]":
92+
j = j + 1
93+
if j >= n:
94+
res.append("\\[")
95+
else:
96+
stuff = pattern[i:j].replace("\\", "\\\\")
97+
i = j + 1
98+
if stuff[0] == "!":
99+
stuff = "^" + stuff[1:]
100+
elif stuff[0] == "^":
101+
stuff = "\\" + stuff
102+
res.append("[%s]" % stuff)
103+
else:
104+
res.append(re.escape(c))
105+
return "".join(res)
106+
107+
31108
def _translate_glob(pattern, case_sensitive=True):
32109
levels = 0
33110
recursive = False
34111
re_patterns = [""]
35112
for component in iteratepath(pattern):
36-
if component == "**":
37-
re_patterns.append(".*/?")
113+
if "**" in component:
38114
recursive = True
115+
split = component.split("**")
116+
split_re = [_translate(s, case_sensitive=case_sensitive) for s in split]
117+
re_patterns.append("/?" + ".*/?".join(split_re))
39118
else:
40119
re_patterns.append(
41-
"/" + wildcard._translate(component, case_sensitive=case_sensitive)
120+
"/" + _translate(component, case_sensitive=case_sensitive)
42121
)
43122
levels += 1
44123
re_glob = "(?ms)^" + "".join(re_patterns) + ("/$" if pattern.endswith("/") else "$")
@@ -72,6 +151,8 @@ def match(pattern, path):
72151
except KeyError:
73152
levels, recursive, re_pattern = _translate_glob(pattern, case_sensitive=True)
74153
_PATTERN_CACHE[(pattern, True)] = (levels, recursive, re_pattern)
154+
if path and path[0] != "/":
155+
path = "/" + path
75156
return bool(re_pattern.match(path))
76157

77158

@@ -92,9 +173,95 @@ def imatch(pattern, path):
92173
except KeyError:
93174
levels, recursive, re_pattern = _translate_glob(pattern, case_sensitive=True)
94175
_PATTERN_CACHE[(pattern, False)] = (levels, recursive, re_pattern)
176+
if path and path[0] != "/":
177+
path = "/" + path
95178
return bool(re_pattern.match(path))
96179

97180

181+
def match_any(patterns, path):
182+
# type: (Iterable[Text], Text) -> bool
183+
"""Test if a path matches any of a list of patterns.
184+
185+
Will return `True` if ``patterns`` is an empty list.
186+
187+
Arguments:
188+
patterns (list): A list of wildcard pattern, e.g ``["*.py",
189+
"*.pyc"]``
190+
name (str): A filename.
191+
192+
Returns:
193+
bool: `True` if the path matches at least one of the patterns.
194+
195+
"""
196+
if not patterns:
197+
return True
198+
return any(match(pattern, path) for pattern in patterns)
199+
200+
201+
def imatch_any(patterns, path):
202+
# type: (Iterable[Text], Text) -> bool
203+
"""Test if a path matches any of a list of patterns (case insensitive).
204+
205+
Will return `True` if ``patterns`` is an empty list.
206+
207+
Arguments:
208+
patterns (list): A list of wildcard pattern, e.g ``["*.py",
209+
"*.pyc"]``
210+
name (str): A filename.
211+
212+
Returns:
213+
bool: `True` if the path matches at least one of the patterns.
214+
215+
"""
216+
if not patterns:
217+
return True
218+
return any(imatch(pattern, path) for pattern in patterns)
219+
220+
221+
def get_matcher(patterns, case_sensitive, accept_prefix=False):
222+
# type: (Iterable[Text], bool, bool) -> Callable[[Text], bool]
223+
"""Get a callable that matches paths against the given patterns.
224+
225+
Arguments:
226+
patterns (list): A list of wildcard pattern. e.g. ``["*.py",
227+
"*.pyc"]``
228+
case_sensitive (bool): If ``True``, then the callable will be case
229+
sensitive, otherwise it will be case insensitive.
230+
accept_prefix (bool): If ``True``, the name is
231+
not required to match the wildcards themselves
232+
but only need to be a prefix of a string that does.
233+
234+
Returns:
235+
callable: a matcher that will return `True` if the paths given as
236+
an argument matches any of the given patterns.
237+
238+
Example:
239+
>>> from fs import wildcard
240+
>>> is_python = wildcard.get_matcher(['*.py'], True)
241+
>>> is_python('__init__.py')
242+
True
243+
>>> is_python('foo.txt')
244+
False
245+
246+
"""
247+
if not patterns:
248+
return lambda name: True
249+
250+
if accept_prefix:
251+
new_patterns = []
252+
for pattern in patterns:
253+
split = _split_pattern_by_rec(pattern)
254+
for i in range(1, len(split)):
255+
new_pattern = "/".join(split[:i])
256+
new_patterns.append(new_pattern)
257+
new_patterns.append(new_pattern + "/")
258+
new_patterns.append(pattern)
259+
patterns = new_patterns
260+
261+
matcher = match_any if case_sensitive else imatch_any
262+
return partial(matcher, patterns)
263+
264+
98265
class Globber(object):
99266
"""A generator of glob results."""
100267

0 commit comments

Comments
 (0)