Skip to content

Commit 8b9e4e1

Browse files
author
atollk
committed
Added filter_glob and exclude_glob to fs.walk.Walker.
These extend the class by an option to include/exclude resources by their entire path, not just its last component. To do so, fs.wildcard had to undergo a rework to remove the dependency on the `re` module. Unit tests were added for all new/changed code.
1 parent 5f73778 commit 8b9e4e1

File tree

7 files changed

+449
-189
lines changed

7 files changed

+449
-189
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
1010

1111
### Added
1212

13+
- To `fs.walk.Walker`, added parameters `filter_glob` and `exclude_glob`.
14+
Closes [#459](https://github.com/PyFilesystem/pyfilesystem2/issues/459).
1315
- Added `fs.copy.copy_file_if`, `fs.copy.copy_dir_if`, and `fs.copy.copy_fs_if`.
1416
Closes [#458](https://github.com/PyFilesystem/pyfilesystem2/issues/458).
1517

1618
### Fixed
1719

20+
- Elaborated documentation of `filter_dirs` and `exclude_dirs` in `fs.walk.Walker`.
21+
Closes [#371](https://github.com/PyFilesystem/pyfilesystem2/issues/371).
1822
- Fixed performance bugs in `fs.copy.copy_dir_if_newer`. Test cases were adapted to catch those bugs in the future.
1923

2024

fs/base.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1604,8 +1604,8 @@ def check(self):
16041604
if self.isclosed():
16051605
raise errors.FilesystemClosed()
16061606

1607-
def match(self, patterns, name):
1608-
# type: (Optional[Iterable[Text]], Text) -> bool
1607+
def match(self, patterns, name, accept_prefix=False):
1608+
# type: (Optional[Iterable[Text]], Text, bool) -> bool
16091609
"""Check if a name matches any of a list of wildcards.
16101610
16111611
If a filesystem is case *insensitive* (such as Windows) then
@@ -1618,6 +1618,9 @@ def match(self, patterns, name):
16181618
patterns (list, optional): A list of patterns, e.g.
16191619
``['*.py']``, or `None` to match everything.
16201620
name (str): A file or directory name (not a path)
1621+
accept_prefix (bool): If ``True``, the name is
1622+
not required to match the wildcards themselves
1623+
but only need to be a prefix of a string that does.
16211624
16221625
Returns:
16231626
bool: `True` if ``name`` matches any of the patterns.
@@ -1644,7 +1647,9 @@ def match(self, patterns, name):
16441647
case_sensitive = not typing.cast(
16451648
bool, self.getmeta().get("case_insensitive", False)
16461649
)
1647-
matcher = wildcard.get_matcher(patterns, case_sensitive)
1650+
matcher = wildcard.get_matcher(
1651+
patterns, case_sensitive, accept_prefix=accept_prefix
1652+
)
16481653
return matcher(name)
16491654

16501655
def tree(self, **kwargs):

fs/glob.py

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from .lrucache import LRUCache
1111
from ._repr import make_repr
1212
from .path import iteratepath
13-
from . import wildcard
1413

1514

1615
GlobMatch = namedtuple("GlobMatch", ["path", "info"])
@@ -27,6 +26,54 @@
2726
) # type: LRUCache[Tuple[Text, bool], Tuple[int, bool, Pattern]]
2827

2928

29+
def _translate(pattern, case_sensitive=True):
30+
# type: (Text, bool) -> Text
31+
"""Translate a wildcard pattern to a regular expression.
32+
33+
There is no way to quote meta-characters.
34+
Arguments:
35+
pattern (str): A wildcard pattern.
36+
case_sensitive (bool): Set to `False` to use a case
37+
insensitive regex (default `True`).
38+
39+
Returns:
40+
str: A regex equivalent to the given pattern.
41+
42+
"""
43+
if not case_sensitive:
44+
pattern = pattern.lower()
45+
i, n = 0, len(pattern)
46+
res = ""
47+
while i < n:
48+
c = pattern[i]
49+
i = i + 1
50+
if c == "*":
51+
res = res + "[^/]*"
52+
elif c == "?":
53+
res = res + "."
54+
elif c == "[":
55+
j = i
56+
if j < n and pattern[j] == "!":
57+
j = j + 1
58+
if j < n and pattern[j] == "]":
59+
j = j + 1
60+
while j < n and pattern[j] != "]":
61+
j = j + 1
62+
if j >= n:
63+
res = res + "\\["
64+
else:
65+
stuff = pattern[i:j].replace("\\", "\\\\")
66+
i = j + 1
67+
if stuff[0] == "!":
68+
stuff = "^" + stuff[1:]
69+
elif stuff[0] == "^":
70+
stuff = "\\" + stuff
71+
res = "%s[%s]" % (res, stuff)
72+
else:
73+
res = res + re.escape(c)
74+
return res
75+
76+
3077
def _translate_glob(pattern, case_sensitive=True):
3178
levels = 0
3279
recursive = False
@@ -37,7 +84,7 @@ def _translate_glob(pattern, case_sensitive=True):
3784
recursive = True
3885
else:
3986
re_patterns.append(
40-
"/" + wildcard._translate(component, case_sensitive=case_sensitive)
87+
"/" + _translate(component, case_sensitive=case_sensitive)
4188
)
4289
levels += 1
4390
re_glob = "(?ms)^" + "".join(re_patterns) + ("/$" if pattern.endswith("/") else "$")

fs/walk.py

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ def __init__(
6262
filter_dirs=None, # type: Optional[List[Text]]
6363
exclude_dirs=None, # type: Optional[List[Text]]
6464
max_depth=None, # type: Optional[int]
65+
filter_glob=None, # type: Optional[List[Text]]
66+
exclude_glob=None, # type: Optional[List[Text]]
6567
):
6668
# type: (...) -> None
6769
"""Create a new `Walker` instance.
@@ -85,11 +87,22 @@ def __init__(
8587
any of these patterns will be removed from the walk.
8688
filter_dirs (list, optional): A list of patterns that will be used
8789
to match directories paths. The walk will only open directories
88-
that match at least one of these patterns.
90+
that match at least one of these patterns. Directories will
91+
only be returned if the final component matches one of the
92+
patterns.
8993
exclude_dirs (list, optional): A list of patterns that will be
9094
used to filter out directories from the walk. e.g.
91-
``['*.svn', '*.git']``.
95+
``['*.svn', '*.git']``. Directories matching any of these
96+
patterns will be removed from the walk.
9297
max_depth (int, optional): Maximum directory depth to walk.
98+
filter_glob (list, optional): If supplied, this parameter
99+
should be a list of path patterns e.g. ``["foo/**/*.py"]``.
100+
Resources will only be returned if their global path or
101+
an extension of it matches one of the patterns.
102+
exclude_glob (list, optional): If supplied, this parameter
103+
should be a list of path patterns e.g. ``["foo/**/*.py"]``.
104+
Resources will not be returned if their global path or
105+
an extension of it matches one of the patterns.
93106
94107
"""
95108
if search not in ("breadth", "depth"):
@@ -109,6 +122,8 @@ def __init__(
109122
self.exclude = exclude
110123
self.filter_dirs = filter_dirs
111124
self.exclude_dirs = exclude_dirs
125+
self.filter_glob = filter_glob
126+
self.exclude_glob = exclude_glob
112127
self.max_depth = max_depth
113128
super(Walker, self).__init__()
114129

@@ -180,6 +195,8 @@ def __repr__(self):
180195
filter_dirs=(self.filter_dirs, None),
181196
exclude_dirs=(self.exclude_dirs, None),
182197
max_depth=(self.max_depth, None),
198+
filter_glob=(self.filter_glob, None),
199+
exclude_glob=(self.exclude_glob, None),
183200
)
184201

185202
def _iter_walk(
@@ -198,9 +215,18 @@ def _iter_walk(
198215
def _check_open_dir(self, fs, path, info):
199216
# type: (FS, Text, Info) -> bool
200217
"""Check if a directory should be considered in the walk."""
218+
full_path = ("" if path == "/" else path) + "/" + info.name
201219
if self.exclude_dirs is not None and fs.match(self.exclude_dirs, info.name):
202220
return False
203-
if self.filter_dirs is not None and not fs.match(self.filter_dirs, info.name):
221+
if self.exclude_glob is not None and fs.match(self.exclude_glob, full_path):
222+
return False
223+
if self.filter_dirs is not None and not fs.match(
224+
self.filter_dirs, info.name, accept_prefix=True
225+
):
226+
return False
227+
if self.filter_glob is not None and not fs.match(
228+
self.filter_glob, full_path, accept_prefix=True
229+
):
204230
return False
205231
return self.check_open_dir(fs, path, info)
206232

@@ -247,6 +273,26 @@ def check_scan_dir(self, fs, path, info):
247273
"""
248274
return True
249275

276+
def _check_file(self, fs, dir_path, info):
277+
# type: (FS, Text, Info) -> bool
278+
"""Check if a filename should be included."""
279+
# Weird check required for backwards compatibility,
280+
# when _check_file did not exist.
281+
if Walker._check_file == type(self)._check_file:
282+
if self.exclude is not None and fs.match(self.exclude, info.name):
283+
return False
284+
if self.exclude_glob is not None and fs.match(
285+
self.exclude_glob, dir_path + "/" + info.name
286+
):
287+
return False
288+
if self.filter is not None and not fs.match(self.filter, info.name):
289+
return False
290+
if self.filter_glob is not None and not fs.match(
291+
self.filter_glob, dir_path + "/" + info.name, accept_prefix=True
292+
):
293+
return False
294+
return self.check_file(fs, info)
295+
250296
def check_file(self, fs, info):
251297
# type: (FS, Info) -> bool
252298
"""Check if a filename should be included.
@@ -261,9 +307,7 @@ def check_file(self, fs, info):
261307
bool: `True` if the file should be included.
262308
263309
"""
264-
if self.exclude is not None and fs.match(self.exclude, info.name):
265-
return False
266-
return fs.match(self.filter, info.name)
310+
return True
267311

268312
def _scan(
269313
self,
@@ -420,7 +464,7 @@ def _walk_breadth(
420464
_calculate_depth = self._calculate_depth
421465
_check_open_dir = self._check_open_dir
422466
_check_scan_dir = self._check_scan_dir
423-
_check_file = self.check_file
467+
_check_file = self._check_file
424468

425469
depth = _calculate_depth(path)
426470

@@ -434,7 +478,7 @@ def _walk_breadth(
434478
if _check_scan_dir(fs, dir_path, info, _depth):
435479
push(_combine(dir_path, info.name))
436480
else:
437-
if _check_file(fs, info):
481+
if _check_file(fs, dir_path, info):
438482
yield dir_path, info # Found a file
439483
yield dir_path, None # End of directory
440484

@@ -453,7 +497,7 @@ def _walk_depth(
453497
_calculate_depth = self._calculate_depth
454498
_check_open_dir = self._check_open_dir
455499
_check_scan_dir = self._check_scan_dir
456-
_check_file = self.check_file
500+
_check_file = self._check_file
457501
depth = _calculate_depth(path)
458502

459503
stack = [
@@ -485,7 +529,7 @@ def _walk_depth(
485529
else:
486530
yield dir_path, info
487531
else:
488-
if _check_file(fs, info):
532+
if _check_file(fs, dir_path, info):
489533
yield dir_path, info
490534

491535

0 commit comments

Comments
 (0)