Skip to content

Commit d85d9db

Browse files
author
atollk
committed
Changes from code review.
1 parent a844a9b commit d85d9db

File tree

5 files changed

+116
-58
lines changed

5 files changed

+116
-58
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
1010

1111
### Added
1212

13-
- To `fs.walk.Walker`, added parameters `filter_glob` and `exclude_glob`.
13+
- Added `filter_glob` and `exclude_glob` parameters to `fs.walk.Walker`.
1414
Closes [#459](https://github.com/PyFilesystem/pyfilesystem2/issues/459).
1515

1616
### Fixed

fs/base.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1653,8 +1653,8 @@ def check(self):
16531653
if self.isclosed():
16541654
raise errors.FilesystemClosed()
16551655

1656-
def match(self, patterns, name, accept_prefix=False):
1657-
# type: (Optional[Iterable[Text]], Text, bool) -> bool
1656+
def match(self, patterns, name):
1657+
# type: (Optional[Iterable[Text]], Text) -> bool
16581658
"""Check if a name matches any of a list of wildcards.
16591659
16601660
If a filesystem is case *insensitive* (such as Windows) then
@@ -1711,7 +1711,7 @@ def match_glob(self, patterns, path, accept_prefix=False):
17111711
``['*.py']``, or `None` to match everything.
17121712
path (str): A resource path, starting with "/".
17131713
accept_prefix (bool): If ``True``, the path is
1714-
not required to match the wildcards themselves
1714+
not required to match the patterns themselves
17151715
but only need to be a prefix of a string that does.
17161716
17171717
Returns:
@@ -1729,6 +1729,8 @@ def match_glob(self, patterns, path, accept_prefix=False):
17291729
False
17301730
>>> my_fs.match_glob(['dir/file.txt'], '/dir/', accept_prefix=True)
17311731
True
1732+
>>> my_fs.match_glob(['dir/file.txt'], '/dir/', accept_prefix=False)
1733+
False
17321734
>>> my_fs.match_glob(['dir/file.txt'], '/dir/gile.txt', accept_prefix=True)
17331735
False
17341736

fs/glob.py

Lines changed: 51 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@
3434

3535
_PATTERN_CACHE = LRUCache(
3636
1000
37-
) # type: LRUCache[Tuple[Text, bool], Tuple[int, bool, Pattern]]
37+
) # type: LRUCache[Tuple[Text, bool], Tuple[Optional[int], Pattern]]
3838

3939

40-
def _split_pattern_by_rec(pattern):
40+
def _split_pattern_by_sep(pattern):
4141
# type: (Text) -> List[Text]
4242
"""Split a glob pattern at its directory seperators (/).
4343
@@ -57,28 +57,27 @@ def _split_pattern_by_rec(pattern):
5757
return [pattern[i + 1 : j] for i, j in zip(indices[:-1], indices[1:])]
5858

5959

60-
def _translate(pattern, case_sensitive=True):
61-
# type: (Text, bool) -> Text
62-
"""Translate a wildcard pattern to a regular expression.
60+
def _translate(pattern):
61+
# type: (Text) -> Text
62+
"""Translate a glob pattern without '**' to a regular expression.
6363
6464
There is no way to quote meta-characters.
65+
6566
Arguments:
66-
pattern (str): A wildcard pattern.
67-
case_sensitive (bool): Set to `False` to use a case
68-
insensitive regex (default `True`).
67+
pattern (str): A glob pattern.
6968
7069
Returns:
7170
str: A regex equivalent to the given pattern.
7271
7372
"""
74-
if not case_sensitive:
75-
pattern = pattern.lower()
7673
i, n = 0, len(pattern)
7774
res = []
7875
while i < n:
7976
c = pattern[i]
8077
i = i + 1
8178
if c == "*":
79+
if i < n and pattern[i] == "*":
80+
raise ValueError("glob._translate does not support '**' patterns.")
8281
res.append("[^/]*")
8382
elif c == "?":
8483
res.append("[^/]")
@@ -96,7 +95,7 @@ def _translate(pattern, case_sensitive=True):
9695
stuff = pattern[i:j].replace("\\", "\\\\")
9796
i = j + 1
9897
if stuff[0] == "!":
99-
stuff = "^" + stuff[1:]
98+
stuff = "^/" + stuff[1:]
10099
elif stuff[0] == "^":
101100
stuff = "\\" + stuff
102101
res.append("[%s]" % stuff)
@@ -105,27 +104,35 @@ def _translate(pattern, case_sensitive=True):
105104
return "".join(res)
106105

107106

108-
def _translate_glob(pattern, case_sensitive=True):
109-
levels = 0
107+
def _translate_glob(pattern):
108+
# type: (Text) -> Tuple[Optional[int], Text]
109+
"""Translate a glob pattern to a regular expression.
110+
111+
There is no way to quote meta-characters.
112+
113+
Arguments:
114+
pattern (str): A glob pattern.
115+
116+
Returns:
117+
Tuple[Optional[int], Text]: The first component describes the levels
118+
of depth this glob pattern goes to; basically the number of "/" in
119+
the pattern. If there is a "**" in the glob pattern, the depth is
120+
basically unbounded, and this component is `None` instead.
121+
The second component is the regular expression.
122+
123+
"""
110124
recursive = False
111125
re_patterns = [""]
112126
for component in iteratepath(pattern):
113127
if "**" in component:
114128
recursive = True
115129
split = component.split("**")
116-
split_re = [_translate(s, case_sensitive=case_sensitive) for s in split]
130+
split_re = [_translate(s) for s in split]
117131
re_patterns.append("/?" + ".*/?".join(split_re))
118132
else:
119-
re_patterns.append(
120-
"/" + _translate(component, case_sensitive=case_sensitive)
121-
)
122-
levels += 1
133+
re_patterns.append("/" + _translate(component))
123134
re_glob = "(?ms)^" + "".join(re_patterns) + ("/$" if pattern.endswith("/") else "$")
124-
return (
125-
levels,
126-
recursive,
127-
re.compile(re_glob, 0 if case_sensitive else re.IGNORECASE),
128-
)
135+
return pattern.count("/") + 1 if not recursive else None, re_glob
129136

130137

131138
def match(pattern, path):
@@ -147,10 +154,11 @@ def match(pattern, path):
147154
148155
"""
149156
try:
150-
levels, recursive, re_pattern = _PATTERN_CACHE[(pattern, True)]
157+
levels, re_pattern = _PATTERN_CACHE[(pattern, True)]
151158
except KeyError:
152-
levels, recursive, re_pattern = _translate_glob(pattern, case_sensitive=True)
153-
_PATTERN_CACHE[(pattern, True)] = (levels, recursive, re_pattern)
159+
levels, re_str = _translate_glob(pattern)
160+
re_pattern = re.compile(re_str)
161+
_PATTERN_CACHE[(pattern, True)] = (levels, re_pattern)
154162
if path and path[0] != "/":
155163
path = "/" + path
156164
return bool(re_pattern.match(path))
@@ -169,10 +177,11 @@ def imatch(pattern, path):
169177
170178
"""
171179
try:
172-
levels, recursive, re_pattern = _PATTERN_CACHE[(pattern, False)]
180+
levels, re_pattern = _PATTERN_CACHE[(pattern, False)]
173181
except KeyError:
174-
levels, recursive, re_pattern = _translate_glob(pattern, case_sensitive=True)
175-
_PATTERN_CACHE[(pattern, False)] = (levels, recursive, re_pattern)
182+
levels, re_str = _translate_glob(pattern)
183+
re_pattern = re.compile(re_str, re.IGNORECASE)
184+
_PATTERN_CACHE[(pattern, False)] = (levels, re_pattern)
176185
if path and path[0] != "/":
177186
path = "/" + path
178187
return bool(re_pattern.match(path))
@@ -187,7 +196,7 @@ def match_any(patterns, path):
187196
Arguments:
188197
patterns (list): A list of wildcard pattern, e.g ``["*.py",
189198
"*.pyc"]``
190-
name (str): A filename.
199+
path (str): A resource path.
191200
192201
Returns:
193202
bool: `True` if the path matches at least one of the patterns.
@@ -207,7 +216,7 @@ def imatch_any(patterns, path):
207216
Arguments:
208217
patterns (list): A list of wildcard pattern, e.g ``["*.py",
209218
"*.pyc"]``
210-
name (str): A filename.
219+
path (str): A resource path.
211220
212221
Returns:
213222
bool: `True` if the path matches at least one of the patterns.
@@ -228,29 +237,30 @@ def get_matcher(patterns, case_sensitive, accept_prefix=False):
228237
case_sensitive (bool): If ``True``, then the callable will be case
229238
sensitive, otherwise it will be case insensitive.
230239
accept_prefix (bool): If ``True``, the name is
231-
not required to match the wildcards themselves
240+
not required to match the patterns themselves
232241
but only need to be a prefix of a string that does.
233242
234243
Returns:
235244
callable: a matcher that will return `True` if the paths given as
236-
an argument matches any of the given patterns.
245+
an argument matches any of the given patterns, or if no patterns
246+
exist.
237247
238248
Example:
239-
>>> from fs import wildcard
240-
>>> is_python = wildcard.get_matcher(['*.py'], True)
249+
>>> from fs import glob
250+
>>> is_python = glob.get_matcher(['*.py'], True)
241251
>>> is_python('__init__.py')
242252
True
243253
>>> is_python('foo.txt')
244254
False
245255
246256
"""
247257
if not patterns:
248-
return lambda name: True
258+
return lambda path: True
249259

250260
if accept_prefix:
251261
new_patterns = []
252262
for pattern in patterns:
253-
split = _split_pattern_by_rec(pattern)
263+
split = _split_pattern_by_sep(pattern)
254264
for i in range(1, len(split)):
255265
new_pattern = "/".join(split[:i])
256266
new_patterns.append(new_pattern)
@@ -310,18 +320,15 @@ def __repr__(self):
310320
def _make_iter(self, search="breadth", namespaces=None):
311321
# type: (str, List[str]) -> Iterator[GlobMatch]
312322
try:
313-
levels, recursive, re_pattern = _PATTERN_CACHE[
314-
(self.pattern, self.case_sensitive)
315-
]
323+
levels, re_pattern = _PATTERN_CACHE[(self.pattern, self.case_sensitive)]
316324
except KeyError:
317-
levels, recursive, re_pattern = _translate_glob(
318-
self.pattern, case_sensitive=self.case_sensitive
319-
)
325+
levels, re_str = _translate_glob(self.pattern)
326+
re_pattern = re.compile(re_str, 0 if self.case_sensitive else re.IGNORECASE)
320327

321328
for path, info in self.fs.walk.info(
322329
path=self.path,
323330
namespaces=namespaces or self.namespaces,
324-
max_depth=None if recursive else levels,
331+
max_depth=levels,
325332
search=search,
326333
exclude_dirs=self.exclude_dirs,
327334
):

fs/walk.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,21 +84,21 @@ def __init__(
8484
a list of filename patterns, e.g. ``["~*"]``. Files matching
8585
any of these patterns will be removed from the walk.
8686
filter_dirs (list, optional): A list of patterns that will be used
87-
to match directories paths. The walk will only open directories
87+
to match directories names. The walk will only open directories
8888
that match at least one of these patterns. Directories will
8989
only be returned if the final component matches one of the
9090
patterns.
9191
exclude_dirs (list, optional): A list of patterns that will be
9292
used to filter out directories from the walk. e.g.
93-
``['*.svn', '*.git']``. Directories matching any of these
93+
``['*.svn', '*.git']``. Directory names matching any of these
9494
patterns will be removed from the walk.
9595
max_depth (int, optional): Maximum directory depth to walk.
9696
filter_glob (list, optional): If supplied, this parameter
9797
should be a list of path patterns e.g. ``["foo/**/*.py"]``.
9898
Resources will only be returned if their global path or
9999
an extension of it matches one of the patterns.
100100
exclude_glob (list, optional): If supplied, this parameter
101-
should be a list of path patterns e.g. ``["foo/**/*.py"]``.
101+
should be a list of path patterns e.g. ``["foo/**/*.pyc"]``.
102102
Resources will not be returned if their global path or
103103
an extension of it matches one of the patterns.
104104
@@ -213,7 +213,7 @@ def _iter_walk(
213213
def _check_open_dir(self, fs, path, info):
214214
# type: (FS, Text, Info) -> bool
215215
"""Check if a directory should be considered in the walk."""
216-
full_path = ("" if path == "/" else path) + "/" + info.name
216+
full_path = combine(path, info.name)
217217
if self.exclude_dirs is not None and fs.match(self.exclude_dirs, info.name):
218218
return False
219219
if self.exclude_glob is not None and fs.match_glob(

tests/test_glob.py

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
from __future__ import unicode_literals
22

3+
import re
34
import unittest
45

6+
from parameterized import parameterized
7+
58
from fs import glob, open_fs
69

710

@@ -17,8 +20,8 @@ def setUp(self):
1720
fs.makedirs("a/b/c/").writetext("foo.py", "import fs")
1821
repr(fs.glob)
1922

20-
def test_match(self):
21-
tests = [
23+
@parameterized.expand(
24+
[
2225
("*.?y", "/test.py", True),
2326
("*.py", "/test.py", True),
2427
("*.py", "__init__.py", True),
@@ -41,11 +44,11 @@ def test_match(self):
4144
("**/", "/test/", True),
4245
("**/", "/test.py", False),
4346
]
44-
for pattern, path, expected in tests:
45-
self.assertEqual(glob.match(pattern, path), expected, msg=(pattern, path))
47+
)
48+
def test_match(self, pattern, path, expected):
49+
self.assertEqual(glob.match(pattern, path), expected, msg=(pattern, path))
4650
# Run a second time to test cache
47-
for pattern, path, expected in tests:
48-
self.assertEqual(glob.match(pattern, path), expected, msg=(pattern, path))
51+
self.assertEqual(glob.match(pattern, path), expected, msg=(pattern, path))
4952

5053
def test_count_1dir(self):
5154
globber = glob.BoundGlobber(self.fs)
@@ -99,3 +102,49 @@ def test_remove_all(self):
99102
globber = glob.BoundGlobber(self.fs)
100103
globber("**").remove()
101104
self.assertEqual(sorted(self.fs.listdir("/")), [])
105+
106+
translate_test_cases = [
107+
("foo.py", ["foo.py"], ["Foo.py", "foo_py", "foo", ".py"]),
108+
("foo?py", ["foo.py", "fooapy"], ["foo/py", "foopy", "fopy"]),
109+
("bar/foo.py", ["bar/foo.py"], []),
110+
("bar?foo.py", ["barafoo.py"], ["bar/foo.py"]),
111+
("???.py", ["foo.py", "bar.py", "FOO.py"], [".py", "foo.PY"]),
112+
("bar/*.py", ["bar/.py", "bar/foo.py"], ["bar/foo"]),
113+
("bar/foo*.py", ["bar/foo.py", "bar/foobaz.py"], ["bar/foo", "bar/.py"]),
114+
("*/[bar]/foo.py", ["/b/foo.py", "x/a/foo.py", "/r/foo.py"], ["b/foo.py", "/bar/foo.py"]),
115+
("[!bar]/foo.py", ["x/foo.py"], ["//foo.py"]),
116+
("[.py", ["[.py"], [".py", "."]),
117+
]
118+
119+
@parameterized.expand(translate_test_cases)
120+
def test_translate(self, glob_pattern, expected_matches, expected_not_matches):
121+
translated = glob._translate(glob_pattern)
122+
for m in expected_matches:
123+
self.assertTrue(re.match(translated, m))
124+
for m in expected_not_matches:
125+
self.assertFalse(re.match(translated, m))
126+
127+
@parameterized.expand(translate_test_cases)
128+
def test_translate_glob_simple(self, glob_pattern, expected_matches, expected_not_matches):
129+
levels, translated = glob._translate_glob(glob_pattern)
130+
self.assertEqual(levels, glob_pattern.count("/") + 1)
131+
for m in expected_matches:
132+
self.assertTrue(re.match(translated, "/" + m))
133+
for m in expected_not_matches:
134+
self.assertFalse(re.match(translated, m))
135+
self.assertFalse(re.match(translated, "/" + m))
136+
137+
@parameterized.expand(
138+
[
139+
("foo/**/bar", ["/foo/bar", "/foo/baz/bar", "/foo/baz/qux/bar"], ["/foo"]),
140+
("**/*/bar", ["/foo/bar", "/foo/bar"], ["/bar", "/bar"]),
141+
("/**/foo/**/bar", ["/baz/foo/qux/bar", "/foo/bar"], ["/bar"]),
142+
]
143+
)
144+
def test_translate_glob(self, glob_pattern, expected_matches, expected_not_matches):
145+
levels, translated = glob._translate_glob(glob_pattern)
146+
self.assertIsNone(levels)
147+
for m in expected_matches:
148+
self.assertTrue(re.match(translated, m))
149+
for m in expected_not_matches:
150+
self.assertFalse(re.match(translated, m))

0 commit comments

Comments
 (0)