gh-74598: add fnmatch.filterfalse for excluding names matching a patern (#121185)

picnixz · web-flow · commit 3eda1460359c · 2025-04-08T10:11:25.000Z
diff --git a/Doc/library/fnmatch.rst b/Doc/library/fnmatch.rst
@@ -90,6 +90,16 @@ functions: :func:`fnmatch`, :func:`fnmatchcase`, :func:`.filter`.
    but implemented more efficiently.
 
 
+.. function:: filterfalse(names, pat)
+
+   Construct a list from those elements of the :term:`iterable` of filename
+   strings *names* that do not match the pattern string *pat*.
+   It is the same as ``[n for n in names if not fnmatch(n, pat)]``,
+   but implemented more efficiently.
+
+   .. versionadded:: next
+
+
 .. function:: translate(pat)
 
    Return the shell-style pattern *pat* converted to a regular expression for
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
@@ -677,6 +677,13 @@ errno
   (Contributed by James Roy in :gh:`126585`.)
 
 
+fnmatch
+-------
+
+* Added :func:`fnmatch.filterfalse` for excluding names matching a pattern.
+  (Contributed by Bénédikt Tran in :gh:`74598`.)
+
+
 fractions
 ---------
 
diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
@@ -9,12 +9,15 @@
 The function translate(PATTERN) returns a regular expression
 corresponding to PATTERN.  (It does not compile it.)
 """
+
+import functools
+import itertools
 import os
 import posixpath
 import re
-import functools
 
-__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
+__all__ = ["filter", "filterfalse", "fnmatch", "fnmatchcase", "translate"]
+
 
 def fnmatch(name, pat):
     """Test whether FILENAME matches PATTERN.
@@ -35,6 +38,7 @@ def fnmatch(name, pat):
     pat = os.path.normcase(pat)
     return fnmatchcase(name, pat)
 
+
 @functools.lru_cache(maxsize=32768, typed=True)
 def _compile_pattern(pat):
     if isinstance(pat, bytes):
@@ -45,6 +49,7 @@ def _compile_pattern(pat):
         res = translate(pat)
     return re.compile(res).match
 
+
 def filter(names, pat):
     """Construct a list from those elements of the iterable NAMES that match PAT."""
     result = []
@@ -61,6 +66,22 @@ def filter(names, pat):
                 result.append(name)
     return result
 
+
+def filterfalse(names, pat):
+    """Construct a list from those elements of the iterable NAMES that do not match PAT."""
+    pat = os.path.normcase(pat)
+    match = _compile_pattern(pat)
+    if os.path is posixpath:
+        # normcase on posix is NOP. Optimize it away from the loop.
+        return list(itertools.filterfalse(match, names))
+
+    result = []
+    for name in names:
+        if match(os.path.normcase(name)) is None:
+            result.append(name)
+    return result
+
+
 def fnmatchcase(name, pat):
     """Test whether FILENAME matches PATTERN, including case.
 
@@ -80,9 +101,11 @@ def translate(pat):
     parts, star_indices = _translate(pat, '*', '.')
     return _join_translated_parts(parts, star_indices)
 
+
 _re_setops_sub = re.compile(r'([&~|])').sub
 _re_escape = functools.lru_cache(maxsize=512)(re.escape)
 
+
 def _translate(pat, star, question_mark):
     res = []
     add = res.append
diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
@@ -1,11 +1,15 @@
 """Test cases for the fnmatch module."""
 
-import unittest
 import os
 import string
+import unittest
 import warnings
+from fnmatch import fnmatch, fnmatchcase, translate, filter, filterfalse
+
+
+IGNORECASE = os.path.normcase('P') == os.path.normcase('p')
+NORMSEP = os.path.normcase('\\') == os.path.normcase('/')
 
-from fnmatch import fnmatch, fnmatchcase, translate, filter
 
 class FnmatchTestCase(unittest.TestCase):
 
@@ -77,35 +81,32 @@ def test_bytes(self):
         self.check_match(b'foo\nbar', b'foo*')
 
     def test_case(self):
-        ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
         check = self.check_match
         check('abc', 'abc')
-        check('AbC', 'abc', ignorecase)
-        check('abc', 'AbC', ignorecase)
+        check('AbC', 'abc', IGNORECASE)
+        check('abc', 'AbC', IGNORECASE)
         check('AbC', 'AbC')
 
     def test_sep(self):
-        normsep = os.path.normcase('\\') == os.path.normcase('/')
         check = self.check_match
         check('usr/bin', 'usr/bin')
-        check('usr\\bin', 'usr/bin', normsep)
-        check('usr/bin', 'usr\\bin', normsep)
+        check('usr\\bin', 'usr/bin', NORMSEP)
+        check('usr/bin', 'usr\\bin', NORMSEP)
         check('usr\\bin', 'usr\\bin')
 
     def test_char_set(self):
-        ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
         check = self.check_match
         tescases = string.ascii_lowercase + string.digits + string.punctuation
         for c in tescases:
             check(c, '[az]', c in 'az')
             check(c, '[!az]', c not in 'az')
         # Case insensitive.
         for c in tescases:
-            check(c, '[AZ]', (c in 'az') and ignorecase)
-            check(c, '[!AZ]', (c not in 'az') or not ignorecase)
+            check(c, '[AZ]', (c in 'az') and IGNORECASE)
+            check(c, '[!AZ]', (c not in 'az') or not IGNORECASE)
         for c in string.ascii_uppercase:
-            check(c, '[az]', (c in 'AZ') and ignorecase)
-            check(c, '[!az]', (c not in 'AZ') or not ignorecase)
+            check(c, '[az]', (c in 'AZ') and IGNORECASE)
+            check(c, '[!az]', (c not in 'AZ') or not IGNORECASE)
         # Repeated same character.
         for c in tescases:
             check(c, '[aa]', c == 'a')
@@ -120,8 +121,6 @@ def test_char_set(self):
         check('[!]', '[!]')
 
     def test_range(self):
-        ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
-        normsep = os.path.normcase('\\') == os.path.normcase('/')
         check = self.check_match
         tescases = string.ascii_lowercase + string.digits + string.punctuation
         for c in tescases:
@@ -131,11 +130,11 @@ def test_range(self):
             check(c, '[!b-dx-z]', c not in 'bcdxyz')
         # Case insensitive.
         for c in tescases:
-            check(c, '[B-D]', (c in 'bcd') and ignorecase)
-            check(c, '[!B-D]', (c not in 'bcd') or not ignorecase)
+            check(c, '[B-D]', (c in 'bcd') and IGNORECASE)
+            check(c, '[!B-D]', (c not in 'bcd') or not IGNORECASE)
         for c in string.ascii_uppercase:
-            check(c, '[b-d]', (c in 'BCD') and ignorecase)
-            check(c, '[!b-d]', (c not in 'BCD') or not ignorecase)
+            check(c, '[b-d]', (c in 'BCD') and IGNORECASE)
+            check(c, '[!b-d]', (c not in 'BCD') or not IGNORECASE)
         # Upper bound == lower bound.
         for c in tescases:
             check(c, '[b-b]', c == 'b')
@@ -144,7 +143,7 @@ def test_range(self):
             check(c, '[!-#]', c not in '-#')
             check(c, '[!--.]', c not in '-.')
             check(c, '[^-`]', c in '^_`')
-            if not (normsep and c == '/'):
+            if not (NORMSEP and c == '/'):
                 check(c, '[[-^]', c in r'[\]^')
                 check(c, r'[\-^]', c in r'\]^')
             check(c, '[b-]', c in '-b')
@@ -160,47 +159,45 @@ def test_range(self):
             check(c, '[d-bx-z]', c in 'xyz')
             check(c, '[!d-bx-z]', c not in 'xyz')
             check(c, '[d-b^-`]', c in '^_`')
-            if not (normsep and c == '/'):
+            if not (NORMSEP and c == '/'):
                 check(c, '[d-b[-^]', c in r'[\]^')
 
     def test_sep_in_char_set(self):
-        normsep = os.path.normcase('\\') == os.path.normcase('/')
         check = self.check_match
         check('/', r'[/]')
         check('\\', r'[\]')
-        check('/', r'[\]', normsep)
-        check('\\', r'[/]', normsep)
+        check('/', r'[\]', NORMSEP)
+        check('\\', r'[/]', NORMSEP)
         check('[/]', r'[/]', False)
         check(r'[\\]', r'[/]', False)
         check('\\', r'[\t]')
-        check('/', r'[\t]', normsep)
+        check('/', r'[\t]', NORMSEP)
         check('t', r'[\t]')
         check('\t', r'[\t]', False)
 
     def test_sep_in_range(self):
-        normsep = os.path.normcase('\\') == os.path.normcase('/')
         check = self.check_match
-        check('a/b', 'a[.-0]b', not normsep)
+        check('a/b', 'a[.-0]b', not NORMSEP)
         check('a\\b', 'a[.-0]b', False)
-        check('a\\b', 'a[Z-^]b', not normsep)
+        check('a\\b', 'a[Z-^]b', not NORMSEP)
         check('a/b', 'a[Z-^]b', False)
 
-        check('a/b', 'a[/-0]b', not normsep)
+        check('a/b', 'a[/-0]b', not NORMSEP)
         check(r'a\b', 'a[/-0]b', False)
         check('a[/-0]b', 'a[/-0]b', False)
         check(r'a[\-0]b', 'a[/-0]b', False)
 
         check('a/b', 'a[.-/]b')
-        check(r'a\b', 'a[.-/]b', normsep)
+        check(r'a\b', 'a[.-/]b', NORMSEP)
         check('a[.-/]b', 'a[.-/]b', False)
         check(r'a[.-\]b', 'a[.-/]b', False)
 
         check(r'a\b', r'a[\-^]b')
-        check('a/b', r'a[\-^]b', normsep)
+        check('a/b', r'a[\-^]b', NORMSEP)
         check(r'a[\-^]b', r'a[\-^]b', False)
         check('a[/-^]b', r'a[\-^]b', False)
 
-        check(r'a\b', r'a[Z-\]b', not normsep)
+        check(r'a\b', r'a[Z-\]b', not NORMSEP)
         check('a/b', r'a[Z-\]b', False)
         check(r'a[Z-\]b', r'a[Z-\]b', False)
         check('a[Z-/]b', r'a[Z-\]b', False)
@@ -332,18 +329,41 @@ def test_mix_bytes_str(self):
         self.assertRaises(TypeError, filter, [b'test'], '*')
 
     def test_case(self):
-        ignorecase = os.path.normcase('P') == os.path.normcase('p')
         self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.p*'),
-                         ['Test.py', 'Test.PL'] if ignorecase else ['Test.py'])
+                         ['Test.py', 'Test.PL'] if IGNORECASE else ['Test.py'])
         self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.P*'),
-                         ['Test.py', 'Test.PL'] if ignorecase else ['Test.PL'])
+                         ['Test.py', 'Test.PL'] if IGNORECASE else ['Test.PL'])
 
     def test_sep(self):
-        normsep = os.path.normcase('\\') == os.path.normcase('/')
         self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr/*'),
-                         ['usr/bin', 'usr\\lib'] if normsep else ['usr/bin'])
+                         ['usr/bin', 'usr\\lib'] if NORMSEP else ['usr/bin'])
         self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr\\*'),
-                         ['usr/bin', 'usr\\lib'] if normsep else ['usr\\lib'])
+                         ['usr/bin', 'usr\\lib'] if NORMSEP else ['usr\\lib'])
+
+
+class FilterFalseTestCase(unittest.TestCase):
+
+    def test_filterfalse(self):
+        actual = filterfalse(['Python', 'Ruby', 'Perl', 'Tcl'], 'P*')
+        self.assertListEqual(actual, ['Ruby', 'Tcl'])
+        actual = filterfalse([b'Python', b'Ruby', b'Perl', b'Tcl'], b'P*')
+        self.assertListEqual(actual, [b'Ruby', b'Tcl'])
+
+    def test_mix_bytes_str(self):
+        self.assertRaises(TypeError, filterfalse, ['test'], b'*')
+        self.assertRaises(TypeError, filterfalse, [b'test'], '*')
+
+    def test_case(self):
+        self.assertEqual(filterfalse(['Test.py', 'Test.rb', 'Test.PL'], '*.p*'),
+                         ['Test.rb'] if IGNORECASE else ['Test.rb', 'Test.PL'])
+        self.assertEqual(filterfalse(['Test.py', 'Test.rb', 'Test.PL'], '*.P*'),
+                         ['Test.rb'] if IGNORECASE else ['Test.py', 'Test.rb',])
+
+    def test_sep(self):
+        self.assertEqual(filterfalse(['usr/bin', 'usr', 'usr\\lib'], 'usr/*'),
+                         ['usr'] if NORMSEP else ['usr', 'usr\\lib'])
+        self.assertEqual(filterfalse(['usr/bin', 'usr', 'usr\\lib'], 'usr\\*'),
+                         ['usr'] if NORMSEP else ['usr/bin', 'usr'])
 
 
 if __name__ == "__main__":
diff --git a/Misc/NEWS.d/next/Library/2024-06-30-17-00-00.gh-issue-74598.1gVy_8.rst b/Misc/NEWS.d/next/Library/2024-06-30-17-00-00.gh-issue-74598.1gVy_8.rst
@@ -0,0 +1,2 @@
+Add :func:`fnmatch.filterfalse` for excluding names matching a pattern.
+Patch by Bénédikt Tran.

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	+Add :func:`fnmatch.filterfalse` for excluding names matching a pattern.
	`2`	`+Patch by Bénédikt Tran.`