Skip to content

Commit

Permalink
enhance apply_regex_substitutions to support multi-line matches
Browse files Browse the repository at this point in the history
It might be required to replace patterns with more context, e.g. content
of the next or previous line to disambiguate otherwise too generic matches.

Add parameter `single_line` to enable the old behavior (default) of
matching per line and otherwise match the whole text.
Add parameter `match_all` to require all patterns to match for each file
not only at least one.
  • Loading branch information
Flamefire committed Feb 7, 2025
1 parent b37f707 commit c14b549
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 39 deletions.
81 changes: 54 additions & 27 deletions easybuild/tools/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -1643,16 +1643,22 @@ def apply_patch(patch_file, dest, fn=None, copy=False, level=None, use_git_am=Fa
return True


def apply_regex_substitutions(paths, regex_subs, backup='.orig.eb', on_missing_match=None):
def apply_regex_substitutions(paths, regex_subs, backup='.orig.eb',
on_missing_match=None, match_all=False, single_line=True):
"""
Apply specified list of regex substitutions.
:param paths: list of paths to files to patch (or just a single filepath)
:param regex_subs: list of substitutions to apply, specified as (<regexp pattern>, <replacement string>)
:param regex_subs: list of substitutions to apply,
specified as (<regexp pattern or regex instance>, <replacement string>)
:param backup: create backup of original file with specified suffix (no backup if value evaluates to False)
:param on_missing_match: Define what to do when no match was found in the file.
Can be 'error' to raise an error, 'warn' to print a warning or 'ignore' to do nothing
Defaults to the value of --strict
:param match_all: Expect to match all patterns in all files
instead of at least one per file for error/warning reporting
:param single_line: Replace first match of each pattern for each line in the order of the patterns.
If False the patterns are applied in order to the full text and may match line breaks.
"""
if on_missing_match is None:
on_missing_match = build_option('strict')
Expand All @@ -1664,18 +1670,22 @@ def apply_regex_substitutions(paths, regex_subs, backup='.orig.eb', on_missing_m
if isinstance(paths, string_type):
paths = [paths]

flags = 0 if single_line else re.M
compiled_regex_subs = [(re.compile(regex, flags) if isinstance(regex, str) else regex, subtxt)
for (regex, subtxt) in regex_subs]

# only report when in 'dry run' mode
if build_option('extended_dry_run'):
paths_str = ', '.join(paths)
dry_run_msg("applying regex substitutions to file(s): %s" % paths_str, silent=build_option('silent'))
for regex, subtxt in regex_subs:
dry_run_msg(" * regex pattern '%s', replacement string '%s'" % (regex, subtxt))
for regex, subtxt in compiled_regex_subs:
dry_run_msg(" * regex pattern '%s', replacement string '%s'" % (regex.pattern, subtxt))

else:
_log.info("Applying following regex substitutions to %s: %s", paths, regex_subs)

compiled_regex_subs = [(re.compile(regex), subtxt) for (regex, subtxt) in regex_subs]
_log.info("Applying following regex substitutions to %s: %s",
paths, [(regex.pattern, subtxt) for regex, subtxt in compiled_regex_subs])

replacement_failed_msgs = []
for path in paths:
try:
# make sure that file can be opened in text mode;
Expand All @@ -1695,32 +1705,49 @@ def apply_regex_substitutions(paths, regex_subs, backup='.orig.eb', on_missing_m
if backup:
copy_file(path, path + backup)
replacement_msgs = []
replaced = [False] * len(compiled_regex_subs)
with open_file(path, 'w') as out_file:
lines = txt_utf8.split('\n')
del txt_utf8
for line_id, line in enumerate(lines):
for regex, subtxt in compiled_regex_subs:
match = regex.search(line)
if match:
if single_line:
lines = txt_utf8.split('\n')
del txt_utf8
for line_id, line in enumerate(lines):
for i, (regex, subtxt) in enumerate(compiled_regex_subs):
match = regex.search(line)
if match:
origtxt = match.group(0)
replacement_msgs.append("Replaced in line %d: '%s' -> '%s'" %
(line_id + 1, origtxt, subtxt))
replaced[i] = True
line = regex.sub(subtxt, line)
lines[line_id] = line
out_file.write('\n'.join(lines))
else:
for i, (regex, subtxt) in enumerate(compiled_regex_subs):
def do_replace(match):
origtxt = match.group(0)
replacement_msgs.append("Replaced in line %d: '%s' -> '%s'" %
(line_id + 1, origtxt, subtxt))
line = regex.sub(subtxt, line)
lines[line_id] = line
out_file.write('\n'.join(lines))
# pylint: disable=cell-var-from-loop
cur_subtxt = match.expand(subtxt)
# pylint: disable=cell-var-from-loop
replacement_msgs.append("Replaced: '%s' -> '%s'" % (origtxt, cur_subtxt))
return cur_subtxt
txt_utf8, replaced[i] = regex.subn(do_replace, txt_utf8)
out_file.write(txt_utf8)
if replacement_msgs:
_log.info('Applied the following substitutions to %s:\n%s', path, '\n'.join(replacement_msgs))
else:
msg = 'Nothing found to replace in %s' % path
if on_missing_match == ERROR:
raise EasyBuildError(msg)
elif on_missing_match == WARN:
_log.warning(msg)
else:
_log.info(msg)

if (match_all and not all(replaced)) or (not match_all and not any(replaced)):
errors = ["Nothing found to replace '%s'" % regex.pattern
for cur_replaced, (regex, _) in zip(replaced, compiled_regex_subs) if not cur_replaced]
replacement_failed_msgs.append(', '.join(errors) + ' in ' + path)
except (IOError, OSError) as err:
raise EasyBuildError("Failed to patch %s: %s", path, err)
if replacement_failed_msgs:
msg = '\n'.join(replacement_failed_msgs)
if on_missing_match == ERROR:
raise EasyBuildError(msg)
elif on_missing_match == WARN:
_log.warning(msg)
else:
_log.info(msg)


def modify_env(old, new):
Expand Down
45 changes: 33 additions & 12 deletions test/framework/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -1443,16 +1443,24 @@ def test_apply_regex_substitutions(self):
# Check handling of on_missing_match
ft.write_file(testfile, testtxt)
regex_subs_no_match = [('Not there', 'Not used')]
error_pat = 'Nothing found to replace in %s' % testfile
error_pat = "Nothing found to replace 'Not there' in %s" % testfile
# Error
self.assertErrorRegex(EasyBuildError, error_pat, ft.apply_regex_substitutions, testfile, regex_subs_no_match,
on_missing_match=run.ERROR)
# First matches, but 2nd not
regex_subs_part_match = [regex_subs[0], ('Not there', 'Not used')]
self.assertErrorRegex(EasyBuildError, error_pat, ft.apply_regex_substitutions, testfile, regex_subs_part_match,
on_missing_match=run.ERROR)

# Warn
with self.log_to_testlogfile():
ft.apply_regex_substitutions(testfile, regex_subs_no_match, on_missing_match=run.WARN)
logtxt = ft.read_file(self.logfile)
self.assertIn('WARNING ' + error_pat, logtxt)
with self.log_to_testlogfile():
ft.apply_regex_substitutions(testfile, regex_subs_part_match, on_missing_match=run.WARN)
logtxt = ft.read_file(self.logfile)
self.assertIn('WARNING ' + error_pat, logtxt)

# Ignore
with self.log_to_testlogfile():
Expand All @@ -1465,6 +1473,21 @@ def test_apply_regex_substitutions(self):
path = os.path.join(self.test_prefix, 'nosuchfile.txt')
self.assertErrorRegex(EasyBuildError, error_pat, ft.apply_regex_substitutions, path, regex_subs)

# Replace multi-line strings
testtxt = "This si wrong\nBut mkae right\nLeave this!"
expected_testtxt = 'This is wrong.\nBut make right\nLeave this!'
ft.write_file(testfile, testtxt)
repl = ('This si( .*)\n(.*)mkae right$', 'This is wrong.\nBut make right')
ft.apply_regex_substitutions(testfile, [repl], backup=False, on_missing_match=ERROR, single_line=False)
new_testtxt = ft.read_file(testfile)
self.assertEqual(new_testtxt, expected_testtxt)
# Supports capture groups
ft.write_file(testfile, testtxt)
repl = ('This si( .*)\n(.*)mkae right$', r'This is\1.\n\2make right')
ft.apply_regex_substitutions(testfile, [repl], backup=False, on_missing_match=ERROR, single_line=False)
new_testtxt = ft.read_file(testfile)
self.assertEqual(new_testtxt, expected_testtxt)

# make sure apply_regex_substitutions can patch files that include UTF-8 characters
testtxt = b"foo \xe2\x80\x93 bar" # This is an UTF-8 "-"
ft.write_file(testfile, testtxt)
Expand All @@ -1485,34 +1508,32 @@ def test_apply_regex_substitutions(self):

# also test apply_regex_substitutions with a *list* of paths
# cfr. https://github.com/easybuilders/easybuild-framework/issues/3493
# and a compiled regex
test_dir = os.path.join(self.test_prefix, 'test_dir')
test_file1 = os.path.join(test_dir, 'one.txt')
test_file2 = os.path.join(test_dir, 'two.txt')
ft.write_file(test_file1, "Donald is an elephant")
ft.write_file(test_file2, "2 + 2 = 5")
regexs = [
('Donald', 'Dumbo'),
(re.compile('donald', re.I), 'Dumbo'), # Only matches if this is used as-is
('= 5', '= 4'),
]
ft.apply_regex_substitutions([test_file1, test_file2], regexs)

# also check dry run mode
init_config(build_options={'extended_dry_run': True})
self.mock_stderr(True)
self.mock_stdout(True)
ft.apply_regex_substitutions([test_file1, test_file2], regexs)
stderr, stdout = self.get_stderr(), self.get_stdout()
self.mock_stderr(False)
self.mock_stdout(False)
with self.mocked_stdout_stderr():
ft.apply_regex_substitutions([test_file1, test_file2], regexs)
stderr, stdout = self.get_stderr(), self.get_stdout()

self.assertFalse(stderr)
regex = re.compile('\n'.join([
regex = '\n'.join([
r"applying regex substitutions to file\(s\): .*/test_dir/one.txt, .*/test_dir/two.txt",
r" \* regex pattern 'Donald', replacement string 'Dumbo'",
r" \* regex pattern 'donald', replacement string 'Dumbo'",
r" \* regex pattern '= 5', replacement string '= 4'",
'',
]))
self.assertTrue(regex.search(stdout), "Pattern '%s' should be found in: %s" % (regex.pattern, stdout))
])
self.assertRegex(stdout, regex)

def test_find_flexlm_license(self):
"""Test find_flexlm_license function."""
Expand Down

0 comments on commit c14b549

Please sign in to comment.