Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Truncate diff of very long texts if not in verbose mode (fix #12406) #12634

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ CrazyMerlyn
Cristian Vera
Cyrus Maden
Damian Skrzypczak
Daniel Diniz
Daniel Grana
Daniel Hahler
Daniel Miller
Expand Down
2 changes: 2 additions & 0 deletions changelog/12406.improvement.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
The ``_diff_text()`` function now truncates texts before diffing them
if not in verbose mode. -- by :user:`devdanzin`.
34 changes: 26 additions & 8 deletions src/_pytest/assertion/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
from _pytest._io.pprint import PrettyPrinter
from _pytest._io.saferepr import saferepr
from _pytest._io.saferepr import saferepr_unlimited
from _pytest.assertion.truncate import DEFAULT_MAX_CHARS
from _pytest.assertion.truncate import DEFAULT_MAX_LINES
from _pytest.config import Config


Expand Down Expand Up @@ -285,17 +287,18 @@
explanation: list[str] = []

if verbose < 1:
i = 0 # just in case left or right has zero length
for i in range(min(len(left), len(right))):
if left[i] != right[i]:
leading_skipped = 0 # just in case left or right has zero length
for leading_skipped in range(min(len(left), len(right))):
if left[leading_skipped] != right[leading_skipped]:
break
if i > 42:
i -= 10 # Provide some context
if leading_skipped > 42:
leading_skipped -= 10 # Provide some context
explanation = [
f"Skipping {i} identical leading characters in diff, use -v to show"
f"Skipping {leading_skipped} identical leading characters in diff, use -v to show"
]
left = left[i:]
right = right[i:]
left = left[leading_skipped:]
right = right[leading_skipped:]
i = 0
if len(left) == len(right):
for i in range(len(left)):
if left[-i] != right[-i]:
Expand All @@ -308,6 +311,21 @@
]
left = left[:-i]
right = right[:-i]
shortest = min(left, right, key=lambda x: len(x))
lines = j = start = 0
if shortest.count("\n") >= DEFAULT_MAX_LINES:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm afraid this PR got outdated after #12766.

I think the path forward would be for _diff_text() to receive both "max lines" and "max chars" by parameter instead (they being int | None, with None meaning "no limits").

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm under the impression the code can be made simpler by skipping max chars first, then dealing with max lines... 🤔

# Keep only DEFAULT_MAX_LINES, usually 8, lines
if 10 < leading_skipped < 42: # We didn't skip equal leading characters
start += leading_skipped - 10

Check warning on line 319 in src/_pytest/assertion/util.py

View check run for this annotation

Codecov / codecov/patch

src/_pytest/assertion/util.py#L319

Added line #L319 was not covered by tests
for j, c in enumerate(shortest[start:], start=start - 1):
if c == "\n":
lines += 1
if lines > DEFAULT_MAX_LINES:
break
else:
j = len(max(left, right, key=lambda x: len(x)))
left = left[start : min(DEFAULT_MAX_CHARS, len(left), j)]
right = right[start : min(DEFAULT_MAX_CHARS, len(right), j)]
keepends = True
if left.isspace() or right.isspace():
left = repr(str(left))
Expand Down
36 changes: 36 additions & 0 deletions testing/test_assertion.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,42 @@ def test_multiline_text_diff(self) -> None:
assert "- eggs" in diff
assert "+ spam" in diff

def test_long_text_diff_same_trailing(self) -> None:
left = "foo\nbar\n" * 100 + "spam\n" * 100 + "spam\n" * 160
right = "foo\nbar\n" * 100 + "eggs\n" * 100 + "spam\n" * 160
diff = callequal(left, right, verbose=0)
assert diff is not None
assert "- eggs" in diff
assert len(diff) == 19

def test_long_text_diff_different_trailing(self) -> None:
left = "foo\nbar\n" * 100 + "spam\n" * 100 + "spam\n" * 160
right = "foo\nbar\n" * 100 + "eggs\n" * 100 + "eggs\n" * 160
diff = callequal(left, right, verbose=0)
assert diff is not None
assert "- eggs" in diff
assert len(diff) == 18

def test_long_text_diff_short_trailing(self) -> None:
left = "foo\nbar\n" * 5 + "spam\n" * 5 + "spam\n" * 5
right = "foo\nbar\n" * 5 + "eggs\n" * 5 + "spam\n" * 5
diff = callequal(left, right, verbose=0)
assert diff is not None
assert "- eggs" in diff
assert len(diff) == 16

def test_long_text_diff_long_lines(self) -> None:
long_line1 = "foo" * 50 + "\n"
long_line2 = "bar" * 50 + "\n"
left = long_line1 * 3 + "spam\n" * 3 + long_line1 * 10
right = long_line1 * 3 + "eggs\n" * 3 + long_line2 * 10
diff = callequal(left, right, verbose=0)
assert diff is not None
assert "- eggs" in diff
assert "+ " + long_line1[:-1] in diff
assert "- " + long_line2[:-1] in diff
assert len(diff) == 20

def test_bytes_diff_normal(self) -> None:
"""Check special handling for bytes diff (#5260)"""
diff = callequal(b"spam", b"eggs")
Expand Down
Loading