From 9a3c3841c33b07ba3d2d48c937befe4cf2bc0a81 Mon Sep 17 00:00:00 2001 From: devdanzin <74280297+devdanzin@users.noreply.github.com> Date: Sat, 20 Jul 2024 10:37:39 -0300 Subject: [PATCH 1/8] Avoid creating a diff of very long texts if not in verbose mode. --- src/_pytest/assertion/util.py | 16 ++++++++++++++++ testing/test_assertion.py | 24 ++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/src/_pytest/assertion/util.py b/src/_pytest/assertion/util.py index 4dc1af4af03..e7a9483f8a8 100644 --- a/src/_pytest/assertion/util.py +++ b/src/_pytest/assertion/util.py @@ -308,6 +308,22 @@ def _diff_text(left: str, right: str, verbose: int = 0) -> list[str]: ] left = left[:-i] right = right[:-i] + shortest = min(left, right, key=lambda x: len(x)) + lines = j = 0 + if shortest.count("\n") >= 8: + # Keep only 8 lines + for j, c in enumerate(shortest): + if c == "\n": + lines += 1 + if lines >= 8: + break + else: + j = len(max(left, right, key=lambda x: len(x))) + # Not sure if this is right + if i < 32: # We didn't skip equal trailing characters + j += i + left = left[:min(640, len(left), j)] + right = right[:min(640, len(right), j)] keepends = True if left.isspace() or right.isspace(): left = repr(str(left)) diff --git a/testing/test_assertion.py b/testing/test_assertion.py index 69ca0f73ff2..8c996d6671f 100644 --- a/testing/test_assertion.py +++ b/testing/test_assertion.py @@ -417,6 +417,30 @@ def test_multiline_text_diff(self) -> None: assert "- eggs" in diff assert "+ spam" in diff + def test_long_text_diff_same_trailing(self) -> None: + left = "foo\nbar\n" * 100 + "spam\n" * 100 + "spam\n" * 160 + right = "foo\nbar\n" * 100 + "eggs\n" * 100 + "spam\n" * 160 + diff = callequal(left, right, verbose=0) + assert diff is not None + assert "- eggs" in diff + assert len(diff) == 17 + + def test_long_text_diff_different_trailing(self) -> None: + left = "foo\nbar\n" * 100 + "spam\n" * 100 + "spam\n" * 160 + right = "foo\nbar\n" * 100 + "eggs\n" * 100 + "eggs\n" * 160 + diff = callequal(left, right, verbose=0) + assert diff is not None + assert "- eggs" in diff + assert len(diff) == 18 + + def test_long_text_diff_short_trailing(self) -> None: + left = "foo\nbar\n" * 5 + "spam\n" * 5 + "spam\n" * 5 + right = "foo\nbar\n" * 5 + "eggs\n" * 5 + "spam\n" * 5 + diff = callequal(left, right, verbose=0) + assert diff is not None + assert "- eggs" in diff + assert len(diff) == 20 + def test_bytes_diff_normal(self) -> None: """Check special handling for bytes diff (#5260)""" diff = callequal(b"spam", b"eggs") From 5a239d7e927ea5342f8b0ceae78ed45db1654e9f Mon Sep 17 00:00:00 2001 From: devdanzin <74280297+devdanzin@users.noreply.github.com> Date: Sat, 20 Jul 2024 10:46:06 -0300 Subject: [PATCH 2/8] Add changelog entry. --- changelog/12406.improvement.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 changelog/12406.improvement.rst diff --git a/changelog/12406.improvement.rst b/changelog/12406.improvement.rst new file mode 100644 index 00000000000..c082193e524 --- /dev/null +++ b/changelog/12406.improvement.rst @@ -0,0 +1,2 @@ +The ``_diff_text()`` function now truncates texts before diffing them +if not in verbose mode. -- by :user:`devdanzin`. From c06185e85b738f80da1fb8df3d21fa886c96c890 Mon Sep 17 00:00:00 2001 From: devdanzin <74280297+devdanzin@users.noreply.github.com> Date: Sat, 20 Jul 2024 10:48:02 -0300 Subject: [PATCH 3/8] Add myself to AUTHORS. --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 9b6cb6a9d23..9d642b9446c 100644 --- a/AUTHORS +++ b/AUTHORS @@ -101,6 +101,7 @@ CrazyMerlyn Cristian Vera Cyrus Maden Damian Skrzypczak +Daniel Diniz Daniel Grana Daniel Hahler Daniel Miller From c17e498a63d4bfce696e76b837082310c0a4950b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 20 Jul 2024 13:54:36 +0000 Subject: [PATCH 4/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/_pytest/assertion/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/_pytest/assertion/util.py b/src/_pytest/assertion/util.py index e7a9483f8a8..f64dc355ee1 100644 --- a/src/_pytest/assertion/util.py +++ b/src/_pytest/assertion/util.py @@ -322,8 +322,8 @@ def _diff_text(left: str, right: str, verbose: int = 0) -> list[str]: # Not sure if this is right if i < 32: # We didn't skip equal trailing characters j += i - left = left[:min(640, len(left), j)] - right = right[:min(640, len(right), j)] + left = left[: min(640, len(left), j)] + right = right[: min(640, len(right), j)] keepends = True if left.isspace() or right.isspace(): left = repr(str(left)) From 6e22c94bea3556dadaf47782415e57434076a681 Mon Sep 17 00:00:00 2001 From: devdanzin <74280297+devdanzin@users.noreply.github.com> Date: Sat, 20 Jul 2024 11:38:15 -0300 Subject: [PATCH 5/8] Use constants from truncate.py instead of magical numbers in _diff_text(). --- src/_pytest/assertion/util.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/_pytest/assertion/util.py b/src/_pytest/assertion/util.py index e7a9483f8a8..b27700c45f5 100644 --- a/src/_pytest/assertion/util.py +++ b/src/_pytest/assertion/util.py @@ -22,6 +22,7 @@ from _pytest._io.saferepr import saferepr from _pytest._io.saferepr import saferepr_unlimited from _pytest.config import Config +from _pytest.assertion.truncate import DEFAULT_MAX_CHARS, DEFAULT_MAX_LINES # The _reprcompare attribute on the util module is used by the new assertion @@ -310,20 +311,20 @@ def _diff_text(left: str, right: str, verbose: int = 0) -> list[str]: right = right[:-i] shortest = min(left, right, key=lambda x: len(x)) lines = j = 0 - if shortest.count("\n") >= 8: - # Keep only 8 lines + if shortest.count("\n") >= DEFAULT_MAX_LINES: + # Keep only DEFAULT_MAX_LINES, usually 8, lines for j, c in enumerate(shortest): if c == "\n": lines += 1 - if lines >= 8: + if lines >= DEFAULT_MAX_LINES: break else: j = len(max(left, right, key=lambda x: len(x))) # Not sure if this is right if i < 32: # We didn't skip equal trailing characters j += i - left = left[:min(640, len(left), j)] - right = right[:min(640, len(right), j)] + left = left[:min(DEFAULT_MAX_CHARS, len(left), j)] + right = right[:min(DEFAULT_MAX_CHARS, len(right), j)] keepends = True if left.isspace() or right.isspace(): left = repr(str(left)) From 57e0c78af10d4ae22af26df2e3e69304f81815c7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 20 Jul 2024 14:40:12 +0000 Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/_pytest/assertion/util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/_pytest/assertion/util.py b/src/_pytest/assertion/util.py index 271978488e0..0925739f283 100644 --- a/src/_pytest/assertion/util.py +++ b/src/_pytest/assertion/util.py @@ -21,8 +21,9 @@ from _pytest._io.pprint import PrettyPrinter from _pytest._io.saferepr import saferepr from _pytest._io.saferepr import saferepr_unlimited +from _pytest.assertion.truncate import DEFAULT_MAX_CHARS +from _pytest.assertion.truncate import DEFAULT_MAX_LINES from _pytest.config import Config -from _pytest.assertion.truncate import DEFAULT_MAX_CHARS, DEFAULT_MAX_LINES # The _reprcompare attribute on the util module is used by the new assertion From 83164fb578c22f3f40897ba59ce3507801a6dcb6 Mon Sep 17 00:00:00 2001 From: devdanzin <74280297+devdanzin@users.noreply.github.com> Date: Mon, 22 Jul 2024 13:01:34 -0300 Subject: [PATCH 7/8] Fix the logic in _diff_text, add a test and update others. --- src/_pytest/assertion/util.py | 32 ++++++++++++++++---------------- testing/test_assertion.py | 14 +++++++++++++- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/src/_pytest/assertion/util.py b/src/_pytest/assertion/util.py index 0925739f283..2199c54024a 100644 --- a/src/_pytest/assertion/util.py +++ b/src/_pytest/assertion/util.py @@ -287,17 +287,18 @@ def _diff_text(left: str, right: str, verbose: int = 0) -> list[str]: explanation: list[str] = [] if verbose < 1: - i = 0 # just in case left or right has zero length - for i in range(min(len(left), len(right))): - if left[i] != right[i]: + leading_skipped = 0 # just in case left or right has zero length + for leading_skipped in range(min(len(left), len(right))): + if left[leading_skipped] != right[leading_skipped]: break - if i > 42: - i -= 10 # Provide some context + if leading_skipped > 42: + leading_skipped -= 10 # Provide some context explanation = [ - f"Skipping {i} identical leading characters in diff, use -v to show" + f"Skipping {leading_skipped} identical leading characters in diff, use -v to show" ] - left = left[i:] - right = right[i:] + left = left[leading_skipped:] + right = right[leading_skipped:] + i = 0 if len(left) == len(right): for i in range(len(left)): if left[-i] != right[-i]: @@ -311,21 +312,20 @@ def _diff_text(left: str, right: str, verbose: int = 0) -> list[str]: left = left[:-i] right = right[:-i] shortest = min(left, right, key=lambda x: len(x)) - lines = j = 0 + lines = j = start = 0 if shortest.count("\n") >= DEFAULT_MAX_LINES: # Keep only DEFAULT_MAX_LINES, usually 8, lines - for j, c in enumerate(shortest): + if 10 < leading_skipped < 42: # We didn't skip equal leading characters + start += leading_skipped - 10 + for j, c in enumerate(shortest[start:], start=start - 1): if c == "\n": lines += 1 - if lines >= DEFAULT_MAX_LINES: + if lines > DEFAULT_MAX_LINES: break else: j = len(max(left, right, key=lambda x: len(x))) - # Not sure if this is right - if i < 32: # We didn't skip equal trailing characters - j += i - left = left[: min(DEFAULT_MAX_CHARS, len(left), j)] - right = right[: min(DEFAULT_MAX_CHARS, len(right), j)] + left = left[start : min(DEFAULT_MAX_CHARS, len(left), j)] + right = right[start : min(DEFAULT_MAX_CHARS, len(right), j)] keepends = True if left.isspace() or right.isspace(): left = repr(str(left)) diff --git a/testing/test_assertion.py b/testing/test_assertion.py index 8c996d6671f..0052a10fcba 100644 --- a/testing/test_assertion.py +++ b/testing/test_assertion.py @@ -423,7 +423,7 @@ def test_long_text_diff_same_trailing(self) -> None: diff = callequal(left, right, verbose=0) assert diff is not None assert "- eggs" in diff - assert len(diff) == 17 + assert len(diff) == 19 def test_long_text_diff_different_trailing(self) -> None: left = "foo\nbar\n" * 100 + "spam\n" * 100 + "spam\n" * 160 @@ -439,6 +439,18 @@ def test_long_text_diff_short_trailing(self) -> None: diff = callequal(left, right, verbose=0) assert diff is not None assert "- eggs" in diff + assert len(diff) == 16 + + def test_long_text_diff_long_lines(self) -> None: + long_line1 = "foo" * 50 + "\n" + long_line2 = "bar" * 50 + "\n" + left = long_line1 * 3 + "spam\n" * 3 + long_line1 * 10 + right = long_line1 * 3 + "eggs\n" * 3 + long_line2 * 10 + diff = callequal(left, right, verbose=0) + assert diff is not None + assert "- eggs" in diff + assert "+ " + long_line1[:-1] in diff + assert "- " + long_line2[:-1] in diff assert len(diff) == 20 def test_bytes_diff_normal(self) -> None: From a48c40db3cba1dbe4b55eb5c47005ede0f3c0aff Mon Sep 17 00:00:00 2001 From: devdanzin <74280297+devdanzin@users.noreply.github.com> Date: Mon, 22 Jul 2024 13:25:22 -0300 Subject: [PATCH 8/8] Formatting. --- testing/test_assertion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/test_assertion.py b/testing/test_assertion.py index 0052a10fcba..8ea6f51a020 100644 --- a/testing/test_assertion.py +++ b/testing/test_assertion.py @@ -445,7 +445,7 @@ def test_long_text_diff_long_lines(self) -> None: long_line1 = "foo" * 50 + "\n" long_line2 = "bar" * 50 + "\n" left = long_line1 * 3 + "spam\n" * 3 + long_line1 * 10 - right = long_line1 * 3 + "eggs\n" * 3 + long_line2 * 10 + right = long_line1 * 3 + "eggs\n" * 3 + long_line2 * 10 diff = callequal(left, right, verbose=0) assert diff is not None assert "- eggs" in diff