The-PR-Agent · gvago · Apr 14, 2026 · Apr 15, 2026 · Apr 16, 2026 · qodo-free-for-open-source-projects
diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py
@@ -354,8 +354,9 @@ def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
 
         if line.startswith('@@'):
             header_line = line
+            prev_match = match  # save previous match before overwriting
             match = RE_HUNK_HEADER.match(line)
-            if match and (new_content_lines or old_content_lines):  # found a new hunk, split the previous lines
+            if prev_match and (new_content_lines or old_content_lines):  # flush the previous hunk
                 if prev_header_line:
                     patch_with_lines_str += f'\n{prev_header_line}\n'
                 is_plus_lines = is_minus_lines = False
@@ -371,12 +372,16 @@ def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
                     patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n'
                     for line_old in old_content_lines:
                         patch_with_lines_str += f"{line_old}\n"
-                new_content_lines = []
-                old_content_lines = []
+            # Always reset buffers on any @@ line — whether prev_match flushed
+            # or not. This prevents orphan lines (collected after a malformed @@
+            # that set match=None) from leaking into the next valid hunk.
+            new_content_lines = []
+            old_content_lines = []
             if match:
                 prev_header_line = header_line
-
-            section_header, size1, size2, start1, start2 = extract_hunk_headers(match)
+                section_header, size1, size2, start1, start2 = extract_hunk_headers(match)
+            else:
+                continue  # skip lines that start with @@ but don't match the hunk header pattern
 
         elif line.startswith('+'):
             new_content_lines.append(line)
@@ -391,9 +396,11 @@ def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
             new_content_lines.append(line)
             old_content_lines.append(line)
 
-    # finishing last hunk
-    if match and new_content_lines:
-        patch_with_lines_str += f'\n{header_line}\n'
+    # finishing last hunk — use prev_header_line (not match/header_line) because
+    # match may have been set to None by a trailing malformed @@ line, and
+    # header_line may point to that malformed line instead of the last valid hunk
+    if prev_header_line and new_content_lines:
+        patch_with_lines_str += f'\n{prev_header_line}\n'
         is_plus_lines = is_minus_lines = False
         if new_content_lines:
             is_plus_lines = any([line.startswith('+') for line in new_content_lines])
@@ -430,6 +437,9 @@ def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, s
                 header_line = line
 
                 match = RE_HUNK_HEADER.match(line)
+                if not match:
+                    skip_hunk = True
+                    continue
 
                 section_header, size1, size2, start1, start2 = extract_hunk_headers(match)
 

diff --git a/tests/unittest/test_malformed_hunk_header.py b/tests/unittest/test_malformed_hunk_header.py
@@ -0,0 +1,151 @@
+"""
+Tests for malformed @@ hunk headers in decouple_and_convert_to_hunks_with_lines_numbers().
+
+Verifies that:
+1. A malformed @@ line does not crash the function.
+2. Content from valid hunks before and after a malformed @@ is preserved.
+3. When a malformed @@ is the last @@-starting line, the preceding hunk is
+   still finalized correctly.
+"""
+
+from pr_agent.algo.git_patch_processing import decouple_and_convert_to_hunks_with_lines_numbers
+from pr_agent.config_loader import get_settings
+
+get_settings(use_context=False).set("CONFIG.CLI_MODE", True)
+
+
+class _FakeFile:
+    """Minimal file object expected by the function under test."""
+
+    def __init__(self, filename="test_file.py"):
+        self.filename = filename
+
+
+class TestMalformedHunkHeader:
+    def test_malformed_hunk_does_not_crash(self):
+        """A patch whose only @@ line is malformed should not raise."""
+        patch = "@@ THIS IS NOT A VALID HUNK HEADER @@\n+added line\n-removed line"
+        result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile())
+        assert isinstance(result, str)
+
+    def test_valid_hunk_before_malformed_is_preserved(self):
+        """Content from a valid hunk that precedes a malformed @@ must appear in output."""
+        patch = (
+            "@@ -1,3 +1,4 @@ section\n"
+            " context\n"
+            "+added_line\n"
+            " more_context\n"
+            "@@ MALFORMED @@ not a real header\n"
+        )
+        result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile())
+        # The valid hunk's added line must be present
+        assert "+added_line" in result
+        assert "__new hunk__" in result
+
+    def test_malformed_hunk_between_two_valid_hunks(self):
+        """A malformed @@ between two valid hunks must not drop either hunk's content."""
+        patch = (
+            "@@ -1,3 +1,4 @@ first_section\n"
+            " ctx1\n"
+            "+add1\n"
+            " ctx2\n"
+            "@@ GARBAGE @@ not valid\n"
+            "@@ -10,2 +11,3 @@ second_section\n"
+            " ctx3\n"
+            "+add2\n"
+        )
+        result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile())
+        assert "+add1" in result, "First hunk content was dropped"
+        assert "+add2" in result, "Second hunk content was dropped"
+
+    def test_trailing_malformed_hunk_does_not_drop_last_valid(self):
+        """When a malformed @@ is the LAST @@-line, the previous hunk must still be finalized."""
+        patch = (
+            "@@ -5,3 +5,4 @@ my_func\n"
+            " existing_line\n"
+            "+new_line\n"
+            " another_existing\n"
+            "@@ INVALID HEADER\n"
+        )
+        result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile())
+        # The valid hunk before the malformed trailing @@ must be present
+        assert "+new_line" in result
+        assert "__new hunk__" in result
+        # The malformed header text should NOT appear as a hunk header in the output
+        assert "INVALID HEADER" not in result
+
+    def test_line_numbers_correct_with_malformed_between(self):
+        """Line numbers from valid hunks are correct even when a malformed @@ sits between them."""
+        patch = (
+            "@@ -1,2 +1,3 @@ header1\n"
+            " line_a\n"
+            "+line_b\n"
+            "@@ NOT_VALID\n"
+            "@@ -10,2 +11,3 @@ header2\n"
+            " line_c\n"
+            "+line_d\n"
+        )
+        result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile())
+        # First hunk starts at new-line 1: "1  line_a", "2 +line_b"
+        assert "1  line_a" in result
+        assert "2 +line_b" in result
+        # Second hunk starts at new-line 11: "11  line_c", "12 +line_d"
+        assert "11  line_c" in result
+        assert "12 +line_d" in result
+
+    def test_only_malformed_hunks_returns_file_header_only(self):
+        """A patch with ONLY malformed @@ lines should return just the file header."""
+        patch = (
+            "@@ BROKEN1 @@\n"
+            "+orphan_add\n"
+            "@@ BROKEN2 @@\n"
+            "-orphan_del\n"
+        )
+        result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile())
+        # No valid hunk was ever parsed, so no __new hunk__ / __old hunk__ sections
+        assert "__new hunk__" not in result
+        assert "__old hunk__" not in result
+
+    def test_deletion_only_hunk_before_malformed(self):
+        """A hunk with only deletions before a malformed @@ is still finalized."""
+        patch = (
+            "@@ -1,3 +1,2 @@ section\n"
+            " context\n"
+            "-removed_line\n"
+            "@@ NOT VALID @@\n"
+        )
+        result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile())
+        assert "-removed_line" in result
+        assert "__old hunk__" in result
+
+    def test_orphan_lines_after_malformed_not_joined_to_next_hunk(self):
+        """Orphan lines between a malformed @@ and the next valid @@ must be discarded.
+
+        Without clearing the buffers unconditionally on every @@ line, orphan
+        lines collected after a malformed @@ (where match=None) leak into the
+        next valid hunk because prev_match is None so the flush block is
+        skipped and the buffers are never reset.
+        """
+        patch = (
+            "@@ -1,3 +1,4 @@ first_section\n"
+            " ctx1\n"
+            "+add1\n"
+            " ctx2\n"
+            "@@ MALFORMED @@ not a real header\n"
+            "+orphan_line_should_be_discarded\n"
+            "-orphan_del_should_be_discarded\n"
+            "@@ -10,2 +11,3 @@ second_section\n"
+            " ctx3\n"
+            "+add2\n"
+        )
+        result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile())
+        # Valid hunk content must be present
+        assert "+add1" in result, "First hunk content was dropped"
+        assert "+add2" in result, "Second hunk content was dropped"
+        # Orphan lines must NOT appear in any hunk
+        assert "orphan_line_should_be_discarded" not in result, (
+            "Orphan line after malformed @@ leaked into the next hunk"
+        )
+        assert "orphan_del_should_be_discarded" not in result, (
+            "Orphan deletion after malformed @@ leaked into the next hunk"
+        )