diff --git a/python_multipart/multipart.py b/python_multipart/multipart.py index 6c84829..b5d5bb8 100644 --- a/python_multipart/multipart.py +++ b/python_multipart/multipart.py @@ -1108,6 +1108,11 @@ def data_callback(name: CallbackName, end_i: int, remaining: bool = False) -> No i += 1 continue + # Skip leading non-boundary characters + if c != boundary[2]: + i += 1 + continue + # index is used as in index into our boundary. Set to 0. index = 0 diff --git a/tests/test_data/http/single_field_with_leading_newlines_and_text.http b/tests/test_data/http/single_field_with_leading_newlines_and_text.http new file mode 100644 index 0000000..7ac1e48 --- /dev/null +++ b/tests/test_data/http/single_field_with_leading_newlines_and_text.http @@ -0,0 +1,8 @@ +ingore this line + + +------WebKitFormBoundaryTkr3kCBQlBe1nrhc +Content-Disposition: form-data; name="field" + +This is a test. +------WebKitFormBoundaryTkr3kCBQlBe1nrhc-- \ No newline at end of file diff --git a/tests/test_data/http/single_field_with_leading_newlines_and_text.yaml b/tests/test_data/http/single_field_with_leading_newlines_and_text.yaml new file mode 100644 index 0000000..7690f08 --- /dev/null +++ b/tests/test_data/http/single_field_with_leading_newlines_and_text.yaml @@ -0,0 +1,6 @@ +boundary: ----WebKitFormBoundaryTkr3kCBQlBe1nrhc +expected: + - name: field + type: field + data: !!binary | + VGhpcyBpcyBhIHRlc3Qu diff --git a/tests/test_multipart.py b/tests/test_multipart.py index ce92ff4..e0aa0cc 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -1250,6 +1250,24 @@ def on_file(f: FileProtocol) -> None: f = FormParser("multipart/form-data", on_field=Mock(), on_file=on_file, boundary="boundary") f.write(data.encode("latin-1")) + def test_multipart_parser_data_before_first_boundary(self) -> None: + """This test makes sure that the parser does not handle when there is junk data before the first boundary.""" + data = ( + "EXtra" + "\r\n" * 2 + "--boundary\r\n" + 'Content-Disposition: form-data; name="file"; filename="filename.txt"\r\n' + "Content-Type: text/plain\r\n\r\n" + "hello\r\n" + "--boundary--" + ) + + files: list[File] = [] + + def on_file(f: FileProtocol) -> None: + files.append(cast(File, f)) + + f = FormParser("multipart/form-data", on_field=Mock(), on_file=on_file, boundary="boundary") + f.write(data.encode("latin-1")) + @pytest.fixture(autouse=True) def inject_fixtures(self, caplog: pytest.LogCaptureFixture) -> None: self._caplog = caplog