Skip to content

Commit 1e3f184

Browse files
authored
Override method and/or body only for the first matching request (#297)
1 parent e26d3c1 commit 1e3f184

File tree

2 files changed

+14
-0
lines changed

2 files changed

+14
-0
lines changed

scrapy_playwright/handler.py

+11
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,13 @@ async def _download_request(self, request: Request, spider: Spider) -> Response:
364364
page=page, request=request, spider=spider, context_name=context_name
365365
)
366366

367+
# We need to identify the Playwright request that matches the Scrapy request
368+
# in order to override method and body if necessary.
369+
# Checking the URL and Request.is_navigation_request() is not enough, e.g.
370+
# requests produced by submitting forms can produce false positives.
371+
# Let's track only the first request that matches the above conditions.
372+
initial_request_done = asyncio.Event()
373+
367374
await page.unroute("**")
368375
await page.route(
369376
"**",
@@ -375,6 +382,7 @@ async def _download_request(self, request: Request, spider: Spider) -> Response:
375382
body=request.body,
376383
encoding=request.encoding,
377384
spider=spider,
385+
initial_request_done=initial_request_done,
378386
),
379387
)
380388

@@ -652,6 +660,7 @@ def _make_request_handler(
652660
body: Optional[bytes],
653661
encoding: str,
654662
spider: Spider,
663+
initial_request_done: asyncio.Event,
655664
) -> Callable:
656665
async def _request_handler(route: Route, playwright_request: PlaywrightRequest) -> None:
657666
"""Override request headers, method and body."""
@@ -691,7 +700,9 @@ async def _request_handler(route: Route, playwright_request: PlaywrightRequest)
691700
if (
692701
playwright_request.url.rstrip("/") == url.rstrip("/")
693702
and playwright_request.is_navigation_request()
703+
and not initial_request_done.is_set()
694704
):
705+
initial_request_done.set()
695706
if method.upper() != playwright_request.method.upper():
696707
overrides["method"] = method
697708
if body:

tests/tests_asyncio/test_playwright_requests.py

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import asyncio
12
import json
23
import logging
34
import platform
@@ -112,6 +113,7 @@ async def test_route_continue_exception(self, logger):
112113
async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler:
113114
scrapy_request = Request(url="https://example.org", method="GET")
114115
spider = Spider("foo")
116+
initial_request_done = asyncio.Event()
115117
req_handler = handler._make_request_handler(
116118
context_name=DEFAULT_CONTEXT_NAME,
117119
method=scrapy_request.method,
@@ -120,6 +122,7 @@ async def test_route_continue_exception(self, logger):
120122
body=None,
121123
encoding="utf-8",
122124
spider=spider,
125+
initial_request_done=initial_request_done,
123126
)
124127
route = MagicMock()
125128
playwright_request = AsyncMock()

0 commit comments

Comments
 (0)