@@ -364,6 +364,13 @@ async def _download_request(self, request: Request, spider: Spider) -> Response:
364
364
page = page , request = request , spider = spider , context_name = context_name
365
365
)
366
366
367
+ # We need to identify the Playwright request that matches the Scrapy request
368
+ # in order to override method and body if necessary.
369
+ # Checking the URL and Request.is_navigation_request() is not enough, e.g.
370
+ # requests produced by submitting forms can produce false positives.
371
+ # Let's track only the first request that matches the above conditions.
372
+ initial_request_done = asyncio .Event ()
373
+
367
374
await page .unroute ("**" )
368
375
await page .route (
369
376
"**" ,
@@ -375,6 +382,7 @@ async def _download_request(self, request: Request, spider: Spider) -> Response:
375
382
body = request .body ,
376
383
encoding = request .encoding ,
377
384
spider = spider ,
385
+ initial_request_done = initial_request_done ,
378
386
),
379
387
)
380
388
@@ -652,6 +660,7 @@ def _make_request_handler(
652
660
body : Optional [bytes ],
653
661
encoding : str ,
654
662
spider : Spider ,
663
+ initial_request_done : asyncio .Event ,
655
664
) -> Callable :
656
665
async def _request_handler (route : Route , playwright_request : PlaywrightRequest ) -> None :
657
666
"""Override request headers, method and body."""
@@ -691,7 +700,9 @@ async def _request_handler(route: Route, playwright_request: PlaywrightRequest)
691
700
if (
692
701
playwright_request .url .rstrip ("/" ) == url .rstrip ("/" )
693
702
and playwright_request .is_navigation_request ()
703
+ and not initial_request_done .is_set ()
694
704
):
705
+ initial_request_done .set ()
695
706
if method .upper () != playwright_request .method .upper ():
696
707
overrides ["method" ] = method
697
708
if body :
0 commit comments