22import logging
33import platform
44from contextlib import suppress
5- from dataclasses import dataclass
5+ from dataclasses import dataclass , field as dataclass_field
66from ipaddress import ip_address
77from time import time
88from typing import Awaitable , Callable , Dict , Optional , Tuple , Type , TypeVar , Union
99
1010from playwright .async_api import (
1111 BrowserContext ,
1212 BrowserType ,
13- Download ,
13+ Download as PlaywrightDownload ,
1414 Error as PlaywrightError ,
1515 Page ,
1616 Playwright as AsyncPlaywright ,
@@ -66,6 +66,19 @@ class BrowserContextWrapper:
6666 persistent : bool
6767
6868
69+ @dataclass
70+ class Download :
71+ body : bytes = b""
72+ url : str = ""
73+ suggested_filename : str = ""
74+ exception : Optional [Exception ] = None
75+ response_status : int = 200
76+ headers : dict = dataclass_field (default_factory = dict )
77+
78+ def __bool__ (self ) -> bool :
79+ return bool (self .body ) or bool (self .exception )
80+
81+
6982@dataclass
7083class Config :
7184 cdp_url : Optional [str ]
@@ -397,7 +410,7 @@ async def _download_request_with_page(
397410 await _set_redirect_meta (request = request , response = response )
398411 headers = Headers (await response .all_headers ())
399412 headers .pop ("Content-Encoding" , None )
400- elif not download . get ( "bytes" ) :
413+ elif not download :
401414 logger .warning (
402415 "Navigating to %s returned None, the response"
403416 " will have empty headers and status 200" ,
@@ -428,20 +441,21 @@ async def _download_request_with_page(
428441 server_addr = await response .server_addr ()
429442 server_ip_address = ip_address (server_addr ["ipAddress" ])
430443
431- if download . get ( " exception" ) :
432- raise download [ " exception" ]
444+ if download and download . exception :
445+ raise download . exception
433446
434447 if not request .meta .get ("playwright_include_page" ):
435448 await page .close ()
436449 self .stats .inc_value ("playwright/page_count/closed" )
437450
438- if download . get ( "bytes" ) :
439- request .meta ["playwright_suggested_filename" ] = download .get ( " suggested_filename" )
440- respcls = responsetypes .from_args (url = download [ " url" ] , body = download [ "bytes" ] )
451+ if download :
452+ request .meta ["playwright_suggested_filename" ] = download .suggested_filename
453+ respcls = responsetypes .from_args (url = download . url , body = download . body )
441454 return respcls (
442- url = download ["url" ],
443- status = 200 ,
444- body = download ["bytes" ],
455+ url = download .url ,
456+ status = download .response_status ,
457+ headers = Headers (download .headers ),
458+ body = download .body ,
445459 request = request ,
446460 flags = ["playwright" ],
447461 )
@@ -461,29 +475,29 @@ async def _download_request_with_page(
461475
462476 async def _get_response_and_download (
463477 self , request : Request , page : Page , spider : Spider
464- ) -> Tuple [Optional [PlaywrightResponse ], dict ]:
478+ ) -> Tuple [Optional [PlaywrightResponse ], Optional [ Download ] ]:
465479 response : Optional [PlaywrightResponse ] = None
466- download : dict = {} # updated in-place in _handle_download
480+ download : Download = Download () # updated in-place in _handle_download
467481 download_started = asyncio .Event ()
468482 download_ready = asyncio .Event ()
469483
470- async def _handle_download (dwnld : Download ) -> None :
484+ async def _handle_download (dwnld : PlaywrightDownload ) -> None :
471485 download_started .set ()
472486 self .stats .inc_value ("playwright/download_count" )
473487 try :
474488 if failure := await dwnld .failure ():
475489 raise RuntimeError (f"Failed to download { dwnld .url } : { failure } " )
476- download_path = await dwnld .path ()
477- download ["bytes" ] = download_path .read_bytes ()
478- download ["url" ] = dwnld .url
479- download ["suggested_filename" ] = dwnld .suggested_filename
490+ download .body = (await dwnld .path ()).read_bytes ()
491+ download .url = dwnld .url
492+ download .suggested_filename = dwnld .suggested_filename
480493 except Exception as ex :
481- download [ " exception" ] = ex
494+ download . exception = ex
482495 finally :
483496 download_ready .set ()
484497
485498 async def _handle_response (response : PlaywrightResponse ) -> None :
486- download ["response_status" ] = response .status
499+ download .response_status = response .status
500+ download .headers = await response .all_headers ()
487501 download_started .set ()
488502
489503 page_goto_kwargs = request .meta .get ("playwright_page_goto_kwargs" ) or {}
@@ -513,7 +527,7 @@ async def _handle_response(response: PlaywrightResponse) -> None:
513527 )
514528 await download_started .wait ()
515529
516- if download .get ( " response_status" ) == 204 :
530+ if download .response_status == 204 :
517531 raise err
518532
519533 logger .debug (
@@ -531,7 +545,7 @@ async def _handle_response(response: PlaywrightResponse) -> None:
531545 page .remove_listener ("download" , _handle_download )
532546 page .remove_listener ("response" , _handle_response )
533547
534- return response , download
548+ return response , download if download else None
535549
536550 async def _apply_page_methods (self , page : Page , request : Request , spider : Spider ) -> None :
537551 context_name = request .meta .get ("playwright_context" )
0 commit comments