From 0cb1ff3258f4a9fe103cbfc7258f1fa125cdb4f8 Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Wed, 19 Mar 2025 01:10:51 +0000 Subject: [PATCH 1/2] update docstring for `Router` --- src/crawlee/router.py | 35 ++++++++++++++++++++++++++++++++++- uv.lock | 2 +- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/src/crawlee/router.py b/src/crawlee/router.py index ec2cac3e00..02bb4e4322 100644 --- a/src/crawlee/router.py +++ b/src/crawlee/router.py @@ -14,7 +14,40 @@ @docs_group('Classes') class Router(Generic[TCrawlingContext]): - """Dispatches requests to registered handlers based on their labels.""" + """Dispatches requests to registered handlers based on their labels. + + Create a `Router` instance and decorate handlers with it, specifying the `label` parameter to correctly process + requests requiring different logic. Pass it to the crawler as the `request_handler` parameter. + + ```python + from crawlee.crawlers import HttpCrawler, HttpCrawlingContext + from crawlee.router import Router + + router = Router[HttpCrawlingContext]() + + + # Handler for requests without a matching label handler + @router.default_handler + async def basic_handler(context: HttpCrawlingContext) -> None: + context.log.info(f'Request without label {context.request.url} ...') + + + # Handler for category requests + @router.handler(label='category') + async def a_handler(context: HttpCrawlingContext) -> None: + context.log.info(f'Category request {context.request.url} ...') + + + # Handler for product requests + @router.handler(label='product') + async def b_handler(context: HttpCrawlingContext) -> None: + context.log.info(f'Product {context.request.url} ...') + + + crawler = HttpCrawler(request_handler=router) + + await crawler.run() + """ def __init__(self) -> None: self._default_handler: RequestHandler[TCrawlingContext] | None = None diff --git a/uv.lock b/uv.lock index 95707046d3..a5c7fc0cda 100644 --- a/uv.lock +++ b/uv.lock @@ -562,7 +562,7 @@ toml = [ [[package]] name = "crawlee" -version = "0.6.5" +version = "0.6.6" source = { editable = "." } dependencies = [ { name = "apify-fingerprint-datapoints" }, From 611af36726854ab80f629466e422c5b236eadcdd Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Mon, 24 Mar 2025 12:51:16 +0100 Subject: [PATCH 2/2] Update router.py --- src/crawlee/router.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/crawlee/router.py b/src/crawlee/router.py index 02bb4e4322..6ceb09d1cd 100644 --- a/src/crawlee/router.py +++ b/src/crawlee/router.py @@ -14,10 +14,13 @@ @docs_group('Classes') class Router(Generic[TCrawlingContext]): - """Dispatches requests to registered handlers based on their labels. + """A request dispatching system that routes requests to registered handlers based on their labels. - Create a `Router` instance and decorate handlers with it, specifying the `label` parameter to correctly process - requests requiring different logic. Pass it to the crawler as the `request_handler` parameter. + The `Router` allows you to define and register request handlers for specific labels. When a request is received, + the router invokes the corresponding `request_handler` based on the request's `label`. If no matching handler + is found, the default handler is used. + + ### Usage ```python from crawlee.crawlers import HttpCrawler, HttpCrawlingContext @@ -28,25 +31,25 @@ class Router(Generic[TCrawlingContext]): # Handler for requests without a matching label handler @router.default_handler - async def basic_handler(context: HttpCrawlingContext) -> None: + async def default_handler(context: HttpCrawlingContext) -> None: context.log.info(f'Request without label {context.request.url} ...') # Handler for category requests @router.handler(label='category') - async def a_handler(context: HttpCrawlingContext) -> None: + async def category_handler(context: HttpCrawlingContext) -> None: context.log.info(f'Category request {context.request.url} ...') # Handler for product requests @router.handler(label='product') - async def b_handler(context: HttpCrawlingContext) -> None: + async def product_handler(context: HttpCrawlingContext) -> None: context.log.info(f'Product {context.request.url} ...') - crawler = HttpCrawler(request_handler=router) - - await crawler.run() + async def main() -> None: + crawler = HttpCrawler(request_handler=router) + await crawler.run() """ def __init__(self) -> None: