diff --git a/src/crawlee/router.py b/src/crawlee/router.py index ec2cac3e00..6ceb09d1cd 100644 --- a/src/crawlee/router.py +++ b/src/crawlee/router.py @@ -14,7 +14,43 @@ @docs_group('Classes') class Router(Generic[TCrawlingContext]): - """Dispatches requests to registered handlers based on their labels.""" + """A request dispatching system that routes requests to registered handlers based on their labels. + + The `Router` allows you to define and register request handlers for specific labels. When a request is received, + the router invokes the corresponding `request_handler` based on the request's `label`. If no matching handler + is found, the default handler is used. + + ### Usage + + ```python + from crawlee.crawlers import HttpCrawler, HttpCrawlingContext + from crawlee.router import Router + + router = Router[HttpCrawlingContext]() + + + # Handler for requests without a matching label handler + @router.default_handler + async def default_handler(context: HttpCrawlingContext) -> None: + context.log.info(f'Request without label {context.request.url} ...') + + + # Handler for category requests + @router.handler(label='category') + async def category_handler(context: HttpCrawlingContext) -> None: + context.log.info(f'Category request {context.request.url} ...') + + + # Handler for product requests + @router.handler(label='product') + async def product_handler(context: HttpCrawlingContext) -> None: + context.log.info(f'Product {context.request.url} ...') + + + async def main() -> None: + crawler = HttpCrawler(request_handler=router) + await crawler.run() + """ def __init__(self) -> None: self._default_handler: RequestHandler[TCrawlingContext] | None = None diff --git a/uv.lock b/uv.lock index 95707046d3..a5c7fc0cda 100644 --- a/uv.lock +++ b/uv.lock @@ -562,7 +562,7 @@ toml = [ [[package]] name = "crawlee" -version = "0.6.5" +version = "0.6.6" source = { editable = "." } dependencies = [ { name = "apify-fingerprint-datapoints" },