@@ -131,6 +131,7 @@ def add_webpages(
131
131
description : str ,
132
132
knowledge_base : str = None ,
133
133
crawl_depth : int = 1 ,
134
+ limit : int = None ,
134
135
filters : List [str ] = None ):
135
136
"""
136
137
Add a crawled URL to the agent for retrieval.
@@ -139,16 +140,19 @@ def add_webpages(
139
140
:param description: Description of the webpages. Used by agent to know when to do retrieval.
140
141
:param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
141
142
:param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only, -1 = default max
143
+ :param limit: max count of pages to crawl
142
144
:param filters: Include only URLs that match these regex patterns
143
145
"""
144
- self .collection .add_webpages (self .name , urls , description , knowledge_base = knowledge_base , crawl_depth = crawl_depth , filters = filters )
146
+ self .collection .add_webpages (self .name , urls , description , knowledge_base = knowledge_base ,
147
+ crawl_depth = crawl_depth , limit = limit , filters = filters )
145
148
146
149
def add_webpage (
147
150
self ,
148
151
url : str ,
149
152
description : str ,
150
153
knowledge_base : str = None ,
151
154
crawl_depth : int = 1 ,
155
+ limit : int = None ,
152
156
filters : List [str ] = None ):
153
157
"""
154
158
Add a crawled URL to the agent for retrieval.
@@ -157,9 +161,11 @@ def add_webpage(
157
161
:param description: Description of the webpages. Used by agent to know when to do retrieval.
158
162
:param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
159
163
:param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only, -1 = default max
164
+ :param limit: max count of pages to crawl
160
165
:param filters: Include only URLs that match these regex patterns
161
166
"""
162
- self .collection .add_webpage (self .name , url , description , knowledge_base = knowledge_base , crawl_depth = crawl_depth , filters = filters )
167
+ self .collection .add_webpage (self .name , url , description , knowledge_base = knowledge_base ,
168
+ crawl_depth = crawl_depth , limit = limit , filters = filters )
163
169
164
170
def add_database (self , database : str , tables : List [str ], description : str ):
165
171
"""
@@ -368,6 +374,7 @@ def add_webpages(
368
374
description : str ,
369
375
knowledge_base : str = None ,
370
376
crawl_depth : int = 1 ,
377
+ limit : int = None ,
371
378
filters : List [str ] = None
372
379
):
373
380
"""
@@ -378,6 +385,7 @@ def add_webpages(
378
385
:param description: Description of the webpages. Used by agent to know when to do retrieval.
379
386
:param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
380
387
:param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only
388
+ :param limit: max count of pages to crawl
381
389
:param filters: Include only URLs that match these regex patterns
382
390
"""
383
391
if not urls :
@@ -393,7 +401,7 @@ def add_webpages(
393
401
kb = self ._create_default_knowledge_base (agent , kb_name )
394
402
395
403
# Insert crawled webpage.
396
- kb .insert_webpages (urls , crawl_depth = crawl_depth , filters = filters )
404
+ kb .insert_webpages (urls , crawl_depth = crawl_depth , filters = filters , limit = limit )
397
405
398
406
# Make sure skill name is unique.
399
407
skill_name = f'web_retrieval_skill_{ uuid4 ().hex } '
@@ -412,6 +420,7 @@ def add_webpage(
412
420
description : str ,
413
421
knowledge_base : str = None ,
414
422
crawl_depth : int = 1 ,
423
+ limit : int = None ,
415
424
filters : List [str ] = None ):
416
425
"""
417
426
Add a webpage to the agent for retrieval.
@@ -421,9 +430,11 @@ def add_webpage(
421
430
:param description: Description of the webpage. Used by agent to know when to do retrieval.
422
431
:param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
423
432
:param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only
433
+ :param limit: max count of pages to crawl
424
434
:param filters: Include only URLs that match these regex patterns
425
435
"""
426
- self .add_webpages (name , [url ], description , knowledge_base = knowledge_base , crawl_depth = crawl_depth , filters = filters )
436
+ self .add_webpages (name , [url ], description , knowledge_base = knowledge_base ,
437
+ crawl_depth = crawl_depth , limit = limit , filters = filters )
427
438
428
439
def add_database (self , name : str , database : str , tables : List [str ], description : str ):
429
440
"""
0 commit comments