Skip to content

Commit b25f1de

Browse files
committed
added limit parameter to
- agent.add_webpages - knowledge_base.insert_webpages
1 parent f6bf8ce commit b25f1de

File tree

2 files changed

+19
-5
lines changed

2 files changed

+19
-5
lines changed

mindsdb_sdk/agents.py

+15-4
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ def add_webpages(
131131
description: str,
132132
knowledge_base: str = None,
133133
crawl_depth: int = 1,
134+
limit: int = None,
134135
filters: List[str] = None):
135136
"""
136137
Add a crawled URL to the agent for retrieval.
@@ -139,16 +140,19 @@ def add_webpages(
139140
:param description: Description of the webpages. Used by agent to know when to do retrieval.
140141
:param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
141142
:param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only, -1 = default max
143+
:param limit: max count of pages to crawl
142144
:param filters: Include only URLs that match these regex patterns
143145
"""
144-
self.collection.add_webpages(self.name, urls, description, knowledge_base=knowledge_base, crawl_depth=crawl_depth, filters=filters)
146+
self.collection.add_webpages(self.name, urls, description, knowledge_base=knowledge_base,
147+
crawl_depth=crawl_depth, limit=limit, filters=filters)
145148

146149
def add_webpage(
147150
self,
148151
url: str,
149152
description: str,
150153
knowledge_base: str = None,
151154
crawl_depth: int = 1,
155+
limit: int = None,
152156
filters: List[str] = None):
153157
"""
154158
Add a crawled URL to the agent for retrieval.
@@ -157,9 +161,11 @@ def add_webpage(
157161
:param description: Description of the webpages. Used by agent to know when to do retrieval.
158162
:param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
159163
:param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only, -1 = default max
164+
:param limit: max count of pages to crawl
160165
:param filters: Include only URLs that match these regex patterns
161166
"""
162-
self.collection.add_webpage(self.name, url, description, knowledge_base=knowledge_base, crawl_depth=crawl_depth, filters=filters)
167+
self.collection.add_webpage(self.name, url, description, knowledge_base=knowledge_base,
168+
crawl_depth=crawl_depth, limit=limit, filters=filters)
163169

164170
def add_database(self, database: str, tables: List[str], description: str):
165171
"""
@@ -368,6 +374,7 @@ def add_webpages(
368374
description: str,
369375
knowledge_base: str = None,
370376
crawl_depth: int = 1,
377+
limit: int = None,
371378
filters: List[str] = None
372379
):
373380
"""
@@ -378,6 +385,7 @@ def add_webpages(
378385
:param description: Description of the webpages. Used by agent to know when to do retrieval.
379386
:param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
380387
:param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only
388+
:param limit: max count of pages to crawl
381389
:param filters: Include only URLs that match these regex patterns
382390
"""
383391
if not urls:
@@ -393,7 +401,7 @@ def add_webpages(
393401
kb = self._create_default_knowledge_base(agent, kb_name)
394402

395403
# Insert crawled webpage.
396-
kb.insert_webpages(urls, crawl_depth=crawl_depth, filters=filters)
404+
kb.insert_webpages(urls, crawl_depth=crawl_depth, filters=filters, limit=limit)
397405

398406
# Make sure skill name is unique.
399407
skill_name = f'web_retrieval_skill_{uuid4().hex}'
@@ -412,6 +420,7 @@ def add_webpage(
412420
description: str,
413421
knowledge_base: str = None,
414422
crawl_depth: int = 1,
423+
limit: int = None,
415424
filters: List[str] = None):
416425
"""
417426
Add a webpage to the agent for retrieval.
@@ -421,9 +430,11 @@ def add_webpage(
421430
:param description: Description of the webpage. Used by agent to know when to do retrieval.
422431
:param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
423432
:param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only
433+
:param limit: max count of pages to crawl
424434
:param filters: Include only URLs that match these regex patterns
425435
"""
426-
self.add_webpages(name, [url], description, knowledge_base=knowledge_base, crawl_depth=crawl_depth, filters=filters)
436+
self.add_webpages(name, [url], description, knowledge_base=knowledge_base,
437+
crawl_depth=crawl_depth, limit=limit, filters=filters)
427438

428439
def add_database(self, name: str, database: str, tables: List[str], description: str):
429440
"""

mindsdb_sdk/knowledge_bases.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -127,18 +127,21 @@ def insert_files(self, file_paths: List[str], params: dict = None):
127127
data=data
128128
)
129129

130-
def insert_webpages(self, urls: List[str], crawl_depth: int = 1, filters: List[str] = None, params: dict = None):
130+
def insert_webpages(self, urls: List[str], crawl_depth: int = 1,
131+
filters: List[str] = None, limit=None, params: dict = None):
131132
"""
132133
Insert data from crawled URLs to knowledge base.
133134
134135
:param urls: URLs to be crawled and inserted.
135136
:param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only
136137
:param filters: Include only URLs that match these regex patterns
138+
:param limit: max count of pages to crawl
137139
:param params: Runtime parameters for KB
138140
"""
139141
data={
140142
'urls': urls,
141143
'crawl_depth': crawl_depth,
144+
'limit': limit,
142145
'filters': [] if filters is None else filters,
143146
}
144147
if params:

0 commit comments

Comments
 (0)