Skip to content

Commit b6bb4e0

Browse files
authored
[feature] support brave search api and refractor google serper api in BingBroswer (#233)
* support brave search api * Update brave_search.py * Update __init__.py * support brave search api and refractor google serper api * pre-commit * Delete lagent/actions/brave_search.py * Update __init__.py * add docstring
1 parent cefbc50 commit b6bb4e0

File tree

1 file changed

+209
-0
lines changed

1 file changed

+209
-0
lines changed

lagent/actions/bing_browser.py

+209
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,215 @@ def _parse_response(self, response: dict) -> dict:
164164
return self._filter_results(raw_results)
165165

166166

167+
class BraveSearch(BaseSearch):
168+
"""
169+
Wrapper around the Brave Search API.
170+
171+
To use, you should pass your Brave Search API key to the constructor.
172+
173+
Args:
174+
api_key (str): API KEY to use Brave Search API.
175+
You can create a free API key at https://api.search.brave.com/app/keys.
176+
search_type (str): Brave Search API supports ['web', 'news', 'images', 'videos'],
177+
currently only supports 'news' and 'web'.
178+
topk (int): The number of search results returned in response from API search results.
179+
region (str): The country code string. Specifies the country where the search results come from.
180+
language (str): The language code string. Specifies the preferred language for the search results.
181+
extra_snippets (bool): Allows retrieving up to 5 additional snippets, which are alternative excerpts from the search results.
182+
**kwargs: Any other parameters related to the Brave Search API. Find more details at
183+
https://api.search.brave.com/app/documentation/web-search/get-started.
184+
"""
185+
186+
def __init__(self,
187+
api_key: str,
188+
region: str = 'ALL',
189+
language: str = 'zh-hans',
190+
extra_snippests: bool = True,
191+
topk: int = 3,
192+
black_list: List[str] = [
193+
'enoN',
194+
'youtube.com',
195+
'bilibili.com',
196+
'researchgate.net',
197+
],
198+
**kwargs):
199+
self.api_key = api_key
200+
self.market = region
201+
self.proxy = kwargs.get('proxy')
202+
self.language = language
203+
self.extra_snippests = extra_snippests
204+
self.search_type = kwargs.get('search_type', 'web')
205+
self.kwargs = kwargs
206+
super().__init__(topk, black_list)
207+
208+
@cached(cache=TTLCache(maxsize=100, ttl=600))
209+
def search(self, query: str, max_retry: int = 3) -> dict:
210+
for attempt in range(max_retry):
211+
try:
212+
response = self._call_brave_api(query)
213+
return self._parse_response(response)
214+
except Exception as e:
215+
logging.exception(str(e))
216+
warnings.warn(
217+
f'Retry {attempt + 1}/{max_retry} due to error: {e}')
218+
time.sleep(random.randint(2, 5))
219+
raise Exception(
220+
'Failed to get search results from Brave Search after retries.')
221+
222+
def _call_brave_api(self, query: str) -> dict:
223+
endpoint = f'https://api.search.brave.com/res/v1/{self.search_type}/search'
224+
params = {
225+
'q': query,
226+
'country': self.market,
227+
'search_lang': self.language,
228+
'extra_snippets': self.extra_snippests,
229+
'count': self.topk,
230+
**{
231+
key: value
232+
for key, value in self.kwargs.items() if value is not None
233+
},
234+
}
235+
headers = {
236+
'X-Subscription-Token': self.api_key or '',
237+
'Accept': 'application/json'
238+
}
239+
response = requests.get(
240+
endpoint, headers=headers, params=params, proxies=self.proxy)
241+
response.raise_for_status()
242+
return response.json()
243+
244+
def _parse_response(self, response: dict) -> dict:
245+
if self.search_type == 'web':
246+
filtered_result = response.get('web', {}).get('results', [])
247+
else:
248+
filtered_result = response.get('results', {})
249+
raw_results = []
250+
251+
for item in filtered_result:
252+
raw_results.append((
253+
item.get('url', ''),
254+
' '.join(
255+
filter(None, [
256+
item.get('description'),
257+
*item.get('extra_snippets', [])
258+
])),
259+
item.get('title', ''),
260+
))
261+
return self._filter_results(raw_results)
262+
263+
264+
class GoogleSearch(BaseSearch):
265+
"""
266+
Wrapper around the Serper.dev Google Search API.
267+
268+
To use, you should pass your serper API key to the constructor.
269+
270+
Args:
271+
api_key (str): API KEY to use serper google search API.
272+
You can create a free API key at https://serper.dev.
273+
search_type (str): Serper API supports ['search', 'images', 'news',
274+
'places'] types of search, currently we only support 'search' and 'news'.
275+
topk (int): The number of search results returned in response from api search results.
276+
**kwargs: Any other parameters related to the Serper API. Find more details at
277+
https://serper.dev/playground
278+
"""
279+
280+
result_key_for_type = {
281+
'news': 'news',
282+
'places': 'places',
283+
'images': 'images',
284+
'search': 'organic',
285+
}
286+
287+
def __init__(self,
288+
api_key: str,
289+
topk: int = 3,
290+
black_list: List[str] = [
291+
'enoN',
292+
'youtube.com',
293+
'bilibili.com',
294+
'researchgate.net',
295+
],
296+
**kwargs):
297+
self.api_key = api_key
298+
self.proxy = kwargs.get('proxy')
299+
self.search_type = kwargs.get('search_type', 'search')
300+
self.kwargs = kwargs
301+
super().__init__(topk, black_list)
302+
303+
@cached(cache=TTLCache(maxsize=100, ttl=600))
304+
def search(self, query: str, max_retry: int = 3) -> dict:
305+
for attempt in range(max_retry):
306+
try:
307+
response = self._call_serper_api(query)
308+
return self._parse_response(response)
309+
except Exception as e:
310+
logging.exception(str(e))
311+
warnings.warn(
312+
f'Retry {attempt + 1}/{max_retry} due to error: {e}')
313+
time.sleep(random.randint(2, 5))
314+
raise Exception(
315+
'Failed to get search results from Google Serper Search after retries.'
316+
)
317+
318+
def _call_serper_api(self, query: str) -> dict:
319+
endpoint = f'https://google.serper.dev/{self.search_type}'
320+
params = {
321+
'q': query,
322+
'num': self.topk,
323+
**{
324+
key: value
325+
for key, value in self.kwargs.items() if value is not None
326+
},
327+
}
328+
headers = {
329+
'X-API-KEY': self.api_key or '',
330+
'Content-Type': 'application/json'
331+
}
332+
response = requests.get(
333+
endpoint, headers=headers, params=params, proxies=self.proxy)
334+
response.raise_for_status()
335+
return response.json()
336+
337+
def _parse_response(self, response: dict) -> dict:
338+
raw_results = []
339+
340+
if response.get('answerBox'):
341+
answer_box = response.get('answerBox', {})
342+
if answer_box.get('answer'):
343+
raw_results.append(('', answer_box.get('answer'), ''))
344+
elif answer_box.get('snippet'):
345+
raw_results.append(
346+
('', answer_box.get('snippet').replace('\n', ' '), ''))
347+
elif answer_box.get('snippetHighlighted'):
348+
raw_results.append(
349+
('', answer_box.get('snippetHighlighted'), ''))
350+
351+
if response.get('knowledgeGraph'):
352+
kg = response.get('knowledgeGraph', {})
353+
description = kg.get('description', '')
354+
attributes = '. '.join(
355+
f'{attribute}: {value}'
356+
for attribute, value in kg.get('attributes', {}).items())
357+
raw_results.append(
358+
(kg.get('descriptionLink', ''),
359+
f'{description}. {attributes}' if attributes else description,
360+
f"{kg.get('title', '')}: {kg.get('type', '')}."))
361+
362+
for result in response[self.result_key_for_type[
363+
self.search_type]][:self.topk]:
364+
description = result.get('snippet', '')
365+
attributes = '. '.join(
366+
f'{attribute}: {value}'
367+
for attribute, value in result.get('attributes', {}).items())
368+
raw_results.append(
369+
(result.get('link', ''),
370+
f'{description}. {attributes}' if attributes else description,
371+
result.get('title', '')))
372+
373+
return self._filter_results(raw_results)
374+
375+
167376
class ContentFetcher:
168377

169378
def __init__(self, timeout: int = 5):

0 commit comments

Comments
 (0)