@@ -164,6 +164,215 @@ def _parse_response(self, response: dict) -> dict:
164
164
return self ._filter_results (raw_results )
165
165
166
166
167
+ class BraveSearch (BaseSearch ):
168
+ """
169
+ Wrapper around the Brave Search API.
170
+
171
+ To use, you should pass your Brave Search API key to the constructor.
172
+
173
+ Args:
174
+ api_key (str): API KEY to use Brave Search API.
175
+ You can create a free API key at https://api.search.brave.com/app/keys.
176
+ search_type (str): Brave Search API supports ['web', 'news', 'images', 'videos'],
177
+ currently only supports 'news' and 'web'.
178
+ topk (int): The number of search results returned in response from API search results.
179
+ region (str): The country code string. Specifies the country where the search results come from.
180
+ language (str): The language code string. Specifies the preferred language for the search results.
181
+ extra_snippets (bool): Allows retrieving up to 5 additional snippets, which are alternative excerpts from the search results.
182
+ **kwargs: Any other parameters related to the Brave Search API. Find more details at
183
+ https://api.search.brave.com/app/documentation/web-search/get-started.
184
+ """
185
+
186
+ def __init__ (self ,
187
+ api_key : str ,
188
+ region : str = 'ALL' ,
189
+ language : str = 'zh-hans' ,
190
+ extra_snippests : bool = True ,
191
+ topk : int = 3 ,
192
+ black_list : List [str ] = [
193
+ 'enoN' ,
194
+ 'youtube.com' ,
195
+ 'bilibili.com' ,
196
+ 'researchgate.net' ,
197
+ ],
198
+ ** kwargs ):
199
+ self .api_key = api_key
200
+ self .market = region
201
+ self .proxy = kwargs .get ('proxy' )
202
+ self .language = language
203
+ self .extra_snippests = extra_snippests
204
+ self .search_type = kwargs .get ('search_type' , 'web' )
205
+ self .kwargs = kwargs
206
+ super ().__init__ (topk , black_list )
207
+
208
+ @cached (cache = TTLCache (maxsize = 100 , ttl = 600 ))
209
+ def search (self , query : str , max_retry : int = 3 ) -> dict :
210
+ for attempt in range (max_retry ):
211
+ try :
212
+ response = self ._call_brave_api (query )
213
+ return self ._parse_response (response )
214
+ except Exception as e :
215
+ logging .exception (str (e ))
216
+ warnings .warn (
217
+ f'Retry { attempt + 1 } /{ max_retry } due to error: { e } ' )
218
+ time .sleep (random .randint (2 , 5 ))
219
+ raise Exception (
220
+ 'Failed to get search results from Brave Search after retries.' )
221
+
222
+ def _call_brave_api (self , query : str ) -> dict :
223
+ endpoint = f'https://api.search.brave.com/res/v1/{ self .search_type } /search'
224
+ params = {
225
+ 'q' : query ,
226
+ 'country' : self .market ,
227
+ 'search_lang' : self .language ,
228
+ 'extra_snippets' : self .extra_snippests ,
229
+ 'count' : self .topk ,
230
+ ** {
231
+ key : value
232
+ for key , value in self .kwargs .items () if value is not None
233
+ },
234
+ }
235
+ headers = {
236
+ 'X-Subscription-Token' : self .api_key or '' ,
237
+ 'Accept' : 'application/json'
238
+ }
239
+ response = requests .get (
240
+ endpoint , headers = headers , params = params , proxies = self .proxy )
241
+ response .raise_for_status ()
242
+ return response .json ()
243
+
244
+ def _parse_response (self , response : dict ) -> dict :
245
+ if self .search_type == 'web' :
246
+ filtered_result = response .get ('web' , {}).get ('results' , [])
247
+ else :
248
+ filtered_result = response .get ('results' , {})
249
+ raw_results = []
250
+
251
+ for item in filtered_result :
252
+ raw_results .append ((
253
+ item .get ('url' , '' ),
254
+ ' ' .join (
255
+ filter (None , [
256
+ item .get ('description' ),
257
+ * item .get ('extra_snippets' , [])
258
+ ])),
259
+ item .get ('title' , '' ),
260
+ ))
261
+ return self ._filter_results (raw_results )
262
+
263
+
264
+ class GoogleSearch (BaseSearch ):
265
+ """
266
+ Wrapper around the Serper.dev Google Search API.
267
+
268
+ To use, you should pass your serper API key to the constructor.
269
+
270
+ Args:
271
+ api_key (str): API KEY to use serper google search API.
272
+ You can create a free API key at https://serper.dev.
273
+ search_type (str): Serper API supports ['search', 'images', 'news',
274
+ 'places'] types of search, currently we only support 'search' and 'news'.
275
+ topk (int): The number of search results returned in response from api search results.
276
+ **kwargs: Any other parameters related to the Serper API. Find more details at
277
+ https://serper.dev/playground
278
+ """
279
+
280
+ result_key_for_type = {
281
+ 'news' : 'news' ,
282
+ 'places' : 'places' ,
283
+ 'images' : 'images' ,
284
+ 'search' : 'organic' ,
285
+ }
286
+
287
+ def __init__ (self ,
288
+ api_key : str ,
289
+ topk : int = 3 ,
290
+ black_list : List [str ] = [
291
+ 'enoN' ,
292
+ 'youtube.com' ,
293
+ 'bilibili.com' ,
294
+ 'researchgate.net' ,
295
+ ],
296
+ ** kwargs ):
297
+ self .api_key = api_key
298
+ self .proxy = kwargs .get ('proxy' )
299
+ self .search_type = kwargs .get ('search_type' , 'search' )
300
+ self .kwargs = kwargs
301
+ super ().__init__ (topk , black_list )
302
+
303
+ @cached (cache = TTLCache (maxsize = 100 , ttl = 600 ))
304
+ def search (self , query : str , max_retry : int = 3 ) -> dict :
305
+ for attempt in range (max_retry ):
306
+ try :
307
+ response = self ._call_serper_api (query )
308
+ return self ._parse_response (response )
309
+ except Exception as e :
310
+ logging .exception (str (e ))
311
+ warnings .warn (
312
+ f'Retry { attempt + 1 } /{ max_retry } due to error: { e } ' )
313
+ time .sleep (random .randint (2 , 5 ))
314
+ raise Exception (
315
+ 'Failed to get search results from Google Serper Search after retries.'
316
+ )
317
+
318
+ def _call_serper_api (self , query : str ) -> dict :
319
+ endpoint = f'https://google.serper.dev/{ self .search_type } '
320
+ params = {
321
+ 'q' : query ,
322
+ 'num' : self .topk ,
323
+ ** {
324
+ key : value
325
+ for key , value in self .kwargs .items () if value is not None
326
+ },
327
+ }
328
+ headers = {
329
+ 'X-API-KEY' : self .api_key or '' ,
330
+ 'Content-Type' : 'application/json'
331
+ }
332
+ response = requests .get (
333
+ endpoint , headers = headers , params = params , proxies = self .proxy )
334
+ response .raise_for_status ()
335
+ return response .json ()
336
+
337
+ def _parse_response (self , response : dict ) -> dict :
338
+ raw_results = []
339
+
340
+ if response .get ('answerBox' ):
341
+ answer_box = response .get ('answerBox' , {})
342
+ if answer_box .get ('answer' ):
343
+ raw_results .append (('' , answer_box .get ('answer' ), '' ))
344
+ elif answer_box .get ('snippet' ):
345
+ raw_results .append (
346
+ ('' , answer_box .get ('snippet' ).replace ('\n ' , ' ' ), '' ))
347
+ elif answer_box .get ('snippetHighlighted' ):
348
+ raw_results .append (
349
+ ('' , answer_box .get ('snippetHighlighted' ), '' ))
350
+
351
+ if response .get ('knowledgeGraph' ):
352
+ kg = response .get ('knowledgeGraph' , {})
353
+ description = kg .get ('description' , '' )
354
+ attributes = '. ' .join (
355
+ f'{ attribute } : { value } '
356
+ for attribute , value in kg .get ('attributes' , {}).items ())
357
+ raw_results .append (
358
+ (kg .get ('descriptionLink' , '' ),
359
+ f'{ description } . { attributes } ' if attributes else description ,
360
+ f"{ kg .get ('title' , '' )} : { kg .get ('type' , '' )} ." ))
361
+
362
+ for result in response [self .result_key_for_type [
363
+ self .search_type ]][:self .topk ]:
364
+ description = result .get ('snippet' , '' )
365
+ attributes = '. ' .join (
366
+ f'{ attribute } : { value } '
367
+ for attribute , value in result .get ('attributes' , {}).items ())
368
+ raw_results .append (
369
+ (result .get ('link' , '' ),
370
+ f'{ description } . { attributes } ' if attributes else description ,
371
+ result .get ('title' , '' )))
372
+
373
+ return self ._filter_results (raw_results )
374
+
375
+
167
376
class ContentFetcher :
168
377
169
378
def __init__ (self , timeout : int = 5 ):
0 commit comments