diff --git a/LICENSE b/LICENSE index f3e2506..5551c6e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,7 +1,7 @@ BSD-style license ================= -Copyright (c) 2010, Michael Stephens +Copyright (c) 2013, Michael Stephens All rights reserved. diff --git a/README.rst b/README.rst index 99aa8c3..3f528d8 100644 --- a/README.rst +++ b/README.rst @@ -6,10 +6,11 @@ A Python library for querying the DuckDuckGo API. Copyright Michael Stephens , released under a BSD-style license. -Source: http://github.com/crazedpsyc/python-duckduckgo -Original source: http://github.com/mikejs/python-duckduckgo (outdated) +Source: http://github.com/djinn/python-duckduckgo +Original Source: http://github.com/crazedpsyc/python-duckduckgo +Original Original Source: http://github.com/mikejs/python-duckduckgo (outdated) -This version has been forked from the original to handle some new features of the API, and switch from XML to JSON. +This version has been forked from the original to be able to allow cleaner interface. It extensively uses namedtuples instead of data classes. This API only interfaces with JSON API Installation ============ @@ -24,16 +25,18 @@ Usage >>> import duckduckgo >>> r = duckduckgo.query('DuckDuckGo') >>> r.type - u'answer' - >>> r.results[0].text + 'answer' + >>> r.result[0] + Result(html=u'Official site', text=u'Official site', url=u'https://duckduckgo.com/', icon=Icon(url=u'https://i.duckduckgo.com/i/duckduckgo.com.ico', width=16, height=16)) + >>> r.result[0].text u'Official site' - >>> r.results[0].url - u'http://duckduckgo.com/' + >>> r.abstract + Abstract(primary=u'DuckDuckGo is an Internet search engine that uses information from many sources, such as crowdsourced websites like Wikipedia and from partnerships with other search engines like Yandex, Yahoo!, Bing and WolframAlpha to obtain its results.', url=u'https://en.wikipedia.org/wiki/DuckDuckGo', text=u'DuckDuckGo is an Internet search engine that uses information from many sources, such as crowdsourced websites like Wikipedia and from partnerships with other search engines like Yandex, Yahoo!, Bing and WolframAlpha to obtain its results.', source=u'Wikipedia') >>> r.abstract.url - u'http://en.wikipedia.org/wiki/Duck_Duck_Go' + u'https://en.wikipedia.org/wiki/DuckDuckGo' >>> r.abstract.source u'Wikipedia' - + >>> r = duckduckgo.query('Python') >>> r.type u'disambiguation' @@ -44,26 +47,19 @@ Usage >>> r.related[7].topics[0].text # weird, but this is how the DDG API is currently organized u'Armstrong Siddeley Python, an early turboprop engine' - >>> r = duckduckgo.query('1 + 1') >>> r.type - u'nothing' - >>> r.answer.text + 'exclusive' + >>> r.answer.primary u'1 + 1 = 2' >>> r.answer.type u'calc' - >>> print duckduckgo.query('19301', kad='es_ES').answer.text - 19301 es un código postal de Paoli, PA - >>> print duckduckgo.query('how to spell test', html=True).answer.text - Test appears to be spelled right!
Suggestions: test, testy, teat, tests, rest, yest. - -The easiest method of quickly grabbing the best (hopefully) API result is to use duckduckgo.get_zci:: - >>> print duckduckgo.get_zci('foo') - The terms foobar /ˈfʊːbɑːr/, fubar, or foo, bar, baz and qux are sometimes used as placeholder names in computer programming or computer-related documentation. (https://en.wikipedia.org/wiki/Foobar) - >>> print ddg.get_zci('foo fighters site') - http://www.foofighters.com/us/home - + + >>> print duckduckgo.query('how to spell test', html=True).answer.primary + Test appears to be spelled correctly!
Suggestions: test testy teat tests rest yest . + The easiest method of quickly grabbing the best (hopefully) API result is to use duckduckgo.get_zci:: + Special keyword args for query(): - useragent - string, The useragent used to make API calls. This is somewhat irrelevant, as they are not logged or used on DuckDuckGo, but it is retained for backwards compatibility. - safesearch - boolean, enable or disable safesearch. diff --git a/dot_example.py b/dot_example.py new file mode 100644 index 0000000..5c0c80e --- /dev/null +++ b/dot_example.py @@ -0,0 +1,41 @@ +from duckduckgo import query, Topic +from sys import argv +visited = [] +depth_color = { + 0: 'green', + 1: '#A52A2A', + 2: 'grey', + 3: 'blue' + } + +def build_web_tree(node, qr, depth=0): + cooked_qr = qr.replace('"', '\\"') + print '"%s" [label="%s", shape="hexagon", style="filled", color="%s"];' % (cooked_qr, cooked_qr, depth_color[depth]) + if node != None: + print '"%s" -> "%s";' % (node, cooked_qr) + ds = query(qr) + if depth == 3: + return + if ds.error_code != 0: + return + visited.append(qr) + if ds.related == []: + return + else: + for r in ds.related: + if isinstance(r, Topic) == True: + r_used = r.name.encode('ascii', 'ignore') + else: + r_used = r.text.encode('ascii', 'ignore').split('-')[0].strip() + try: + visited.index(r_used) + except: + build_web_tree(qr, r_used, depth=depth+1) + + +if __name__ == '__main__': + print """digraph G { + ranksep=3; + ratio=auto;""" + build_web_tree(None, ' '.join(argv[1:])) + print "}" diff --git a/duckduckgo.py b/duckduckgo.py index 04ed1a5..dd584eb 100755 --- a/duckduckgo.py +++ b/duckduckgo.py @@ -2,9 +2,31 @@ import urllib2 import json as j import sys +from collections import namedtuple __version__ = 0.242 +Response = namedtuple('Response', ['type', 'api_version', + 'heading', 'result', + 'related', 'definition', + 'abstract', 'redirect', + 'answer', 'error_code', + 'error_msg']) +Result = namedtuple('Result', ['html', + 'text', 'url', + 'icon']) +Related = namedtuple('Related', ['html', 'text', + 'url', 'icon']) +Definition = namedtuple('Definition', ['primary','url', 'source']) + +Abstract = namedtuple('Abstract', ['primary', 'url', + 'text', 'source']) +Redirect = namedtuple('Redirect', ['primary',]) +Icon = namedtuple('Icon', ['url', 'width', 'height']) +Topic = namedtuple('Topic',['name', 'results']) +Answer = namedtuple('Answer', ['primary', 'type']) + + def query(query, useragent='python-duckduckgo '+str(__version__), safesearch=True, html=False, meanings=True, **kwargs): """ @@ -42,138 +64,109 @@ def query(query, useragent='python-duckduckgo '+str(__version__), safesearch=Tru params.update(kwargs) encparams = urllib.urlencode(params) url = 'http://api.duckduckgo.com/?' + encparams - request = urllib2.Request(url, headers={'User-Agent': useragent}) - response = urllib2.urlopen(request) - json = j.loads(response.read()) - response.close() - - return Results(json) - - -class Results(object): - - def __init__(self, json): - self.type = {'A': 'answer', 'D': 'disambiguation', - 'C': 'category', 'N': 'name', - 'E': 'exclusive', '': 'nothing'}.get(json.get('Type',''), '') - - self.json = json - self.api_version = None # compat - - self.heading = json.get('Heading', '') - - self.results = [Result(elem) for elem in json.get('Results',[])] - self.related = [Result(elem) for elem in - json.get('RelatedTopics',[])] - - self.abstract = Abstract(json) - self.redirect = Redirect(json) - self.definition = Definition(json) - self.answer = Answer(json) - - self.image = Image({'Result':json.get('Image','')}) - - -class Abstract(object): - - def __init__(self, json): - self.html = json.get('Abstract', '') - self.text = json.get('AbstractText', '') - self.url = json.get('AbstractURL', '') - self.source = json.get('AbstractSource') - -class Redirect(object): - - def __init__(self, json): - self.url = json.get('Redirect', '') + try: + response = urllib2.urlopen(request) + except urllib2.URLError, e: + return Response(type='Error', api_version=__version__, + heading=None, redirect=None, + abstract=None, + definition=None, + answer=None, + related=None, + result=None, error_code=1, + error_msg=str(e)) + + try: + json = j.loads(response.read()) + except Exception, e: + return Response(type='Error', api_version=__version__, + heading=None, redirect=None, + abstract=None, + definition=None, + answer=None, + related=None, + result=None, error_code=2, + error_msg='Data from api malformed') -class Result(object): - - def __init__(self, json): - self.topics = json.get('Topics', []) - if self.topics: - self.topics = [Result(t) for t in self.topics] - return - self.html = json.get('Result') - self.text = json.get('Text') - self.url = json.get('FirstURL') - - icon_json = json.get('Icon') - if icon_json is not None: - self.icon = Image(icon_json) - else: - self.icon = None - - -class Image(object): - - def __init__(self, json): - self.url = json.get('Result') - self.height = json.get('Height', None) - self.width = json.get('Width', None) - - -class Answer(object): - - def __init__(self, json): - self.text = json.get('Answer') - self.type = json.get('AnswerType', '') - -class Definition(object): - def __init__(self, json): - self.text = json.get('Definition','') - self.url = json.get('DefinitionURL') - self.source = json.get('DefinitionSource') - - -def get_zci(q, web_fallback=True, priority=['answer', 'abstract', 'related.0', 'definition'], urls=True, **kwargs): - '''A helper method to get a single (and hopefully the best) ZCI result. - priority=list can be used to set the order in which fields will be checked for answers. - Use web_fallback=True to fall back to grabbing the first web result. - passed to query. This method will fall back to 'Sorry, no results.' - if it cannot find anything.''' - - ddg = query('\\'+q, **kwargs) - response = '' - - for p in priority: - ps = p.split('.') - type = ps[0] - index = int(ps[1]) if len(ps) > 1 else None - - result = getattr(ddg, type) - if index is not None: - if not hasattr(result, '__getitem__'): raise TypeError('%s field is not indexable' % type) - result = result[index] if len(result) > index else None - if not result: continue - - if result.text: response = result.text - if result.text and hasattr(result,'url') and urls: - if result.url: response += ' (%s)' % result.url - if response: break - - # if there still isn't anything, try to get the first web result - if not response and web_fallback: - if ddg.redirect.url: - response = ddg.redirect.url - - # final fallback - if not response: - response = 'Sorry, no results.' + response.close() - return response + return process_results(json) + + + + + + +def result_deserialize(dataset, obj_type): + d = dataset + topics = None + if 'Topics' in d: + results = [result_deserialize(t, Result) for t in d['Topics']] + return Topic(d['Name'], results=results) + text = d['Text'] + url = d['FirstURL'] + html = d['Result'] + i_url = d['Icon']['URL'] + i_width = d['Icon']['Width'] + i_height = d['Icon']['Height'] + icon = None + if i_url != '': + icon = Icon(url=i_url, width=i_width, + height=i_height) + dt = obj_type(text=text, url=url, html=html, + icon=icon) + return dt + + + +def search_deserialize(dataset, prefix, obj_type): + if dataset[prefix] == '': + return None + keys = dataset.keys() + required = filter(lambda x: x.startswith(prefix) and x != prefix, keys) + unq_required = [r.split(prefix)[1].lower() for r in required] + args = {ur: dataset[r] for ur, r in map(None, unq_required, required)} + if prefix in dataset: + args['primary'] = dataset[prefix] + return obj_type(**args) + + + +def process_results(json): + resp_type = {'A': 'answer', + 'D': 'disambiguation', + 'C': 'category', + 'N': 'name', + 'E': 'exclusive', + '': 'nothing'}.get(json.get('Type',''), '') + if resp_type == 'Nothing': + return Response(type='nothing', api_version=0.242, heading=None, + result=None, related=None, definition=None, + abstract=None, redirect=None, answer=None, + error_code=0, error_msg=None) + + redirect = search_deserialize(json, 'Redirect', Redirect) + abstract = search_deserialize(json, 'Abstract', Abstract) + definition = search_deserialize(json, 'Definition', Definition) + js_results = json.get('Results', []) + results = [result_deserialize(jr, Result) for jr in js_results] + js_related = json.get('RelatedTopics', []) + related = [result_deserialize(jr, Related) for jr in js_related] + answer = search_deserialize(json, 'Answer', Answer) + return Response(type=resp_type, api_version=__version__, + heading='', redirect=redirect, + abstract=abstract, + definition=definition, + answer=answer, + related=related, + result=results, error_code=0, + error_msg=None) def main(): if len(sys.argv) > 1: q = query(' '.join(sys.argv[1:])) - keys = q.json.keys() - keys.sort() - for key in keys: - sys.stdout.write(key) - if type(q.json[key]) in [str,unicode]: print(':', q.json[key]) - else: - sys.stdout.write('\n') - for i in q.json[key]: print('\t',i) + print q else: print('Usage: %s [query]' % sys.argv[0]) + diff --git a/example1.py b/example1.py new file mode 100644 index 0000000..f726618 --- /dev/null +++ b/example1.py @@ -0,0 +1,28 @@ +from duckduckgo import query, Topic +from sys import argv +visited = [] + +def build_web_tree(qr, depth=0): + print ' '* depth * 4 + qr + ds = query(qr) + if depth == 2: + return + if ds.error_code != 0: + return + visited.append(qr) + if ds.related == []: + return + else: + for r in ds.related: + if isinstance(r, Topic) == True: + r_used = r.name.encode('ascii', 'ignore') + else: + r_used = r.text.encode('ascii', 'ignore').split('-')[0].strip() + try: + visited.index(r_used) + except: + build_web_tree(r_used, depth=depth+1) + + +if __name__ == '__main__': + build_web_tree(' '.join(argv[1:])) diff --git a/example2.py b/example2.py new file mode 100644 index 0000000..17c6347 --- /dev/null +++ b/example2.py @@ -0,0 +1,9 @@ +from duckduckgo import query + +def calculate(text): + """ There is bc but why not use web api to caculate""" + return query(text).answer.primary if query(text).type != 'nothing' else None + +if __name__ == '__main__': + import sys + print calculate(' '.join(sys.argv[1:])) diff --git a/example3.py b/example3.py new file mode 100644 index 0000000..6c64f3b --- /dev/null +++ b/example3.py @@ -0,0 +1,9 @@ +from duckduckgo import query + +def wikipedia_presence(text): + """Find if a query has wikipedia article""" + return query(text).abstract.url if query(text).abstract != None and query(text).abstract.source == 'Wikipedia' else None + +if __name__ == '__main__': + import sys + print wikipedia_presence(' '.join(sys.argv[1:])) diff --git a/tests.py b/tests.py new file mode 100644 index 0000000..8d5fc56 --- /dev/null +++ b/tests.py @@ -0,0 +1,125 @@ +from duckduckgo import query +import unittest + + +class GrandDuckDuckGoTestSuite(unittest.TestCase): + def testDuckDuckGo(self): + dataset = query('duckduckgo') + ds = dataset + self.assertEqual(ds.type, 'answer') + self.assertEqual(ds.result[0].url, 'https://duckduckgo.com/') + self.assertEqual(len(ds.related), 1) + self.assertEqual(ds.related[0].url, 'http://duckduckgo.com/c/Internet_search_engines?kp=1') + self.assertEqual(ds.answer, None) + self.assertEqual(ds.definition, None) + self.assertEqual(ds.abstract.url, 'https://en.wikipedia.org/wiki/Duck_Duck_Go') + self.assertEqual(ds.redirect, None) + + def test4_pow_10(self): + dataset = query('4 ^ 10') + ds = dataset + self.assertEqual(ds.type, 'exclusive') + self.assertEqual(ds.result, []) + self.assertEqual(len(ds.related), 0) + self.assertEqual(ds.answer.primary, '4 ^ 10 = 1,048,576') + self.assertEqual(ds.definition, None) + self.assertEqual(ds.abstract, None) + self.assertEqual(ds.redirect, None) + + def testYahoo(self): + dataset = query('Yahoo!') + ds = dataset + self.assertEqual(ds.type, 'disambiguation') + self.assertEqual(ds.result, []) + self.assertEqual(len(ds.related), 6) + self.assertEqual(ds.related[0].url, 'http://duckduckgo.com/Yahoo!?kp=1') + self.assertEqual(ds.answer, None) + self.assertEqual(ds.definition, None) + self.assertEqual(ds.abstract, None) + self.assertEqual(ds.redirect, None) + + def test42(self): + dataset = query('42') + ds = dataset + self.assertEqual(ds.type, 'disambiguation') + self.assertEqual(ds.result, []) + self.assertEqual(len(ds.related), 7) + self.assertEqual(ds.related[0].url, 'http://duckduckgo.com/42?kp=1') + self.assertEqual(ds.answer, None) + self.assertEqual(ds.definition, None) + self.assertEqual(ds.abstract, None) + self.assertEqual(ds.redirect, None) + + def testGenomeProject(self): + dataset = query('Genome Project') + ds = dataset + self.assertEqual(ds.type, 'answer') + self.assertEqual(ds.result, []) + self.assertEqual(len(ds.related), 6) + self.assertEqual(ds.related[0].url, 'http://duckduckgo.com/Joint_Genome_Institute?kp=1') + self.assertEqual(ds.answer, None) + self.assertEqual(ds.definition, None) + self.assertEqual(ds.abstract.url, 'https://en.wikipedia.org/wiki/Genome_project') + self.assertEqual(ds.redirect, None) + + def testBeetle(self): + dataset = query('Beetle') + ds = dataset + self.assertEqual(ds.type, 'disambiguation') + self.assertEqual(ds.result, []) + self.assertEqual(len(ds.related), 20) + self.assertEqual(ds.related[0].url, 'http://duckduckgo.com/Beetle?kp=1') + self.assertEqual(ds.answer, None) + self.assertEqual(ds.definition.url, 'http://www.merriam-webster.com/dictionary/beetle') + self.assertEqual(ds.abstract, None) + self.assertEqual(ds.redirect, None) + + def testGoLang(self): + dataset = query('golang') + ds = dataset + self.assertEqual(ds.type, 'answer') + self.assertEqual(ds.result, []) + self.assertEqual(len(ds.related), 7) + self.assertEqual(ds.related[0].url, 'http://duckduckgo.com/Go!_(programming_language)?kp=1') + self.assertEqual(ds.answer, None) + self.assertEqual(ds.definition, None) + self.assertEqual(ds.abstract.url, 'https://en.wikipedia.org/wiki/Go_(programming_language)') + self.assertEqual(ds.redirect, None) + + def testPythonDuckDuckGo(self): + dataset = query('python-duckduckgo') + ds = dataset + self.assertEqual(ds.type, 'answer') + self.assertEqual(ds.result, []) + self.assertEqual(len(ds.related), 0) + self.assertEqual(ds.answer, None) + self.assertEqual(ds.definition, None) + self.assertEqual(ds.abstract.url, 'https://github.com/mikejs/python-duckduckgo') + self.assertEqual(ds.redirect, None) + + def testPythonDjango(self): + dataset = query('python django') + ds = dataset + self.assertEqual(ds.type, 'nothing') + self.assertEqual(ds.result, []) + self.assertEqual(len(ds.related), 0) + self.assertEqual(ds.answer, None) + self.assertEqual(ds.definition, None) + self.assertEqual(ds.abstract, None) + self.assertEqual(ds.redirect, None) + + def testNFAK(self): + dataset = query('NFAK') + ds = dataset + self.assertEqual(ds.type, 'answer') + self.assertEqual(ds.result, []) + self.assertEqual(len(ds.related), 8) + self.assertEqual(ds.related[0].url, 'http://duckduckgo.com/c/Harmonium_players?kp=1') + self.assertEqual(ds.answer, None) + self.assertEqual(ds.definition, None) + self.assertEqual(ds.abstract.url, 'https://en.wikipedia.org/wiki/Nusrat_Fateh_Ali_Khan') + self.assertEqual(ds.redirect, None) + + +if __name__ == '__main__': + unittest.main()