2008-02-21 07:06:33 -05:00
|
|
|
#!/usr/bin/env python
|
|
|
|
"""
|
|
|
|
search.py - Phenny Web Search Module
|
2009-06-07 05:08:49 -04:00
|
|
|
Copyright 2008-9, Sean B. Palmer, inamidst.com
|
2008-02-21 07:06:33 -05:00
|
|
|
Licensed under the Eiffel Forum License 2.
|
|
|
|
|
|
|
|
http://inamidst.com/phenny/
|
|
|
|
"""
|
|
|
|
|
|
|
|
import re
|
|
|
|
import web
|
|
|
|
|
2011-09-22 18:15:42 -04:00
|
|
|
class Grab(web.urllib.request.URLopener):
|
2012-01-03 14:09:34 -05:00
|
|
|
def __init__(self, *args):
|
|
|
|
self.version = 'Mozilla/5.0 (Phenny)'
|
|
|
|
web.urllib.request.URLopener.__init__(self, *args)
|
|
|
|
self.addheader('Referer', 'https://github.com/sbp/phenny')
|
|
|
|
def http_error_default(self, url, fp, errcode, errmsg, headers):
|
|
|
|
return web.urllib.addinfourl(fp, [headers, errcode], "http:" + url)
|
2011-06-17 11:49:37 -04:00
|
|
|
|
2011-09-05 12:46:17 -04:00
|
|
|
def google_ajax(query):
|
2012-01-03 14:09:34 -05:00
|
|
|
"""Search using AjaxSearch, and return its JSON."""
|
2012-02-10 21:11:04 -05:00
|
|
|
if isinstance(query, str):
|
2012-02-08 16:10:37 -05:00
|
|
|
query = query.encode('utf-8')
|
2012-01-03 14:09:34 -05:00
|
|
|
uri = 'http://ajax.googleapis.com/ajax/services/search/web'
|
2012-02-08 16:10:37 -05:00
|
|
|
args = '?v=1.0&safe=off&q=' + web.urllib.quote(query)
|
|
|
|
handler = web.urllib._urlopener
|
|
|
|
web.urllib._urlopener = Grab()
|
2012-01-03 14:09:34 -05:00
|
|
|
bytes = web.get(uri + args)
|
2012-02-08 16:10:37 -05:00
|
|
|
web.urllib._urlopener = handler
|
2012-01-03 14:09:34 -05:00
|
|
|
return web.json(bytes)
|
2008-02-21 07:06:33 -05:00
|
|
|
|
2011-09-05 12:46:17 -04:00
|
|
|
def google_search(query):
|
2012-01-03 14:09:34 -05:00
|
|
|
results = google_ajax(query)
|
|
|
|
try: return results['responseData']['results'][0]['unescapedUrl']
|
|
|
|
except IndexError: return None
|
|
|
|
except TypeError:
|
|
|
|
print(results)
|
|
|
|
return False
|
2008-02-21 07:06:33 -05:00
|
|
|
|
2011-09-05 12:46:17 -04:00
|
|
|
def google_count(query):
|
2012-01-03 14:09:34 -05:00
|
|
|
results = google_ajax(query)
|
|
|
|
if 'responseData' not in results: return '0'
|
|
|
|
if 'cursor' not in results['responseData']: return '0'
|
|
|
|
if 'estimatedResultCount' not in results['responseData']['cursor']:
|
|
|
|
return '0'
|
|
|
|
return results['responseData']['cursor']['estimatedResultCount']
|
2008-02-21 07:06:33 -05:00
|
|
|
|
|
|
|
def formatnumber(n):
|
2012-01-03 14:09:34 -05:00
|
|
|
"""Format a number with beautiful commas."""
|
|
|
|
parts = list(str(n))
|
|
|
|
for i in range((len(parts) - 3), 0, -3):
|
|
|
|
parts.insert(i, ',')
|
|
|
|
return ''.join(parts)
|
2008-02-21 07:06:33 -05:00
|
|
|
|
|
|
|
def g(phenny, input):
|
2012-01-03 14:09:34 -05:00
|
|
|
"""Queries Google for the specified input."""
|
|
|
|
query = input.group(2)
|
|
|
|
if not query:
|
|
|
|
return phenny.reply('.g what?')
|
|
|
|
uri = google_search(query)
|
|
|
|
if uri:
|
|
|
|
phenny.reply(uri)
|
|
|
|
if not hasattr(phenny.bot, 'last_seen_uri'):
|
|
|
|
phenny.bot.last_seen_uri = {}
|
|
|
|
phenny.bot.last_seen_uri[input.sender] = uri
|
|
|
|
elif uri is False: phenny.reply("Problem getting data from Google.")
|
|
|
|
else: phenny.reply("No results found for '%s'." % query)
|
2008-02-21 07:06:33 -05:00
|
|
|
g.commands = ['g']
|
|
|
|
g.priority = 'high'
|
2008-03-10 15:58:28 -04:00
|
|
|
g.example = '.g swhack'
|
2008-02-21 07:06:33 -05:00
|
|
|
|
|
|
|
def gc(phenny, input):
|
2012-01-03 14:09:34 -05:00
|
|
|
"""Returns the number of Google results for the specified input."""
|
|
|
|
query = input.group(2)
|
|
|
|
if not query:
|
|
|
|
return phenny.reply('.gc what?')
|
|
|
|
num = formatnumber(google_count(query))
|
|
|
|
phenny.say(query + ': ' + num)
|
2008-02-21 07:06:33 -05:00
|
|
|
gc.commands = ['gc']
|
|
|
|
gc.priority = 'high'
|
2008-03-10 15:58:28 -04:00
|
|
|
gc.example = '.gc extrapolate'
|
2008-02-21 07:06:33 -05:00
|
|
|
|
|
|
|
r_query = re.compile(
|
2012-01-03 14:09:34 -05:00
|
|
|
r'\+?"[^"\\]*(?:\\.[^"\\]*)*"|\[[^]\\]*(?:\\.[^]\\]*)*\]|\S+'
|
2008-02-21 07:06:33 -05:00
|
|
|
)
|
|
|
|
|
2008-02-23 07:16:43 -05:00
|
|
|
def gcs(phenny, input):
|
2012-01-03 14:09:34 -05:00
|
|
|
if not input.group(2):
|
|
|
|
return phenny.reply("Nothing to compare.")
|
|
|
|
queries = r_query.findall(input.group(2))
|
|
|
|
if len(queries) > 6:
|
|
|
|
return phenny.reply('Sorry, can only compare up to six things.')
|
|
|
|
|
|
|
|
results = []
|
|
|
|
for i, query in enumerate(queries):
|
|
|
|
query = query.strip('[]')
|
|
|
|
n = int((formatnumber(google_count(query)) or '0').replace(',', ''))
|
|
|
|
results.append((n, query))
|
|
|
|
if i >= 2: __import__('time').sleep(0.25)
|
|
|
|
if i >= 4: __import__('time').sleep(0.25)
|
|
|
|
|
|
|
|
results = [(term, n) for (n, term) in reversed(sorted(results))]
|
|
|
|
reply = ', '.join('%s (%s)' % (t, formatnumber(n)) for (t, n) in results)
|
|
|
|
phenny.say(reply)
|
2008-03-07 16:33:00 -05:00
|
|
|
gcs.commands = ['gcs', 'comp']
|
2008-02-21 07:06:33 -05:00
|
|
|
|
2011-03-10 08:41:02 -05:00
|
|
|
r_bing = re.compile(r'<h3><a href="([^"]+)"')
|
|
|
|
|
2011-09-05 12:46:17 -04:00
|
|
|
def bing_search(query, lang='en-GB'):
|
2012-01-03 14:09:34 -05:00
|
|
|
query = web.quote(query)
|
|
|
|
base = 'http://www.bing.com/search?mkt=%s&q=' % lang
|
|
|
|
bytes = web.get(base + query)
|
|
|
|
m = r_bing.search(bytes)
|
|
|
|
if m: return m.group(1)
|
2011-09-05 12:46:17 -04:00
|
|
|
|
2011-03-10 08:41:02 -05:00
|
|
|
def bing(phenny, input):
|
2012-01-03 14:09:34 -05:00
|
|
|
"""Queries Bing for the specified input."""
|
|
|
|
query = input.group(2)
|
|
|
|
if query.startswith(':'):
|
|
|
|
lang, query = query.split(' ', 1)
|
|
|
|
lang = lang[1:]
|
|
|
|
else: lang = 'en-GB'
|
|
|
|
if not query:
|
|
|
|
return phenny.reply('.bing what?')
|
|
|
|
|
|
|
|
uri = bing_search(query, lang)
|
|
|
|
if uri:
|
|
|
|
phenny.reply(uri)
|
|
|
|
if not hasattr(phenny.bot, 'last_seen_uri'):
|
|
|
|
phenny.bot.last_seen_uri = {}
|
|
|
|
phenny.bot.last_seen_uri[input.sender] = uri
|
|
|
|
else: phenny.reply("No results found for '%s'." % query)
|
2011-03-10 08:41:02 -05:00
|
|
|
bing.commands = ['bing']
|
|
|
|
bing.example = '.bing swhack'
|
|
|
|
|
2011-09-05 12:46:17 -04:00
|
|
|
r_duck = re.compile(r'nofollow" class="[^"]+" href="(.*?)">')
|
2011-08-04 10:53:55 -04:00
|
|
|
|
2011-09-05 12:46:17 -04:00
|
|
|
def duck_search(query):
|
2012-01-03 14:09:34 -05:00
|
|
|
query = query.replace('!', '')
|
|
|
|
query = web.quote(query)
|
|
|
|
uri = 'http://duckduckgo.com/html/?q=%s&kl=uk-en' % query
|
|
|
|
bytes = web.get(uri)
|
|
|
|
m = r_duck.search(bytes)
|
|
|
|
if m: return web.decode(m.group(1))
|
2011-09-05 12:46:17 -04:00
|
|
|
|
|
|
|
def duck(phenny, input):
|
2012-01-03 14:09:34 -05:00
|
|
|
query = input.group(2)
|
|
|
|
if not query: return phenny.reply('.ddg what?')
|
|
|
|
|
|
|
|
uri = duck_search(query)
|
|
|
|
if uri:
|
|
|
|
phenny.reply(uri)
|
|
|
|
if not hasattr(phenny.bot, 'last_seen_uri'):
|
|
|
|
phenny.bot.last_seen_uri = {}
|
|
|
|
phenny.bot.last_seen_uri[input.sender] = uri
|
|
|
|
else: phenny.reply("No results found for '%s'." % query)
|
2011-09-05 12:46:17 -04:00
|
|
|
duck.commands = ['duck', 'ddg']
|
|
|
|
|
|
|
|
def search(phenny, input):
|
2012-01-03 14:09:34 -05:00
|
|
|
if not input.group(2):
|
|
|
|
return phenny.reply('.search for what?')
|
|
|
|
query = input.group(2)
|
|
|
|
gu = google_search(query) or '-'
|
|
|
|
bu = bing_search(query) or '-'
|
|
|
|
du = duck_search(query) or '-'
|
|
|
|
|
|
|
|
if (gu == bu) and (bu == du):
|
|
|
|
result = '%s (g, b, d)' % gu
|
|
|
|
elif (gu == bu):
|
|
|
|
result = '%s (g, b), %s (d)' % (gu, du)
|
|
|
|
elif (bu == du):
|
|
|
|
result = '%s (b, d), %s (g)' % (bu, gu)
|
|
|
|
elif (gu == du):
|
|
|
|
result = '%s (g, d), %s (b)' % (gu, bu)
|
|
|
|
else:
|
|
|
|
if len(gu) > 250: gu = '(extremely long link)'
|
|
|
|
if len(bu) > 150: bu = '(extremely long link)'
|
|
|
|
if len(du) > 150: du = '(extremely long link)'
|
|
|
|
result = '%s (g), %s (b), %s (d)' % (gu, bu, du)
|
|
|
|
|
|
|
|
phenny.reply(result)
|
2011-09-05 12:46:17 -04:00
|
|
|
search.commands = ['search']
|
|
|
|
|
|
|
|
def suggest(phenny, input):
|
2012-01-03 14:09:34 -05:00
|
|
|
if not input.group(2):
|
|
|
|
return phenny.reply("No query term.")
|
|
|
|
query = input.group(2)
|
|
|
|
uri = 'http://websitedev.de/temp-bin/suggest.pl?q='
|
|
|
|
answer = web.get(uri + web.quote(query).replace('+', '%2B'))
|
|
|
|
if answer:
|
|
|
|
phenny.say(answer)
|
|
|
|
else: phenny.reply('Sorry, no result.')
|
2011-09-05 12:46:17 -04:00
|
|
|
suggest.commands = ['suggest']
|
2011-08-04 10:53:55 -04:00
|
|
|
|
2008-02-21 07:06:33 -05:00
|
|
|
if __name__ == '__main__':
|
2012-01-03 14:09:34 -05:00
|
|
|
print(__doc__.strip())
|