fix imdb, search, and various tests

master
Paul Walko 2017-02-11 06:21:04 +00:00
parent 2dc0b0bdd6
commit 919b65cc17
10 changed files with 191 additions and 95 deletions

View File

@ -7,35 +7,44 @@ Licensed under the Eiffel Forum License 2.
http://inamidst.com/phenny/ http://inamidst.com/phenny/
""" """
import json import re
import web import web
def imdb_search(query): r_imdb_find = re.compile(r'href="/title/(.*?)/')
r_imdb_details = re.compile(r'<title>(.*?) \((.*?)\) .*?name="description" content="(.*?)"')
def imdb_search(query):
query = query.replace('!', '') query = query.replace('!', '')
query = query.encode('utf-8')
query = web.quote(query) query = web.quote(query)
uri = 'http://www.omdbapi.com/?i=&t=%s' % query uri = 'http://imdb.com/find?q=%s' % query
bytes = web.get(uri) bytes = web.get(uri)
m = json.loads(bytes) m = r_imdb_find.search(bytes)
return m if not m: return m
ID = web.decode(m.group(1))
uri = 'http://imdb.com/title/%s' % ID
bytes = web.get(uri)
bytes = bytes.replace('\n', '')
info = r_imdb_details.search(bytes)
info = {'Title': info.group(1), 'Year': info.group(2), 'Plot': info.group(3), 'imdbID': ID}
return info
def imdb(phenny, input): def imdb(phenny, input):
""".imdb <movie> - Use the OMDB API to find a link to a movie on IMDb.""" """.imdb <movie> - Find a link to a movie on IMDb."""
query = input.group(2) query = input.group(2)
if not query: if not query:
return phenny.say('.imdb what?') return phenny.say('.imdb what?')
m = imdb_search(query) m = imdb_search(query)
try: if m:
phenny.say('{0} ({1}): {2} http://imdb.com/title/{3}'.format( phenny.say('{0} ({1}): {2} http://imdb.com/title/{3}'.format(
m['Title'], m['Title'],
m['Year'], m['Year'],
m['Plot'], m['Plot'],
m['imdbID'])) m['imdbID']))
except: else:
phenny.reply("No results found for '%s'." % query) phenny.reply("No results found for '%s'." % query)
imdb.commands = ['imdb'] imdb.commands = ['imdb']
imdb.example = '.imdb Promethius' imdb.example = '.imdb Promethius'

1
modules/imdb.txt Normal file

File diff suppressed because one or more lines are too long

View File

@ -18,24 +18,10 @@ def fml(phenny, input):
raise GrumbleError("I tried to use .fml, but it was broken. FML") raise GrumbleError("I tried to use .fml, but it was broken. FML")
doc = lxml.html.fromstring(req) doc = lxml.html.fromstring(req)
quote = doc.find_class('article')[0][0].text_content() quote = doc.find_class('block')[1][0].text_content()
phenny.say(quote) phenny.say(quote)
fml.commands = ['fml'] fml.commands = ['fml']
def mlia(phenny, input):
""".mlia - My life is average."""
try:
req = web.get("http://mylifeisaverage.com/")
except:
raise GrumbleError("I tried to use .mlia, but it wasn't loading. MLIA")
doc = lxml.html.fromstring(req)
quote = doc.find_class('story')[0][0].text_content()
quote = quote.strip()
phenny.say(quote)
mlia.commands = ['mlia']
if __name__ == '__main__': if __name__ == '__main__':
print(__doc__.strip()) print(__doc__.strip())

View File

@ -10,31 +10,30 @@ http://inamidst.com/phenny/
import re import re
import web import web
def google_ajax(query):
"""Search using AjaxSearch, and return its JSON.""" r_google = re.compile(r'href="\/url\?q=(http.*?)\/&amp')
if isinstance(query, str):
query = query.encode('utf-8')
uri = 'https://ajax.googleapis.com/ajax/services/search/web'
args = '?v=1.0&safe=off&q=' + web.quote(query)
bytes = web.get(uri + args, headers={'Referer': 'https://github.com/sbp/phenny'})
return web.json(bytes)
def google_search(query): def google_search(query):
results = google_ajax(query) query = web.quote(query)
try: return results['responseData']['results'][0]['unescapedUrl'] uri = 'https://google.co.uk/search?q=%s' % query
except IndexError: return None bytes = web.get(uri)
except TypeError: m = r_google.search(bytes)
print(results) if m:
return False result = web.decode(m.group(1))
return web.unquote(result)
r_google_count = re.compile(r'id="resultStats">About (.*?) ')
def google_count(query): def google_count(query):
results = google_ajax(query) query = web.quote(query)
if 'responseData' not in results: return '0' uri = 'https://google.co.uk/search?q=%s' % query
if 'cursor' not in results['responseData']: return '0' bytes = web.get(uri)
if 'estimatedResultCount' not in results['responseData']['cursor']: m = r_google_count.search(bytes)
return '0' if m:
return results['responseData']['cursor']['estimatedResultCount'] result = web.decode(m.group(1)).replace(',', '')
return int(result)
else: return 0
def formatnumber(n): def formatnumber(n):
"""Format a number with beautiful commas.""" """Format a number with beautiful commas."""
parts = list(str(n)) parts = list(str(n))
@ -53,7 +52,6 @@ def g(phenny, input):
if not hasattr(phenny.bot, 'last_seen_uri'): if not hasattr(phenny.bot, 'last_seen_uri'):
phenny.bot.last_seen_uri = {} phenny.bot.last_seen_uri = {}
phenny.bot.last_seen_uri[input.sender] = uri phenny.bot.last_seen_uri[input.sender] = uri
elif uri is False: phenny.reply("Problem getting data from Google.")
else: phenny.reply("No results found for '%s'." % query) else: phenny.reply("No results found for '%s'." % query)
g.commands = ['g'] g.commands = ['g']
g.priority = 'high' g.priority = 'high'
@ -81,7 +79,6 @@ def gcs(phenny, input):
queries = r_query.findall(input.group(2)) queries = r_query.findall(input.group(2))
if len(queries) > 6: if len(queries) > 6:
return phenny.reply('Sorry, can only compare up to six things.') return phenny.reply('Sorry, can only compare up to six things.')
results = [] results = []
for i, query in enumerate(queries): for i, query in enumerate(queries):
query = query.strip('[]') query = query.strip('[]')
@ -114,7 +111,6 @@ def bing(phenny, input):
else: lang = 'en-GB' else: lang = 'en-GB'
if not query: if not query:
return phenny.reply('.bing what?') return phenny.reply('.bing what?')
uri = bing_search(query, lang) uri = bing_search(query, lang)
if uri: if uri:
phenny.reply(uri) phenny.reply(uri)
@ -125,7 +121,7 @@ def bing(phenny, input):
bing.commands = ['bing'] bing.commands = ['bing']
bing.example = '.bing swhack' bing.example = '.bing swhack'
r_duck = re.compile(r'nofollow" class="[^"]+" href="(http.*?)">') r_duck = re.compile(r'nofollow" class="[^"]+" href=".+?(http.*?)">')
def duck_search(query): def duck_search(query):
query = query.replace('!', '') query = query.replace('!', '')
@ -133,14 +129,26 @@ def duck_search(query):
uri = 'https://duckduckgo.com/html/?q=%s&kl=uk-en' % query uri = 'https://duckduckgo.com/html/?q=%s&kl=uk-en' % query
bytes = web.get(uri) bytes = web.get(uri)
m = r_duck.search(bytes) m = r_duck.search(bytes)
if m: return web.decode(m.group(1)) if m:
result = web.decode(m.group(1))
return web.unquote(result)
def duck_api(query):
uri = 'https://api.duckduckgo.com/?q=%s&format=json&no_redirect=1' % query
bytes = web.get(uri)
json = web.json(bytes)
if query[:1] == '!':
return json['Redirect']
elif json['Abstract']:
return json['AbstractURL'] + ' : ' + json['Abstract']
else: return json['AbstractURL']
def duck(phenny, input): def duck(phenny, input):
"""Queries DuckDuckGo for specified input.""" """Queries DuckDuckGo for specified input."""
query = input.group(2) query = input.group(2)
if not query: return phenny.reply('.ddg what?') if not query: return phenny.reply('.ddg what?')
uri = duck_api(query)
uri = duck_search(query) if not uri: uri = duck_search(query)
if uri: if uri:
phenny.reply(uri) phenny.reply(uri)
if not hasattr(phenny.bot, 'last_seen_uri'): if not hasattr(phenny.bot, 'last_seen_uri'):

View File

@ -18,7 +18,7 @@ class TestHead(unittest.TestCase):
out = self.phenny.reply.call_args[0][0] out = self.phenny.reply.call_args[0][0]
m = re.match('^200, text/html, utf-8, \d{4}\-\d{2}\-\d{2} '\ m = re.match('^200, text/html, utf-8, \d{4}\-\d{2}\-\d{2} '\
'\d{2}:\d{2}:\d{2} UTC, [0-9\.]+ s$', out, flags=re.UNICODE) '\d{2}:\d{2}:\d{2} UTC, [0-9]+ bytes, [0-9]+.[0-9]+ s$', out, flags=re.UNICODE)
self.assertTrue(m) self.assertTrue(m)
def test_head_404(self): def test_head_404(self):

View File

@ -15,7 +15,3 @@ class TestMylife(unittest.TestCase):
def test_fml(self): def test_fml(self):
mylife.fml(self.phenny, None) mylife.fml(self.phenny, None)
assert self.phenny.say.called is True assert self.phenny.say.called is True
def test_mlia(self):
mylife.mlia(self.phenny, None)
assert self.phenny.say.called is True

View File

@ -6,7 +6,7 @@ author: mutantmonkey <mutantmonkey@mutantmonkey.in>
import re import re
import unittest import unittest
from mock import MagicMock, Mock from mock import MagicMock, Mock
from modules.search import google_ajax, google_search, google_count, \ from modules.search import duck_api, google_search, google_count, \
formatnumber, g, gc, gcs, bing_search, bing, duck_search, duck, \ formatnumber, g, gc, gcs, bing_search, bing, duck_search, duck, \
search, suggest search, suggest
@ -15,12 +15,6 @@ class TestSearch(unittest.TestCase):
def setUp(self): def setUp(self):
self.phenny = MagicMock() self.phenny = MagicMock()
def test_google_ajax(self):
data = google_ajax('phenny')
assert 'responseData' in data
assert data['responseStatus'] == 200
def test_google_search(self): def test_google_search(self):
out = google_search('phenny') out = google_search('phenny')
@ -31,8 +25,7 @@ class TestSearch(unittest.TestCase):
input = Mock(group=lambda x: 'swhack') input = Mock(group=lambda x: 'swhack')
g(self.phenny, input) g(self.phenny, input)
self.phenny.reply.assert_not_called_with( assert self.phenny.reply.called is True
"Problem getting data from Google.")
def test_gc(self): def test_gc(self):
query = 'extrapolate' query = 'extrapolate'
@ -73,6 +66,10 @@ class TestSearch(unittest.TestCase):
assert self.phenny.reply.called is True assert self.phenny.reply.called is True
def test_duck_api(self):
input = Mock(group=lambda x: 'swhack')
duck(self.phenny, input)
def test_search(self): def test_search(self):
input = Mock(group=lambda x: 'vtluug') input = Mock(group=lambda x: 'vtluug')
duck(self.phenny, input) duck(self.phenny, input)

View File

@ -8,32 +8,32 @@ import unittest
from mock import MagicMock, Mock from mock import MagicMock, Mock
from modules import vtluugwiki from modules import vtluugwiki
# Disabling tests until wiki is up
class TestVtluugwiki(unittest.TestCase): #class TestVtluugwiki(unittest.TestCase):
def setUp(self): # def setUp(self):
self.phenny = MagicMock() # self.phenny = MagicMock()
#
def test_vtluug(self): # def test_vtluug(self):
input = Mock(groups=lambda: ['', "VT-Wireless"]) # input = Mock(groups=lambda: ['', "VT-Wireless"])
vtluugwiki.vtluug(self.phenny, input) # vtluugwiki.vtluug(self.phenny, input)
#
out = self.phenny.say.call_args[0][0] # out = self.phenny.say.call_args[0][0]
m = re.match('^.* - https:\/\/vtluug\.org\/wiki\/VT-Wireless$', # m = re.match('^.* - https:\/\/vtluug\.org\/wiki\/VT-Wireless$',
out, flags=re.UNICODE) # out, flags=re.UNICODE)
self.assertTrue(m) # self.assertTrue(m)
#
def test_vtluug_invalid(self): # def test_vtluug_invalid(self):
term = "EAP-TLS#netcfg" # term = "EAP-TLS#netcfg"
input = Mock(groups=lambda: ['', term]) # input = Mock(groups=lambda: ['', term])
vtluugwiki.vtluug(self.phenny, input) # vtluugwiki.vtluug(self.phenny, input)
#
self.phenny.say.assert_called_once_with( "Can't find anything in "\ # self.phenny.say.assert_called_once_with( "Can't find anything in "\
"the VTLUUG Wiki for \"{0}\".".format(term)) # "the VTLUUG Wiki for \"{0}\".".format(term))
#
def test_vtluug_none(self): # def test_vtluug_none(self):
term = "Ajgoajh" # term = "Ajgoajh"
input = Mock(groups=lambda: ['', term]) # input = Mock(groups=lambda: ['', term])
vtluugwiki.vtluug(self.phenny, input) # vtluugwiki.vtluug(self.phenny, input)
#
self.phenny.say.assert_called_once_with( "Can't find anything in "\ # self.phenny.say.assert_called_once_with( "Can't find anything in "\
"the VTLUUG Wiki for \"{0}\".".format(term)) # "the VTLUUG Wiki for \"{0}\".".format(term))

31
modules/tools.py Executable file
View File

@ -0,0 +1,31 @@
#!/usr/bin/env python3
"""
tools.py - Phenny Tools
Copyright 2008, Sean B. Palmer, inamidst.com
Licensed under the Eiffel Forum License 2.
http://inamidst.com/phenny/
"""
class GrumbleError(Exception):
pass
def deprecated(old):
def new(phenny, input, old=old):
self = phenny
origin = type('Origin', (object,), {
'sender': input.sender,
'nick': input.nick
})()
match = input.match
args = [input.bytes, input.sender, '@@']
old(self, origin, match, args)
new.__module__ = old.__module__
new.__name__ = old.__name__
return new
if __name__ == '__main__':
print(__doc__.strip())

68
modules/web.py Executable file
View File

@ -0,0 +1,68 @@
#!/usr/bin/env python3
"""
web.py - Web Facilities
Author: Sean B. Palmer, inamidst.com
About: http://inamidst.com/phenny/
"""
import re
import urllib.parse
import requests
import json as jsonlib
from requests.exceptions import ConnectionError, HTTPError, InvalidURL
from html.entities import name2codepoint
from urllib.parse import quote, unquote
user_agent = "Mozilla/5.0 (Phenny)"
default_headers = {'User-Agent': user_agent}
def get(uri, headers={}, verify=True, **kwargs):
if not uri.startswith('http'):
return
headers.update(default_headers)
r = requests.get(uri, headers=headers, verify=verify, **kwargs)
r.raise_for_status()
return r.text
def head(uri, headers={}, verify=True, **kwargs):
if not uri.startswith('http'):
return
headers.update(default_headers)
r = requests.head(uri, headers=headers, verify=verify, **kwargs)
r.raise_for_status()
return r.headers
def post(uri, data, headers={}, verify=True, **kwargs):
if not uri.startswith('http'):
return
headers.update(default_headers)
r = requests.post(uri, data=data, headers=headers, verify=verify, **kwargs)
r.raise_for_status()
return r.text
r_entity = re.compile(r'&([^;\s]+);')
def entity(match):
value = match.group(1).lower()
if value.startswith('#x'):
return chr(int(value[2:], 16))
elif value.startswith('#'):
return chr(int(value[1:]))
elif value in name2codepoint:
return chr(name2codepoint[value])
return '[' + value + ']'
def decode(html):
return r_entity.sub(entity, html)
r_string = re.compile(r'("(\\.|[^"\\])*")')
r_json = re.compile(r'^[,:{}\[\]0-9.\-+Eaeflnr-u \n\r\t]+$')
env = {'__builtins__': None, 'null': None, 'true': True, 'false': False}
def json(text):
"""Evaluate JSON text safely (we hope)."""
return jsonlib.loads(text)
if __name__=="__main__":
main()