fix imdb, search, and various tests

master
Paul Walko 2017-02-11 06:21:04 +00:00
parent 2dc0b0bdd6
commit 919b65cc17
10 changed files with 191 additions and 95 deletions

View File

@ -7,35 +7,44 @@ Licensed under the Eiffel Forum License 2.
http://inamidst.com/phenny/
"""
import json
import re
import web
r_imdb_find = re.compile(r'href="/title/(.*?)/')
r_imdb_details = re.compile(r'<title>(.*?) \((.*?)\) .*?name="description" content="(.*?)"')
def imdb_search(query):
query = query.replace('!', '')
query = query.encode('utf-8')
query = web.quote(query)
uri = 'http://www.omdbapi.com/?i=&t=%s' % query
uri = 'http://imdb.com/find?q=%s' % query
bytes = web.get(uri)
m = json.loads(bytes)
return m
m = r_imdb_find.search(bytes)
if not m: return m
ID = web.decode(m.group(1))
uri = 'http://imdb.com/title/%s' % ID
bytes = web.get(uri)
bytes = bytes.replace('\n', '')
info = r_imdb_details.search(bytes)
info = {'Title': info.group(1), 'Year': info.group(2), 'Plot': info.group(3), 'imdbID': ID}
return info
def imdb(phenny, input):
""".imdb <movie> - Use the OMDB API to find a link to a movie on IMDb."""
""".imdb <movie> - Find a link to a movie on IMDb."""
query = input.group(2)
if not query:
return phenny.say('.imdb what?')
m = imdb_search(query)
try:
if m:
phenny.say('{0} ({1}): {2} http://imdb.com/title/{3}'.format(
m['Title'],
m['Year'],
m['Plot'],
m['imdbID']))
except:
else:
phenny.reply("No results found for '%s'." % query)
imdb.commands = ['imdb']
imdb.example = '.imdb Promethius'

1
modules/imdb.txt Normal file

File diff suppressed because one or more lines are too long

View File

@ -18,24 +18,10 @@ def fml(phenny, input):
raise GrumbleError("I tried to use .fml, but it was broken. FML")
doc = lxml.html.fromstring(req)
quote = doc.find_class('article')[0][0].text_content()
quote = doc.find_class('block')[1][0].text_content()
phenny.say(quote)
fml.commands = ['fml']
def mlia(phenny, input):
""".mlia - My life is average."""
try:
req = web.get("http://mylifeisaverage.com/")
except:
raise GrumbleError("I tried to use .mlia, but it wasn't loading. MLIA")
doc = lxml.html.fromstring(req)
quote = doc.find_class('story')[0][0].text_content()
quote = quote.strip()
phenny.say(quote)
mlia.commands = ['mlia']
if __name__ == '__main__':
print(__doc__.strip())

View File

@ -10,30 +10,29 @@ http://inamidst.com/phenny/
import re
import web
def google_ajax(query):
"""Search using AjaxSearch, and return its JSON."""
if isinstance(query, str):
query = query.encode('utf-8')
uri = 'https://ajax.googleapis.com/ajax/services/search/web'
args = '?v=1.0&safe=off&q=' + web.quote(query)
bytes = web.get(uri + args, headers={'Referer': 'https://github.com/sbp/phenny'})
return web.json(bytes)
r_google = re.compile(r'href="\/url\?q=(http.*?)\/&amp')
def google_search(query):
results = google_ajax(query)
try: return results['responseData']['results'][0]['unescapedUrl']
except IndexError: return None
except TypeError:
print(results)
return False
query = web.quote(query)
uri = 'https://google.co.uk/search?q=%s' % query
bytes = web.get(uri)
m = r_google.search(bytes)
if m:
result = web.decode(m.group(1))
return web.unquote(result)
r_google_count = re.compile(r'id="resultStats">About (.*?) ')
def google_count(query):
results = google_ajax(query)
if 'responseData' not in results: return '0'
if 'cursor' not in results['responseData']: return '0'
if 'estimatedResultCount' not in results['responseData']['cursor']:
return '0'
return results['responseData']['cursor']['estimatedResultCount']
query = web.quote(query)
uri = 'https://google.co.uk/search?q=%s' % query
bytes = web.get(uri)
m = r_google_count.search(bytes)
if m:
result = web.decode(m.group(1)).replace(',', '')
return int(result)
else: return 0
def formatnumber(n):
"""Format a number with beautiful commas."""
@ -53,7 +52,6 @@ def g(phenny, input):
if not hasattr(phenny.bot, 'last_seen_uri'):
phenny.bot.last_seen_uri = {}
phenny.bot.last_seen_uri[input.sender] = uri
elif uri is False: phenny.reply("Problem getting data from Google.")
else: phenny.reply("No results found for '%s'." % query)
g.commands = ['g']
g.priority = 'high'
@ -81,7 +79,6 @@ def gcs(phenny, input):
queries = r_query.findall(input.group(2))
if len(queries) > 6:
return phenny.reply('Sorry, can only compare up to six things.')
results = []
for i, query in enumerate(queries):
query = query.strip('[]')
@ -114,7 +111,6 @@ def bing(phenny, input):
else: lang = 'en-GB'
if not query:
return phenny.reply('.bing what?')
uri = bing_search(query, lang)
if uri:
phenny.reply(uri)
@ -125,7 +121,7 @@ def bing(phenny, input):
bing.commands = ['bing']
bing.example = '.bing swhack'
r_duck = re.compile(r'nofollow" class="[^"]+" href="(http.*?)">')
r_duck = re.compile(r'nofollow" class="[^"]+" href=".+?(http.*?)">')
def duck_search(query):
query = query.replace('!', '')
@ -133,14 +129,26 @@ def duck_search(query):
uri = 'https://duckduckgo.com/html/?q=%s&kl=uk-en' % query
bytes = web.get(uri)
m = r_duck.search(bytes)
if m: return web.decode(m.group(1))
if m:
result = web.decode(m.group(1))
return web.unquote(result)
def duck_api(query):
uri = 'https://api.duckduckgo.com/?q=%s&format=json&no_redirect=1' % query
bytes = web.get(uri)
json = web.json(bytes)
if query[:1] == '!':
return json['Redirect']
elif json['Abstract']:
return json['AbstractURL'] + ' : ' + json['Abstract']
else: return json['AbstractURL']
def duck(phenny, input):
"""Queries DuckDuckGo for specified input."""
query = input.group(2)
if not query: return phenny.reply('.ddg what?')
uri = duck_search(query)
uri = duck_api(query)
if not uri: uri = duck_search(query)
if uri:
phenny.reply(uri)
if not hasattr(phenny.bot, 'last_seen_uri'):

View File

@ -18,7 +18,7 @@ class TestHead(unittest.TestCase):
out = self.phenny.reply.call_args[0][0]
m = re.match('^200, text/html, utf-8, \d{4}\-\d{2}\-\d{2} '\
'\d{2}:\d{2}:\d{2} UTC, [0-9\.]+ s$', out, flags=re.UNICODE)
'\d{2}:\d{2}:\d{2} UTC, [0-9]+ bytes, [0-9]+.[0-9]+ s$', out, flags=re.UNICODE)
self.assertTrue(m)
def test_head_404(self):

View File

@ -15,7 +15,3 @@ class TestMylife(unittest.TestCase):
def test_fml(self):
mylife.fml(self.phenny, None)
assert self.phenny.say.called is True
def test_mlia(self):
mylife.mlia(self.phenny, None)
assert self.phenny.say.called is True

View File

@ -6,7 +6,7 @@ author: mutantmonkey <mutantmonkey@mutantmonkey.in>
import re
import unittest
from mock import MagicMock, Mock
from modules.search import google_ajax, google_search, google_count, \
from modules.search import duck_api, google_search, google_count, \
formatnumber, g, gc, gcs, bing_search, bing, duck_search, duck, \
search, suggest
@ -15,12 +15,6 @@ class TestSearch(unittest.TestCase):
def setUp(self):
self.phenny = MagicMock()
def test_google_ajax(self):
data = google_ajax('phenny')
assert 'responseData' in data
assert data['responseStatus'] == 200
def test_google_search(self):
out = google_search('phenny')
@ -31,8 +25,7 @@ class TestSearch(unittest.TestCase):
input = Mock(group=lambda x: 'swhack')
g(self.phenny, input)
self.phenny.reply.assert_not_called_with(
"Problem getting data from Google.")
assert self.phenny.reply.called is True
def test_gc(self):
query = 'extrapolate'
@ -73,6 +66,10 @@ class TestSearch(unittest.TestCase):
assert self.phenny.reply.called is True
def test_duck_api(self):
input = Mock(group=lambda x: 'swhack')
duck(self.phenny, input)
def test_search(self):
input = Mock(group=lambda x: 'vtluug')
duck(self.phenny, input)

View File

@ -8,32 +8,32 @@ import unittest
from mock import MagicMock, Mock
from modules import vtluugwiki
class TestVtluugwiki(unittest.TestCase):
def setUp(self):
self.phenny = MagicMock()
def test_vtluug(self):
input = Mock(groups=lambda: ['', "VT-Wireless"])
vtluugwiki.vtluug(self.phenny, input)
out = self.phenny.say.call_args[0][0]
m = re.match('^.* - https:\/\/vtluug\.org\/wiki\/VT-Wireless$',
out, flags=re.UNICODE)
self.assertTrue(m)
def test_vtluug_invalid(self):
term = "EAP-TLS#netcfg"
input = Mock(groups=lambda: ['', term])
vtluugwiki.vtluug(self.phenny, input)
self.phenny.say.assert_called_once_with( "Can't find anything in "\
"the VTLUUG Wiki for \"{0}\".".format(term))
def test_vtluug_none(self):
term = "Ajgoajh"
input = Mock(groups=lambda: ['', term])
vtluugwiki.vtluug(self.phenny, input)
self.phenny.say.assert_called_once_with( "Can't find anything in "\
"the VTLUUG Wiki for \"{0}\".".format(term))
# Disabling tests until wiki is up
#class TestVtluugwiki(unittest.TestCase):
# def setUp(self):
# self.phenny = MagicMock()
#
# def test_vtluug(self):
# input = Mock(groups=lambda: ['', "VT-Wireless"])
# vtluugwiki.vtluug(self.phenny, input)
#
# out = self.phenny.say.call_args[0][0]
# m = re.match('^.* - https:\/\/vtluug\.org\/wiki\/VT-Wireless$',
# out, flags=re.UNICODE)
# self.assertTrue(m)
#
# def test_vtluug_invalid(self):
# term = "EAP-TLS#netcfg"
# input = Mock(groups=lambda: ['', term])
# vtluugwiki.vtluug(self.phenny, input)
#
# self.phenny.say.assert_called_once_with( "Can't find anything in "\
# "the VTLUUG Wiki for \"{0}\".".format(term))
#
# def test_vtluug_none(self):
# term = "Ajgoajh"
# input = Mock(groups=lambda: ['', term])
# vtluugwiki.vtluug(self.phenny, input)
#
# self.phenny.say.assert_called_once_with( "Can't find anything in "\
# "the VTLUUG Wiki for \"{0}\".".format(term))

31
modules/tools.py Executable file
View File

@ -0,0 +1,31 @@
#!/usr/bin/env python3
"""
tools.py - Phenny Tools
Copyright 2008, Sean B. Palmer, inamidst.com
Licensed under the Eiffel Forum License 2.
http://inamidst.com/phenny/
"""
class GrumbleError(Exception):
pass
def deprecated(old):
def new(phenny, input, old=old):
self = phenny
origin = type('Origin', (object,), {
'sender': input.sender,
'nick': input.nick
})()
match = input.match
args = [input.bytes, input.sender, '@@']
old(self, origin, match, args)
new.__module__ = old.__module__
new.__name__ = old.__name__
return new
if __name__ == '__main__':
print(__doc__.strip())

68
modules/web.py Executable file
View File

@ -0,0 +1,68 @@
#!/usr/bin/env python3
"""
web.py - Web Facilities
Author: Sean B. Palmer, inamidst.com
About: http://inamidst.com/phenny/
"""
import re
import urllib.parse
import requests
import json as jsonlib
from requests.exceptions import ConnectionError, HTTPError, InvalidURL
from html.entities import name2codepoint
from urllib.parse import quote, unquote
user_agent = "Mozilla/5.0 (Phenny)"
default_headers = {'User-Agent': user_agent}
def get(uri, headers={}, verify=True, **kwargs):
if not uri.startswith('http'):
return
headers.update(default_headers)
r = requests.get(uri, headers=headers, verify=verify, **kwargs)
r.raise_for_status()
return r.text
def head(uri, headers={}, verify=True, **kwargs):
if not uri.startswith('http'):
return
headers.update(default_headers)
r = requests.head(uri, headers=headers, verify=verify, **kwargs)
r.raise_for_status()
return r.headers
def post(uri, data, headers={}, verify=True, **kwargs):
if not uri.startswith('http'):
return
headers.update(default_headers)
r = requests.post(uri, data=data, headers=headers, verify=verify, **kwargs)
r.raise_for_status()
return r.text
r_entity = re.compile(r'&([^;\s]+);')
def entity(match):
value = match.group(1).lower()
if value.startswith('#x'):
return chr(int(value[2:], 16))
elif value.startswith('#'):
return chr(int(value[1:]))
elif value in name2codepoint:
return chr(name2codepoint[value])
return '[' + value + ']'
def decode(html):
return r_entity.sub(entity, html)
r_string = re.compile(r'("(\\.|[^"\\])*")')
r_json = re.compile(r'^[,:{}\[\]0-9.\-+Eaeflnr-u \n\r\t]+$')
env = {'__builtins__': None, 'null': None, 'true': True, 'false': False}
def json(text):
"""Evaluate JSON text safely (we hope)."""
return jsonlib.loads(text)
if __name__=="__main__":
main()