From e91f3bd16b92cf665cdcd73e0b2cc6289d9c8ba0 Mon Sep 17 00:00:00 2001 From: Robin Richtsfeld Date: Fri, 16 Mar 2018 14:27:18 +0100 Subject: [PATCH] Refactor Wikipedia modules --- modules/archwiki.py | 39 ++++++------ modules/vtluugwiki.py | 34 +++++----- modules/wikipedia.py | 34 +++++----- wiki.py | 142 +++++++++++++++++++++++++++++++++++++----- 4 files changed, 173 insertions(+), 76 deletions(-) diff --git a/modules/archwiki.py b/modules/archwiki.py index 53f09c6..909db21 100644 --- a/modules/archwiki.py +++ b/modules/archwiki.py @@ -10,36 +10,33 @@ modified from Wikipedia module author: mutantmonkey """ -import re -import web import wiki -wikiapi = 'https://wiki.archlinux.org/api.php?action=query&list=search&srsearch={0}&limit=1&prop=snippet&format=json' -wikiuri = 'https://wiki.archlinux.org/index.php/{0}' -wikisearch = 'https://wiki.archlinux.org/index.php/Special:Search?' \ - + 'search={0}&fulltext=Search' +endpoints = { + 'api': 'https://wiki.archlinux.org/api.php?action=query&list=search&srsearch={0}&limit=1&format=json', + 'url': 'https://wiki.archlinux.org/index.php/{0}', + 'search': 'https://wiki.archlinux.org/index.php/Special:Search?search={0}&fulltext=Search', +} def awik(phenny, input): - origterm = input.groups()[1] - if not origterm: + """.awik - Look up something on the ArchWiki.""" + + origterm = input.group(1) + if not origterm: return phenny.say('Perhaps you meant ".awik dwm"?') - term = web.unquote(origterm) - term = term[0].upper() + term[1:] - term = term.replace(' ', '_') + term, section = wiki.parse_term(origterm) - w = wiki.Wiki(wikiapi, wikiuri, wikisearch) + w = wiki.Wiki(endpoints) + match = w.search(term) - try: - result = w.search(term) - except web.ConnectionError: - error = "Can't connect to wiki.archlinux.org ({0})".format(wikiuri.format(term)) - return phenny.say(error) + if not match: + phenny.say('Can\'t find anything in the ArchWiki for "{0}".'.format(term)) + return - if result is not None: - phenny.say(result) - else: - phenny.say('Can\'t find anything in the ArchWiki for "{0}".'.format(origterm)) + snippet, url = wiki.extract_snippet(match, section) + + phenny.say('"{0}" - {1}'.format(snippet, url)) awik.commands = ['awik'] awik.priority = 'high' diff --git a/modules/vtluugwiki.py b/modules/vtluugwiki.py index 12a3d36..0e2f2a1 100644 --- a/modules/vtluugwiki.py +++ b/modules/vtluugwiki.py @@ -10,14 +10,13 @@ modified from Wikipedia module author: mutantmonkey """ -import re -import web import wiki -wikiapi = 'https://vtluug.org/w/api.php?action=query&list=search&srsearch={0}&limit=1&prop=snippet&format=json' -wikiuri = 'https://vtluug.org/wiki/{0}' -wikisearch = 'https://vtluug.org/wiki/Special:Search?' \ - + 'search={0}&fulltext=Search' +endpoints = { + 'api': 'https://vtluug.org/w/api.php?action=query&list=search&srsearch={0}&limit=1&prop=snippet&format=json', + 'url': 'https://vtluug.org/wiki/{0}', + 'search': 'https://vtluug.org/wiki/Special:Search?search={0}&fulltext=Search', +} def vtluug(phenny, input): """.vtluug - Look up something on the VTLUUG wiki.""" @@ -26,22 +25,19 @@ def vtluug(phenny, input): if not origterm: return phenny.say('Perhaps you meant ".vtluug VT-Wireless"?') - term = web.unquote(origterm) - term = term[0].upper() + term[1:] - term = term.replace(' ', '_') + term, section = wiki.parse_term(origterm) - w = wiki.Wiki(wikiapi, wikiuri, wikisearch) + w = wiki.Wiki(endpoints) + match = w.search(term) - try: - result = w.search(term) - except web.ConnectionError: - error = "Can't connect to vtluug.org ({0})".format(wikiuri.format(term)) - return phenny.say(error) + if not match: + phenny.say('Can\'t find anything in the VTLUUG Wiki for "{0}".'.format(term)) + return + + snippet, url = wiki.extract_snippet(match, section) + + phenny.say('"{0}" - {1}'.format(snippet, url)) - if result is not None: - phenny.say(result) - else: - phenny.say('Can\'t find anything in the VTLUUG Wiki for "{0}".'.format(origterm)) vtluug.commands = ['vtluug'] vtluug.priority = 'high' diff --git a/modules/wikipedia.py b/modules/wikipedia.py index 8dbe6f4..b37cfcf 100644 --- a/modules/wikipedia.py +++ b/modules/wikipedia.py @@ -7,14 +7,13 @@ Licensed under the Eiffel Forum License 2. http://inamidst.com/phenny/ """ -import re -import web import wiki -wikiapi = 'https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={0}&limit=1&prop=snippet&format=json' -wikiuri = 'https://en.wikipedia.org/wiki/{0}' -wikisearch = 'https://en.wikipedia.org/wiki/Special:Search?' \ - + 'search={0}&fulltext=Search' +endpoints = { + 'api': 'https://en.wikipedia.org/w/api.php?format=json&action=query&list=search&srsearch={0}&prop=snippet&limit=1', + 'url': 'https://en.wikipedia.org/wiki/{0}', + 'search': 'https://en.wikipedia.org/wiki/Special:Search?search={0}&fulltext=Search', +} def wik(phenny, input): """.wik - Look up something on Wikipedia.""" @@ -23,22 +22,19 @@ def wik(phenny, input): if not origterm: return phenny.say('Perhaps you meant ".wik Zen"?') - term = web.unquote(origterm) - term = term[0].upper() + term[1:] - term = term.replace(' ', '_') + origterm = origterm.strip() + term, section = wiki.parse_term(origterm) - w = wiki.Wiki(wikiapi, wikiuri, wikisearch) + w = wiki.Wiki(endpoints) + match = w.search(term) - try: - result = w.search(term) - except web.ConnectionError: - error = "Can't connect to en.wikipedia.org ({0})".format(wikiuri.format(term)) - return phenny.say(error) - - if result is not None: - phenny.say(result) - else: + if not match: phenny.say('Can\'t find anything in Wikipedia for "{0}".'.format(origterm)) + return + + snippet, url = wiki.extract_snippet(match, section) + + phenny.say('"{0}" - {1}'.format(snippet, url)) wik.commands = ['wik'] wik.priority = 'high' diff --git a/wiki.py b/wiki.py index 58dc9b4..1a92cfb 100644 --- a/wiki.py +++ b/wiki.py @@ -1,5 +1,8 @@ import json +import lxml.html import re +from requests.exceptions import HTTPError +from urllib.parse import quote, unquote import web @@ -16,15 +19,104 @@ abbrs = ['etc', 'ca', 'cf', 'Co', 'Ltd', 'Inc', 'Mt', 'Mr', 'Mrs', 'syn', 'transl', 'sess', 'fl', 'Op', 'Dec', 'Brig', 'Gen'] \ + list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') \ + list('abcdefghijklmnopqrstuvwxyz') -t_sentence = r'^.{5,}?(?