From ec32741826e9cf039bf014f79fee0506f1ef6725 Mon Sep 17 00:00:00 2001 From: mutantmonkey Date: Wed, 13 Jun 2012 21:58:31 -0700 Subject: [PATCH] refactor mediawiki modules into unified library --- modules/archwiki.py | 62 +++------------ modules/uncyclopedia.py | 165 ---------------------------------------- modules/vtluugwiki.py | 63 +++------------ modules/wikipedia.py | 61 +++------------ wiki.py | 54 +++++++++++++ 5 files changed, 88 insertions(+), 317 deletions(-) delete mode 100644 modules/uncyclopedia.py create mode 100644 wiki.py diff --git a/modules/archwiki.py b/modules/archwiki.py index 3526d2b..d60161f 100644 --- a/modules/archwiki.py +++ b/modules/archwiki.py @@ -11,74 +11,34 @@ author: mutantmonkey """ import re, urllib.request, urllib.parse, urllib.error -import web -import json +import wiki -wikiapi = 'https://wiki.archlinux.org/api.php?action=query&list=search&srsearch=%s&limit=1&prop=snippet&format=json' -wikiuri = 'https://wiki.archlinux.org/index.php/%s' +wikiapi = 'https://wiki.archlinux.org/api.php?action=query&list=search&srsearch={0}&limit=1&prop=snippet&format=json' +wikiuri = 'https://wiki.archlinux.org/index.php/{0}' wikisearch = 'https://wiki.archlinux.org/index.php/Special:Search?' \ - + 'search=%s&fulltext=Search' - -r_tr = re.compile(r'(?ims)]*>.*?') -r_content = re.compile(r'(?ims)

\n.*?') -r_paragraph = re.compile(r'(?ims)]*>.*?

|]*>.*?') -r_tag = re.compile(r'<(?!!)[^>]+>') -r_whitespace = re.compile(r'[\t\r\n ]+') -r_redirect = re.compile( - r'(?ims)class=.redirectText.>\s*') - s = s.replace('<', '<') - s = s.replace('&', '&') - s = s.replace(' ', ' ') - return s - -def text(html): - html = r_tag.sub('', html) - html = r_whitespace.sub(' ', html) - return unescape(html).strip() - -def archwiki(term, last=False): - global wikiapi, wikiuri - url = wikiapi % term - bytes = web.get(url) - result = json.loads(bytes) - result = result['query']['search'] - if len(result) <= 0: - return None - term = result[0]['title'] - term = term.replace(' ', '_') - snippet = text(result[0]['snippet']) - return "%s - %s" % (snippet, wikiuri % term) + + 'search={0}&fulltext=Search' def awik(phenny, input): origterm = input.groups()[1] if not origterm: return phenny.say('Perhaps you meant ".awik dwm"?') - origterm = origterm term = urllib.parse.unquote(origterm) term = term[0].upper() + term[1:] term = term.replace(' ', '_') - try: result = archwiki(term) + w = wiki.Wiki(wikiapi, wikiuri, wikisearch) + + try: + result = w.search(term) except IOError: - error = "Can't connect to wiki.archlinux.org (%s)" % (wikiuri % term) + error = "Can't connect to wiki.archlinux.org ({0})".format(wikiuri.format(term)) return phenny.say(error) if result is not None: phenny.say(result) - else: phenny.say('Can\'t find anything in the ArchWiki for "%s".' % origterm) + else: + phenny.say('Can\'t find anything in the ArchWiki for "{0}".'.format(origterm)) awik.commands = ['awik'] awik.priority = 'high' diff --git a/modules/uncyclopedia.py b/modules/uncyclopedia.py deleted file mode 100644 index 3b8927d..0000000 --- a/modules/uncyclopedia.py +++ /dev/null @@ -1,165 +0,0 @@ -#!/usr/bin/env python -""" -uncyclopedia.py - Phenny Uncyclopedia Module -Copyright 2008-9, Sean B. Palmer, inamidst.com -Licensed under the Eiffel Forum License 2. - -http://inamidst.com/phenny/ - -modified from Wikipedia module -author: mutantmonkey -""" - -import re, urllib.request, urllib.parse, urllib.error -import web - -wikiuri = 'http://uncyclopedia.wikia.com/wiki/%s' -wikisearch = 'http://uncyclopedia.wikia.com/wiki/Special:Search?' \ - + 'search=%s&fulltext=Search' - -r_tr = re.compile(r'(?ims)]*>.*?') -r_paragraph = re.compile(r'(?ims)]*>.*?

|]*>.*?') -r_tag = re.compile(r'<(?!!)[^>]+>') -r_whitespace = re.compile(r'[\t\r\n ]+') -r_redirect = re.compile( - r'(?ims)class=.redirectText.>\s*') - s = s.replace('<', '<') - s = s.replace('&', '&') - s = s.replace(' ', ' ') - return s - -def text(html): - html = r_tag.sub('', html) - html = r_whitespace.sub(' ', html) - return unescape(html).strip() - -def search(term): - try: from . import search - except ImportError as e: - print(e) - return term - - if not isinstance(term, str): - term = term.decode('utf-8') - - term = term.replace('_', ' ') - try: uri = search.result('site:uncyclopedia.wikia.com %s' % term) - except IndexError: return term - if uri: - return uri[len('http://uncyclopedia.wikia.com/wiki/'):] - else: return term - -def uncyclopedia(term, last=False): - global wikiuri - if not '%' in term: - if isinstance(term, str): - t = term - else: t = term - q = urllib.parse.quote(t) - u = wikiuri % q - bytes = web.get(u) - else: bytes = web.get(wikiuri % term) - bytes = r_tr.sub('', bytes) - - if not last: - r = r_redirect.search(bytes[:4096]) - if r: - term = urllib.parse.unquote(r.group(1)) - return uncyclopedia(term, last=True) - - paragraphs = r_paragraph.findall(bytes) - - if not paragraphs: - if not last: - term = search(term) - return uncyclopedia(term, last=True) - return None - - # Pre-process - paragraphs = [para for para in paragraphs - if (para and 'technical limitations' not in para - and 'window.showTocToggle' not in para - and 'Deletion_policy' not in para - and 'Template:AfD_footer' not in para - and not (para.startswith('

') and - para.endswith('

')) - and not 'disambiguation)"' in para) - and not '(images and media)' in para - and not 'This article contains a' in para - and not 'id="coordinates"' in para - and not 'class="thumb' in para - and not 'There is currently no text in this page.' in para] - # and not 'style="display:none"' in para] - - for i, para in enumerate(paragraphs): - para = para.replace('', '|') - para = para.replace('', '|') - paragraphs[i] = text(para).strip() - - # Post-process - paragraphs = [para for para in paragraphs if - (para and not (para.endswith(':') and len(para) < 150))] - - para = text(paragraphs[0]) - m = r_sentence.match(para) - - if not m: - if not last: - term = search(term) - return uncyclopedia(term, last=True) - return None - sentence = m.group(0) - - maxlength = 275 - if len(sentence) > maxlength: - sentence = sentence[:maxlength] - words = sentence[:-5].split(' ') - words.pop() - sentence = ' '.join(words) + ' [...]' - - if (('using the Article Wizard if you wish' in sentence) - or ('or add a request for it' in sentence)): - if not last: - term = search(term) - return uncyclopedia(term, last=True) - return None - - sentence = '"' + sentence.replace('"', "'") + '"' - return sentence + ' - ' + (wikiuri % term) - -def uncyc(phenny, input): - origterm = input.groups()[1] - if not origterm: - return phenny.say('Perhaps you meant ".uncyc Zen"?') - origterm = origterm - - term = urllib.parse.unquote(origterm) - term = term[0].upper() + term[1:] - term = term.replace(' ', '_') - - try: result = uncyclopedia(term) - except IOError: - error = "Can't connect to uncyclopedia.wikia.com (%s)" % (wikiuri % term) - return phenny.say(error) - - if result is not None: - phenny.say(result) - else: phenny.say('Can\'t find anything in Uncyclopedia for "%s".' % origterm) - -uncyc.commands = ['uncyc'] -uncyc.priority = 'high' - -if __name__ == '__main__': - print(__doc__.strip()) diff --git a/modules/vtluugwiki.py b/modules/vtluugwiki.py index 2979113..3777b53 100644 --- a/modules/vtluugwiki.py +++ b/modules/vtluugwiki.py @@ -11,73 +11,34 @@ author: mutantmonkey """ import re, urllib.request, urllib.parse, urllib.error -import web -import json +import wiki -wikiapi = 'https://vtluug.org/w/api.php?action=query&list=search&srsearch=%s&limit=1&prop=snippet&format=json' -wikiuri = 'https://vtluug.org/wiki/%s' +wikiapi = 'https://vtluug.org/w/api.php?action=query&list=search&srsearch={0}&limit=1&prop=snippet&format=json' +wikiuri = 'https://vtluug.org/wiki/{0}' wikisearch = 'https://vtluug.org/wiki/Special:Search?' \ - + 'search=%s&fulltext=Search' - -r_tr = re.compile(r'(?ims)]*>.*?') -r_paragraph = re.compile(r'(?ims)]*>.*?

|]*>.*?') -r_tag = re.compile(r'<(?!!)[^>]+>') -r_whitespace = re.compile(r'[\t\r\n ]+') -r_redirect = re.compile( - r'(?ims)class=.redirectText.>\s*') - s = s.replace('<', '<') - s = s.replace('&', '&') - s = s.replace(' ', ' ') - return s - -def text(html): - html = r_tag.sub('', html) - html = r_whitespace.sub(' ', html) - return unescape(html).strip() - -def vtluugwiki(term, last=False): - global wikiapi, wikiuri - url = wikiapi % term - bytes = web.get(url) - result = json.loads(bytes) - result = result['query']['search'] - if len(result) <= 0: - return None - term = result[0]['title'] - term = term.replace(' ', '_') - snippet = text(result[0]['snippet']) - return "%s - %s" % (snippet, wikiuri % term) + + 'search={0}&fulltext=Search' def vtluug(phenny, input): origterm = input.groups()[1] if not origterm: - return phenny.say('Perhaps you meant ".vtluug Zen"?') - origterm = origterm + return phenny.say('Perhaps you meant ".vtluug VT-Wireless"?') term = urllib.parse.unquote(origterm) term = term[0].upper() + term[1:] term = term.replace(' ', '_') - try: result = vtluugwiki(term) + w = wiki.Wiki(wikiapi, wikiuri, wikisearch) + + try: + result = w.search(term) except IOError: - error = "Can't connect to vtluug.org (%s)" % (wikiuri % term) + error = "Can't connect to vtluug.org ({0})".format(wikiuri.format(term)) return phenny.say(error) if result is not None: phenny.say(result) - else: phenny.say('Can\'t find anything in the VTLUUG Wiki for "%s".' % origterm) + else: + phenny.say('Can\'t find anything in the VTLUUG Wiki for "{0}".'.format(origterm)) vtluug.commands = ['vtluug'] vtluug.priority = 'high' diff --git a/modules/wikipedia.py b/modules/wikipedia.py index 8b2d29d..fa1a6f2 100644 --- a/modules/wikipedia.py +++ b/modules/wikipedia.py @@ -8,73 +8,34 @@ http://inamidst.com/phenny/ """ import re, urllib.request, urllib.parse, urllib.error, gzip, io -import web -import json +import wiki -wikiapi = 'http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=%s&limit=1&prop=snippet&format=json' -wikiuri = 'http://en.wikipedia.org/wiki/%s' +wikiapi = 'http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={0}&limit=1&prop=snippet&format=json' +wikiuri = 'http://en.wikipedia.org/wiki/{0}' wikisearch = 'http://en.wikipedia.org/wiki/Special:Search?' \ - + 'search=%s&fulltext=Search' - -r_tr = re.compile(r'(?ims)]*>.*?') -r_paragraph = re.compile(r'(?ims)]*>.*?

|]*>.*?') -r_tag = re.compile(r'<(?!!)[^>]+>') -r_whitespace = re.compile(r'[\t\r\n ]+') -r_redirect = re.compile( - r'(?ims)class=.redirectText.>\s*') - s = s.replace('<', '<') - s = s.replace('&', '&') - s = s.replace(' ', ' ') - return s - -def text(html): - html = r_tag.sub('', html) - html = r_whitespace.sub(' ', html) - return unescape(html).strip() - -def wikipedia(term, last=False): - global wikiapi, wikiuri - url = wikiapi % term - bytes = web.get(url) - result = json.loads(bytes) - result = result['query']['search'] - if len(result) <= 0: - return None - term = result[0]['title'] - term = term.replace(' ', '_') - snippet = text(result[0]['snippet']) - return "%s - %s" % (snippet, wikiuri % term) + + 'search={0}&fulltext=Search' def wik(phenny, input): origterm = input.groups()[1] if not origterm: return phenny.say('Perhaps you meant ".wik Zen"?') - origterm = origterm term = urllib.parse.unquote(origterm) term = term[0].upper() + term[1:] term = term.replace(' ', '_') - try: result = wikipedia(term) + w = wiki.Wiki(wikiapi, wikiuri, wikisearch) + + try: + result = w.search(term) except IOError: - error = "Can't connect to en.wikipedia.org (%s)" % (wikiuri % term) + error = "Can't connect to en.wikipedia.org ({0})".format(wikiuri.format(term)) return phenny.say(error) if result is not None: phenny.say(result) - else: phenny.say('Can\'t find anything in Wikipedia for "%s".' % origterm) + else: + phenny.say('Can\'t find anything in Wikipedia for "{0}".'.format(origterm)) wik.commands = ['wik'] wik.priority = 'high' diff --git a/wiki.py b/wiki.py new file mode 100644 index 0000000..f86a7a9 --- /dev/null +++ b/wiki.py @@ -0,0 +1,54 @@ +import json +import re +import web + + +r_tr = re.compile(r'(?ims)]*>.*?') +r_paragraph = re.compile(r'(?ims)]*>.*?

|]*>.*?') +r_tag = re.compile(r'<(?!!)[^>]+>') +r_whitespace = re.compile(r'[\t\r\n ]+') +r_redirect = re.compile( + r'(?ims)class=.redirectText.>\s*') + s = s.replace('<', '<') + s = s.replace('&', '&') + s = s.replace(' ', ' ') + return s + + @staticmethod + def text(html): + html = r_tag.sub('', html) + html = r_whitespace.sub(' ', html) + return Wiki.unescape(html).strip() + + def search(self, term, last=False): + url = self.api.format(term) + bytes = web.get(url) + result = json.loads(bytes) + result = result['query']['search'] + if len(result) <= 0: + return None + term = result[0]['title'] + term = term.replace(' ', '_') + snippet = self.text(result[0]['snippet']) + return "{0} - {1}".format(snippet, self.url.format(term)) +