#!/usr/bin/env python """ wikipedia.py - Phenny Wikipedia Module Copyright 2008-9, Sean B. Palmer, inamidst.com Licensed under the Eiffel Forum License 2. http://inamidst.com/phenny/ """ import re, urllib import web wikiuri = 'http://%s.wikipedia.org/wiki/%s' # wikisearch = 'http://%s.wikipedia.org/wiki/Special:Search?' \ # + 'search=%s&fulltext=Search' r_tr = re.compile(r'(?ims)
]*>.*?
|') and para.endswith('
')) and not 'disambiguation)"' in para) and not '(images and media)' in para and not 'This article contains a' in para and not 'id="coordinates"' in para and not 'class="thumb' in para] # and not 'style="display:none"' in para] for i, para in enumerate(paragraphs): para = para.replace('', '|') para = para.replace('', '|') paragraphs[i] = text(para).strip() # Post-process paragraphs = [para for para in paragraphs if (para and not (para.endswith(':') and len(para) < 150))] para = text(paragraphs[0]) m = r_sentence.match(para) if not m: if not last: term = search(term) return wikipedia(term, language=language, last=True) return None sentence = m.group(0) maxlength = 275 if len(sentence) > maxlength: sentence = sentence[:maxlength] words = sentence[:-5].split(' ') words.pop() sentence = ' '.join(words) + ' [...]' if (('using the Article Wizard if you wish' in sentence) or ('or add a request for it' in sentence)): if not last: term = search(term) return wikipedia(term, language=language, last=True) return None sentence = '"' + sentence.replace('"', "'") + '"' sentence = sentence.decode('utf-8').encode('utf-8') wikiuri = wikiuri.decode('utf-8').encode('utf-8') term = term.decode('utf-8').encode('utf-8') return sentence + ' - ' + (wikiuri % (language, term)) def wik(phenny, input): origterm = input.groups()[1] if not origterm: return phenny.say('Perhaps you meant ".wik Zen"?') origterm = origterm.encode('utf-8') term = urllib.unquote(origterm) language = 'en' if term.startswith(':') and (' ' in term): a, b = term.split(' ', 1) a = a.lstrip(':') if a.isalpha(): language, term = a, b term = term[0].upper() + term[1:] term = term.replace(' ', '_') try: result = wikipedia(term, language) except IOError: args = (language, wikiuri % (language, term)) error = "Can't connect to %s.wikipedia.org (%s)" % args return phenny.say(error) if result is not None: phenny.say(result) else: phenny.say('Can\'t find anything in Wikipedia for "%s".' % origterm) wik.commands = ['wik'] wik.priority = 'high' if __name__ == '__main__': print __doc__.strip()