phenny/modules/wiktionary.py

117 lines
3.4 KiB
Python

#!/usr/bin/env python
"""
wiktionary.py - Phenny Wiktionary Module
Copyright 2009, Sean B. Palmer, inamidst.com
Licensed under the Eiffel Forum License 2.
http://inamidst.com/phenny/
"""
import re
import web
import json
uri = 'http://en.wiktionary.org/w/index.php?title=%s&printable=yes'
wikiapi = 'http://en.wiktionary.org/w/api.php?action=query&titles={0}&prop=revisions&rvprop=content&format=json'
#r_tag = re.compile(r'<[^>]+>')
r_ul = re.compile(r'(?ims)<ul>.*?</ul>')
r_li = re.compile(r'^# ')
r_img = re.compile(r'\[\[Image:.*\]\]')
r_link1 = re.compile(r'\[\[([A-Za-z0-9\-_ ]+?)\]\]')
r_link2 = re.compile(r'\[\[([A-Za-z0-9\-_ ]+?)\|(.+?)\]\]')
r_context = re.compile(r'{{context\|(.+?)}}')
r_template1 = re.compile(r'{{.+?\|(.+?)}}')
r_template2 = re.compile(r'{{(.+?)}}')
def text(html):
text = r_li.sub('', html).strip()
text = r_img.sub('', text)
text = r_link1.sub(r'\1', text)
text = r_link2.sub(r'\2', text)
text = r_context.sub(r'\1:', text)
text = r_template1.sub(r'\1:', text)
text = r_template2.sub(r'\1:', text)
return text
def wiktionary(word):
bytes = web.get(wikiapi.format(web.quote(word)))
pages = json.loads(bytes)
pages = pages['query']['pages']
pg = next(iter(pages))
try:
result = pages[pg]['revisions'][0]['*']
except KeyError:
return '', ''
mode = None
etymology = None
definitions = {}
for line in result.splitlines():
if 'Etymology' in line:
mode = 'etymology'
elif '==Noun==' in line:
mode = 'noun'
elif '==Verb==' in line:
mode = 'verb'
elif '==Adjective==' in line:
mode = 'adjective'
elif '==Adverb==' in line:
mode = 'adverb'
elif '==Interjection==' in line:
mode = 'interjection'
elif 'Particle' in line:
mode = 'particle'
elif '==Preposition==' in line:
mode = 'preposition'
elif mode == 'etymology':
etymology = text(line)
mode = None
elif mode is not None and '#' in line:
definitions.setdefault(mode, []).append(text(line))
mode = None
if '====Synonyms====' in line:
break
return etymology, definitions
parts = ('preposition', 'particle', 'noun', 'verb',
'adjective', 'adverb', 'interjection')
def format(word, definitions, number=2):
result = '%s' % word
for part in parts:
if part in definitions:
defs = definitions[part][:number]
result += ' \u2014 ' + ('%s: ' % part)
n = ['%s. %s' % (i + 1, e.strip(' .')) for i, e in enumerate(defs)]
result += ', '.join(n)
return result.strip(' .,')
def w(phenny, input):
""".w <word> - Get the definition of a word from wiktionary."""
if not input.group(2):
return phenny.reply("Nothing to define.")
word = input.group(2)
etymology, definitions = wiktionary(word)
if not definitions:
phenny.say("Couldn't get any definitions for %s." % word)
return
result = format(word, definitions)
if len(result) < 150:
result = format(word, definitions, 3)
if len(result) < 150:
result = format(word, definitions, 5)
if len(result) > 300:
result = result[:295] + '[...]'
phenny.say(result)
w.commands = ['w']
w.example = '.w bailiwick'
if __name__ == '__main__':
print(__doc__.strip())