phenny-1/modules/wiktionary.py

117 lines
3.4 KiB
Python
Raw Normal View History

2009-01-19 11:47:28 -05:00
#!/usr/bin/env python
"""
wiktionary.py - Phenny Wiktionary Module
Copyright 2009, Sean B. Palmer, inamidst.com
Licensed under the Eiffel Forum License 2.
http://inamidst.com/phenny/
"""
import re
import web
2012-06-02 00:01:56 -04:00
import json
2009-01-19 11:47:28 -05:00
uri = 'http://en.wiktionary.org/w/index.php?title=%s&printable=yes'
2012-06-02 00:01:56 -04:00
wikiapi = 'http://en.wiktionary.org/w/api.php?action=query&titles={0}&prop=revisions&rvprop=content&format=json'
#r_tag = re.compile(r'<[^>]+>')
2009-01-19 11:47:28 -05:00
r_ul = re.compile(r'(?ims)<ul>.*?</ul>')
2012-06-02 00:01:56 -04:00
r_li = re.compile(r'^# ')
r_img = re.compile(r'\[\[Image:.*\]\]')
r_link1 = re.compile(r'\[\[([A-Za-z0-9\-_ ]+?)\]\]')
r_link2 = re.compile(r'\[\[([A-Za-z0-9\-_ ]+?)\|(.+?)\]\]')
r_context = re.compile(r'{{context\|(.+?)}}')
r_template1 = re.compile(r'{{.+?\|(.+?)}}')
r_template2 = re.compile(r'{{(.+?)}}')
2009-01-19 11:47:28 -05:00
def text(html):
2012-06-02 00:01:56 -04:00
text = r_li.sub('', html).strip()
text = r_img.sub('', text)
text = r_link1.sub(r'\1', text)
text = r_link2.sub(r'\2', text)
text = r_context.sub(r'\1:', text)
text = r_template1.sub(r'\1:', text)
text = r_template2.sub(r'\1:', text)
return text
2009-01-19 11:47:28 -05:00
def wiktionary(word):
2012-06-02 00:01:56 -04:00
bytes = web.get(wikiapi.format(web.quote(word)))
pages = json.loads(bytes)
pages = pages['query']['pages']
pg = next(iter(pages))
try:
result = pages[pg]['revisions'][0]['*']
except KeyError:
return '', ''
2009-01-19 11:47:28 -05:00
mode = None
etymology = None
definitions = {}
2012-06-02 00:01:56 -04:00
for line in result.splitlines():
if line == '===Etymology===':
mode = 'etymology'
2012-06-02 00:01:56 -04:00
elif 'Noun' in line:
mode = 'noun'
2012-06-02 00:01:56 -04:00
elif 'Verb' in line:
mode = 'verb'
2012-06-02 00:01:56 -04:00
elif 'Adjective' in line:
mode = 'adjective'
2012-06-02 00:01:56 -04:00
elif 'Adverb' in line:
mode = 'adverb'
2012-06-02 00:01:56 -04:00
elif 'Interjection' in line:
mode = 'interjection'
2012-06-02 00:01:56 -04:00
elif 'Particle' in line:
mode = 'particle'
2012-06-02 00:01:56 -04:00
elif 'Preposition' in line:
mode = 'preposition'
2012-06-02 00:01:56 -04:00
elif len(line) == 0:
mode = None
2009-01-19 11:47:28 -05:00
2012-06-02 00:01:56 -04:00
elif mode == 'etymology':
etymology = text(line)
2012-06-02 00:01:56 -04:00
elif mode is not None and '#' in line:
definitions.setdefault(mode, []).append(text(line))
2009-01-19 11:47:28 -05:00
2012-06-02 00:01:56 -04:00
if '====Synonyms====' in line:
break
return etymology, definitions
2009-01-19 11:47:28 -05:00
parts = ('preposition', 'particle', 'noun', 'verb',
'adjective', 'adverb', 'interjection')
2009-01-19 11:47:28 -05:00
def format(word, definitions, number=2):
result = '%s' % word
for part in parts:
if part in definitions:
defs = definitions[part][:number]
result += ' \u2014 ' + ('%s: ' % part)
n = ['%s. %s' % (i + 1, e.strip(' .')) for i, e in enumerate(defs)]
result += ', '.join(n)
return result.strip(' .,')
2009-01-19 11:47:28 -05:00
def w(phenny, input):
2013-10-05 19:20:37 -04:00
""".w <word> - Get the definition of a word from wiktionary."""
if not input.group(2):
return phenny.reply("Nothing to define.")
word = input.group(2)
etymology, definitions = wiktionary(word)
if not definitions:
phenny.say("Couldn't get any definitions for %s." % word)
return
2009-01-19 11:47:28 -05:00
result = format(word, definitions)
if len(result) < 150:
result = format(word, definitions, 3)
if len(result) < 150:
result = format(word, definitions, 5)
2009-01-19 11:47:28 -05:00
if len(result) > 300:
result = result[:295] + '[...]'
phenny.say(result)
2009-01-19 11:47:28 -05:00
w.commands = ['w']
w.example = '.w bailiwick'
if __name__ == '__main__':
print(__doc__.strip())