2009-01-19 11:47:28 -05:00
|
|
|
#!/usr/bin/env python
|
|
|
|
"""
|
|
|
|
wiktionary.py - Phenny Wiktionary Module
|
|
|
|
Copyright 2009, Sean B. Palmer, inamidst.com
|
|
|
|
Licensed under the Eiffel Forum License 2.
|
|
|
|
|
|
|
|
http://inamidst.com/phenny/
|
|
|
|
"""
|
|
|
|
|
|
|
|
import re
|
|
|
|
import web
|
2012-06-02 00:01:56 -04:00
|
|
|
import json
|
2009-01-19 11:47:28 -05:00
|
|
|
|
|
|
|
uri = 'http://en.wiktionary.org/w/index.php?title=%s&printable=yes'
|
2012-06-02 00:01:56 -04:00
|
|
|
wikiapi = 'http://en.wiktionary.org/w/api.php?action=query&titles={0}&prop=revisions&rvprop=content&format=json'
|
|
|
|
#r_tag = re.compile(r'<[^>]+>')
|
2009-01-19 11:47:28 -05:00
|
|
|
r_ul = re.compile(r'(?ims)<ul>.*?</ul>')
|
2012-06-02 00:01:56 -04:00
|
|
|
r_li = re.compile(r'^# ')
|
|
|
|
r_img = re.compile(r'\[\[Image:.*\]\]')
|
|
|
|
r_link1 = re.compile(r'\[\[([A-Za-z0-9\-_ ]+?)\]\]')
|
|
|
|
r_link2 = re.compile(r'\[\[([A-Za-z0-9\-_ ]+?)\|(.+?)\]\]')
|
|
|
|
r_context = re.compile(r'{{context\|(.+?)}}')
|
|
|
|
r_template1 = re.compile(r'{{.+?\|(.+?)}}')
|
|
|
|
r_template2 = re.compile(r'{{(.+?)}}')
|
2009-01-19 11:47:28 -05:00
|
|
|
|
|
|
|
def text(html):
|
2012-06-02 00:01:56 -04:00
|
|
|
text = r_li.sub('', html).strip()
|
|
|
|
text = r_img.sub('', text)
|
|
|
|
text = r_link1.sub(r'\1', text)
|
|
|
|
text = r_link2.sub(r'\2', text)
|
|
|
|
text = r_context.sub(r'\1:', text)
|
|
|
|
text = r_template1.sub(r'\1:', text)
|
|
|
|
text = r_template2.sub(r'\1:', text)
|
2012-01-03 14:09:34 -05:00
|
|
|
return text
|
2009-01-19 11:47:28 -05:00
|
|
|
|
|
|
|
def wiktionary(word):
|
2012-06-02 00:01:56 -04:00
|
|
|
bytes = web.get(wikiapi.format(web.quote(word)))
|
|
|
|
pages = json.loads(bytes)
|
|
|
|
pages = pages['query']['pages']
|
|
|
|
pg = next(iter(pages))
|
2012-06-07 21:17:55 -04:00
|
|
|
|
|
|
|
try:
|
|
|
|
result = pages[pg]['revisions'][0]['*']
|
|
|
|
except KeyError:
|
|
|
|
return '', ''
|
2009-01-19 11:47:28 -05:00
|
|
|
|
2012-01-03 14:09:34 -05:00
|
|
|
mode = None
|
|
|
|
etymology = None
|
|
|
|
definitions = {}
|
2012-06-02 00:01:56 -04:00
|
|
|
for line in result.splitlines():
|
2017-02-17 14:52:26 -05:00
|
|
|
if 'Etymology' in line:
|
2012-01-03 14:09:34 -05:00
|
|
|
mode = 'etymology'
|
2017-02-17 14:52:26 -05:00
|
|
|
elif '==Noun==' in line:
|
2012-01-03 14:09:34 -05:00
|
|
|
mode = 'noun'
|
2017-02-17 14:52:26 -05:00
|
|
|
elif '==Verb==' in line:
|
2012-01-03 14:09:34 -05:00
|
|
|
mode = 'verb'
|
2017-02-17 14:52:26 -05:00
|
|
|
elif '==Adjective==' in line:
|
2012-01-03 14:09:34 -05:00
|
|
|
mode = 'adjective'
|
2017-02-17 14:52:26 -05:00
|
|
|
elif '==Adverb==' in line:
|
2012-01-03 14:09:34 -05:00
|
|
|
mode = 'adverb'
|
2017-02-17 14:52:26 -05:00
|
|
|
elif '==Interjection==' in line:
|
2012-01-03 14:09:34 -05:00
|
|
|
mode = 'interjection'
|
2012-06-02 00:01:56 -04:00
|
|
|
elif 'Particle' in line:
|
2012-01-03 14:09:34 -05:00
|
|
|
mode = 'particle'
|
2017-02-17 14:52:26 -05:00
|
|
|
elif '==Preposition==' in line:
|
2012-01-03 14:09:34 -05:00
|
|
|
mode = 'preposition'
|
2009-01-19 11:47:28 -05:00
|
|
|
|
2012-06-02 00:01:56 -04:00
|
|
|
elif mode == 'etymology':
|
2012-01-03 14:09:34 -05:00
|
|
|
etymology = text(line)
|
2017-02-17 14:52:26 -05:00
|
|
|
mode = None
|
2012-06-02 00:01:56 -04:00
|
|
|
elif mode is not None and '#' in line:
|
2012-01-03 14:09:34 -05:00
|
|
|
definitions.setdefault(mode, []).append(text(line))
|
2017-02-17 14:52:26 -05:00
|
|
|
mode = None
|
2009-01-19 11:47:28 -05:00
|
|
|
|
2012-06-02 00:01:56 -04:00
|
|
|
if '====Synonyms====' in line:
|
2012-01-03 14:09:34 -05:00
|
|
|
break
|
|
|
|
return etymology, definitions
|
2009-01-19 11:47:28 -05:00
|
|
|
|
|
|
|
parts = ('preposition', 'particle', 'noun', 'verb',
|
2012-01-03 14:09:34 -05:00
|
|
|
'adjective', 'adverb', 'interjection')
|
2009-01-19 11:47:28 -05:00
|
|
|
|
|
|
|
def format(word, definitions, number=2):
|
2012-01-03 14:09:34 -05:00
|
|
|
result = '%s' % word
|
|
|
|
for part in parts:
|
|
|
|
if part in definitions:
|
|
|
|
defs = definitions[part][:number]
|
|
|
|
result += ' \u2014 ' + ('%s: ' % part)
|
|
|
|
n = ['%s. %s' % (i + 1, e.strip(' .')) for i, e in enumerate(defs)]
|
|
|
|
result += ', '.join(n)
|
|
|
|
return result.strip(' .,')
|
2009-01-19 11:47:28 -05:00
|
|
|
|
|
|
|
def w(phenny, input):
|
2013-10-05 19:20:37 -04:00
|
|
|
""".w <word> - Get the definition of a word from wiktionary."""
|
|
|
|
|
2012-01-03 14:09:34 -05:00
|
|
|
if not input.group(2):
|
|
|
|
return phenny.reply("Nothing to define.")
|
|
|
|
word = input.group(2)
|
|
|
|
etymology, definitions = wiktionary(word)
|
|
|
|
if not definitions:
|
|
|
|
phenny.say("Couldn't get any definitions for %s." % word)
|
|
|
|
return
|
2009-01-19 11:47:28 -05:00
|
|
|
|
2012-01-03 14:09:34 -05:00
|
|
|
result = format(word, definitions)
|
|
|
|
if len(result) < 150:
|
|
|
|
result = format(word, definitions, 3)
|
|
|
|
if len(result) < 150:
|
|
|
|
result = format(word, definitions, 5)
|
2009-01-19 11:47:28 -05:00
|
|
|
|
2012-01-03 14:09:34 -05:00
|
|
|
if len(result) > 300:
|
|
|
|
result = result[:295] + '[...]'
|
|
|
|
phenny.say(result)
|
2009-01-19 11:47:28 -05:00
|
|
|
w.commands = ['w']
|
|
|
|
w.example = '.w bailiwick'
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2012-01-03 14:09:34 -05:00
|
|
|
print(__doc__.strip())
|