124 lines
3.7 KiB
Python
Executable File
124 lines
3.7 KiB
Python
Executable File
#!/usr/bin/env python
|
|
"""
|
|
dict.py - Phenny Dictionary Module
|
|
Copyright 2008, Sean B. Palmer, inamidst.com
|
|
Licensed under the Eiffel Forum License 2.
|
|
|
|
http://inamidst.com/phenny/
|
|
"""
|
|
|
|
import re, urllib
|
|
import web
|
|
from tools import deprecated
|
|
|
|
formuri = 'http://wordnet.princeton.edu/perl/webwn?s='
|
|
|
|
r_li = re.compile(r'(?ims)<li>.*?</li>')
|
|
r_tag = re.compile(r'<[^>]+>')
|
|
r_parens = re.compile(r'(?<=\()(?:[^()]+|\([^)]+\))*(?=\))')
|
|
r_word = re.compile(r'^[A-Za-z0-9\' -]+$')
|
|
|
|
@deprecated
|
|
def f_wordnet(self, origin, match, args):
|
|
"""Gives the definition of a word using Wordnet."""
|
|
command = 'w'
|
|
term = match.group(2)
|
|
term = term.encode('utf-8')
|
|
|
|
if origin.sender != '#inamidst':
|
|
if not r_word.match(term):
|
|
msg = "Words must match the regexp %s" % r'^[A-Za-z0-9\' -]+$'
|
|
return self.msg(origin.sender, origin.nick + ": " + msg)
|
|
if ('--' in term) or ("''" in term) or (' ' in term):
|
|
self.msg(origin.sender, origin.nick + ": That's not in WordNet.")
|
|
return
|
|
|
|
bytes = web.get(formuri + web.urllib.quote(term)) # @@ ugh!
|
|
items = r_li.findall(bytes)
|
|
|
|
nouns, verbs, adjectives = [], [], []
|
|
for item in items:
|
|
item = r_tag.sub('', item)
|
|
chunks = r_parens.findall(item)
|
|
# self.msg(origin.sender, item)
|
|
if len(chunks) < 2: continue
|
|
|
|
kind, defn = chunks[0], chunks[1]
|
|
if command != 'wordnet':
|
|
defn = defn.split(';')[0]
|
|
if not defn: continue
|
|
defn = defn[0].upper() + defn[1:]
|
|
|
|
if kind == 'n':
|
|
nouns.append(defn)
|
|
elif kind == 'v':
|
|
verbs.append(defn)
|
|
elif kind == 'adj':
|
|
adjectives.append(defn)
|
|
|
|
if not (nouns or verbs or adjectives):
|
|
self.msg(origin.sender, "I couldn't find '%s' in WordNet." % term)
|
|
return
|
|
|
|
while len(nouns + verbs + adjectives) > 3:
|
|
if len(nouns) >= len(verbs) and len(nouns) >= len(adjectives):
|
|
nouns.pop()
|
|
elif len(verbs) >= len(nouns) and len(verbs) >= len(adjectives):
|
|
verbs.pop()
|
|
elif len(adjectives) >= len(nouns) and len(adjectives) >= len(verbs):
|
|
adjectives.pop()
|
|
|
|
if adjectives:
|
|
adjectives[-1] = adjectives[-1] + '.'
|
|
elif verbs:
|
|
verbs[-1] = verbs[-1] + '.'
|
|
elif nouns:
|
|
nouns[-1] = nouns[-1] + '.'
|
|
|
|
for (i, defn) in enumerate(nouns):
|
|
self.msg(origin.sender, '%s n. %r: %s' % (term, i+1, defn))
|
|
for (i, defn) in enumerate(verbs):
|
|
self.msg(origin.sender, '%s v. %r: %s' % (term, i+1, defn))
|
|
for (i, defn) in enumerate(adjectives):
|
|
self.msg(origin.sender, '%s a. %r: %s' % (term, i+1, defn))
|
|
f_wordnet.commands = ['wordnet']
|
|
f_wordnet.priority = 'low'
|
|
|
|
uri = 'http://encarta.msn.com/dictionary_/%s.html'
|
|
r_info = re.compile(
|
|
r'(?:ResultBody"><br /><br />(.*?) )|(?:<b>(.*?)</b>)'
|
|
)
|
|
|
|
def dict(phenny, input):
|
|
word = input.group(2)
|
|
word = urllib.quote(word.encode('utf-8'))
|
|
|
|
def trim(thing):
|
|
if thing.endswith(' '):
|
|
thing = thing[:-6]
|
|
return thing.strip(' :.')
|
|
|
|
bytes = web.get(uri % word)
|
|
results = {}
|
|
wordkind = None
|
|
for kind, sense in r_info.findall(bytes):
|
|
kind, sense = trim(kind), trim(sense)
|
|
if kind: wordkind = kind
|
|
elif sense:
|
|
results.setdefault(wordkind, []).append(sense)
|
|
result = input.group(2).encode('utf-8') + ' - '
|
|
for key in sorted(results.keys()):
|
|
if results[key]:
|
|
result += (key or '') + ' 1. ' + results[key][0]
|
|
if len(results[key]) > 1:
|
|
result += ', 2. ' + results[key][1]
|
|
result += '; '
|
|
result = result.rstrip('; ')
|
|
if result.endswith('-') and (len(result) < 30):
|
|
phenny.reply('Sorry, no definition found.')
|
|
else: phenny.say(result)
|
|
dict.commands = ['dict']
|
|
|
|
if __name__ == '__main__':
|
|
print __doc__.strip()
|