diff --git a/modules/etymology.py b/modules/etymology.py deleted file mode 100644 index 595dbd5..0000000 --- a/modules/etymology.py +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env python -""" -etymology.py - Phenny Etymology Module -Copyright 2007-9, Sean B. Palmer, inamidst.com -Licensed under the Eiffel Forum License 2. - -http://inamidst.com/phenny/ -""" - -import re -import urllib.request -import web -from tools import deprecated - -etysite = 'http://www.etymonline.com/index.php?' -etyuri = etysite + 'allowed_in_frame=0&term=%s' -etysearch = etysite + 'allowed_in_frame=0&search=%s' - -r_definition = re.compile(r'(?ims)]*>.*?') -r_tag = re.compile(r'<(?!!)[^>]+>') -r_whitespace = re.compile(r'[\t\r\n ]+') - -class Grab(urllib.request.URLopener): - def __init__(self, *args): - self.version = 'Mozilla/5.0 (Phenny)' - urllib.URLopener.__init__(self, *args) - def http_error_default(self, url, fp, errcode, errmsg, headers): - return urllib.addinfourl(fp, [headers, errcode], "http:" + url) - -abbrs = [ - 'cf', 'lit', 'etc', 'Ger', 'Du', 'Skt', 'Rus', 'Eng', 'Amer.Eng', 'Sp', - 'Fr', 'N', 'E', 'S', 'W', 'L', 'Gen', 'J.C', 'dial', 'Gk', - '19c', '18c', '17c', '16c', 'St', 'Capt', 'obs', 'Jan', 'Feb', 'Mar', - 'Apr', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'c', 'tr', 'e', 'g' -] -t_sentence = r'^.*?(?') - s = s.replace('<', '<') - s = s.replace('&', '&') - return s - -def text(html): - html = r_tag.sub('', html) - html = r_whitespace.sub(' ', html) - return unescape(html).strip() - -def etymology(word): - # @@ sbp, would it be possible to have a flag for .ety to get 2nd/etc - # entries? - http://swhack.com/logs/2006-07-19#T15-05-29 - - if len(word) > 25: - raise ValueError("Word too long: %s[...]" % word[:10]) - word = {'axe': 'ax/axe'}.get(word, word) - - grab = urllib.request._urlopener - urllib.request._urlopener = Grab() - urllib.request._urlopener.addheader("Referer", "http://www.etymonline.com/") - bytes = web.get(etyuri % web.quote(word)) - urllib.request._urlopener = grab - definitions = r_definition.findall(bytes) - - if not definitions: - return None - - defn = text(definitions[0]) - m = r_sentence.match(defn) - if not m: - return None - sentence = m.group(0) - - try: - sentence = unicode(sentence, 'iso-8859-1') - sentence = sentence.encode('utf-8') - except: pass - sentence = web.decode(sentence) - - maxlength = 275 - if len(sentence) > maxlength: - sentence = sentence[:maxlength] - words = sentence[:-5].split(' ') - words.pop() - sentence = ' '.join(words) + ' [...]' - - sentence = '"' + sentence.replace('"', "'") + '"' - return sentence + ' - etymonline.com' - -@deprecated -def f_etymology(self, origin, match, args): - word = match.group(2) - - try: result = etymology(word.encode('iso-8859-1')) - except IOError: - msg = "Can't connect to etymonline.com (%s)" % (etyuri % word) - self.msg(origin.sender, msg) - return - except AttributeError: - result = None - - if result is not None: - self.msg(origin.sender, result) - else: - uri = etysearch % word - msg = 'Can\'t find the etymology for "%s". Try %s' % (word, uri) - self.msg(origin.sender, msg) -# @@ Cf. http://swhack.com/logs/2006-01-04#T01-50-22 -f_etymology.rule = (['ety'], r"(.+?)$") -f_etymology.thread = True -f_etymology.priority = 'high' - -if __name__=="__main__": - import sys - print(etymology(sys.argv[1]))