Merge branch 'master' of https://github.com/sbp/phenny
commit
acbbd199c2
|
@ -8,16 +8,25 @@ http://inamidst.com/phenny/
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import urllib.request
|
||||||
import web
|
import web
|
||||||
from tools import deprecated
|
from tools import deprecated
|
||||||
|
|
||||||
etyuri = 'http://etymonline.com/?term=%s'
|
etysite = 'http://www.etymonline.com/index.php?'
|
||||||
etysearch = 'http://etymonline.com/?search=%s'
|
etyuri = etysite + 'allowed_in_frame=0&term=%s'
|
||||||
|
etysearch = etysite + 'allowed_in_frame=0&search=%s'
|
||||||
|
|
||||||
r_definition = re.compile(r'(?ims)<dd[^>]*>.*?</dd>')
|
r_definition = re.compile(r'(?ims)<dd[^>]*>.*?</dd>')
|
||||||
r_tag = re.compile(r'<(?!!)[^>]+>')
|
r_tag = re.compile(r'<(?!!)[^>]+>')
|
||||||
r_whitespace = re.compile(r'[\t\r\n ]+')
|
r_whitespace = re.compile(r'[\t\r\n ]+')
|
||||||
|
|
||||||
|
class Grab(urllib.request.URLopener):
|
||||||
|
def __init__(self, *args):
|
||||||
|
self.version = 'Mozilla/5.0 (Phenny)'
|
||||||
|
urllib.URLopener.__init__(self, *args)
|
||||||
|
def http_error_default(self, url, fp, errcode, errmsg, headers):
|
||||||
|
return urllib.addinfourl(fp, [headers, errcode], "http:" + url)
|
||||||
|
|
||||||
abbrs = [
|
abbrs = [
|
||||||
'cf', 'lit', 'etc', 'Ger', 'Du', 'Skt', 'Rus', 'Eng', 'Amer.Eng', 'Sp',
|
'cf', 'lit', 'etc', 'Ger', 'Du', 'Skt', 'Rus', 'Eng', 'Amer.Eng', 'Sp',
|
||||||
'Fr', 'N', 'E', 'S', 'W', 'L', 'Gen', 'J.C', 'dial', 'Gk',
|
'Fr', 'N', 'E', 'S', 'W', 'L', 'Gen', 'J.C', 'dial', 'Gk',
|
||||||
|
@ -46,7 +55,11 @@ def etymology(word):
|
||||||
raise ValueError("Word too long: %s[...]" % word[:10])
|
raise ValueError("Word too long: %s[...]" % word[:10])
|
||||||
word = {'axe': 'ax/axe'}.get(word, word)
|
word = {'axe': 'ax/axe'}.get(word, word)
|
||||||
|
|
||||||
bytes = web.get(etyuri % web.urllib.quote(word))
|
grab = urllib.request._urlopener
|
||||||
|
urllib.request._urlopener = Grab()
|
||||||
|
urllib.request._urlopener.addheader("Referer", "http://www.etymonline.com/")
|
||||||
|
bytes = web.get(etyuri % web.quote(word))
|
||||||
|
urllib.request._urlopener = grab
|
||||||
definitions = r_definition.findall(bytes)
|
definitions = r_definition.findall(bytes)
|
||||||
|
|
||||||
if not definitions:
|
if not definitions:
|
||||||
|
@ -59,8 +72,10 @@ def etymology(word):
|
||||||
sentence = m.group(0)
|
sentence = m.group(0)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sentence = str(sentence, 'iso-8859-1')
|
sentence = unicode(sentence, 'iso-8859-1')
|
||||||
|
sentence = sentence.encode('utf-8')
|
||||||
except: pass
|
except: pass
|
||||||
|
sentence = web.decode(sentence)
|
||||||
|
|
||||||
maxlength = 275
|
maxlength = 275
|
||||||
if len(sentence) > maxlength:
|
if len(sentence) > maxlength:
|
||||||
|
@ -70,7 +85,7 @@ def etymology(word):
|
||||||
sentence = ' '.join(words) + ' [...]'
|
sentence = ' '.join(words) + ' [...]'
|
||||||
|
|
||||||
sentence = '"' + sentence.replace('"', "'") + '"'
|
sentence = '"' + sentence.replace('"', "'") + '"'
|
||||||
return sentence + ' - ' + (etyuri % word)
|
return sentence + ' - etymonline.com'
|
||||||
|
|
||||||
@deprecated
|
@deprecated
|
||||||
def f_etymology(self, origin, match, args):
|
def f_etymology(self, origin, match, args):
|
||||||
|
|
Loading…
Reference in New Issue