New translation module, using the Google Ajax interface.
parent
d7f8faabe8
commit
87b9866c7c
|
@ -8,107 +8,67 @@ Licensed under the Eiffel Forum License 2.
|
||||||
http://inamidst.com/phenny/
|
http://inamidst.com/phenny/
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re, time
|
import re, urllib
|
||||||
import web
|
import web
|
||||||
|
|
||||||
r_translation = re.compile(r'<div style="padding:\S+?;">([^<]+)</div>')
|
r_json = re.compile(r'^[,:{}\[\]0-9.\-+Eaeflnr-u \n\r\t]+$')
|
||||||
|
r_string = re.compile(r'("(\\.|[^"\\])*")')
|
||||||
|
env = {'__builtins__': None, 'null': None,
|
||||||
|
'true': True, 'false': False}
|
||||||
|
|
||||||
def guess_language(phrase):
|
def json(text):
|
||||||
languages = {
|
"""Evaluate JSON text safely (we hope)."""
|
||||||
'english': 'en',
|
if r_json.match(r_string.sub('', text)):
|
||||||
'french': 'fr',
|
text = r_string.sub(lambda m: 'u' + m.group(1), text)
|
||||||
'spanish': 'es',
|
return eval(text.strip(' \t\r\n'), env, {})
|
||||||
'portuguese': 'pt',
|
raise ValueError('Input must be serialised JSON.')
|
||||||
'german': 'de',
|
|
||||||
'italian': 'it',
|
|
||||||
'korean': 'ko',
|
|
||||||
'japanese': 'ja',
|
|
||||||
'chinese': 'zh',
|
|
||||||
'dutch': 'nl',
|
|
||||||
'greek': 'el',
|
|
||||||
'russian': 'ru'
|
|
||||||
}
|
|
||||||
|
|
||||||
uri = 'http://www.xrce.xerox.com/cgi-bin/mltt/LanguageGuesser'
|
def detect(text):
|
||||||
form = {'Text': phrase}
|
uri = 'http://ajax.googleapis.com/ajax/services/language/detect'
|
||||||
bytes = web.post(uri, form)
|
q = urllib.quote(text)
|
||||||
for line in bytes.splitlines():
|
bytes = web.get(uri + '?q=' + q + '&v=1.0')
|
||||||
if '<listing><font size=+1>' in line:
|
result = json(bytes)
|
||||||
i = line.find('<listing><font size=+1>')
|
try: return result['responseData']['language']
|
||||||
lang = line[i+len('<listing><font size=+1>'):].strip()
|
except Exception: return None
|
||||||
lang = lang.lower()
|
|
||||||
if '_' in lang:
|
|
||||||
j = lang.find('_')
|
|
||||||
lang = lang[:j]
|
|
||||||
try: return languages[lang].lower()
|
|
||||||
except KeyError:
|
|
||||||
return lang.lower()
|
|
||||||
return 'Moon Language'
|
|
||||||
|
|
||||||
def translate(phrase, lang, target='en'):
|
def translate(text, input, output):
|
||||||
babelfish = 'http://uk.babelfish.yahoo.com/translate_txt'
|
uri = 'http://ajax.googleapis.com/ajax/services/language/translate'
|
||||||
form = {
|
q = urllib.quote(text)
|
||||||
'ei': 'UTF-8',
|
pair = input + '%7C' + output
|
||||||
'doit': 'done',
|
bytes = web.get(uri + '?q=' + q + '&v=1.0&langpair=' + pair)
|
||||||
'fr': 'bf-home',
|
result = json(bytes)
|
||||||
'intl': '1',
|
try: msg = result['responseData']['translatedText']
|
||||||
'tt': 'urltext',
|
except Exception:
|
||||||
'trtext': phrase,
|
msg = 'The %s to %s translation failed, sorry!' % (input, output)
|
||||||
'lp': lang + '_' + target
|
else:
|
||||||
}
|
msg = msg.encode('cp1252').replace(''', "'")
|
||||||
|
msg = '"%s" (%s to %s, translate.google.com)' % (msg, input, output)
|
||||||
|
return msg
|
||||||
|
|
||||||
bytes = web.post(babelfish, form)
|
def tr(phenny, context):
|
||||||
m = r_translation.search(bytes)
|
|
||||||
if m:
|
|
||||||
translation = m.group(1)
|
|
||||||
translation = translation.replace('\r', ' ')
|
|
||||||
translation = translation.replace('\n', ' ')
|
|
||||||
while ' ' in translation:
|
|
||||||
translation = translation.replace(' ', ' ')
|
|
||||||
return translation.lower()
|
|
||||||
return None
|
|
||||||
|
|
||||||
def tr(phenny, input):
|
|
||||||
"""Translates a phrase, with an optional language hint."""
|
"""Translates a phrase, with an optional language hint."""
|
||||||
original_input = input
|
input, output, phrase = context.groups()
|
||||||
input, output, phrase = original_input.groups()
|
|
||||||
phrase = phrase.encode('utf-8')
|
phrase = phrase.encode('utf-8')
|
||||||
if (len(phrase) > 350) and (not original_input.admin):
|
|
||||||
|
if (len(phrase) > 350) and (not context.admin):
|
||||||
return phenny.reply('Phrase must be under 350 characters.')
|
return phenny.reply('Phrase must be under 350 characters.')
|
||||||
|
|
||||||
input = input or guess_language(phrase)
|
input = input or detect(phrase)
|
||||||
if not input:
|
if not input:
|
||||||
return phenny.reply('Unable to guess the language, sorry.')
|
err = 'Unable to guess your crazy moon language, sorry.'
|
||||||
|
return phenny.reply(err)
|
||||||
input = input.encode('utf-8')
|
input = input.encode('utf-8')
|
||||||
output = (output or 'en').encode('utf-8')
|
output = (output or 'en').encode('utf-8')
|
||||||
|
|
||||||
if not ((input == 'en') and (output == 'en')):
|
if input != output:
|
||||||
translation = translate(phrase, input, output)
|
msg = translate(phrase, input, output)
|
||||||
if translation is not None:
|
phenny.reply(msg)
|
||||||
translation = translation.decode('utf-8').encode('utf-8')
|
else: phenny.reply('Ehwhatnow?')
|
||||||
if output == 'en':
|
|
||||||
return phenny.reply('"%s" (%s)' % (translation, input))
|
|
||||||
else: return phenny.reply('"%s" (%s -> %s)' % \
|
|
||||||
(translation, input, output))
|
|
||||||
|
|
||||||
error = "I think it's %s, but I can't translate it currently."
|
|
||||||
return phenny.reply(error % input.title())
|
|
||||||
|
|
||||||
# Otherwise, it's English, so mangle it for fun
|
|
||||||
for other in ['de', 'ja', 'de', 'ja', 'de', 'ja', 'de', 'ja', 'de', 'ja']:
|
|
||||||
phrase = translate(phrase, 'en', other)
|
|
||||||
phrase = translate(phrase, other, 'en')
|
|
||||||
time.sleep(0.1)
|
|
||||||
|
|
||||||
if phrase is not None:
|
|
||||||
return phenny.reply(u'"%s" (en-unmangled)' % phrase)
|
|
||||||
return phenny.reply("I think it's English already.")
|
|
||||||
# @@ or 'Why but that be English, sire.'
|
|
||||||
tr.rule = ('$nick', ur'(?:([a-z]{2}) +)?(?:([a-z]{2}) +)?["“](.+?)["”]\? *$')
|
tr.rule = ('$nick', ur'(?:([a-z]{2}) +)?(?:([a-z]{2}) +)?["“](.+?)["”]\? *$')
|
||||||
tr.example = '$nickname: "mon chien"? or $nickname: fr "mon chien"?'
|
tr.example = '$nickname: "mon chien"? or $nickname: fr "mon chien"?'
|
||||||
tr.priority = 'low'
|
tr.priority = 'low'
|
||||||
|
|
||||||
# @@ mangle
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print __doc__.strip()
|
print __doc__.strip()
|
||||||
|
|
Loading…
Reference in New Issue