From 87b9866c7c2ebeaa3c1c010176fa3bc24e017300 Mon Sep 17 00:00:00 2001
From: "Sean B. Palmer"
Date: Fri, 23 May 2008 19:16:38 +0100
Subject: [PATCH] New translation module, using the Google Ajax interface.
---
modules/translate.py | 126 +++++++++++++++----------------------------
1 file changed, 43 insertions(+), 83 deletions(-)
diff --git a/modules/translate.py b/modules/translate.py
index 27bd094..cf960aa 100755
--- a/modules/translate.py
+++ b/modules/translate.py
@@ -8,107 +8,67 @@ Licensed under the Eiffel Forum License 2.
http://inamidst.com/phenny/
"""
-import re, time
+import re, urllib
import web
-r_translation = re.compile(r'
([^<]+)
')
+r_json = re.compile(r'^[,:{}\[\]0-9.\-+Eaeflnr-u \n\r\t]+$')
+r_string = re.compile(r'("(\\.|[^"\\])*")')
+env = {'__builtins__': None, 'null': None,
+ 'true': True, 'false': False}
-def guess_language(phrase):
- languages = {
- 'english': 'en',
- 'french': 'fr',
- 'spanish': 'es',
- 'portuguese': 'pt',
- 'german': 'de',
- 'italian': 'it',
- 'korean': 'ko',
- 'japanese': 'ja',
- 'chinese': 'zh',
- 'dutch': 'nl',
- 'greek': 'el',
- 'russian': 'ru'
- }
+def json(text):
+ """Evaluate JSON text safely (we hope)."""
+ if r_json.match(r_string.sub('', text)):
+ text = r_string.sub(lambda m: 'u' + m.group(1), text)
+ return eval(text.strip(' \t\r\n'), env, {})
+ raise ValueError('Input must be serialised JSON.')
- uri = 'http://www.xrce.xerox.com/cgi-bin/mltt/LanguageGuesser'
- form = {'Text': phrase}
- bytes = web.post(uri, form)
- for line in bytes.splitlines():
- if '' in line:
- i = line.find('')
- lang = line[i+len(''):].strip()
- lang = lang.lower()
- if '_' in lang:
- j = lang.find('_')
- lang = lang[:j]
- try: return languages[lang].lower()
- except KeyError:
- return lang.lower()
- return 'Moon Language'
+def detect(text):
+ uri = 'http://ajax.googleapis.com/ajax/services/language/detect'
+ q = urllib.quote(text)
+ bytes = web.get(uri + '?q=' + q + '&v=1.0')
+ result = json(bytes)
+ try: return result['responseData']['language']
+ except Exception: return None
-def translate(phrase, lang, target='en'):
- babelfish = 'http://uk.babelfish.yahoo.com/translate_txt'
- form = {
- 'ei': 'UTF-8',
- 'doit': 'done',
- 'fr': 'bf-home',
- 'intl': '1',
- 'tt': 'urltext',
- 'trtext': phrase,
- 'lp': lang + '_' + target
- }
+def translate(text, input, output):
+ uri = 'http://ajax.googleapis.com/ajax/services/language/translate'
+ q = urllib.quote(text)
+ pair = input + '%7C' + output
+ bytes = web.get(uri + '?q=' + q + '&v=1.0&langpair=' + pair)
+ result = json(bytes)
+ try: msg = result['responseData']['translatedText']
+ except Exception:
+ msg = 'The %s to %s translation failed, sorry!' % (input, output)
+ else:
+ msg = msg.encode('cp1252').replace(''', "'")
+ msg = '"%s" (%s to %s, translate.google.com)' % (msg, input, output)
+ return msg
- bytes = web.post(babelfish, form)
- m = r_translation.search(bytes)
- if m:
- translation = m.group(1)
- translation = translation.replace('\r', ' ')
- translation = translation.replace('\n', ' ')
- while ' ' in translation:
- translation = translation.replace(' ', ' ')
- return translation.lower()
- return None
-
-def tr(phenny, input):
+def tr(phenny, context):
"""Translates a phrase, with an optional language hint."""
- original_input = input
- input, output, phrase = original_input.groups()
+ input, output, phrase = context.groups()
+
phrase = phrase.encode('utf-8')
- if (len(phrase) > 350) and (not original_input.admin):
+
+ if (len(phrase) > 350) and (not context.admin):
return phenny.reply('Phrase must be under 350 characters.')
- input = input or guess_language(phrase)
+ input = input or detect(phrase)
if not input:
- return phenny.reply('Unable to guess the language, sorry.')
+ err = 'Unable to guess your crazy moon language, sorry.'
+ return phenny.reply(err)
input = input.encode('utf-8')
output = (output or 'en').encode('utf-8')
- if not ((input == 'en') and (output == 'en')):
- translation = translate(phrase, input, output)
- if translation is not None:
- translation = translation.decode('utf-8').encode('utf-8')
- if output == 'en':
- return phenny.reply('"%s" (%s)' % (translation, input))
- else: return phenny.reply('"%s" (%s -> %s)' % \
- (translation, input, output))
+ if input != output:
+ msg = translate(phrase, input, output)
+ phenny.reply(msg)
+ else: phenny.reply('Ehwhatnow?')
- error = "I think it's %s, but I can't translate it currently."
- return phenny.reply(error % input.title())
-
- # Otherwise, it's English, so mangle it for fun
- for other in ['de', 'ja', 'de', 'ja', 'de', 'ja', 'de', 'ja', 'de', 'ja']:
- phrase = translate(phrase, 'en', other)
- phrase = translate(phrase, other, 'en')
- time.sleep(0.1)
-
- if phrase is not None:
- return phenny.reply(u'"%s" (en-unmangled)' % phrase)
- return phenny.reply("I think it's English already.")
- # @@ or 'Why but that be English, sire.'
tr.rule = ('$nick', ur'(?:([a-z]{2}) +)?(?:([a-z]{2}) +)?["“](.+?)["”]\? *$')
tr.example = '$nickname: "mon chien"? or $nickname: fr "mon chien"?'
tr.priority = 'low'
-# @@ mangle
-
if __name__ == '__main__':
print __doc__.strip()