phenny-1/modules/translate.py

#!/usr/bin/env python
# coding=utf-8
"""
translate.py - Phenny Translation Module
Copyright 2008, Sean B. Palmer, inamidst.com
Licensed under the Eiffel Forum License 2.

http://inamidst.com/phenny/
"""

import re, time
import web

r_translation = re.compile(r'<div style="padding:\S+?;">([^<]+)</div>')

def guess_language(phrase):
   languages = {
      'english': 'en',
      'french': 'fr',
      'spanish': 'es',
      'portuguese': 'pt',
      'german': 'de',
      'italian': 'it',
      'korean': 'ko',
      'japanese': 'ja',
      'chinese': 'zh',
      'dutch': 'nl',
      'greek': 'el',
      'russian': 'ru'
   }

   uri = 'http://www.xrce.xerox.com/cgi-bin/mltt/LanguageGuesser'
   form = {'Text': phrase}
   bytes = web.post(uri, form)
   for line in bytes.splitlines():
      if '<listing><font size=+1>' in line:
         i = line.find('<listing><font size=+1>')
         lang = line[i+len('<listing><font size=+1>'):].strip()
         lang = lang.lower()
         if '_' in lang:
            j = lang.find('_')
            lang = lang[:j]
         try: return languages[lang].lower()
         except KeyError:
            return lang.lower()
   return 'Moon Language'

def translate(phrase, lang, target='en'):
   babelfish = 'http://uk.babelfish.yahoo.com/translate_txt'
   form = {
      'ei': 'UTF-8',
      'doit': 'done',
      'fr': 'bf-home',
      'intl': '1',
      'tt': 'urltext',
      'trtext': phrase,
      'lp': lang + '_' + target
   }

   bytes = web.post(babelfish, form)
   m = r_translation.search(bytes)
   if m:
      translation = m.group(1)
      translation = translation.replace('\r', ' ')
      translation = translation.replace('\n', ' ')
      while '  ' in translation:
         translation = translation.replace('  ', ' ')
      return translation.lower()
   return None

def tr(phenny, input):
   """Translates a phrase, with an optional language hint."""
   original_input = input
   input, output, phrase = original_input.groups()
   phrase = phrase.encode('utf-8')
   if (len(phrase) > 350) and (not original_input.admin):
      return phenny.reply('Phrase must be under 350 characters.')

   input = input or guess_language(phrase)
   if not input:
      return phenny.reply('Unable to guess the language, sorry.')
   input = input.encode('utf-8')
   output = (output or 'en').encode('utf-8')

   if not ((input == 'en') and (output == 'en')):
      translation = translate(phrase, input, output)
      if translation is not None:
         translation = translation.decode('utf-8').encode('utf-8')
         if output == 'en':
            return phenny.reply('"%s" (%s)' % (translation, input))
         else: return phenny.reply('"%s" (%s -> %s)' % \
                                   (translation, input, output))

      error = "I think it's %s, but I can't translate it currently."
      return phenny.reply(error % input.title())

   # Otherwise, it's English, so mangle it for fun
   for other in ['de', 'ja', 'de', 'ja', 'de', 'ja', 'de', 'ja', 'de', 'ja']:
      phrase = translate(phrase, 'en', other)
      phrase = translate(phrase, other, 'en')
      time.sleep(0.1)

   if phrase is not None:
      return phenny.reply(u'"%s" (en-unmangled)' % phrase)
   return phenny.reply("I think it's English already.")
   # @@ or 'Why but that be English, sire.'
tr.rule = ('$nick', ur'(?:([a-z]{2}) +)?(?:([a-z]{2}) +)?["“](.+?)["”]\? *$')
tr.example = '$nickname: "mon chien"? or $nickname: fr "mon chien"?'
tr.priority = 'low'

# @@ mangle

if __name__ == '__main__':
   print __doc__.strip()