phenny/modules/translate.py

#!/usr/bin/env python
# coding=utf-8
"""
translate.py - Phenny Translation Module
Copyright 2008, Sean B. Palmer, inamidst.com
Licensed under the Eiffel Forum License 2.

http://inamidst.com/phenny/
"""

import re
import web

r_translation = re.compile(r'<div style=padding:10px;>([^<]+)</div>')

def guess_language(phrase): 
   languages = {
      'english': 'en', 
      'french': 'fr', 
      'spanish': 'es', 
      'portuguese': 'pt', 
      'german': 'de', 
      'italian': 'it', 
      'korean': 'ko', 
      'japanese': 'ja', 
      'chinese': 'zh', 
      'dutch': 'nl', 
      'greek': 'el', 
      'russian': 'ru'
   }

   uri = 'http://www.xrce.xerox.com/cgi-bin/mltt/LanguageGuesser'
   form = {'Text': phrase}
   bytes = web.post(uri, form)
   for line in bytes.splitlines(): 
      if '<listing><font size=+1>' in line: 
         i = line.find('<listing><font size=+1>')
         lang = line[i+len('<listing><font size=+1>'):].strip()
         lang = lang.lower()
         if '_' in lang: 
            j = lang.find('_')
            lang = lang[:j]
         try: return languages[lang]
         except KeyError: 
            return lang
   return 'unknown'

def translate(phrase, lang, target='en'): 
   babelfish = 'http://world.altavista.com/tr'
   form = {
      'doit': 'done', 
      'intl': '1', 
      'tt': 'urltext', 
      'trtext': phrase, 
      'lp': lang + '_' + target
   }

   bytes = web.post(babelfish, form)
   m = r_translation.search(bytes)
   if m: 
      translation = m.group(1)
      translation = translation.replace('\r', ' ')
      translation = translation.replace('\n', ' ')
      while '  ' in translation:
         translation = translation.replace('  ', ' ')
      return translation
   return None

def tr(phenny, input): 
   lang, phrase = input.groups()

   if (len(phrase) > 350) and (not phenny.admin(input.nick)): 
      return phenny.reply('Phrase must be under 350 characters.')

   language = guess_language(phrase)
   if language is None: 
      return phenny.reply('Unable to guess the language, sorry.')

   if language != 'en': 
      translation = translate(phrase, language)
      if translation is not None: 
         return phenny.reply(u'"%s" (%s)' % (translation, language))

      error = "I think it's %s, but I can't translate that language."
      return phenny.reply(error % language.title())

   # Otherwise, it's English, so mangle it for fun
   for other in ['de', 'ja']: 
      phrase = translate(phrase, 'en', other)
      phrase = translate(phrase, other, 'en')

   if phrase is not None: 
      return phenny.reply(u'"%s" (en-unmangled)' % phrase)
   return phenny.reply("I think it's English already.")
   # @@ or 'Why but that be English, sire.'
tr.doc = ('phenny: "<phrase>"? or phenny: <lang> "<phrase>"?', 
   'Translate <phrase>, optionally forcing the <lang> interpretation.')
tr.rule = ('$nick', ur'(?:([a-z]{2}) +)?["“](.+?)["”]\? *$')
tr.priority = 'low'

if __name__ == '__main__': 
   print __doc__.strip()
Phenny2, now being tested on Freenode as the main phenny. 2008-02-21 07:06:33 -05:00			`#!/usr/bin/env python`
			`# coding=utf-8`
			`"""`
			`translate.py - Phenny Translation Module`
			`Copyright 2008, Sean B. Palmer, inamidst.com`
			`Licensed under the Eiffel Forum License 2.`

			`http://inamidst.com/phenny/`
			`"""`

			`import re`
			`import web`

			`r_translation = re.compile(r'<div style=padding:10px;>([^<]+)</div>')`

			`def guess_language(phrase):`
			`languages = {`
			`'english': 'en',`
			`'french': 'fr',`
			`'spanish': 'es',`
			`'portuguese': 'pt',`
			`'german': 'de',`
			`'italian': 'it',`
			`'korean': 'ko',`
			`'japanese': 'ja',`
			`'chinese': 'zh',`
			`'dutch': 'nl',`
			`'greek': 'el',`
			`'russian': 'ru'`
			`}`

			`uri = 'http://www.xrce.xerox.com/cgi-bin/mltt/LanguageGuesser'`
			`form = {'Text': phrase}`
			`bytes = web.post(uri, form)`
			`for line in bytes.splitlines():`
			`if '<listing><font size=+1>' in line:`
			`i = line.find('<listing><font size=+1>')`
			`lang = line[i+len('<listing><font size=+1>'):].strip()`
			`lang = lang.lower()`
			`if '_' in lang:`
			`j = lang.find('_')`
			`lang = lang[:j]`
			`try: return languages[lang]`
			`except KeyError:`
			`return lang`
			`return 'unknown'`

			`def translate(phrase, lang, target='en'):`
			`babelfish = 'http://world.altavista.com/tr'`
			`form = {`
			`'doit': 'done',`
			`'intl': '1',`
			`'tt': 'urltext',`
			`'trtext': phrase,`
			`'lp': lang + '_' + target`
			`}`

			`bytes = web.post(babelfish, form)`
			`m = r_translation.search(bytes)`
			`if m:`
			`translation = m.group(1)`
			`translation = translation.replace('\r', ' ')`
			`translation = translation.replace('\n', ' ')`
			`while ' ' in translation:`
			`translation = translation.replace(' ', ' ')`
			`return translation`
			`return None`

			`def tr(phenny, input):`
			`lang, phrase = input.groups()`

			`if (len(phrase) > 350) and (not phenny.admin(input.nick)):`
			`return phenny.reply('Phrase must be under 350 characters.')`

			`language = guess_language(phrase)`
			`if language is None:`
			`return phenny.reply('Unable to guess the language, sorry.')`

			`if language != 'en':`
			`translation = translate(phrase, language)`
			`if translation is not None:`
			`return phenny.reply(u'"%s" (%s)' % (translation, language))`

			`error = "I think it's %s, but I can't translate that language."`
			`return phenny.reply(error % language.title())`

			`# Otherwise, it's English, so mangle it for fun`
			`for other in ['de', 'ja']:`
			`phrase = translate(phrase, 'en', other)`
			`phrase = translate(phrase, other, 'en')`

			`if phrase is not None:`
			`return phenny.reply(u'"%s" (en-unmangled)' % phrase)`
			`return phenny.reply("I think it's English already.")`
			`# @@ or 'Why but that be English, sire.'`
			`tr.doc = ('phenny: "<phrase>"? or phenny: <lang> "<phrase>"?',`
			`'Translate <phrase>, optionally forcing the <lang> interpretation.')`
			`tr.rule = ('$nick', ur'(?:([a-z]{2}) +)?["“](.+?)["”]\? *$')`
			`tr.priority = 'low'`

			`if __name__ == '__main__':`
			`print __doc__.strip()`