mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-23 11:09:23 +01:00
Add babelfish.py and the corresponding translate & babelize functions to Http.py
This commit is contained in:
parent
8c72168a30
commit
28a8071635
163
others/babelfish.py
Normal file
163
others/babelfish.py
Normal file
@ -0,0 +1,163 @@
|
||||
# babelizer.py - API for simple access to babelfish.altavista.com.
|
||||
# Requires python 2.0 or better.
|
||||
#
|
||||
# See it in use at http://babel.MrFeinberg.com/
|
||||
|
||||
"""API for simple access to babelfish.altavista.com.
|
||||
|
||||
Summary:
|
||||
|
||||
import babelizer
|
||||
|
||||
print ' '.join(babelizer.available_languages)
|
||||
|
||||
print babelizer.translate( 'How much is that doggie in the window?',
|
||||
'English', 'French' )
|
||||
|
||||
def babel_callback(phrase):
|
||||
print phrase
|
||||
sys.stdout.flush()
|
||||
|
||||
babelizer.babelize( 'I love a reigning knight.',
|
||||
'English', 'German',
|
||||
callback = babel_callback )
|
||||
|
||||
available_languages
|
||||
A list of languages available for use with babelfish.
|
||||
|
||||
translate( phrase, from_lang, to_lang )
|
||||
Uses babelfish to translate phrase from from_lang to to_lang.
|
||||
|
||||
babelize(phrase, from_lang, through_lang, limit = 12, callback = None)
|
||||
Uses babelfish to translate back and forth between from_lang and
|
||||
through_lang until either no more changes occur in translation or
|
||||
limit iterations have been reached, whichever comes first. Takes
|
||||
an optional callback function which should receive a single
|
||||
parameter, being the next translation. Without the callback
|
||||
returns a list of successive translations.
|
||||
|
||||
It's only guaranteed to work if 'english' is one of the two languages
|
||||
given to either of the translation methods.
|
||||
|
||||
Both translation methods throw exceptions which are all subclasses of
|
||||
BabelizerError. They include
|
||||
|
||||
LanguageNotAvailableError
|
||||
Thrown on an attempt to use an unknown language.
|
||||
|
||||
BabelfishChangedError
|
||||
Thrown when babelfish.altavista.com changes some detail of their
|
||||
layout, and babelizer can no longer parse the results or submit
|
||||
the correct form (a not infrequent occurance).
|
||||
|
||||
BabelizerIOError
|
||||
Thrown for various networking and IO errors.
|
||||
|
||||
Version: $Id$
|
||||
Author: Jonathan Feinberg <jdf@pobox.com>
|
||||
"""
|
||||
import re, string, urllib
|
||||
|
||||
"""
|
||||
Various patterns I have encountered in looking for the babelfish result.
|
||||
We try each of them in turn, based on the relative number of times I've
|
||||
seen each of these patterns. $1.00 to anyone who can provide a heuristic
|
||||
for knowing which one to use. This includes AltaVista employees.
|
||||
"""
|
||||
__where = [ re.compile(r'lang=..>([^<]*)</div'),
|
||||
re.compile(r'name=\"q\" value=\"([^\"]*)\">'),
|
||||
]
|
||||
|
||||
__languages = { 'english' : 'en',
|
||||
'chinese' : 'zh',
|
||||
'french' : 'fr',
|
||||
'german' : 'de',
|
||||
'italian' : 'it',
|
||||
'japanese' : 'ja',
|
||||
'korean' : 'ko',
|
||||
'spanish' : 'es',
|
||||
'portugese' : 'pt',
|
||||
}
|
||||
|
||||
"""
|
||||
All of the available language names.
|
||||
"""
|
||||
available_languages = [ x.title() for x in __languages.keys() ]
|
||||
|
||||
"""
|
||||
Calling translate() or babelize() can raise a BabelizerError
|
||||
"""
|
||||
class BabelizerError(Exception):
|
||||
pass
|
||||
|
||||
class LanguageNotAvailableError(BabelizerError):
|
||||
pass
|
||||
class BabelfishChangedError(BabelizerError):
|
||||
pass
|
||||
class BabelizerIOError(BabelizerError):
|
||||
pass
|
||||
|
||||
def clean(text):
|
||||
return ' '.join(string.replace(text.strip(), "\n", ' ').split())
|
||||
|
||||
def translate(phrase, from_lang, to_lang):
|
||||
phrase = clean(phrase)
|
||||
try:
|
||||
from_code = __languages[from_lang.lower()]
|
||||
to_code = __languages[to_lang.lower()]
|
||||
except KeyError, lang:
|
||||
raise LanguageNotAvailableError(lang)
|
||||
|
||||
params = urllib.urlencode( { 'BabelFishFrontPage' : 'yes',
|
||||
'doit' : 'done',
|
||||
'tt' : 'urltext',
|
||||
'intl' : '1',
|
||||
'urltext' : phrase,
|
||||
'lp' : from_code + '_' + to_code } )
|
||||
try:
|
||||
response = urllib.urlopen('http://babelfish.altavista.com/babelfish/tr', params)
|
||||
except IOError, what:
|
||||
raise BabelizerIOError("Couldn't talk to server: %s" % what)
|
||||
except:
|
||||
print "Unexpected error:", sys.exc_info()[0]
|
||||
|
||||
html = response.read()
|
||||
for regex in __where:
|
||||
match = regex.search(html)
|
||||
if match: break
|
||||
if not match: raise BabelfishChangedError("Can't recognize translated string.")
|
||||
return clean(match.group(1))
|
||||
|
||||
def babelize(phrase, from_language, through_language, limit = 12, callback = None):
|
||||
phrase = clean(phrase)
|
||||
seen = { phrase: 1 }
|
||||
if callback:
|
||||
callback(phrase)
|
||||
else:
|
||||
results = [ phrase ]
|
||||
flip = { from_language: through_language, through_language: from_language }
|
||||
next = from_language
|
||||
for i in range(limit):
|
||||
phrase = translate(phrase, next, flip[next])
|
||||
if seen.has_key(phrase): break
|
||||
seen[phrase] = 1
|
||||
if callback:
|
||||
callback(phrase)
|
||||
else:
|
||||
results.append(phrase)
|
||||
next = flip[next]
|
||||
if next != from_language:
|
||||
phrase = translate(phrase, next, flip[next])
|
||||
results.append(phrase)
|
||||
if not callback: return results
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
def printer(x):
|
||||
print x
|
||||
sys.stdout.flush();
|
||||
|
||||
|
||||
babelize("I won't take that sort of treatment from you, or from your doggie!",
|
||||
'english', 'french', callback = printer)
|
||||
|
@ -45,6 +45,7 @@ import urllib2
|
||||
import utils
|
||||
import debug
|
||||
import privmsgs
|
||||
import babelfish
|
||||
import callbacks
|
||||
import structures
|
||||
|
||||
@ -363,7 +364,56 @@ class Http(callbacks.Privmsg):
|
||||
(numberOfPackages, len(responses), ', '.join(responses))
|
||||
irc.reply(msg, s)
|
||||
|
||||
_abbrevs = utils.abbrev(map(str.lower, babelfish.available_languages))
|
||||
def translate(self, irc, msg, args):
|
||||
"""<from-language> <to-language> <phrase>
|
||||
|
||||
Returns the phrase translated to the new language. One of the
|
||||
languages must be English.
|
||||
"""
|
||||
(flang, tlang, phrase) = privmsgs.getArgs(args, 3)
|
||||
flang = str.lower(flang)
|
||||
tlang = str.lower(tlang)
|
||||
if self._abbrevs.has_key(flang):
|
||||
flang = self._abbrevs[flang]
|
||||
if self._abbrevs.has_key(tlang):
|
||||
tlang = self._abbrevs[tlang]
|
||||
try:
|
||||
trans = babelfish.translate(phrase, flang, tlang)
|
||||
irc.reply(msg, trans)
|
||||
except babelfish.LanguageNotAvailableError, e:
|
||||
irc.reply(msg, 'Valid languages: %s.' %\
|
||||
', '.join(babelfish.available_languages))
|
||||
except babelfish.BabelizerIOError, e:
|
||||
irc.reply(msg, e.args[0])
|
||||
except babelfish.BabelfishChangedError, e:
|
||||
irc.reply(msg, 'Babelfish has foiled our plans by changing their\
|
||||
format')
|
||||
|
||||
def babelize(self, irc, msg, args):
|
||||
"""<from-language> <to-language> <phrase>
|
||||
|
||||
Returns the phrase translated to the new language. This is done 12
|
||||
times, or until the output doesn't change anymore.
|
||||
"""
|
||||
(flang, tlang, phrase) = privmsgs.getArgs(args, 3)
|
||||
flang = str.lower(flang)
|
||||
tlang = str.lower(tlang)
|
||||
if self._abbrevs.has_key(flang):
|
||||
flang = self._abbrevs[flang]
|
||||
if self._abbrevs.has_key(tlang):
|
||||
tlang = self._abbrevs[tlang]
|
||||
try:
|
||||
trans = babelfish.babelize(phrase, flang, tlang)
|
||||
irc.reply(msg, trans[-1])
|
||||
except babelfish.LanguageNotAvailableError, e:
|
||||
irc.reply(msg, 'Valid languages: %s.' %\
|
||||
', '.join(babelfish.available_languages))
|
||||
except babelfish.BabelizerIOError, e:
|
||||
irc.reply(msg, e.args[0])
|
||||
except babelfish.BabelfishChangedError, e:
|
||||
irc.reply(msg, 'Babelfish has foiled our plans by changing their\
|
||||
format')
|
||||
|
||||
Class = Http
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user