mirror of
https://github.com/Mikaela/Limnoria.git
synced 2025-02-17 06:00:42 +01:00
Add babelfish.py and the corresponding translate & babelize functions to Http.py
This commit is contained in:
parent
8c72168a30
commit
28a8071635
163
others/babelfish.py
Normal file
163
others/babelfish.py
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
# babelizer.py - API for simple access to babelfish.altavista.com.
|
||||||
|
# Requires python 2.0 or better.
|
||||||
|
#
|
||||||
|
# See it in use at http://babel.MrFeinberg.com/
|
||||||
|
|
||||||
|
"""API for simple access to babelfish.altavista.com.
|
||||||
|
|
||||||
|
Summary:
|
||||||
|
|
||||||
|
import babelizer
|
||||||
|
|
||||||
|
print ' '.join(babelizer.available_languages)
|
||||||
|
|
||||||
|
print babelizer.translate( 'How much is that doggie in the window?',
|
||||||
|
'English', 'French' )
|
||||||
|
|
||||||
|
def babel_callback(phrase):
|
||||||
|
print phrase
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
babelizer.babelize( 'I love a reigning knight.',
|
||||||
|
'English', 'German',
|
||||||
|
callback = babel_callback )
|
||||||
|
|
||||||
|
available_languages
|
||||||
|
A list of languages available for use with babelfish.
|
||||||
|
|
||||||
|
translate( phrase, from_lang, to_lang )
|
||||||
|
Uses babelfish to translate phrase from from_lang to to_lang.
|
||||||
|
|
||||||
|
babelize(phrase, from_lang, through_lang, limit = 12, callback = None)
|
||||||
|
Uses babelfish to translate back and forth between from_lang and
|
||||||
|
through_lang until either no more changes occur in translation or
|
||||||
|
limit iterations have been reached, whichever comes first. Takes
|
||||||
|
an optional callback function which should receive a single
|
||||||
|
parameter, being the next translation. Without the callback
|
||||||
|
returns a list of successive translations.
|
||||||
|
|
||||||
|
It's only guaranteed to work if 'english' is one of the two languages
|
||||||
|
given to either of the translation methods.
|
||||||
|
|
||||||
|
Both translation methods throw exceptions which are all subclasses of
|
||||||
|
BabelizerError. They include
|
||||||
|
|
||||||
|
LanguageNotAvailableError
|
||||||
|
Thrown on an attempt to use an unknown language.
|
||||||
|
|
||||||
|
BabelfishChangedError
|
||||||
|
Thrown when babelfish.altavista.com changes some detail of their
|
||||||
|
layout, and babelizer can no longer parse the results or submit
|
||||||
|
the correct form (a not infrequent occurance).
|
||||||
|
|
||||||
|
BabelizerIOError
|
||||||
|
Thrown for various networking and IO errors.
|
||||||
|
|
||||||
|
Version: $Id$
|
||||||
|
Author: Jonathan Feinberg <jdf@pobox.com>
|
||||||
|
"""
|
||||||
|
import re, string, urllib
|
||||||
|
|
||||||
|
"""
|
||||||
|
Various patterns I have encountered in looking for the babelfish result.
|
||||||
|
We try each of them in turn, based on the relative number of times I've
|
||||||
|
seen each of these patterns. $1.00 to anyone who can provide a heuristic
|
||||||
|
for knowing which one to use. This includes AltaVista employees.
|
||||||
|
"""
|
||||||
|
__where = [ re.compile(r'lang=..>([^<]*)</div'),
|
||||||
|
re.compile(r'name=\"q\" value=\"([^\"]*)\">'),
|
||||||
|
]
|
||||||
|
|
||||||
|
__languages = { 'english' : 'en',
|
||||||
|
'chinese' : 'zh',
|
||||||
|
'french' : 'fr',
|
||||||
|
'german' : 'de',
|
||||||
|
'italian' : 'it',
|
||||||
|
'japanese' : 'ja',
|
||||||
|
'korean' : 'ko',
|
||||||
|
'spanish' : 'es',
|
||||||
|
'portugese' : 'pt',
|
||||||
|
}
|
||||||
|
|
||||||
|
"""
|
||||||
|
All of the available language names.
|
||||||
|
"""
|
||||||
|
available_languages = [ x.title() for x in __languages.keys() ]
|
||||||
|
|
||||||
|
"""
|
||||||
|
Calling translate() or babelize() can raise a BabelizerError
|
||||||
|
"""
|
||||||
|
class BabelizerError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class LanguageNotAvailableError(BabelizerError):
|
||||||
|
pass
|
||||||
|
class BabelfishChangedError(BabelizerError):
|
||||||
|
pass
|
||||||
|
class BabelizerIOError(BabelizerError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def clean(text):
|
||||||
|
return ' '.join(string.replace(text.strip(), "\n", ' ').split())
|
||||||
|
|
||||||
|
def translate(phrase, from_lang, to_lang):
|
||||||
|
phrase = clean(phrase)
|
||||||
|
try:
|
||||||
|
from_code = __languages[from_lang.lower()]
|
||||||
|
to_code = __languages[to_lang.lower()]
|
||||||
|
except KeyError, lang:
|
||||||
|
raise LanguageNotAvailableError(lang)
|
||||||
|
|
||||||
|
params = urllib.urlencode( { 'BabelFishFrontPage' : 'yes',
|
||||||
|
'doit' : 'done',
|
||||||
|
'tt' : 'urltext',
|
||||||
|
'intl' : '1',
|
||||||
|
'urltext' : phrase,
|
||||||
|
'lp' : from_code + '_' + to_code } )
|
||||||
|
try:
|
||||||
|
response = urllib.urlopen('http://babelfish.altavista.com/babelfish/tr', params)
|
||||||
|
except IOError, what:
|
||||||
|
raise BabelizerIOError("Couldn't talk to server: %s" % what)
|
||||||
|
except:
|
||||||
|
print "Unexpected error:", sys.exc_info()[0]
|
||||||
|
|
||||||
|
html = response.read()
|
||||||
|
for regex in __where:
|
||||||
|
match = regex.search(html)
|
||||||
|
if match: break
|
||||||
|
if not match: raise BabelfishChangedError("Can't recognize translated string.")
|
||||||
|
return clean(match.group(1))
|
||||||
|
|
||||||
|
def babelize(phrase, from_language, through_language, limit = 12, callback = None):
|
||||||
|
phrase = clean(phrase)
|
||||||
|
seen = { phrase: 1 }
|
||||||
|
if callback:
|
||||||
|
callback(phrase)
|
||||||
|
else:
|
||||||
|
results = [ phrase ]
|
||||||
|
flip = { from_language: through_language, through_language: from_language }
|
||||||
|
next = from_language
|
||||||
|
for i in range(limit):
|
||||||
|
phrase = translate(phrase, next, flip[next])
|
||||||
|
if seen.has_key(phrase): break
|
||||||
|
seen[phrase] = 1
|
||||||
|
if callback:
|
||||||
|
callback(phrase)
|
||||||
|
else:
|
||||||
|
results.append(phrase)
|
||||||
|
next = flip[next]
|
||||||
|
if next != from_language:
|
||||||
|
phrase = translate(phrase, next, flip[next])
|
||||||
|
results.append(phrase)
|
||||||
|
if not callback: return results
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
def printer(x):
|
||||||
|
print x
|
||||||
|
sys.stdout.flush();
|
||||||
|
|
||||||
|
|
||||||
|
babelize("I won't take that sort of treatment from you, or from your doggie!",
|
||||||
|
'english', 'french', callback = printer)
|
||||||
|
|
@ -45,6 +45,7 @@ import urllib2
|
|||||||
import utils
|
import utils
|
||||||
import debug
|
import debug
|
||||||
import privmsgs
|
import privmsgs
|
||||||
|
import babelfish
|
||||||
import callbacks
|
import callbacks
|
||||||
import structures
|
import structures
|
||||||
|
|
||||||
@ -363,7 +364,56 @@ class Http(callbacks.Privmsg):
|
|||||||
(numberOfPackages, len(responses), ', '.join(responses))
|
(numberOfPackages, len(responses), ', '.join(responses))
|
||||||
irc.reply(msg, s)
|
irc.reply(msg, s)
|
||||||
|
|
||||||
|
_abbrevs = utils.abbrev(map(str.lower, babelfish.available_languages))
|
||||||
|
def translate(self, irc, msg, args):
|
||||||
|
"""<from-language> <to-language> <phrase>
|
||||||
|
|
||||||
|
Returns the phrase translated to the new language. One of the
|
||||||
|
languages must be English.
|
||||||
|
"""
|
||||||
|
(flang, tlang, phrase) = privmsgs.getArgs(args, 3)
|
||||||
|
flang = str.lower(flang)
|
||||||
|
tlang = str.lower(tlang)
|
||||||
|
if self._abbrevs.has_key(flang):
|
||||||
|
flang = self._abbrevs[flang]
|
||||||
|
if self._abbrevs.has_key(tlang):
|
||||||
|
tlang = self._abbrevs[tlang]
|
||||||
|
try:
|
||||||
|
trans = babelfish.translate(phrase, flang, tlang)
|
||||||
|
irc.reply(msg, trans)
|
||||||
|
except babelfish.LanguageNotAvailableError, e:
|
||||||
|
irc.reply(msg, 'Valid languages: %s.' %\
|
||||||
|
', '.join(babelfish.available_languages))
|
||||||
|
except babelfish.BabelizerIOError, e:
|
||||||
|
irc.reply(msg, e.args[0])
|
||||||
|
except babelfish.BabelfishChangedError, e:
|
||||||
|
irc.reply(msg, 'Babelfish has foiled our plans by changing their\
|
||||||
|
format')
|
||||||
|
|
||||||
|
def babelize(self, irc, msg, args):
|
||||||
|
"""<from-language> <to-language> <phrase>
|
||||||
|
|
||||||
|
Returns the phrase translated to the new language. This is done 12
|
||||||
|
times, or until the output doesn't change anymore.
|
||||||
|
"""
|
||||||
|
(flang, tlang, phrase) = privmsgs.getArgs(args, 3)
|
||||||
|
flang = str.lower(flang)
|
||||||
|
tlang = str.lower(tlang)
|
||||||
|
if self._abbrevs.has_key(flang):
|
||||||
|
flang = self._abbrevs[flang]
|
||||||
|
if self._abbrevs.has_key(tlang):
|
||||||
|
tlang = self._abbrevs[tlang]
|
||||||
|
try:
|
||||||
|
trans = babelfish.babelize(phrase, flang, tlang)
|
||||||
|
irc.reply(msg, trans[-1])
|
||||||
|
except babelfish.LanguageNotAvailableError, e:
|
||||||
|
irc.reply(msg, 'Valid languages: %s.' %\
|
||||||
|
', '.join(babelfish.available_languages))
|
||||||
|
except babelfish.BabelizerIOError, e:
|
||||||
|
irc.reply(msg, e.args[0])
|
||||||
|
except babelfish.BabelfishChangedError, e:
|
||||||
|
irc.reply(msg, 'Babelfish has foiled our plans by changing their\
|
||||||
|
format')
|
||||||
|
|
||||||
Class = Http
|
Class = Http
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user