From 28a80716357992088c4b489b0748976aaad3dcc1 Mon Sep 17 00:00:00 2001 From: James Vega Date: Wed, 20 Aug 2003 22:47:19 +0000 Subject: [PATCH] Add babelfish.py and the corresponding translate & babelize functions to Http.py --- others/babelfish.py | 163 ++++++++++++++++++++++++++++++++++++++++++++ plugins/Http.py | 50 ++++++++++++++ 2 files changed, 213 insertions(+) create mode 100644 others/babelfish.py diff --git a/others/babelfish.py b/others/babelfish.py new file mode 100644 index 000000000..9775e4115 --- /dev/null +++ b/others/babelfish.py @@ -0,0 +1,163 @@ +# babelizer.py - API for simple access to babelfish.altavista.com. +# Requires python 2.0 or better. +# +# See it in use at http://babel.MrFeinberg.com/ + +"""API for simple access to babelfish.altavista.com. + +Summary: + + import babelizer + + print ' '.join(babelizer.available_languages) + + print babelizer.translate( 'How much is that doggie in the window?', + 'English', 'French' ) + + def babel_callback(phrase): + print phrase + sys.stdout.flush() + + babelizer.babelize( 'I love a reigning knight.', + 'English', 'German', + callback = babel_callback ) + +available_languages + A list of languages available for use with babelfish. + +translate( phrase, from_lang, to_lang ) + Uses babelfish to translate phrase from from_lang to to_lang. + +babelize(phrase, from_lang, through_lang, limit = 12, callback = None) + Uses babelfish to translate back and forth between from_lang and + through_lang until either no more changes occur in translation or + limit iterations have been reached, whichever comes first. Takes + an optional callback function which should receive a single + parameter, being the next translation. Without the callback + returns a list of successive translations. + +It's only guaranteed to work if 'english' is one of the two languages +given to either of the translation methods. + +Both translation methods throw exceptions which are all subclasses of +BabelizerError. They include + +LanguageNotAvailableError + Thrown on an attempt to use an unknown language. + +BabelfishChangedError + Thrown when babelfish.altavista.com changes some detail of their + layout, and babelizer can no longer parse the results or submit + the correct form (a not infrequent occurance). + +BabelizerIOError + Thrown for various networking and IO errors. + +Version: $Id$ +Author: Jonathan Feinberg +""" +import re, string, urllib + +""" +Various patterns I have encountered in looking for the babelfish result. +We try each of them in turn, based on the relative number of times I've +seen each of these patterns. $1.00 to anyone who can provide a heuristic +for knowing which one to use. This includes AltaVista employees. +""" +__where = [ re.compile(r'lang=..>([^<]*)'), + ] + +__languages = { 'english' : 'en', + 'chinese' : 'zh', + 'french' : 'fr', + 'german' : 'de', + 'italian' : 'it', + 'japanese' : 'ja', + 'korean' : 'ko', + 'spanish' : 'es', + 'portugese' : 'pt', + } + +""" + All of the available language names. +""" +available_languages = [ x.title() for x in __languages.keys() ] + +""" + Calling translate() or babelize() can raise a BabelizerError +""" +class BabelizerError(Exception): + pass + +class LanguageNotAvailableError(BabelizerError): + pass +class BabelfishChangedError(BabelizerError): + pass +class BabelizerIOError(BabelizerError): + pass + +def clean(text): + return ' '.join(string.replace(text.strip(), "\n", ' ').split()) + +def translate(phrase, from_lang, to_lang): + phrase = clean(phrase) + try: + from_code = __languages[from_lang.lower()] + to_code = __languages[to_lang.lower()] + except KeyError, lang: + raise LanguageNotAvailableError(lang) + + params = urllib.urlencode( { 'BabelFishFrontPage' : 'yes', + 'doit' : 'done', + 'tt' : 'urltext', + 'intl' : '1', + 'urltext' : phrase, + 'lp' : from_code + '_' + to_code } ) + try: + response = urllib.urlopen('http://babelfish.altavista.com/babelfish/tr', params) + except IOError, what: + raise BabelizerIOError("Couldn't talk to server: %s" % what) + except: + print "Unexpected error:", sys.exc_info()[0] + + html = response.read() + for regex in __where: + match = regex.search(html) + if match: break + if not match: raise BabelfishChangedError("Can't recognize translated string.") + return clean(match.group(1)) + +def babelize(phrase, from_language, through_language, limit = 12, callback = None): + phrase = clean(phrase) + seen = { phrase: 1 } + if callback: + callback(phrase) + else: + results = [ phrase ] + flip = { from_language: through_language, through_language: from_language } + next = from_language + for i in range(limit): + phrase = translate(phrase, next, flip[next]) + if seen.has_key(phrase): break + seen[phrase] = 1 + if callback: + callback(phrase) + else: + results.append(phrase) + next = flip[next] + if next != from_language: + phrase = translate(phrase, next, flip[next]) + results.append(phrase) + if not callback: return results + +if __name__ == '__main__': + import sys + def printer(x): + print x + sys.stdout.flush(); + + + babelize("I won't take that sort of treatment from you, or from your doggie!", + 'english', 'french', callback = printer) + diff --git a/plugins/Http.py b/plugins/Http.py index 6827f0e5a..633639693 100644 --- a/plugins/Http.py +++ b/plugins/Http.py @@ -45,6 +45,7 @@ import urllib2 import utils import debug import privmsgs +import babelfish import callbacks import structures @@ -363,7 +364,56 @@ class Http(callbacks.Privmsg): (numberOfPackages, len(responses), ', '.join(responses)) irc.reply(msg, s) + _abbrevs = utils.abbrev(map(str.lower, babelfish.available_languages)) + def translate(self, irc, msg, args): + """ + Returns the phrase translated to the new language. One of the + languages must be English. + """ + (flang, tlang, phrase) = privmsgs.getArgs(args, 3) + flang = str.lower(flang) + tlang = str.lower(tlang) + if self._abbrevs.has_key(flang): + flang = self._abbrevs[flang] + if self._abbrevs.has_key(tlang): + tlang = self._abbrevs[tlang] + try: + trans = babelfish.translate(phrase, flang, tlang) + irc.reply(msg, trans) + except babelfish.LanguageNotAvailableError, e: + irc.reply(msg, 'Valid languages: %s.' %\ + ', '.join(babelfish.available_languages)) + except babelfish.BabelizerIOError, e: + irc.reply(msg, e.args[0]) + except babelfish.BabelfishChangedError, e: + irc.reply(msg, 'Babelfish has foiled our plans by changing their\ + format') + + def babelize(self, irc, msg, args): + """ + + Returns the phrase translated to the new language. This is done 12 + times, or until the output doesn't change anymore. + """ + (flang, tlang, phrase) = privmsgs.getArgs(args, 3) + flang = str.lower(flang) + tlang = str.lower(tlang) + if self._abbrevs.has_key(flang): + flang = self._abbrevs[flang] + if self._abbrevs.has_key(tlang): + tlang = self._abbrevs[tlang] + try: + trans = babelfish.babelize(phrase, flang, tlang) + irc.reply(msg, trans[-1]) + except babelfish.LanguageNotAvailableError, e: + irc.reply(msg, 'Valid languages: %s.' %\ + ', '.join(babelfish.available_languages)) + except babelfish.BabelizerIOError, e: + irc.reply(msg, e.args[0]) + except babelfish.BabelfishChangedError, e: + irc.reply(msg, 'Babelfish has foiled our plans by changing their\ + format') Class = Http