diff --git a/plugins/Babelfish/README.txt b/plugins/Babelfish/README.txt new file mode 100644 index 000000000..d60b47a97 --- /dev/null +++ b/plugins/Babelfish/README.txt @@ -0,0 +1 @@ +Insert a description of your plugin here, with any notes, etc. about using it. diff --git a/plugins/Babelfish/__init__.py b/plugins/Babelfish/__init__.py new file mode 100644 index 000000000..bf148c812 --- /dev/null +++ b/plugins/Babelfish/__init__.py @@ -0,0 +1,54 @@ +### +# Copyright (c) 2004, Jeremiah Fincher +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +### + +""" +Babelfish-related commands. +""" + +import supybot +import supybot.world as world + +__author__ = supybot.authors.jamessan + +# This is a dictionary mapping supybot.Author instances to lists of +# contributions. +__contributors__ = {} + +import config +import plugin +reload(plugin) # In case we're being reloaded. + +if world.testing: + import test + +Class = plugin.Class +configure = config.configure + + +# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: diff --git a/plugins/Babelfish/babelfish.py b/plugins/Babelfish/babelfish.py new file mode 100644 index 000000000..1e917a344 --- /dev/null +++ b/plugins/Babelfish/babelfish.py @@ -0,0 +1,175 @@ +# babelizer.py - API for simple access to babelfish.altavista.com. +# Requires python 2.0 or better. +# +# See it in use at http://babel.MrFeinberg.com/ + +"""API for simple access to babelfish.altavista.com. + +Summary: + + import babelizer + + print ' '.join(babelizer.available_languages) + + print babelizer.translate( 'How much is that doggie in the window?', + 'English', 'French' ) + + def babel_callback(phrase): + print phrase + sys.stdout.flush() + + babelizer.babelize( 'I love a reigning knight.', + 'English', 'German', + callback = babel_callback ) + +available_languages + A list of languages available for use with babelfish. + +translate( phrase, from_lang, to_lang ) + Uses babelfish to translate phrase from from_lang to to_lang. + +babelize(phrase, from_lang, through_lang, limit = 12, callback = None) + Uses babelfish to translate back and forth between from_lang and + through_lang until either no more changes occur in translation or + limit iterations have been reached, whichever comes first. Takes + an optional callback function which should receive a single + parameter, being the next translation. Without the callback + returns a list of successive translations. + +It's only guaranteed to work if 'english' is one of the two languages +given to either of the translation methods. + +Both translation methods throw exceptions which are all subclasses of +BabelizerError. They include + +LanguageNotAvailableError + Thrown on an attempt to use an unknown language. + +BabelfishChangedError + Thrown when babelfish.altavista.com changes some detail of their + layout, and babelizer can no longer parse the results or submit + the correct form (a not infrequent occurance). + +BabelizerIOError + Thrown for various networking and IO errors. + +Version: $Id: babelfish.py,v 1.8 2004/08/12 15:04:52 jamessan Exp $ +Author: Jonathan Feinberg +""" +import re, string, urllib + +""" +Various patterns I have encountered in looking for the babelfish result. +We try each of them in turn, based on the relative number of times I've +seen each of these patterns. $1.00 to anyone who can provide a heuristic +for knowing which one to use. This includes AltaVista employees. +""" +__where = [ re.compile(r'lang=..>([^<]*)'), + re.compile(r'div style=padding:10px;>([^<]+)') + end = html.index('') + html = html[begin:end] + except ValueError: + pass + for regex in __where: + match = regex.search(html) + if match: + break + if not match: + raise BabelfishChangedError("Can't recognize translated string.") + return clean(match.group(1)) + +def babelize(phrase, from_language, through_language, limit = 12, callback = None): + phrase = clean(phrase) + seen = { phrase: 1 } + results = [] + if callback: + def_callback = callback + else: + def_callback = results.append + def_callback(phrase) + flip = { from_language: through_language, through_language: from_language } + next = from_language + for i in range(limit): + phrase = translate(phrase, next, flip[next]) + if seen.has_key(phrase): + break + seen[phrase] = 1 + def_callback(phrase) + next = flip[next] + # next is set to the language of the last entry. this should be the same + # as the language we are translating to + if next != through_language: + phrase = translate(phrase, next, flip[next]) + def_callback(phrase) + if not callback: + return results + +if __name__ == '__main__': + import sys + def printer(x): + print x + sys.stdout.flush(); + + + babelize("I won't take that sort of treatment from you, or from your doggie!", + 'english', 'french', callback = printer) + diff --git a/plugins/Babelfish/config.py b/plugins/Babelfish/config.py new file mode 100644 index 000000000..b6eb1b0d5 --- /dev/null +++ b/plugins/Babelfish/config.py @@ -0,0 +1,60 @@ +### +# Copyright (c) 2004, Jeremiah Fincher +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +### + +import supybot.conf as conf +import supybot.registry as registry + +def configure(advanced): + # This will be called by supybot to configure this module. advanced is + # a bool that specifies whether the user identified himself as an advanced + # user or not. You should effect your configuration by manipulating the + # registry as appropriate. + from supybot.questions import expect, anything, something, yn + conf.registerPlugin('Babelfish', True) + + +class Languages(registry.OnlySomeStrings): + validStrings = tuple(map(str.capitalize, babelfish.available_languages)) + normalize = staticmethod(str.capitalize) + +class SpaceSeparatedListOfLanguages(registry.SeparatedListOf): + List = sets.Set + Value = Languages + def splitter(self, s): + return s.split() + joiner = ' '.join + +Babelfish = conf.registerPlugin('Babelfish') +conf.registerChannelValue(Babelfish, 'languages', + SpaceSeparatedListOfLanguages(babelfish.available_languages, """Determines + which languages are available for translation; valid input is a list of + languages separated by spaces.""")) + + +# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78 diff --git a/plugins/Babelfish/plugin.py b/plugins/Babelfish/plugin.py new file mode 100644 index 000000000..fe5ddcc7a --- /dev/null +++ b/plugins/Babelfish/plugin.py @@ -0,0 +1,169 @@ +### +# Copyright (c) 2002-2004, James Vega +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +### + +import supybot + +import sets +import random +from itertools import imap + +import babelfish + +import supybot.conf as conf +import supybot.utils as utils +from supybot.commands import * +import supybot.registry as registry +import supybot.callbacks as callbacks + +class Babelfish(callbacks.Privmsg): + threaded = True + _abbrevs = utils.abbrev(imap(str.lower, babelfish.available_languages)) + _abbrevs['de'] = 'german' + _abbrevs['jp'] = 'japanese' + _abbrevs['kr'] = 'korean' + _abbrevs['es'] = 'spanish' + _abbrevs['pt'] = 'portuguese' + _abbrevs['it'] = 'italian' + _abbrevs['zh'] = 'chinese' + for language in babelfish.available_languages: + _abbrevs[language] = language + + def _getLang(self, fromLang, toLang, chan): + fromLang = self._abbrevs[fromLang.lower()] + toLang = self._abbrevs[toLang.lower()] + languages = map(str.lower, self.registryValue('languages',chan)) + if fromLang not in languages: + fromLang = None + if toLang not in languages: + toLang = None + return (fromLang, toLang) + + def languages(self, irc, msg, args): + """takes no arguments + + Returns the languages that Babelfish can translate to/from. + """ + irc.reply(utils.commaAndify(babelfish.available_languages)) + + def translate(self, irc, msg, args, fromLang, toLang, text): + """ [to] + + Returns translated from into . + Beware that translating to or from languages that use multi-byte + characters may result in some very odd results. + """ + chan = msg.args[0] + try: + (fromLang, toLang) = self._getLang(fromLang, toLang, chan) + if not fromLang or not toLang: + langs = self.registryValue('languages', chan) + if not langs: + irc.error('I do not speak any other languages.') + return + else: + irc.error('I only speak %s.' % utils.commaAndify(langs)) + return + translation = babelfish.translate(text, fromLang, toLang) + irc.reply(utils.htmlToText(translation)) + except (KeyError, babelfish.LanguageNotAvailableError), e: + languages = self.registryValue('languages', chan) + if languages: + languages = 'Valid languages include %s' % \ + utils.commaAndify(sorted(languages)) + else: + languages = 'I do not speak any other languages.' + irc.errorInvalid('language', str(e), languages) + except babelfish.BabelizerIOError, e: + irc.error(str(e)) + except babelfish.BabelfishChangedError, e: + irc.error('Babelfish has foiled our plans by changing its ' + 'webpage format.') + translate = wrap(translate, ['something', 'to', 'something', 'text']) + + def babelize(self, irc, msg, args, fromLang, toLang, text): + """ + + Translates repeatedly between and + until it doesn't change anymore or 12 times, whichever is fewer. One + of the languages must be English. + """ + chan = msg.args[0] + try: + (fromLang, toLang) = self._getLang(fromLang, toLang, chan) + if fromLang != 'english' and toLang != 'english': + irc.error('One language in babelize must be English.') + return + if not fromLang or not toLang: + langs = self.registryValue('languages', chan) + if not langs: + irc.error('I do not speak any other languages.') + return + else: + irc.error('I only speak %s.' % utils.commaAndify(langs, + And='or')) + return + translations = babelfish.babelize(text, fromLang, toLang) + irc.reply(utils.htmlToText(translations[-1])) + except (KeyError, babelfish.LanguageNotAvailableError), e: + languages = self.registryValue('languages', chan) + if languages: + languages = 'Valid languages include %s' % \ + utils.commaAndify(sorted(languages)) + else: + languages = 'I do not speak any other languages.' + irc.errorInvalid('language', str(e), languages) + except babelfish.BabelizerIOError, e: + irc.reply(e) + except babelfish.BabelfishChangedError, e: + irc.reply('Babelfish has foiled our plans by changing its ' + 'webpage format.') + babelize = wrap(babelize, ['something', 'something', 'text']) + + def randomlanguage(self, irc, msg, args, optlist): + """[--allow-english] + + Returns a random language supported by babelfish. If --allow-english + is provided, will include English in the list of possible languages. + """ + allowEnglish = False + for (option, arg) in optlist: + if option == 'allow-english': + allowEnglish = True + languages = self.registryValue('languages', msg.args[0]) + if not languages: + irc.error('I can\'t speak any other languages.', Raise=True) + language = random.choice(languages) + while not allowEnglish and language == 'English': + language = random.choice(languages) + irc.reply(language) + randomlanguage = wrap(randomlanguage, [getopts({'allow-english': ''})]) + +Class = Babelfish + +# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: diff --git a/plugins/Babelfish/test.py b/plugins/Babelfish/test.py new file mode 100644 index 000000000..d88c6cdc2 --- /dev/null +++ b/plugins/Babelfish/test.py @@ -0,0 +1,80 @@ +### +# Copyright (c) 2002-2004, Jeremiah Fincher +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +### + +from supybot.test import * + +class BabelFishTestCase(PluginTestCase, PluginDocumentation): + plugins = ('Babelfish',) + if network: + def testTranslate(self): + self.assertResponse('translate en sp food', + 'alimento') + self.assertResponse('translate en to sp food', + 'alimento') + self.assertError('translate foo en food') + self.assertError('translate en foo food') + + def testBabelize(self): + self.assertNotError('babelize en sp foo') + self.assertError('babelize sp fr foo') + self.assertResponse('babelize german english sprache', 'Language') + + def testRandomlanguage(self): + self.assertNotError('randomlanguage') + try: + orig = conf.supybot.plugins.Babelfish.languages() + conf.supybot.plugins.Babelfish.languages.setValue([]) + self.assertError('randomlanguage') + finally: + conf.supybot.plugins.Babelfish.languages.setValue(orig) + + def testDisabledLanguages(self): + langs = conf.supybot.plugins.Babelfish.languages + try: + orig = langs() + langs.setValue(['Spanish', 'English']) + self.assertResponse('translate sp en hola', 'hello') + langs.setValue([]) + self.assertRegexp('translate sp en hola', 'do not speak') + self.assertRegexp('translate en sp hola', 'do not speak') + langs.setValue(['Spanish', 'Italian']) + self.assertRegexp('translate sp en hola', 'only speak') + self.assertRegexp('translate en it hello', 'only speak') + langs.setValue(['English', 'Italian']) + self.assertResponse('translate en it hello', 'ciao') + finally: + langs.setValue(orig) + + def testHtmlToText(self): + self.assertNotRegexp('translate fr en Qu\'y', ''') + self.assertNotRegexp('babelize fr en Qu\'y', ''') + + +# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: +