Limnoria/plugins/String/plugin.py
James Lu b1cfb87e71 String: allow multi-character inputs in "ord" and "unicodename"
This allows them to support emoji ZWJ sequences, which render like one character but are actually multiple.
2022-10-06 08:31:22 +02:00

268 lines
9.6 KiB
Python

###
# Copyright (c) 2003-2005, Jeremiah Fincher
# Copyright (c) 2008-2009, James McCoy
# Copyright (c) 2010-2021, Valentin Lorentz
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
import re
import sys
import types
import codecs
import base64
import binascii
import unicodedata
import supybot.utils as utils
from supybot.commands import *
import supybot.utils.minisix as minisix
import supybot.plugins as plugins
import supybot.commands as commands
import supybot.ircutils as ircutils
import supybot.callbacks as callbacks
from supybot.i18n import PluginInternationalization
_ = PluginInternationalization('String')
import multiprocessing
class String(callbacks.Plugin):
"""Provides useful commands for manipulating characters and strings."""
def ord(self, irc, msg, args, s):
"""<string>
Returns the unicode codepoint of characters in <string>.
"""
irc.replies([str(ord(char)) for char in s])
ord = wrap(ord, ['text'])
def chr(self, irc, msg, args, i):
"""<number>
Returns the unicode character associated with codepoint <number>
"""
try:
irc.reply(chr(i), stripCtcp=False)
except ValueError:
irc.error(_('That number doesn\'t map to a unicode character.'))
chr = wrap(chr, ['int'])
def unicodename(self, irc, msg, args, s):
"""<string>
Returns the name of characters in <string>.
This will error if any character is not a valid Unicode character."""
replies = []
for idx, char in enumerate(s):
try:
replies.append(unicodedata.name(char))
except ValueError:
irc.error(_('No name found for character %r at position %d.') %
(char, idx), Raise=True)
irc.replies(replies)
unicodename = wrap(unicodename, ['text'])
def unicodesearch(self, irc, msg, args, name):
"""<name>
Searches for a unicode character from its <name>."""
try:
irc.reply(unicodedata.lookup(name))
except KeyError:
irc.error(_('No character found with this name.'))
unicodesearch = wrap(unicodesearch, ['text'])
def encode(self, irc, msg, args, encoding, text):
"""<encoding> <text>
Returns an encoded form of the given text; the valid encodings are
available in the documentation of the Python codecs module:
<http://docs.python.org/library/codecs.html#standard-encodings>.
"""
# Binary codecs are prefixed with _codec in Python 3
if encoding in 'base64 bz2 hex quopri uu zlib':
encoding += '_codec'
if encoding.endswith('_codec'):
text = text.encode()
# Do the encoding
try:
encoder = codecs.getencoder(encoding)
except LookupError:
irc.errorInvalid(_('encoding'), encoding)
text = encoder(text)[0]
# If this is a binary codec, re-encode it with base64
if encoding.endswith('_codec') and encoding != 'base64_codec':
text = codecs.getencoder('base64_codec')(text)[0].decode()
# Change result into a string
if minisix.PY2 and isinstance(text, unicode):
text = text.encode('utf-8')
elif minisix.PY3 and isinstance(text, bytes):
text = text.decode()
if encoding in ('base64', 'base64_codec'):
text = text.replace('\n', '')
# Reply
irc.reply(text.rstrip('\n'))
encode = wrap(encode, ['something', 'text'])
def decode(self, irc, msg, args, encoding, text):
"""<encoding> <text>
Returns an un-encoded form of the given text; the valid encodings are
available in the documentation of the Python codecs module:
<http://docs.python.org/library/codecs.html#standard-encodings>.
"""
# Binary codecs are prefixed with _codec in Python 3
if encoding in 'base64 bz2 hex quopri uu zlib':
encoding += '_codec'
# If this is a binary codec, pre-decode it with base64
if encoding.endswith('_codec') and encoding != 'base64_codec':
text = codecs.getdecoder('base64_codec')(text.encode())[0]
# Do the decoding
try:
decoder = codecs.getdecoder(encoding)
except LookupError:
irc.errorInvalid(_('encoding'), encoding)
if minisix.PY3 and not isinstance(text, bytes):
text = text.encode()
try:
text = decoder(text)[0]
except binascii.Error:
irc.errorInvalid(_('base64 string'),
s=_('Base64 strings must be a multiple of 4 in '
'length, padded with \'=\' if necessary.'))
return
# Change result into a string
if minisix.PY2 and isinstance(text, unicode):
text = text.encode('utf-8')
elif minisix.PY3 and isinstance(text, bytes):
try:
text = text.decode()
except UnicodeDecodeError:
pass
# Reply
irc.reply(text)
decode = wrap(decode, ['something', 'text'])
def levenshtein(self, irc, msg, args, s1, s2):
"""<string1> <string2>
Returns the levenshtein distance (also known as the "edit distance"
between <string1> and <string2>)
"""
max = self.registryValue('levenshtein.max')
if len(s1) > max or len(s2) > max:
irc.error(_('Levenshtein distance is a complicated algorithm, try '
'it with some smaller inputs.'))
else:
irc.reply(str(utils.str.distance(s1, s2)))
levenshtein = thread(wrap(levenshtein, ['something', 'text']))
def soundex(self, irc, msg, args, text, length):
"""<string> [<length>]
Returns the Soundex hash to a given length. The length defaults to
4, since that's the standard length for a soundex hash. For unlimited
length, use 0. Maximum length 1024.
"""
if length > 1024:
irc.error("Maximum allowed length is 1024.")
return
irc.reply(utils.str.soundex(text, length))
soundex = wrap(soundex, ['somethingWithoutSpaces', additional('int', 4)])
def len(self, irc, msg, args, text):
"""<text>
Returns the length of <text>.
"""
irc.reply(str(len(text)))
len = wrap(len, ['text'])
def re(self, irc, msg, args, f, text):
"""<regexp> <text>
If <regexp> is of the form m/regexp/flags, returns the portion of
<text> that matches the regexp. If <regexp> is of the form
s/regexp/replacement/flags, returns the result of applying such a
regexp to <text>.
"""
if f('') and len(f(' ')) > len(f(''))+1: # Matches the empty string.
s = _('You probably don\'t want to match the empty string.')
irc.error(s)
else:
t = self.registryValue('re.timeout')
try:
v = process(f, text, timeout=t, pn=self.name(), cn='re')
if isinstance(v, list):
v = format('%L', v)
irc.reply(v)
except commands.ProcessTimeoutError as e:
irc.error("ProcessTimeoutError: %s" % (e,))
except re.error as e:
irc.error(e.args[0])
re = thread(wrap(re, [first('regexpMatcherMany', 'regexpReplacer'),
'text']))
def xor(self, irc, msg, args, password, text):
"""<password> <text>
Returns <text> XOR-encrypted with <password>.
"""
chars = utils.iter.cycle(password)
ret = [chr(ord(c) ^ ord(next(chars))) for c in text]
irc.reply(''.join(ret))
xor = wrap(xor, ['something', 'text'])
def md5(self, irc, msg, args, text):
"""<text>
Returns the md5 hash of a given string.
"""
irc.reply(utils.crypt.md5(text.encode('utf8')).hexdigest())
md5 = wrap(md5, ['text'])
def sha(self, irc, msg, args, text):
"""<text>
Returns the SHA1 hash of a given string.
"""
irc.reply(utils.crypt.sha(text.encode('utf8')).hexdigest())
sha = wrap(sha, ['text'])
Class = String
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: