String: allow multi-character inputs in "ord" and "unicodename"

This allows them to support emoji ZWJ sequences, which render like one character but are actually multiple.
This commit is contained in:
James Lu 2022-10-05 18:40:03 -07:00 committed by Val Lorentz
parent 35bf599856
commit b1cfb87e71
2 changed files with 29 additions and 18 deletions

View File

@ -51,13 +51,13 @@ import multiprocessing
class String(callbacks.Plugin):
"""Provides useful commands for manipulating characters and strings."""
def ord(self, irc, msg, args, letter):
"""<letter>
def ord(self, irc, msg, args, s):
"""<string>
Returns the unicode codepoint of <letter>.
Returns the unicode codepoint of characters in <string>.
"""
irc.reply(str(ord(letter)))
ord = wrap(ord, ['letter'])
irc.replies([str(ord(char)) for char in s])
ord = wrap(ord, ['text'])
def chr(self, irc, msg, args, i):
"""<number>
@ -70,17 +70,20 @@ class String(callbacks.Plugin):
irc.error(_('That number doesn\'t map to a unicode character.'))
chr = wrap(chr, ['int'])
def unicodename(self, irc, msg, args, character):
"""<character>
def unicodename(self, irc, msg, args, s):
"""<string>
Returns the name of the given unicode <character>."""
if len(character) != 1:
irc.errorInvalid('character', character)
try:
irc.reply(unicodedata.name(character))
except ValueError:
irc.error(_('No name found for this character.'))
unicodename = wrap(unicodename, ['something'])
Returns the name of characters in <string>.
This will error if any character is not a valid Unicode character."""
replies = []
for idx, char in enumerate(s):
try:
replies.append(unicodedata.name(char))
except ValueError:
irc.error(_('No name found for character %r at position %d.') %
(char, idx), Raise=True)
irc.replies(replies)
unicodename = wrap(unicodename, ['text'])
def unicodesearch(self, irc, msg, args, name):
"""<name>

View File

@ -103,13 +103,21 @@ class StringTestCase(PluginTestCase):
for c in map(chr, range(256)):
i = ord(c)
self.assertResponse('ord %s' % utils.str.dqrepr(c), str(i))
self.assertResponse('ord é', '233')
self.assertResponse('ord 🆒', '127378')
self.assertResponse('ord 🇦🇶', '127462 and 127478')
def testUnicode(self):
self.assertResponse('unicodename ☃', 'SNOWMAN')
self.assertResponse('unicodesearch SNOWMAN', '')
#self.assertResponse('unicodename ?',
# 'No name found for this character.')
self.assertResponse('unicodename ?', 'QUESTION MARK')
# multi-char strings and ZWJ sequences
self.assertResponse('unicodename :O', 'COLON and LATIN CAPITAL LETTER O')
self.assertResponse('unicodename 🤷‍♂️', 'SHRUG, ZERO WIDTH JOINER, MALE SIGN, and VARIATION SELECTOR-16')
self.assertError('unicodename "\\uFFFF"')
self.assertError('unicodename "!@#\\uFFFF$"')
self.assertResponse('unicodesearch FOO',
'Error: No character found with this name.')