Do not break UTF-8 characters in long words. Closes GH-1333.

This commit is contained in:
Valentin Lorentz 2018-04-14 22:31:30 +02:00
parent cd479717b8
commit 0d627c05b7
4 changed files with 25 additions and 11 deletions

View File

@ -941,8 +941,7 @@ class NestedCommandsIrcProxy(ReplyIrcProxy):
stripCtcp=stripCtcp)
sendMsg(m)
return m
msgs = ircutils.wrap(s, allowedLength,
break_long_words=True)
msgs = ircutils.wrap(s, allowedLength)
msgs.reverse()
instant = conf.get(conf.supybot.reply.mores.instant,target)
while instant > 1 and msgs:

View File

@ -596,10 +596,9 @@ class FormatParser(object):
else:
self.ungetChar(c)
def wrap(s, length, break_on_hyphens = False, break_long_words = False):
def wrap(s, length, break_on_hyphens = False):
processed = []
chunks = utils.str.byteTextWrap(s, length,
break_long_words=break_long_words)
chunks = utils.str.byteTextWrap(s, length)
context = None
for chunk in chunks:
if context is not None:

View File

@ -306,7 +306,21 @@ def perlVariableSubstitute(vars, text):
return '$' + unbraced
return _perlVarSubstituteRe.sub(replacer, text)
def byteTextWrap(text, size, break_on_hyphens=False, break_long_words=True):
def splitBytes(word, size):
# I'm going to hell for this function
for i in range(4): # a character takes at most 4 bytes in UTF-8
try:
if sys.version_info[0] >= 3:
word[size-i:].decode()
else:
word[size-i:].encode('utf8')
except UnicodeDecodeError:
continue
else:
return (word[0:size-i], word[size-i:])
assert False, (word, size)
def byteTextWrap(text, size, break_on_hyphens=False):
"""Similar to textwrap.wrap(), but considers the size of strings (in bytes)
instead of their length (in characters)."""
try:
@ -320,8 +334,9 @@ def byteTextWrap(text, size, break_on_hyphens=False, break_long_words=True):
while words:
word = words.pop(-1)
if len(word) > size:
words.append(word[size:])
word = word[0:size]
(before, after) = splitBytes(word, size)
words.append(after)
word = before
if len(lines[-1]) + len(word) <= size:
lines[-1] += word
else:

View File

@ -222,9 +222,10 @@ class FunctionsTestCase(SupyTestCase):
self.assertEqual(''.join(r), s)
s = uchr(233)*500
print(repr(ircutils.wrap(s, 500)))
import supybot.utils.str
print(repr(supybot.utils.str.byteTextWrap(s, 500)))
r = ircutils.wrap(s, 500)
self.assertTrue(max(map(pred, r)) <= 500)
r = ircutils.wrap(s, 139)
self.assertTrue(max(map(pred, r)) <= 139)
def testSafeArgument(self):