Do not break UTF-8 characters in long words. Closes GH-1333.

This commit is contained in:
Valentin Lorentz 2018-04-14 22:31:30 +02:00
parent cd479717b8
commit 0d627c05b7
4 changed files with 25 additions and 11 deletions

View File

@ -941,8 +941,7 @@ class NestedCommandsIrcProxy(ReplyIrcProxy):
stripCtcp=stripCtcp) stripCtcp=stripCtcp)
sendMsg(m) sendMsg(m)
return m return m
msgs = ircutils.wrap(s, allowedLength, msgs = ircutils.wrap(s, allowedLength)
break_long_words=True)
msgs.reverse() msgs.reverse()
instant = conf.get(conf.supybot.reply.mores.instant,target) instant = conf.get(conf.supybot.reply.mores.instant,target)
while instant > 1 and msgs: while instant > 1 and msgs:

View File

@ -596,10 +596,9 @@ class FormatParser(object):
else: else:
self.ungetChar(c) self.ungetChar(c)
def wrap(s, length, break_on_hyphens = False, break_long_words = False): def wrap(s, length, break_on_hyphens = False):
processed = [] processed = []
chunks = utils.str.byteTextWrap(s, length, chunks = utils.str.byteTextWrap(s, length)
break_long_words=break_long_words)
context = None context = None
for chunk in chunks: for chunk in chunks:
if context is not None: if context is not None:

View File

@ -306,7 +306,21 @@ def perlVariableSubstitute(vars, text):
return '$' + unbraced return '$' + unbraced
return _perlVarSubstituteRe.sub(replacer, text) return _perlVarSubstituteRe.sub(replacer, text)
def byteTextWrap(text, size, break_on_hyphens=False, break_long_words=True): def splitBytes(word, size):
# I'm going to hell for this function
for i in range(4): # a character takes at most 4 bytes in UTF-8
try:
if sys.version_info[0] >= 3:
word[size-i:].decode()
else:
word[size-i:].encode('utf8')
except UnicodeDecodeError:
continue
else:
return (word[0:size-i], word[size-i:])
assert False, (word, size)
def byteTextWrap(text, size, break_on_hyphens=False):
"""Similar to textwrap.wrap(), but considers the size of strings (in bytes) """Similar to textwrap.wrap(), but considers the size of strings (in bytes)
instead of their length (in characters).""" instead of their length (in characters)."""
try: try:
@ -320,8 +334,9 @@ def byteTextWrap(text, size, break_on_hyphens=False, break_long_words=True):
while words: while words:
word = words.pop(-1) word = words.pop(-1)
if len(word) > size: if len(word) > size:
words.append(word[size:]) (before, after) = splitBytes(word, size)
word = word[0:size] words.append(after)
word = before
if len(lines[-1]) + len(word) <= size: if len(lines[-1]) + len(word) <= size:
lines[-1] += word lines[-1] += word
else: else:

View File

@ -222,9 +222,10 @@ class FunctionsTestCase(SupyTestCase):
self.assertEqual(''.join(r), s) self.assertEqual(''.join(r), s)
s = uchr(233)*500 s = uchr(233)*500
print(repr(ircutils.wrap(s, 500))) r = ircutils.wrap(s, 500)
import supybot.utils.str self.assertTrue(max(map(pred, r)) <= 500)
print(repr(supybot.utils.str.byteTextWrap(s, 500))) r = ircutils.wrap(s, 139)
self.assertTrue(max(map(pred, r)) <= 139)
def testSafeArgument(self): def testSafeArgument(self):