From cb7a39544440eb4ccbbf9fe9ee26c79ab6c19f88 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Thu, 11 Mar 2021 19:02:55 +0100 Subject: [PATCH] irclib: truncate outgoing messages to 512 bytes, not 512 chars --- src/irclib.py | 17 +++++++++++++++-- test/test_irclib.py | 23 +++++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/irclib.py b/src/irclib.py index a5bcd2702..701dd1fb9 100644 --- a/src/irclib.py +++ b/src/irclib.py @@ -1306,7 +1306,8 @@ class Irc(IrcCommandDispatcher, log.Firewalled): else: msg_tags_str = '' msg_rest_str = msg_str - if len(msg_rest_str) > MAX_LINE_SIZE: + msg_rest_bytes = msg_rest_str.encode() + if len(msg_rest_bytes) > MAX_LINE_SIZE: # Yes, this violates the contract, but at this point it doesn't # matter. That's why we gotta go munging in private attributes # @@ -1315,7 +1316,19 @@ class Irc(IrcCommandDispatcher, log.Firewalled): # this issue, there's no fundamental reason to make it a # warning. log.debug('Truncating %r, message is too long.', msg) - msg._str = msg_tags_str + msg_rest_str[:MAX_LINE_SIZE-2] + '\r\n' + + # Truncate to 512 bytes (minus 2 for '\r\n') + msg_rest_bytes = msg_rest_bytes[:MAX_LINE_SIZE-2] + + # The above truncation may have truncated in the middle of a + # multi-byte character. + # I was about to write a UTF-8 decoder here just to trim them + # properly, but fortunately there is a neat trick to trim it + # while decoding: just ignore invalid bytes! + # https://stackoverflow.com/a/1820949/539465 + msg_rest_str = msg_rest_bytes.decode(errors="ignore") + + msg._str = msg_tags_str + msg_rest_str + '\r\n' msg._len = len(str(msg)) # TODO: truncate tags diff --git a/test/test_irclib.py b/test/test_irclib.py index 0e7851d6c..0d870e93b 100644 --- a/test/test_irclib.py +++ b/test/test_irclib.py @@ -1084,6 +1084,29 @@ class IrcTestCase(SupyTestCase): self.assertEqual(msg5.tagged('batch'), outer) self.assertEqual(self.irc.state.getParentBatches(msg5), [outer]) + def testTruncate(self): + self.irc = irclib.Irc('test') + + while self.irc.takeMsg(): + pass + + # over 512 bytes + msg = ircmsgs.IrcMsg(command='PRIVMSG', args=('#test2', 'é'*256)) + self.irc.sendMsg(msg) + m = self.irc.takeMsg() + remaining_payload = 'é' * ((512 - len('PRIVMSG #test2 :\r\n'))//2) + self.assertEqual( + str(m), 'PRIVMSG #test2 :%s\r\n' % remaining_payload) + + # over 512 bytes, make sure it doesn't truncate in the middle of + # a character + msg = ircmsgs.IrcMsg(command='PRIVMSG', args=('#test', 'é'*256)) + self.irc.sendMsg(msg) + m = self.irc.takeMsg() + remaining_payload = 'é' * ((512 - len('PRIVMSG #test :\r\n'))//2) + self.assertEqual( + str(m), 'PRIVMSG #test :%s\r\n' % remaining_payload) + class SaslTestCase(SupyTestCase, CapNegMixin): def setUp(self):