Split messages if necessary (discord) (#2124)

* Implement and test byte-splitting helper function

* Implement discord botuser message splitting

* Implement discord webhooks message splitting
This commit is contained in:
Ben Wiederhake 2024-05-24 00:14:45 +02:00 committed by GitHub
parent 0bb521512a
commit 4bf1c0450c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 264 additions and 83 deletions

View File

@ -121,6 +121,7 @@ type Protocol struct {
MessageLength int // IRC, max length of a message allowed
MessageQueue int // IRC, size of message queue for flood control
MessageSplit bool // IRC, split long messages with newlines on MessageLength instead of clipping
MessageSplitMaxCount int // discord, split long messages into at most this many messages instead of clipping (MessageLength=1950 cannot be configured)
Muc string // xmpp
MxID string // matrix
Name string // all protocols

View File

@ -316,6 +316,7 @@ func (b *Bdiscord) handleEventBotUser(msg *config.Message, channelID string) (st
// Upload a file if it exists
if msg.Extra != nil {
for _, rmsg := range helper.HandleExtra(msg, b.General) {
// TODO: Use ClipOrSplitMessage
rmsg.Text = helper.ClipMessage(rmsg.Text, MessageLength, b.GetString("MessageClipped"))
if _, err := b.c.ChannelMessageSend(channelID, rmsg.Username+rmsg.Text); err != nil {
b.Log.Errorf("Could not send message %#v: %s", rmsg, err)
@ -327,17 +328,32 @@ func (b *Bdiscord) handleEventBotUser(msg *config.Message, channelID string) (st
}
}
msg.Text = helper.ClipMessage(msg.Text, MessageLength, b.GetString("MessageClipped"))
msg.Text = b.replaceUserMentions(msg.Text)
// Edit message
if msg.ID != "" {
_, err := b.c.ChannelMessageEdit(channelID, msg.ID, msg.Username+msg.Text)
return msg.ID, err
// Exploit that a discord message ID is actually just a large number, and we encode a list of IDs by separating them with ";".
var msgIds = strings.Split(msg.ID, ";")
msgParts := helper.ClipOrSplitMessage(b.replaceUserMentions(msg.Text), MessageLength, b.GetString("MessageClipped"), len(msgIds))
for len(msgParts) < len(msgIds) {
msgParts = append(msgParts, "((obsoleted by edit))")
}
for i := range msgParts {
// In case of split-messages where some parts remain the same (i.e. only a typo-fix in a huge message), this causes some noop-updates.
// TODO: Optimize away noop-updates of un-edited messages
// TODO: Use RemoteNickFormat instead of this broken concatenation
_, err := b.c.ChannelMessageEdit(channelID, msgIds[i], msg.Username+msgParts[i])
if err != nil {
return "", err
}
}
return msg.ID, nil
}
msgParts := helper.ClipOrSplitMessage(b.replaceUserMentions(msg.Text), MessageLength, b.GetString("MessageClipped"), b.GetInt("MessageSplitMaxCount"))
var msgIds = []string{}
for _, msgPart := range msgParts {
m := discordgo.MessageSend{
Content: msg.Username + msg.Text,
Content: msg.Username + msgPart,
AllowedMentions: b.getAllowedMentions(),
}
@ -354,8 +370,11 @@ func (b *Bdiscord) handleEventBotUser(msg *config.Message, channelID string) (st
if err != nil {
return "", err
}
msgIds = append(msgIds, res.ID)
}
return res.ID, nil
// Exploit that a discord message ID is actually just a large number, so we encode a list of IDs by separating them with ";".
return strings.Join(msgIds, ";"), nil
}
// handleUploadFile handles native upload of files

View File

@ -2,6 +2,7 @@ package bdiscord
import (
"bytes"
"strings"
"github.com/42wim/matterbridge/bridge/config"
"github.com/42wim/matterbridge/bridge/helper"
@ -42,13 +43,65 @@ func (b *Bdiscord) maybeGetLocalAvatar(msg *config.Message) string {
return ""
}
func (b *Bdiscord) webhookSendTextOnly(msg *config.Message, channelID string) (string, error) {
msgParts := helper.ClipOrSplitMessage(msg.Text, MessageLength, b.GetString("MessageClipped"), b.GetInt("MessageSplitMaxCount"))
var msgIds = []string{}
for _, msgPart := range msgParts {
res, err := b.transmitter.Send(
channelID,
&discordgo.WebhookParams{
Content: msgPart,
Username: msg.Username,
AvatarURL: msg.Avatar,
AllowedMentions: b.getAllowedMentions(),
},
)
if err != nil {
return "", err
} else {
msgIds = append(msgIds, res.ID)
}
}
// Exploit that a discord message ID is actually just a large number, so we encode a list of IDs by separating them with ";".
return strings.Join(msgIds, ";"), nil
}
func (b *Bdiscord) webhookSendFilesOnly(msg *config.Message, channelID string) error {
for _, f := range msg.Extra["file"] {
fi := f.(config.FileInfo)
file := discordgo.File{
Name: fi.Name,
ContentType: "",
Reader: bytes.NewReader(*fi.Data),
}
content := fi.Comment
// Cannot use the resulting ID for any edits anyway, so throw it away.
// This has to be re-enabled when we implement message deletion.
_, err := b.transmitter.Send(
channelID,
&discordgo.WebhookParams{
Username: msg.Username,
AvatarURL: msg.Avatar,
Files: []*discordgo.File{&file},
Content: content,
AllowedMentions: b.getAllowedMentions(),
},
)
if err != nil {
b.Log.Errorf("Could not send file %#v for message %#v: %s", file, msg, err)
return err
}
}
return nil
}
// webhookSend send one or more message via webhook, taking care of file
// uploads (from slack, telegram or mattermost).
// Returns messageID and error.
func (b *Bdiscord) webhookSend(msg *config.Message, channelID string) (*discordgo.Message, error) {
func (b *Bdiscord) webhookSend(msg *config.Message, channelID string) (string, error) {
var (
res *discordgo.Message
res2 *discordgo.Message
res string
err error
)
@ -61,48 +114,11 @@ func (b *Bdiscord) webhookSend(msg *config.Message, channelID string) (*discordg
// We can't send empty messages.
if msg.Text != "" {
res, err = b.transmitter.Send(
channelID,
&discordgo.WebhookParams{
Content: msg.Text,
Username: msg.Username,
AvatarURL: msg.Avatar,
AllowedMentions: b.getAllowedMentions(),
},
)
if err != nil {
b.Log.Errorf("Could not send text (%s) for message %#v: %s", msg.Text, msg, err)
}
res, err = b.webhookSendTextOnly(msg, channelID)
}
if msg.Extra != nil {
for _, f := range msg.Extra["file"] {
fi := f.(config.FileInfo)
file := discordgo.File{
Name: fi.Name,
ContentType: "",
Reader: bytes.NewReader(*fi.Data),
}
content := fi.Comment
res2, err = b.transmitter.Send(
channelID,
&discordgo.WebhookParams{
Username: msg.Username,
AvatarURL: msg.Avatar,
Files: []*discordgo.File{&file},
Content: content,
AllowedMentions: b.getAllowedMentions(),
},
)
if err != nil {
b.Log.Errorf("Could not send file %#v for message %#v: %s", file, msg, err)
}
}
}
if msg.Text == "" {
res = res2
if err == nil && msg.Extra != nil {
err = b.webhookSendFilesOnly(msg, channelID)
}
return res, err
@ -120,35 +136,44 @@ func (b *Bdiscord) handleEventWebhook(msg *config.Message, channelID string) (st
return "", nil
}
msg.Text = helper.ClipMessage(msg.Text, MessageLength, b.GetString("MessageClipped"))
msg.Text = b.replaceUserMentions(msg.Text)
// discord username must be [0..32] max
if len(msg.Username) > 32 {
msg.Username = msg.Username[0:32]
}
if msg.ID != "" {
// Exploit that a discord message ID is actually just a large number, and we encode a list of IDs by separating them with ";".
var msgIds = strings.Split(msg.ID, ";")
msgParts := helper.ClipOrSplitMessage(b.replaceUserMentions(msg.Text), MessageLength, b.GetString("MessageClipped"), len(msgIds))
for len(msgParts) < len(msgIds) {
msgParts = append(msgParts, "((obsoleted by edit))")
}
b.Log.Debugf("Editing webhook message")
err := b.transmitter.Edit(channelID, msg.ID, &discordgo.WebhookParams{
Content: msg.Text,
var edit_err error = nil
for i := range msgParts {
// In case of split-messages where some parts remain the same (i.e. only a typo-fix in a huge message), this causes some noop-updates.
// TODO: Optimize away noop-updates of un-edited messages
edit_err = b.transmitter.Edit(channelID, msgIds[i], &discordgo.WebhookParams{
Content: msgParts[i],
Username: msg.Username,
AllowedMentions: b.getAllowedMentions(),
})
if err == nil {
if edit_err != nil {
break
}
}
if edit_err == nil {
return msg.ID, nil
}
b.Log.Errorf("Could not edit webhook message: %s", err)
b.Log.Errorf("Could not edit webhook message(s): %s; sending as new message(s) instead", edit_err)
}
b.Log.Debugf("Processing webhook sending for message %#v", msg)
discordMsg, err := b.webhookSend(msg, channelID)
msg.Text = b.replaceUserMentions(msg.Text)
msgId, err := b.webhookSend(msg, channelID)
if err != nil {
b.Log.Errorf("Could not broadcast via webhook for message %#v: %s", msg, err)
b.Log.Errorf("Could not broadcast via webhook for message %#v: %s", msgId, err)
return "", err
}
if discordMsg == nil {
return "", nil
}
return discordMsg.ID, nil
return msgId, nil
}

View File

@ -229,6 +229,33 @@ func ClipMessage(text string, length int, clippingMessage string) string {
return text
}
func ClipOrSplitMessage(text string, length int, clippingMessage string, splitMax int) []string {
var msgParts []string
var remainingText = text
// Invariant of this splitting loop: No text is lost (msgParts+remainingText is the original text),
// and all parts is guaranteed to satisfy the length requirement.
for len(msgParts) < splitMax - 1 && len(remainingText) > length {
// Decision: The text needs to be split (again).
var chunk string
var wasted = 0
// The longest UTF-8 encoding of a valid rune is 4 bytes (0xF4 0x8F 0xBF 0xBF, encoding U+10FFFF),
// so we should never need to waste 4 or more bytes at a time.
for wasted < 4 && wasted < length {
chunk = remainingText[:length - wasted]
if r, _ := utf8.DecodeLastRuneInString(chunk); r == utf8.RuneError {
wasted += 1
} else {
break
}
}
// Note: At this point, "chunk" might still be invalid, if "text" is very broken.
msgParts = append(msgParts, chunk)
remainingText = remainingText[len(chunk):]
}
msgParts = append(msgParts, ClipMessage(remainingText, length, clippingMessage))
return msgParts
}
// ParseMarkdown takes in an input string as markdown and parses it to html
func ParseMarkdown(input string) string {
extensions := parser.HardLineBreak | parser.NoIntraEmphasis | parser.FencedCode

View File

@ -134,3 +134,105 @@ func TestConvertWebPToPNG(t *testing.T) {
t.Fail()
}
}
var clippingOrSplittingTestCases = map[string]struct {
inputText string
clipSplitLength int
clippingMessage string
splitMax int
expectedOutput []string
}{
"Short single-line message, split 3": {
inputText: "short",
clipSplitLength: 20,
clippingMessage: "?!?!",
splitMax: 3,
expectedOutput: []string{"short"},
},
"Short single-line message, split 1": {
inputText: "short",
clipSplitLength: 20,
clippingMessage: "?!?!",
splitMax: 1,
expectedOutput: []string{"short"},
},
"Short single-line message, split 0": {
// Mainly check that we don't crash.
inputText: "short",
clipSplitLength: 20,
clippingMessage: "?!?!",
splitMax: 0,
expectedOutput: []string{"short"},
},
"Long single-line message, noclip": {
inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
clipSplitLength: 50,
clippingMessage: "?!?!",
splitMax: 10,
expectedOutput: []string{
"Lorem ipsum dolor sit amet, consectetur adipiscing",
" elit, sed do eiusmod tempor incididunt ut labore ",
"et dolore magna aliqua.",
},
},
"Long single-line message, noclip tight": {
inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
clipSplitLength: 50,
clippingMessage: "?!?!",
splitMax: 3,
expectedOutput: []string{
"Lorem ipsum dolor sit amet, consectetur adipiscing",
" elit, sed do eiusmod tempor incididunt ut labore ",
"et dolore magna aliqua.",
},
},
"Long single-line message, clip custom": {
inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
clipSplitLength: 50,
clippingMessage: "?!?!",
splitMax: 2,
expectedOutput: []string{
"Lorem ipsum dolor sit amet, consectetur adipiscing",
" elit, sed do eiusmod tempor incididunt ut lab?!?!",
},
},
"Long single-line message, clip built-in": {
inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
clipSplitLength: 50,
clippingMessage: "",
splitMax: 2,
expectedOutput: []string{
"Lorem ipsum dolor sit amet, consectetur adipiscing",
" elit, sed do eiusmod tempor inc <clipped message>",
},
},
"Short multi-line message": {
inputText: "I\ncan't\nget\nno\nsatisfaction!",
clipSplitLength: 50,
clippingMessage: "",
splitMax: 2,
expectedOutput: []string{"I\ncan't\nget\nno\nsatisfaction!"},
},
"Long message containing UTF-8 multi-byte runes": {
inputText: "人人生而自由,在尊嚴和權利上一律平等。 他們都具有理性和良知,應該以兄弟情誼的精神對待彼此。",
clipSplitLength: 50,
clippingMessage: "",
splitMax: 10,
expectedOutput: []string{
"人人生而自由,在尊嚴和權利上一律", // Note: only 48 bytes!
"平等。 他們都具有理性和良知,應該", // Note: only 49 bytes!
"以兄弟情誼的精神對待彼此。",
},
},
}
func TestClipOrSplitMessage(t *testing.T) {
for testname, testcase := range clippingOrSplittingTestCases {
actualOutput := ClipOrSplitMessage(testcase.inputText, testcase.clipSplitLength, testcase.clippingMessage, testcase.splitMax)
assert.Equalf(t, testcase.expectedOutput, actualOutput, "'%s' testcase should give expected lines with clipping+splitting.", testname)
for _, splitLine := range testcase.expectedOutput {
byteLength := len([]byte(splitLine))
assert.True(t, byteLength <= testcase.clipSplitLength, "Splitted line '%s' of testcase '%s' should not exceed the maximum byte-length (%d vs. %d).", splitLine, testname, testcase.clipSplitLength, byteLength)
}
}
}

View File

@ -929,6 +929,13 @@ SyncTopic=false
# Default "<clipped message>"
MessageClipped="<clipped message>"
# Before clipping, try to split messages into at most this many parts. 0 is treated like 1.
# Be careful with large numbers, as this might cause flooding.
# Example: A maximum telegram message of 4096 bytes is received. This requires 3 Discord
# messages (each capped at a hardcoded 1950 bytes).
# Default 1
MessageSplitMaxCount=3
###################################################################
#telegram section
###################################################################