Fix IRC line splitting. Closes #584 (#587)

This commit is contained in:
Duco van Amstel 2018-11-14 21:43:52 +00:00 committed by Wim
parent 09713d40ba
commit 85564a35fd
5 changed files with 158 additions and 91 deletions

View File

@ -2,9 +2,7 @@ package config
import (
"bytes"
"fmt"
"io/ioutil"
"os"
"strings"
"sync"
"time"
@ -316,7 +314,6 @@ type TestConfig struct {
func (c *TestConfig) GetBool(key string) (bool, bool) {
val, ok := c.Overrides[key]
fmt.Fprintln(os.Stderr, "DEBUG:", c.Overrides, key, ok, val)
if ok {
return val.(bool), true
}

View File

@ -40,16 +40,42 @@ func DownloadFileAuth(url string, auth string) (*[]byte, error) {
return &data, nil
}
func SplitStringLength(input string, length int) string {
a := []rune(input)
str := ""
for i, r := range a {
str += string(r)
if i > 0 && (i+1)%length == 0 {
str += "\n"
// GetSubLines splits messages in newline-delimited lines. If maxLineLength is
// specified as non-zero GetSubLines will and also clip long lines to the
// maximum length and insert a warning marker that the line was clipped.
//
// TODO: The current implementation has the inconvenient that it disregards
// word boundaries when splitting but this is hard to solve without potentially
// breaking formatting and other stylistic effects.
func GetSubLines(message string, maxLineLength int) []string {
const clippingMessage = " <clipped message>"
var lines []string
for _, line := range strings.Split(strings.TrimSpace(message), "\n") {
if maxLineLength == 0 || len([]byte(line)) <= maxLineLength {
lines = append(lines, line)
continue
}
// !!! WARNING !!!
// Before touching the splitting logic below please ensure that you PROPERLY
// understand how strings, runes and range loops over strings work in Go.
// A good place to start is to read https://blog.golang.org/strings. :-)
var splitStart int
var startOfPreviousRune int
for i := range line {
if i-splitStart > maxLineLength-len([]byte(clippingMessage)) {
lines = append(lines, line[splitStart:startOfPreviousRune]+clippingMessage)
splitStart = startOfPreviousRune
}
startOfPreviousRune = i
}
// This last append is safe to do without looking at the remaining byte-length
// as we assume that the byte-length of the last rune will never exceed that of
// the byte-length of the clipping message.
lines = append(lines, line[splitStart:])
}
return str
return lines
}
// handle all the stuff we put into extra

View File

@ -0,0 +1,105 @@
package helper
import (
"testing"
"github.com/stretchr/testify/assert"
)
const testLineLength = 64
var (
lineSplittingTestCases = map[string]struct {
input string
splitOutput []string
nonSplitOutput []string
}{
"Short single-line message": {
input: "short",
splitOutput: []string{"short"},
nonSplitOutput: []string{"short"},
},
"Long single-line message": {
input: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
splitOutput: []string{
"Lorem ipsum dolor sit amet, consectetur adipis <clipped message>",
"cing elit, sed do eiusmod tempor incididunt ut <clipped message>",
" labore et dolore magna aliqua.",
},
nonSplitOutput: []string{"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."},
},
"Short multi-line message": {
input: "I\ncan't\nget\nno\nsatisfaction!",
splitOutput: []string{
"I",
"can't",
"get",
"no",
"satisfaction!",
},
nonSplitOutput: []string{
"I",
"can't",
"get",
"no",
"satisfaction!",
},
},
"Long multi-line message": {
input: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.\n" +
"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.\n" +
"Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.\n" +
"Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.",
splitOutput: []string{
"Lorem ipsum dolor sit amet, consectetur adipis <clipped message>",
"cing elit, sed do eiusmod tempor incididunt ut <clipped message>",
" labore et dolore magna aliqua.",
"Ut enim ad minim veniam, quis nostrud exercita <clipped message>",
"tion ullamco laboris nisi ut aliquip ex ea com <clipped message>",
"modo consequat.",
"Duis aute irure dolor in reprehenderit in volu <clipped message>",
"ptate velit esse cillum dolore eu fugiat nulla <clipped message>",
" pariatur.",
"Excepteur sint occaecat cupidatat non proident <clipped message>",
", sunt in culpa qui officia deserunt mollit an <clipped message>",
"im id est laborum.",
},
nonSplitOutput: []string{
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.",
"Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.",
"Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.",
},
},
"Message ending with new-line.": {
input: "Newline ending\n",
splitOutput: []string{"Newline ending"},
nonSplitOutput: []string{"Newline ending"},
},
"Long message containing UTF-8 multi-byte runes": {
input: "不布人個我此而及單石業喜資富下我河下日沒一我臺空達的常景便物沒為……子大我別名解成?生賣的全直黑,我自我結毛分洲了世當,是政福那是東;斯說",
splitOutput: []string{
"不布人個我此而及單石業喜資富下 <clipped message>",
"我河下日沒一我臺空達的常景便物 <clipped message>",
"沒為……子大我別名解成?生賣的 <clipped message>",
"全直黑,我自我結毛分洲了世當, <clipped message>",
"是政福那是東;斯說",
},
nonSplitOutput: []string{"不布人個我此而及單石業喜資富下我河下日沒一我臺空達的常景便物沒為……子大我別名解成?生賣的全直黑,我自我結毛分洲了世當,是政福那是東;斯說"},
},
}
)
func TestGetSubLines(t *testing.T) {
for testname, testcase := range lineSplittingTestCases {
splitLines := GetSubLines(testcase.input, testLineLength)
assert.Equalf(t, testcase.splitOutput, splitLines, "'%s' testcase should give expected lines with splitting.", testname)
for _, splitLine := range splitLines {
byteLength := len([]byte(splitLine))
assert.True(t, byteLength <= testLineLength, "Splitted line '%s' of testcase '%s' should not exceed the maximum byte-length (%d vs. %d).", splitLine, testcase, byteLength, testLineLength)
}
nonSplitLines := GetSubLines(testcase.input, 0)
assert.Equalf(t, testcase.nonSplitOutput, nonSplitLines, "'%s' testcase should give expected lines without splitting.", testname)
}
}

View File

@ -1,61 +0,0 @@
package birc
import (
"strings"
)
/*
func tableformatter(nicks []string, nicksPerRow int, continued bool) string {
result := "|IRC users"
if continued {
result = "|(continued)"
}
for i := 0; i < 2; i++ {
for j := 1; j <= nicksPerRow && j <= len(nicks); j++ {
if i == 0 {
result += "|"
} else {
result += ":-|"
}
}
result += "\r\n|"
}
result += nicks[0] + "|"
for i := 1; i < len(nicks); i++ {
if i%nicksPerRow == 0 {
result += "\r\n|" + nicks[i] + "|"
} else {
result += nicks[i] + "|"
}
}
return result
}
*/
func plainformatter(nicks []string) string {
return strings.Join(nicks, ", ") + " currently on IRC"
}
func IsMarkup(message string) bool {
switch message[0] {
case '|':
fallthrough
case '#':
fallthrough
case '_':
fallthrough
case '*':
fallthrough
case '~':
fallthrough
case '-':
fallthrough
case ':':
fallthrough
case '>':
fallthrough
case '=':
return true
}
return false
}

View File

@ -13,7 +13,6 @@ import (
"strconv"
"strings"
"time"
"unicode/utf8"
"github.com/42wim/matterbridge/bridge"
"github.com/42wim/matterbridge/bridge/config"
@ -21,8 +20,11 @@ import (
"github.com/dfordsoft/golib/ic"
"github.com/lrstanley/girc"
"github.com/paulrosania/go-charset/charset"
_ "github.com/paulrosania/go-charset/data"
"github.com/saintfish/chardet"
// We need to import the 'data' package as an implicit dependency.
// See: https://godoc.org/github.com/paulrosania/go-charset/charset
_ "github.com/paulrosania/go-charset/data"
)
type Birc struct {
@ -222,25 +224,23 @@ func (b *Birc) Send(msg config.Message) (string, error) {
}
}
// split long messages on messageLength, to avoid clipped messages #281
var msgLines []string
if b.GetBool("MessageSplit") {
msg.Text = helper.SplitStringLength(msg.Text, b.MessageLength)
msgLines = helper.GetSubLines(msg.Text, b.MessageLength)
} else {
msgLines = helper.GetSubLines(msg.Text, 0)
}
for _, text := range strings.Split(msg.Text, "\n") {
if len(text) > b.MessageLength {
text = text[:b.MessageLength-len(" <message clipped>")]
if r, size := utf8.DecodeLastRuneInString(text); r == utf8.RuneError {
text = text[:len(text)-size]
}
text += " <message clipped>"
}
if len(b.Local) < b.MessageQueue {
if len(b.Local) == b.MessageQueue-1 {
text += " <message clipped>"
}
b.Local <- config.Message{Text: text, Username: msg.Username, Channel: msg.Channel, Event: msg.Event}
} else {
for i := range msgLines {
if len(b.Local) >= b.MessageQueue {
b.Log.Debugf("flooding, dropping message (queue at %d)", len(b.Local))
return "", nil
}
b.Local <- config.Message{
Text: msgLines[i],
Username: msg.Username,
Channel: msg.Channel,
Event: msg.Event,
}
}
return "", nil
@ -462,5 +462,5 @@ func (b *Birc) storeNames(client *girc.Client, event girc.Event) {
}
func (b *Birc) formatnicks(nicks []string) string {
return plainformatter(nicks)
return strings.Join(nicks, ", ") + " currently on IRC"
}