mirror of
https://github.com/42wim/matterbridge.git
synced 2025-07-07 14:07:31 +02:00
Implement and test byte-splitting helper function
This commit is contained in:
parent
56e7bd01ca
commit
05dddc42ff
@ -219,6 +219,33 @@ func ClipMessage(text string, length int, clippingMessage string) string {
|
||||
return text
|
||||
}
|
||||
|
||||
func ClipOrSplitMessage(text string, length int, clippingMessage string, splitMax int) []string {
|
||||
var msgParts []string
|
||||
var remainingText = text
|
||||
// Invariant of this splitting loop: No text is lost (msgParts+remainingText is the original text),
|
||||
// and all parts is guaranteed to satisfy the length requirement.
|
||||
for len(msgParts) < splitMax - 1 && len(remainingText) > length {
|
||||
// Decision: The text needs to be split (again).
|
||||
var chunk string
|
||||
var wasted = 0
|
||||
// The longest UTF-8 encoding of a valid rune is 4 bytes (0xF4 0x8F 0xBF 0xBF, encoding U+10FFFF),
|
||||
// so we should never need to waste 4 or more bytes at a time.
|
||||
for wasted < 4 && wasted < length {
|
||||
chunk = remainingText[:length - wasted]
|
||||
if r, _ := utf8.DecodeLastRuneInString(chunk); r == utf8.RuneError {
|
||||
wasted += 1
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
// Note: At this point, "chunk" might still be invalid, if "text" is very broken.
|
||||
msgParts = append(msgParts, chunk)
|
||||
remainingText = remainingText[len(chunk):]
|
||||
}
|
||||
msgParts = append(msgParts, ClipMessage(remainingText, length, clippingMessage))
|
||||
return msgParts
|
||||
}
|
||||
|
||||
// ParseMarkdown takes in an input string as markdown and parses it to html
|
||||
func ParseMarkdown(input string) string {
|
||||
extensions := parser.HardLineBreak | parser.NoIntraEmphasis | parser.FencedCode
|
||||
|
@ -125,3 +125,105 @@ func TestConvertWebPToPNG(t *testing.T) {
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
|
||||
var clippingOrSplittingTestCases = map[string]struct {
|
||||
inputText string
|
||||
clipSplitLength int
|
||||
clippingMessage string
|
||||
splitMax int
|
||||
expectedOutput []string
|
||||
}{
|
||||
"Short single-line message, split 3": {
|
||||
inputText: "short",
|
||||
clipSplitLength: 20,
|
||||
clippingMessage: "?!?!",
|
||||
splitMax: 3,
|
||||
expectedOutput: []string{"short"},
|
||||
},
|
||||
"Short single-line message, split 1": {
|
||||
inputText: "short",
|
||||
clipSplitLength: 20,
|
||||
clippingMessage: "?!?!",
|
||||
splitMax: 1,
|
||||
expectedOutput: []string{"short"},
|
||||
},
|
||||
"Short single-line message, split 0": {
|
||||
// Mainly check that we don't crash.
|
||||
inputText: "short",
|
||||
clipSplitLength: 20,
|
||||
clippingMessage: "?!?!",
|
||||
splitMax: 0,
|
||||
expectedOutput: []string{"short"},
|
||||
},
|
||||
"Long single-line message, noclip": {
|
||||
inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
|
||||
clipSplitLength: 50,
|
||||
clippingMessage: "?!?!",
|
||||
splitMax: 10,
|
||||
expectedOutput: []string{
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing",
|
||||
" elit, sed do eiusmod tempor incididunt ut labore ",
|
||||
"et dolore magna aliqua.",
|
||||
},
|
||||
},
|
||||
"Long single-line message, noclip tight": {
|
||||
inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
|
||||
clipSplitLength: 50,
|
||||
clippingMessage: "?!?!",
|
||||
splitMax: 3,
|
||||
expectedOutput: []string{
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing",
|
||||
" elit, sed do eiusmod tempor incididunt ut labore ",
|
||||
"et dolore magna aliqua.",
|
||||
},
|
||||
},
|
||||
"Long single-line message, clip custom": {
|
||||
inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
|
||||
clipSplitLength: 50,
|
||||
clippingMessage: "?!?!",
|
||||
splitMax: 2,
|
||||
expectedOutput: []string{
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing",
|
||||
" elit, sed do eiusmod tempor incididunt ut lab?!?!",
|
||||
},
|
||||
},
|
||||
"Long single-line message, clip built-in": {
|
||||
inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
|
||||
clipSplitLength: 50,
|
||||
clippingMessage: "",
|
||||
splitMax: 2,
|
||||
expectedOutput: []string{
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing",
|
||||
" elit, sed do eiusmod tempor inc <clipped message>",
|
||||
},
|
||||
},
|
||||
"Short multi-line message": {
|
||||
inputText: "I\ncan't\nget\nno\nsatisfaction!",
|
||||
clipSplitLength: 50,
|
||||
clippingMessage: "",
|
||||
splitMax: 2,
|
||||
expectedOutput: []string{"I\ncan't\nget\nno\nsatisfaction!"},
|
||||
},
|
||||
"Long message containing UTF-8 multi-byte runes": {
|
||||
inputText: "人人生而自由,在尊嚴和權利上一律平等。 他們都具有理性和良知,應該以兄弟情誼的精神對待彼此。",
|
||||
clipSplitLength: 50,
|
||||
clippingMessage: "",
|
||||
splitMax: 10,
|
||||
expectedOutput: []string{
|
||||
"人人生而自由,在尊嚴和權利上一律", // Note: only 48 bytes!
|
||||
"平等。 他們都具有理性和良知,應該", // Note: only 49 bytes!
|
||||
"以兄弟情誼的精神對待彼此。",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
func TestClipOrSplitMessage(t *testing.T) {
|
||||
for testname, testcase := range clippingOrSplittingTestCases {
|
||||
actualOutput := ClipOrSplitMessage(testcase.inputText, testcase.clipSplitLength, testcase.clippingMessage, testcase.splitMax)
|
||||
assert.Equalf(t, testcase.expectedOutput, actualOutput, "'%s' testcase should give expected lines with clipping+splitting.", testname)
|
||||
for _, splitLine := range testcase.expectedOutput {
|
||||
byteLength := len([]byte(splitLine))
|
||||
assert.True(t, byteLength <= testcase.clipSplitLength, "Splitted line '%s' of testcase '%s' should not exceed the maximum byte-length (%d vs. %d).", splitLine, testname, testcase.clipSplitLength, byteLength)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user