diff --git a/bridge/helper/helper.go b/bridge/helper/helper.go index 0208dff1..db3b54a8 100644 --- a/bridge/helper/helper.go +++ b/bridge/helper/helper.go @@ -219,6 +219,33 @@ func ClipMessage(text string, length int, clippingMessage string) string { return text } +func ClipOrSplitMessage(text string, length int, clippingMessage string, splitMax int) []string { + var msgParts []string + var remainingText = text + // Invariant of this splitting loop: No text is lost (msgParts+remainingText is the original text), + // and all parts is guaranteed to satisfy the length requirement. + for len(msgParts) < splitMax - 1 && len(remainingText) > length { + // Decision: The text needs to be split (again). + var chunk string + var wasted = 0 + // The longest UTF-8 encoding of a valid rune is 4 bytes (0xF4 0x8F 0xBF 0xBF, encoding U+10FFFF), + // so we should never need to waste 4 or more bytes at a time. + for wasted < 4 && wasted < length { + chunk = remainingText[:length - wasted] + if r, _ := utf8.DecodeLastRuneInString(chunk); r == utf8.RuneError { + wasted += 1 + } else { + break + } + } + // Note: At this point, "chunk" might still be invalid, if "text" is very broken. + msgParts = append(msgParts, chunk) + remainingText = remainingText[len(chunk):] + } + msgParts = append(msgParts, ClipMessage(remainingText, length, clippingMessage)) + return msgParts +} + // ParseMarkdown takes in an input string as markdown and parses it to html func ParseMarkdown(input string) string { extensions := parser.HardLineBreak | parser.NoIntraEmphasis | parser.FencedCode diff --git a/bridge/helper/helper_test.go b/bridge/helper/helper_test.go index 76e548e4..d486d647 100644 --- a/bridge/helper/helper_test.go +++ b/bridge/helper/helper_test.go @@ -125,3 +125,105 @@ func TestConvertWebPToPNG(t *testing.T) { t.Fail() } } + +var clippingOrSplittingTestCases = map[string]struct { + inputText string + clipSplitLength int + clippingMessage string + splitMax int + expectedOutput []string +}{ + "Short single-line message, split 3": { + inputText: "short", + clipSplitLength: 20, + clippingMessage: "?!?!", + splitMax: 3, + expectedOutput: []string{"short"}, + }, + "Short single-line message, split 1": { + inputText: "short", + clipSplitLength: 20, + clippingMessage: "?!?!", + splitMax: 1, + expectedOutput: []string{"short"}, + }, + "Short single-line message, split 0": { + // Mainly check that we don't crash. + inputText: "short", + clipSplitLength: 20, + clippingMessage: "?!?!", + splitMax: 0, + expectedOutput: []string{"short"}, + }, + "Long single-line message, noclip": { + inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", + clipSplitLength: 50, + clippingMessage: "?!?!", + splitMax: 10, + expectedOutput: []string{ + "Lorem ipsum dolor sit amet, consectetur adipiscing", + " elit, sed do eiusmod tempor incididunt ut labore ", + "et dolore magna aliqua.", + }, + }, + "Long single-line message, noclip tight": { + inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", + clipSplitLength: 50, + clippingMessage: "?!?!", + splitMax: 3, + expectedOutput: []string{ + "Lorem ipsum dolor sit amet, consectetur adipiscing", + " elit, sed do eiusmod tempor incididunt ut labore ", + "et dolore magna aliqua.", + }, + }, + "Long single-line message, clip custom": { + inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", + clipSplitLength: 50, + clippingMessage: "?!?!", + splitMax: 2, + expectedOutput: []string{ + "Lorem ipsum dolor sit amet, consectetur adipiscing", + " elit, sed do eiusmod tempor incididunt ut lab?!?!", + }, + }, + "Long single-line message, clip built-in": { + inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", + clipSplitLength: 50, + clippingMessage: "", + splitMax: 2, + expectedOutput: []string{ + "Lorem ipsum dolor sit amet, consectetur adipiscing", + " elit, sed do eiusmod tempor inc ", + }, + }, + "Short multi-line message": { + inputText: "I\ncan't\nget\nno\nsatisfaction!", + clipSplitLength: 50, + clippingMessage: "", + splitMax: 2, + expectedOutput: []string{"I\ncan't\nget\nno\nsatisfaction!"}, + }, + "Long message containing UTF-8 multi-byte runes": { + inputText: "人人生而自由,在尊嚴和權利上一律平等。 他們都具有理性和良知,應該以兄弟情誼的精神對待彼此。", + clipSplitLength: 50, + clippingMessage: "", + splitMax: 10, + expectedOutput: []string{ + "人人生而自由,在尊嚴和權利上一律", // Note: only 48 bytes! + "平等。 他們都具有理性和良知,應該", // Note: only 49 bytes! + "以兄弟情誼的精神對待彼此。", + }, + }, +} + +func TestClipOrSplitMessage(t *testing.T) { + for testname, testcase := range clippingOrSplittingTestCases { + actualOutput := ClipOrSplitMessage(testcase.inputText, testcase.clipSplitLength, testcase.clippingMessage, testcase.splitMax) + assert.Equalf(t, testcase.expectedOutput, actualOutput, "'%s' testcase should give expected lines with clipping+splitting.", testname) + for _, splitLine := range testcase.expectedOutput { + byteLength := len([]byte(splitLine)) + assert.True(t, byteLength <= testcase.clipSplitLength, "Splitted line '%s' of testcase '%s' should not exceed the maximum byte-length (%d vs. %d).", splitLine, testname, testcase.clipSplitLength, byteLength) + } + } +}