From ad3ad97047cc8e52479fabea3c596dcf5bfefe2d Mon Sep 17 00:00:00 2001 From: Shivaram Lingamneni Date: Wed, 14 Jun 2023 02:46:14 -0400 Subject: [PATCH] upgrade to irc-go v0.4.0 --- go.mod | 2 +- go.sum | 4 +- irc/channel.go | 6 +- irc/handlers.go | 2 +- .../ergochat/irc-go/ircfmt/ircfmt.go | 194 +++++++++++++----- .../ergochat/irc-go/ircmsg/message.go | 2 +- .../ergochat/irc-go/ircmsg/unicode.go | 29 +++ .../ergochat/irc-go/ircutils/unicode.go | 19 +- vendor/modules.txt | 2 +- 9 files changed, 178 insertions(+), 82 deletions(-) create mode 100644 vendor/github.com/ergochat/irc-go/ircmsg/unicode.go diff --git a/go.mod b/go.mod index 557d3cb4..73fedb42 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,7 @@ require ( github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815 github.com/ergochat/confusables v0.0.0-20201108231250-4ab98ab61fb1 github.com/ergochat/go-ident v0.0.0-20200511222032-830550b1d775 - github.com/ergochat/irc-go v0.2.0 + github.com/ergochat/irc-go v0.4.0 github.com/go-sql-driver/mysql v1.7.0 github.com/go-test/deep v1.0.6 // indirect github.com/gofrs/flock v0.8.1 diff --git a/go.sum b/go.sum index ea18fcc0..cd853995 100644 --- a/go.sum +++ b/go.sum @@ -10,8 +10,8 @@ github.com/ergochat/confusables v0.0.0-20201108231250-4ab98ab61fb1 h1:WLHTOodthV github.com/ergochat/confusables v0.0.0-20201108231250-4ab98ab61fb1/go.mod h1:mov+uh1DPWsltdQnOdzn08UO9GsJ3MEvhtu0Ci37fdk= github.com/ergochat/go-ident v0.0.0-20200511222032-830550b1d775 h1:QSJIdpr3HOzJDPwxT7hp7WbjoZcS+5GqVvsBscqChk0= github.com/ergochat/go-ident v0.0.0-20200511222032-830550b1d775/go.mod h1:d2qvgjD0TvGNSvUs+mZgX090RiJlrzUYW6vtANGOy3A= -github.com/ergochat/irc-go v0.2.0 h1:3vHdy4c56UTY6+/rTBrQc1fmt32N5G8PrEZacJDOr+E= -github.com/ergochat/irc-go v0.2.0/go.mod h1:2vi7KNpIPWnReB5hmLpl92eMywQvuIeIIGdt/FQCph0= +github.com/ergochat/irc-go v0.4.0 h1:0YibCKfAAtwxQdNjLQd9xpIEPisLcJ45f8FNsMHAuZc= +github.com/ergochat/irc-go v0.4.0/go.mod h1:2vi7KNpIPWnReB5hmLpl92eMywQvuIeIIGdt/FQCph0= github.com/ergochat/scram v1.0.2-ergo1 h1:2bYXiRFQH636pT0msOG39fmEYl4Eq+OuutcyDsCix/g= github.com/ergochat/scram v1.0.2-ergo1/go.mod h1:1WAq6h33pAW+iRreB34OORO2Nf7qel3VV3fjBj+hCSs= github.com/ergochat/websocket v1.4.2-oragono1 h1:plMUunFBM6UoSCIYCKKclTdy/TkkHfUslhOfJQzfueM= diff --git a/irc/channel.go b/irc/channel.go index 7836f79f..4680958c 100644 --- a/irc/channel.go +++ b/irc/channel.go @@ -13,7 +13,7 @@ import ( "sync" - "github.com/ergochat/irc-go/ircutils" + "github.com/ergochat/irc-go/ircmsg" "github.com/ergochat/ergo/irc/caps" "github.com/ergochat/ergo/irc/datastore" @@ -1191,7 +1191,7 @@ func (channel *Channel) SetTopic(client *Client, topic string, rb *ResponseBuffe return } - topic = ircutils.TruncateUTF8Safe(topic, client.server.Config().Limits.TopicLen) + topic = ircmsg.TruncateUTF8Safe(topic, client.server.Config().Limits.TopicLen) channel.stateMutex.Lock() chname := channel.name @@ -1450,7 +1450,7 @@ func (channel *Channel) Kick(client *Client, target *Client, comment string, rb return } - comment = ircutils.TruncateUTF8Safe(comment, channel.server.Config().Limits.KickLen) + comment = ircmsg.TruncateUTF8Safe(comment, channel.server.Config().Limits.KickLen) message := utils.MakeMessage(comment) details := client.Details() diff --git a/irc/handlers.go b/irc/handlers.go index 71c724ef..f3db4fe3 100644 --- a/irc/handlers.go +++ b/irc/handlers.go @@ -451,7 +451,7 @@ func awayHandler(server *Server, client *Client, msg ircmsg.Message, rb *Respons var awayMessage string if len(msg.Params) > 0 { awayMessage = msg.Params[0] - awayMessage = ircutils.TruncateUTF8Safe(awayMessage, server.Config().Limits.AwayLen) + awayMessage = ircmsg.TruncateUTF8Safe(awayMessage, server.Config().Limits.AwayLen) } wasAway, nowAway := rb.session.SetAway(awayMessage) diff --git a/vendor/github.com/ergochat/irc-go/ircfmt/ircfmt.go b/vendor/github.com/ergochat/irc-go/ircfmt/ircfmt.go index e5250568..8590a009 100644 --- a/vendor/github.com/ergochat/irc-go/ircfmt/ircfmt.go +++ b/vendor/github.com/ergochat/irc-go/ircfmt/ircfmt.go @@ -5,6 +5,7 @@ package ircfmt import ( "regexp" + "strconv" "strings" ) @@ -19,24 +20,126 @@ const ( underline string = "\x1f" reset string = "\x0f" - runecolour rune = '\x03' - runebold rune = '\x02' - runemonospace rune = '\x11' - runereverseColour rune = '\x16' - runeitalic rune = '\x1d' - runestrikethrough rune = '\x1e' - runereset rune = '\x0f' - runeunderline rune = '\x1f' - - // valid characters in a colour code character, for speed - colours1 string = "0123456789" + metacharacters = (bold + colour + monospace + reverseColour + italic + strikethrough + underline + reset) ) +// ColorCode is a normalized representation of an IRC color code, +// as per this de facto specification: https://modern.ircdocs.horse/formatting.html#color +// The zero value of the type represents a default or unset color, +// whereas ColorCode{true, 0} represents the color white. +type ColorCode struct { + IsSet bool + Value uint8 +} + +// ParseColor converts a string representation of an IRC color code, e.g. "04", +// into a normalized ColorCode, e.g. ColorCode{true, 4}. +func ParseColor(str string) (color ColorCode) { + // "99 - Default Foreground/Background - Not universally supported." + // normalize 99 to ColorCode{} meaning "unset": + if code, err := strconv.ParseUint(str, 10, 8); err == nil && code < 99 { + color.IsSet = true + color.Value = uint8(code) + } + return +} + +// FormattedSubstring represents a section of an IRC message with associated +// formatting data. +type FormattedSubstring struct { + Content string + ForegroundColor ColorCode + BackgroundColor ColorCode + Bold bool + Monospace bool + Strikethrough bool + Underline bool + Italic bool + ReverseColor bool +} + +// IsFormatted returns whether the section has any formatting flags switched on. +func (f *FormattedSubstring) IsFormatted() bool { + // could rely on value receiver but if this is to be a public API, + // let's make it a pointer receiver + g := *f + g.Content = "" + return g != FormattedSubstring{} +} + +var ( + // "If there are two ASCII digits available where a is allowed, + // then two characters MUST always be read for it and displayed as described below." + // we rely on greedy matching to implement this for both forms: + // (\x03)00,01 + colorForeBackRe = regexp.MustCompile(`^([0-9]{1,2}),([0-9]{1,2})`) + // (\x03)00 + colorForeRe = regexp.MustCompile(`^([0-9]{1,2})`) +) + +// Split takes an IRC message (typically a PRIVMSG or NOTICE final parameter) +// containing IRC formatting control codes, and splits it into substrings with +// associated formatting information. +func Split(raw string) (result []FormattedSubstring) { + var chunk FormattedSubstring + for { + // skip to the next metacharacter, or the end of the string + if idx := strings.IndexAny(raw, metacharacters); idx != 0 { + if idx == -1 { + idx = len(raw) + } + chunk.Content = raw[:idx] + if len(chunk.Content) != 0 { + result = append(result, chunk) + } + raw = raw[idx:] + } + + if len(raw) == 0 { + return + } + + // we're at a metacharacter. by default, all previous formatting carries over + metacharacter := raw[0] + raw = raw[1:] + switch metacharacter { + case bold[0]: + chunk.Bold = !chunk.Bold + case monospace[0]: + chunk.Monospace = !chunk.Monospace + case strikethrough[0]: + chunk.Strikethrough = !chunk.Strikethrough + case underline[0]: + chunk.Underline = !chunk.Underline + case italic[0]: + chunk.Italic = !chunk.Italic + case reverseColour[0]: + chunk.ReverseColor = !chunk.ReverseColor + case reset[0]: + chunk = FormattedSubstring{} + case colour[0]: + // preferentially match the "\x0399,01" form, then "\x0399"; + // if neither of those matches, then it's a reset + if matches := colorForeBackRe.FindStringSubmatch(raw); len(matches) != 0 { + chunk.ForegroundColor = ParseColor(matches[1]) + chunk.BackgroundColor = ParseColor(matches[2]) + raw = raw[len(matches[0]):] + } else if matches := colorForeRe.FindStringSubmatch(raw); len(matches) != 0 { + chunk.ForegroundColor = ParseColor(matches[1]) + raw = raw[len(matches[0]):] + } else { + chunk.ForegroundColor = ColorCode{} + chunk.BackgroundColor = ColorCode{} + } + default: + // should be impossible, but just ignore it + } + } +} + var ( // valtoescape replaces most of IRC characters with our escapes. valtoescape = strings.NewReplacer("$", "$$", colour, "$c", reverseColour, "$v", bold, "$b", italic, "$i", strikethrough, "$s", underline, "$u", monospace, "$m", reset, "$r") - // valToStrip replaces most of the IRC characters with nothing - valToStrip = strings.NewReplacer(colour, "$c", reverseColour, "", bold, "", italic, "", strikethrough, "", underline, "", monospace, "", reset, "") // escapetoval contains most of our escapes and how they map to real IRC characters. // intentionally skips colour, since that's handled elsewhere. @@ -98,7 +201,9 @@ var ( "light blue": "12", "pink": "13", "grey": "14", + "gray": "14", "light grey": "15", + "light gray": "15", "default": "99", } @@ -123,7 +228,7 @@ func Escape(in string) string { out.WriteString("$c") inRunes = inRunes[2:] // strip colour code chars - if len(inRunes) < 1 || !strings.Contains(colours1, string(inRunes[0])) { + if len(inRunes) < 1 || !isDigit(inRunes[0]) { out.WriteString("[]") continue } @@ -131,14 +236,14 @@ func Escape(in string) string { var foreBuffer, backBuffer string foreBuffer += string(inRunes[0]) inRunes = inRunes[1:] - if 0 < len(inRunes) && strings.Contains(colours1, string(inRunes[0])) { + if 0 < len(inRunes) && isDigit(inRunes[0]) { foreBuffer += string(inRunes[0]) inRunes = inRunes[1:] } - if 1 < len(inRunes) && inRunes[0] == ',' && strings.Contains(colours1, string(inRunes[1])) { + if 1 < len(inRunes) && inRunes[0] == ',' && isDigit(inRunes[1]) { backBuffer += string(inRunes[1]) inRunes = inRunes[2:] - if 0 < len(inRunes) && strings.Contains(colours1, string(inRunes[0])) { + if 0 < len(inRunes) && isDigit(inRunes[1]) { backBuffer += string(inRunes[0]) inRunes = inRunes[1:] } @@ -178,52 +283,27 @@ func Escape(in string) string { return out.String() } +func isDigit(r rune) bool { + return '0' <= r && r <= '9' // don't use unicode.IsDigit, it includes non-ASCII numerals +} + // Strip takes a raw IRC string and removes it with all formatting codes removed // IE, it turns this: "This is a \x02cool\x02, \x034red\x0f message!" // into: "This is a cool, red message!" func Strip(in string) string { - out := strings.Builder{} - runes := []rune(in) - if out.Len() < len(runes) { // Reduce allocations where needed - out.Grow(len(in) - out.Len()) - } - for len(runes) > 0 { - switch runes[0] { - case runebold, runemonospace, runereverseColour, runeitalic, runestrikethrough, runeunderline, runereset: - runes = runes[1:] - case runecolour: - runes = removeColour(runes) - default: - out.WriteRune(runes[0]) - runes = runes[1:] - } - } - return out.String() -} - -func removeNumber(runes []rune) []rune { - if len(runes) > 0 && runes[0] >= '0' && runes[0] <= '9' { - runes = runes[1:] - } - return runes -} - -func removeColour(runes []rune) []rune { - if runes[0] != runecolour { - return runes - } - - runes = runes[1:] - runes = removeNumber(runes) - runes = removeNumber(runes) - - if len(runes) > 1 && runes[0] == ',' && runes[1] >= '0' && runes[1] <= '9' { - runes = runes[2:] + splitChunks := Split(in) + if len(splitChunks) == 0 { + return "" + } else if len(splitChunks) == 1 { + return splitChunks[0].Content } else { - return runes // Nothing else because we dont have a comma + var buf strings.Builder + buf.Grow(len(in)) + for _, chunk := range splitChunks { + buf.WriteString(chunk.Content) + } + return buf.String() } - runes = removeNumber(runes) - return runes } // resolve "light blue" to "12", "12" to "12", "asdf" to "", etc. diff --git a/vendor/github.com/ergochat/irc-go/ircmsg/message.go b/vendor/github.com/ergochat/irc-go/ircmsg/message.go index 2d3cd112..7a7e1305 100644 --- a/vendor/github.com/ergochat/irc-go/ircmsg/message.go +++ b/vendor/github.com/ergochat/irc-go/ircmsg/message.go @@ -238,7 +238,7 @@ func parseLine(line string, maxTagDataLength int, truncateLen int) (ircmsg Messa // truncate if desired if truncateLen != 0 && truncateLen < len(line) { err = ErrorBodyTooLong - line = line[:truncateLen] + line = TruncateUTF8Safe(line, truncateLen) } // modern: "These message parts, and parameters themselves, are separated diff --git a/vendor/github.com/ergochat/irc-go/ircmsg/unicode.go b/vendor/github.com/ergochat/irc-go/ircmsg/unicode.go new file mode 100644 index 00000000..1e07addb --- /dev/null +++ b/vendor/github.com/ergochat/irc-go/ircmsg/unicode.go @@ -0,0 +1,29 @@ +// Copyright (c) 2021 Shivaram Lingamneni +// Released under the MIT License + +package ircmsg + +import ( + "unicode/utf8" +) + +// TruncateUTF8Safe truncates a message, respecting UTF8 boundaries. If a message +// was originally valid UTF8, TruncateUTF8Safe will not make it invalid; instead +// it will truncate additional bytes as needed, back to the last valid +// UTF8-encoded codepoint. If a message is not UTF8, TruncateUTF8Safe will truncate +// at most 3 additional bytes before giving up. +func TruncateUTF8Safe(message string, byteLimit int) (result string) { + if len(message) <= byteLimit { + return message + } + message = message[:byteLimit] + for i := 0; i < (utf8.UTFMax - 1); i++ { + r, n := utf8.DecodeLastRuneInString(message) + if r == utf8.RuneError && n <= 1 { + message = message[:len(message)-1] + } else { + break + } + } + return message +} diff --git a/vendor/github.com/ergochat/irc-go/ircutils/unicode.go b/vendor/github.com/ergochat/irc-go/ircutils/unicode.go index 6e1cf445..e05598a8 100644 --- a/vendor/github.com/ergochat/irc-go/ircutils/unicode.go +++ b/vendor/github.com/ergochat/irc-go/ircutils/unicode.go @@ -7,24 +7,11 @@ import ( "strings" "unicode" "unicode/utf8" + + "github.com/ergochat/irc-go/ircmsg" ) -// truncate a message, taking care not to make valid UTF8 into invalid UTF8 -func TruncateUTF8Safe(message string, byteLimit int) (result string) { - if len(message) <= byteLimit { - return message - } - message = message[:byteLimit] - for i := 0; i < (utf8.UTFMax - 1); i++ { - r, n := utf8.DecodeLastRuneInString(message) - if r == utf8.RuneError && n <= 1 { - message = message[:len(message)-1] - } else { - break - } - } - return message -} +var TruncateUTF8Safe = ircmsg.TruncateUTF8Safe // Sanitizes human-readable text to make it safe for IRC; // assumes UTF-8 and uses the replacement character where diff --git a/vendor/modules.txt b/vendor/modules.txt index 4c09b1cc..afcc30eb 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -16,7 +16,7 @@ github.com/ergochat/confusables # github.com/ergochat/go-ident v0.0.0-20200511222032-830550b1d775 ## explicit github.com/ergochat/go-ident -# github.com/ergochat/irc-go v0.2.0 +# github.com/ergochat/irc-go v0.4.0 ## explicit; go 1.15 github.com/ergochat/irc-go/ircfmt github.com/ergochat/irc-go/ircmsg