diff --git a/irc/client.go b/irc/client.go index 4c9ad019..c31e71cb 100644 --- a/irc/client.go +++ b/irc/client.go @@ -984,12 +984,7 @@ func (client *Client) updateNickMaskNoMutex() { return // pre-registration, don't bother generating the hostname } - cfhostname, err := Casefold(client.hostname) - if err != nil { - client.server.logger.Error("internal", "hostname couldn't be casefolded", client.hostname, err.Error()) - cfhostname = client.hostname // YOLO - } - + cfhostname := strings.ToLower(client.hostname) client.nickMaskString = fmt.Sprintf("%s!%s@%s", client.nick, client.username, client.hostname) client.nickMaskCasefolded = fmt.Sprintf("%s!%s@%s", client.nickCasefolded, strings.ToLower(client.username), cfhostname) } @@ -1006,18 +1001,14 @@ func (client *Client) AllNickmasks() (masks []string) { username = strings.ToLower(username) if len(vhost) > 0 { - cfvhost, err := Casefold(vhost) - if err == nil { - masks = append(masks, fmt.Sprintf("%s!%s@%s", nick, username, cfvhost)) - } + cfvhost := strings.ToLower(vhost) + masks = append(masks, fmt.Sprintf("%s!%s@%s", nick, username, cfvhost)) } var rawhostmask string - cfrawhost, err := Casefold(rawHostname) - if err == nil { - rawhostmask = fmt.Sprintf("%s!%s@%s", nick, username, cfrawhost) - masks = append(masks, rawhostmask) - } + cfrawhost := strings.ToLower(rawHostname) + rawhostmask = fmt.Sprintf("%s!%s@%s", nick, username, cfrawhost) + masks = append(masks, rawhostmask) if cloakedHostname != "" { masks = append(masks, fmt.Sprintf("%s!%s@%s", nick, username, cloakedHostname)) diff --git a/irc/config.go b/irc/config.go index b564a98d..06409aa7 100644 --- a/irc/config.go +++ b/irc/config.go @@ -182,6 +182,27 @@ func (nr *NickEnforcementMethod) UnmarshalYAML(unmarshal func(interface{}) error return err } +func (cm *Casemapping) UnmarshalYAML(unmarshal func(interface{}) error) (err error) { + var orig string + if err = unmarshal(&orig); err != nil { + return err + } + + var result Casemapping + switch strings.ToLower(orig) { + case "ascii": + result = CasemappingASCII + case "precis", "rfc7613", "rfc8265": + result = CasemappingPRECIS + case "permissive", "fun": + result = CasemappingPermissive + default: + return fmt.Errorf("invalid casemapping value: %s", orig) + } + *cm = result + return nil +} + type NickReservationConfig struct { Enabled bool AdditionalNickLimit int `yaml:"additional-nick-limit"` @@ -324,6 +345,7 @@ type Config struct { Cloaks cloaks.CloakConfig `yaml:"ip-cloaking"` supportedCaps *caps.Set capValues caps.Values + Casemapping Casemapping } Languages struct { @@ -612,6 +634,7 @@ func LoadConfig(filename string) (config *Config, err error) { if !utils.IsServerName(config.Server.Name) { return nil, ErrServerNameNotHostname } + config.Server.nameCasefolded = strings.ToLower(config.Server.Name) if config.Datastore.Path == "" { return nil, ErrDatastorePathMissing } @@ -810,12 +833,6 @@ func LoadConfig(filename string) (config *Config, err error) { config.Debug.recoverFromErrors = true } - // casefold/validate server name - config.Server.nameCasefolded, err = Casefold(config.Server.Name) - if err != nil { - return nil, fmt.Errorf("Server name isn't valid [%s]: %s", config.Server.Name, err.Error()) - } - // process operator definitions, store them to config.operators operclasses, err := config.OperatorClasses() if err != nil { diff --git a/irc/handlers.go b/irc/handlers.go index 86565deb..6c897ccc 100644 --- a/irc/handlers.go +++ b/irc/handlers.go @@ -2806,7 +2806,7 @@ func whoHandler(server *Server, client *Client, msg ircmsg.IrcMessage, rb *Respo } else if mask[0] == '#' { mask, err = CasefoldChannel(msg.Params[0]) } else { - mask, err = Casefold(mask) + mask, err = CanonicalizeMaskWildcard(mask) } if err != nil { diff --git a/irc/server.go b/irc/server.go index 7b376d49..f73d0d95 100644 --- a/irc/server.go +++ b/irc/server.go @@ -165,7 +165,9 @@ func (config *Config) generateISupport() (err error) { isupport.Add("STATUSMSG", "~&@%+") isupport.Add("TARGMAX", fmt.Sprintf("NAMES:1,LIST:1,KICK:1,WHOIS:1,USERHOST:10,PRIVMSG:%s,TAGMSG:%s,NOTICE:%s,MONITOR:", maxTargetsString, maxTargetsString, maxTargetsString)) isupport.Add("TOPICLEN", strconv.Itoa(config.Limits.TopicLen)) - isupport.Add("UTF8MAPPING", casemappingName) + if globalCasemappingSetting == CasemappingPRECIS { + isupport.Add("UTF8MAPPING", precisUTF8MappingToken) + } err = isupport.RegenerateCachedReply() return @@ -599,6 +601,7 @@ func (server *Server) applyConfig(config *Config, initial bool) (err error) { server.configFilename = config.Filename server.name = config.Server.Name server.nameCasefolded = config.Server.nameCasefolded + globalCasemappingSetting = config.Server.Casemapping } else { // enforce configs that can't be changed after launch: currentLimits := server.Config().Limits @@ -608,6 +611,8 @@ func (server *Server) applyConfig(config *Config, initial bool) (err error) { return fmt.Errorf("Server name cannot be changed after launching the server, rehash aborted") } else if server.Config().Datastore.Path != config.Datastore.Path { return fmt.Errorf("Datastore path cannot be changed after launching the server, rehash aborted") + } else if globalCasemappingSetting != config.Server.Casemapping { + return fmt.Errorf("Casemapping cannot be changed after launching the server, rehash aborted") } } diff --git a/irc/strings.go b/irc/strings.go index e7ec40ea..32f3f952 100644 --- a/irc/strings.go +++ b/irc/strings.go @@ -7,19 +7,49 @@ package irc import ( "fmt" + "regexp" "strings" "github.com/oragono/confusables" "golang.org/x/text/cases" - "golang.org/x/text/language" "golang.org/x/text/secure/precis" + "golang.org/x/text/unicode/norm" "golang.org/x/text/width" ) const ( - casemappingName = "rfc8265" + precisUTF8MappingToken = "rfc8265" ) +var ( + // reviving the old ergonomadic nickname regex: + // in permissive mode, allow arbitrary letters, numbers, punctuation, and symbols + permissiveCharsRegex = regexp.MustCompile(`^[\pL\pN\pP\pS]*$`) +) + +type Casemapping uint + +const ( + // "precis" is the default / zero value: + // casefolding/validation: PRECIS + ircd restrictions (like no *) + // confusables detection: standard skeleton algorithm + CasemappingPRECIS Casemapping = iota + // "ascii" is the traditional ircd behavior: + // casefolding/validation: must be pure ASCII and follow ircd restrictions, ASCII lowercasing + // confusables detection: none + CasemappingASCII + // "permissive" is an insecure mode: + // casefolding/validation: arbitrary unicodes that follow ircd restrictions, unicode casefolding + // confusables detection: standard skeleton algorithm (which may be ineffective + // over the larger set of permitted identifiers) + CasemappingPermissive +) + +// XXX this is a global variable without explicit synchronization. +// it gets set during the initial Server.applyConfig and cannot be changed by rehash: +// this happens-before all IRC connections and all casefolding operations. +var globalCasemappingSetting Casemapping = CasemappingPRECIS + // Each pass of PRECIS casefolding is a composition of idempotent operations, // but not idempotent itself. Therefore, the spec says "do it four times and hope // it converges" (lolwtf). Golang's PRECIS implementation has a "repeat" option, @@ -46,7 +76,14 @@ func iterateFolding(profile *precis.Profile, oldStr string) (str string, err err // Casefold returns a casefolded string, without doing any name or channel character checks. func Casefold(str string) (string, error) { - return iterateFolding(precis.UsernameCaseMapped, str) + switch globalCasemappingSetting { + default: + return iterateFolding(precis.UsernameCaseMapped, str) + case CasemappingASCII: + return foldASCII(str) + case CasemappingPermissive: + return foldPermissive(str) + } } // CasefoldChannel returns a casefolded version of a channel name. @@ -144,6 +181,16 @@ func isIdent(name string) bool { // from the original (unfolded) identifier and stored/tracked separately from the // casefolded identifier. func Skeleton(name string) (string, error) { + switch globalCasemappingSetting { + default: + return realSkeleton(name) + case CasemappingASCII: + // identity function is fine because we independently case-normalize in Casefold + return name, nil + } +} + +func realSkeleton(name string) (string, error) { // XXX the confusables table includes some, but not all, fullwidth->standard // mappings for latin characters. do a pass of explicit width folding, // same as PRECIS: @@ -156,7 +203,7 @@ func Skeleton(name string) (string, error) { // violate the bidi rule). We also don't care if they contain runes // that are disallowed by PRECIS, because every identifier must independently // pass PRECIS --- we are just further canonicalizing the skeleton. - return cases.Lower(language.Und).String(name), nil + return cases.Fold().String(name), nil } // maps a nickmask fragment to an expanded, casefolded wildcard: @@ -212,3 +259,33 @@ func CanonicalizeMaskWildcard(userhost string) (expanded string, err error) { } return fmt.Sprintf("%s!%s@%s", nick, user, host), nil } + +func foldASCII(str string) (result string, err error) { + if !IsPrintableASCII(str) { + return "", errInvalidCharacter + } + return strings.ToLower(str), nil +} + +func IsPrintableASCII(str string) bool { + for i := 0; i < len(str); i++ { + // allow space here because it's technically printable; + // it will be disallowed later by CasefoldName/CasefoldChannel + chr := str[i] + if chr < ' ' || chr > '~' { + return false + } + } + return true +} + +func foldPermissive(str string) (result string, err error) { + if !permissiveCharsRegex.MatchString(str) { + return "", errInvalidCharacter + } + // YOLO + str = norm.NFD.String(str) + str = cases.Fold().String(str) + str = norm.NFD.String(str) + return str, nil +} diff --git a/irc/strings_test.go b/irc/strings_test.go index fdf7ddde..9bb487f5 100644 --- a/irc/strings_test.go +++ b/irc/strings_test.go @@ -63,6 +63,7 @@ func TestCasefoldChannel(t *testing.T) { "", "#*starpower", "# NASA", "#interro?", "OOF#", "foo", // bidi violation mixing latin and hebrew characters: "#shalomעליכם", + "#tab\tcharacter", "#\t", "#carriage\rreturn", } { testCases = append(testCases, channelTest{channel: errCase, err: true}) } @@ -215,3 +216,61 @@ func TestCanonicalizeMaskWildcard(t *testing.T) { tester("Shivaram*", "shivaram*!*@*", nil) tester("*SHIVARAM*", "*shivaram*!*@*", nil) } + +func validFoldTester(first, second string, equal bool, folder func(string) (string, error), t *testing.T) { + firstFolded, err := folder(first) + if err != nil { + panic(err) + } + secondFolded, err := folder(second) + if err != nil { + panic(err) + } + foundEqual := firstFolded == secondFolded + if foundEqual != equal { + t.Errorf("%s and %s: expected equality %t, but got %t", first, second, equal, foundEqual) + } +} + +func TestFoldPermissive(t *testing.T) { + tester := func(first, second string, equal bool) { + validFoldTester(first, second, equal, foldPermissive, t) + } + tester("SHIVARAM", "shivaram", true) + tester("shIvaram", "shivaraM", true) + tester("shivaram", "DAN-", false) + tester("dolph🐬n", "DOLPH🐬n", true) + tester("dolph🐬n", "dolph💻n", false) + tester("9FRONT", "9front", true) +} + +func TestFoldPermissiveInvalid(t *testing.T) { + _, err := foldPermissive("a\tb") + if err == nil { + t.Errorf("whitespace should be invalid in identifiers") + } + _, err = foldPermissive("a\x00b") + if err == nil { + t.Errorf("the null byte should be invalid in identifiers") + } +} + +func TestFoldASCII(t *testing.T) { + tester := func(first, second string, equal bool) { + validFoldTester(first, second, equal, foldASCII, t) + } + tester("shivaram", "SHIVARAM", true) + tester("X|Y", "x|y", true) + tester("a != b", "A != B", true) +} + +func TestFoldASCIIInvalid(t *testing.T) { + _, err := foldASCII("\x01") + if err == nil { + t.Errorf("control characters should be invalid in identifiers") + } + _, err = foldASCII("\x7F") + if err == nil { + t.Errorf("control characters should be invalid in identifiers") + } +} diff --git a/oragono.yaml b/oragono.yaml index cb038310..bdee854f 100644 --- a/oragono.yaml +++ b/oragono.yaml @@ -85,6 +85,18 @@ server: # should clients include this STS policy when they ship their inbuilt preload lists? preload: false + # casemapping controls what kinds of strings are permitted as identifiers (nicknames, + # channel names, account names, etc.), and how they are normalized for case. + # with the recommended default of 'precis', utf-8 identifiers that are "sane" + # (according to RFC 8265) are allowed, and the server additionally tries to protect + # against confusable characters ("homoglyph attacks"). + # the other options are 'ascii' (traditional ASCII-only identifiers), and 'permissive', + # which allows identifiers to contain unusual characters like emoji, but makes users + # vulnerable to homoglyph attacks. unless you're really confident in your decision, + # we recommend leaving this value at its default (changing it once the network is + # already up and running is problematic). + casemapping: "precis" + # whether to look up user hostnames with reverse DNS # (to suppress this for privacy purposes, use the ip-cloaking options below) lookup-hostnames: true