diff --git a/irc/config.go b/irc/config.go index 533c4582..1a187604 100644 --- a/irc/config.go +++ b/irc/config.go @@ -182,6 +182,27 @@ func (nr *NickEnforcementMethod) UnmarshalYAML(unmarshal func(interface{}) error return err } +func (cm *Casemapping) UnmarshalYAML(unmarshal func(interface{}) error) (err error) { + var orig string + if err = unmarshal(&orig); err != nil { + return err + } + + var result Casemapping + switch strings.ToLower(orig) { + case "ascii": + result = CasemappingASCII + case "precis", "rfc7613", "rfc8265": + result = CasemappingPRECIS + case "permissive", "fun": + result = CasemappingPermissive + default: + return fmt.Errorf("invalid casemapping value: %s", orig) + } + *cm = result + return nil +} + type NickReservationConfig struct { Enabled bool AdditionalNickLimit int `yaml:"additional-nick-limit"` @@ -318,6 +339,7 @@ type Config struct { Cloaks cloaks.CloakConfig `yaml:"ip-cloaking"` supportedCaps *caps.Set capValues caps.Values + Casemapping Casemapping } Languages struct { diff --git a/irc/server.go b/irc/server.go index 5c56ac4d..c1b9aece 100644 --- a/irc/server.go +++ b/irc/server.go @@ -165,7 +165,9 @@ func (config *Config) generateISupport() (err error) { isupport.Add("STATUSMSG", "~&@%+") isupport.Add("TARGMAX", fmt.Sprintf("NAMES:1,LIST:1,KICK:1,WHOIS:1,USERHOST:10,PRIVMSG:%s,TAGMSG:%s,NOTICE:%s,MONITOR:", maxTargetsString, maxTargetsString, maxTargetsString)) isupport.Add("TOPICLEN", strconv.Itoa(config.Limits.TopicLen)) - isupport.Add("UTF8MAPPING", casemappingName) + if globalCasemappingSetting == CasemappingPRECIS { + isupport.Add("UTF8MAPPING", precisUTF8MappingToken) + } err = isupport.RegenerateCachedReply() return @@ -596,6 +598,7 @@ func (server *Server) applyConfig(config *Config, initial bool) (err error) { server.configFilename = config.Filename server.name = config.Server.Name server.nameCasefolded = config.Server.nameCasefolded + globalCasemappingSetting = config.Server.Casemapping } else { // enforce configs that can't be changed after launch: currentLimits := server.Config().Limits @@ -605,6 +608,8 @@ func (server *Server) applyConfig(config *Config, initial bool) (err error) { return fmt.Errorf("Server name cannot be changed after launching the server, rehash aborted") } else if server.Config().Datastore.Path != config.Datastore.Path { return fmt.Errorf("Datastore path cannot be changed after launching the server, rehash aborted") + } else if globalCasemappingSetting != config.Server.Casemapping { + return fmt.Errorf("Casemapping cannot be changed after launching the server, rehash aborted") } } diff --git a/irc/strings.go b/irc/strings.go index e7ec40ea..e03f15d2 100644 --- a/irc/strings.go +++ b/irc/strings.go @@ -8,18 +8,43 @@ package irc import ( "fmt" "strings" + "unicode" "github.com/oragono/confusables" "golang.org/x/text/cases" "golang.org/x/text/language" "golang.org/x/text/secure/precis" + "golang.org/x/text/unicode/norm" "golang.org/x/text/width" ) const ( - casemappingName = "rfc8265" + precisUTF8MappingToken = "rfc8265" ) +type Casemapping uint + +const ( + // "precis" is the default / zero value: + // casefolding/validation: PRECIS + ircd restrictions (like no *) + // confusables detection: standard skeleton algorithm + CasemappingPRECIS Casemapping = iota + // "ascii" is the traditional ircd behavior: + // casefolding/validation: must be pure ASCII and follow ircd restrictions, ASCII lowercasing + // confusables detection: none + CasemappingASCII + // "permissive" is an insecure mode: + // casefolding/validation: arbitrary unicodes that follow ircd restrictions, unicode casefolding + // confusables detection: standard skeleton algorithm (which may be ineffective + // over the larger set of permitted identifiers) + CasemappingPermissive +) + +// XXX this is a global variable without explicit synchronization. +// it gets set during the initial Server.applyConfig and cannot be changed by rehash: +// this happens-before all IRC connections and all casefolding operations. +var globalCasemappingSetting Casemapping = CasemappingPRECIS + // Each pass of PRECIS casefolding is a composition of idempotent operations, // but not idempotent itself. Therefore, the spec says "do it four times and hope // it converges" (lolwtf). Golang's PRECIS implementation has a "repeat" option, @@ -46,7 +71,14 @@ func iterateFolding(profile *precis.Profile, oldStr string) (str string, err err // Casefold returns a casefolded string, without doing any name or channel character checks. func Casefold(str string) (string, error) { - return iterateFolding(precis.UsernameCaseMapped, str) + switch globalCasemappingSetting { + default: + return iterateFolding(precis.UsernameCaseMapped, str) + case CasemappingASCII: + return foldASCII(str) + case CasemappingPermissive: + return foldPermissive(str) + } } // CasefoldChannel returns a casefolded version of a channel name. @@ -144,6 +176,16 @@ func isIdent(name string) bool { // from the original (unfolded) identifier and stored/tracked separately from the // casefolded identifier. func Skeleton(name string) (string, error) { + switch globalCasemappingSetting { + default: + return realSkeleton(name) + case CasemappingASCII: + // identity function is fine because we independently case-normalize in Casefold + return name, nil + } +} + +func realSkeleton(name string) (string, error) { // XXX the confusables table includes some, but not all, fullwidth->standard // mappings for latin characters. do a pass of explicit width folding, // same as PRECIS: @@ -212,3 +254,27 @@ func CanonicalizeMaskWildcard(userhost string) (expanded string, err error) { } return fmt.Sprintf("%s!%s@%s", nick, user, host), nil } + +func foldASCII(str string) (result string, err error) { + if !IsPureASCII(str) { + return "", errInvalidCharacter + } + return strings.ToLower(str), nil +} + +func IsPureASCII(str string) bool { + for i := 0; i < len(str); i++ { + if unicode.MaxASCII < str[i] { + return false + } + } + return true +} + +func foldPermissive(str string) (result string, err error) { + // YOLO + str = norm.NFD.String(str) + str = cases.Fold().String(str) + str = norm.NFD.String(str) + return str, nil +} diff --git a/irc/strings_test.go b/irc/strings_test.go index fdf7ddde..c2515cb5 100644 --- a/irc/strings_test.go +++ b/irc/strings_test.go @@ -215,3 +215,25 @@ func TestCanonicalizeMaskWildcard(t *testing.T) { tester("Shivaram*", "shivaram*!*@*", nil) tester("*SHIVARAM*", "*shivaram*!*@*", nil) } + +func TestFoldPermissive(t *testing.T) { + tester := func(first, second string, equal bool) { + firstFolded, err := foldPermissive(first) + if err != nil { + panic(err) + } + secondFolded, err := foldPermissive(second) + if err != nil { + panic(err) + } + foundEqual := firstFolded == secondFolded + if foundEqual != equal { + t.Errorf("%s and %s: expected equality %t, but got %t", first, second, equal, foundEqual) + } + } + tester("SHIVARAM", "shivaram", true) + tester("shIvaram", "shivaraM", true) + tester("shivaram", "DAN-", false) + tester("dolph🐬n", "DOLPH🐬n", true) + tester("dolph🐬n", "dolph💻n", false) +} diff --git a/oragono.yaml b/oragono.yaml index 1f2d9ef4..511c14e6 100644 --- a/oragono.yaml +++ b/oragono.yaml @@ -85,6 +85,18 @@ server: # should clients include this STS policy when they ship their inbuilt preload lists? preload: false + # casemapping controls what kinds of strings are permitted as identifiers (nicknames, + # channel names, account names, etc.), and how they are normalized for case. + # with the recommended default of 'precis', utf-8 identifiers that are "sane" + # (according to RFC 8265) are allowed, and the server additionally tries to protect + # against confusable characters ("homoglyph attacks"). + # the other options are 'ascii' (traditional ASCII-only identifiers), and 'permissive', + # which allows identifiers to contain unusual characters like emoji, but makes users + # vulnerable to homoglyph attacks. unless you're really confident in your decision, + # we recommend leaving this value at its default (changing it once the network is + # already up and running is problematic). + casemapping: precis + # whether to look up user hostnames with reverse DNS # (to suppress this for privacy purposes, use the ip-cloaking options below) lookup-hostnames: true