3
0
mirror of https://github.com/ergochat/ergo.git synced 2025-01-22 02:04:10 +01:00

Merge pull request #695 from slingamn/issue693_casemappings.4

#693: configurable casemappings
This commit is contained in:
Shivaram Lingamneni 2019-12-29 09:20:06 -05:00 committed by GitHub
commit 9de9fcf069
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 188 additions and 27 deletions

View File

@ -984,12 +984,7 @@ func (client *Client) updateNickMaskNoMutex() {
return // pre-registration, don't bother generating the hostname
}
cfhostname, err := Casefold(client.hostname)
if err != nil {
client.server.logger.Error("internal", "hostname couldn't be casefolded", client.hostname, err.Error())
cfhostname = client.hostname // YOLO
}
cfhostname := strings.ToLower(client.hostname)
client.nickMaskString = fmt.Sprintf("%s!%s@%s", client.nick, client.username, client.hostname)
client.nickMaskCasefolded = fmt.Sprintf("%s!%s@%s", client.nickCasefolded, strings.ToLower(client.username), cfhostname)
}
@ -1006,18 +1001,14 @@ func (client *Client) AllNickmasks() (masks []string) {
username = strings.ToLower(username)
if len(vhost) > 0 {
cfvhost, err := Casefold(vhost)
if err == nil {
masks = append(masks, fmt.Sprintf("%s!%s@%s", nick, username, cfvhost))
}
cfvhost := strings.ToLower(vhost)
masks = append(masks, fmt.Sprintf("%s!%s@%s", nick, username, cfvhost))
}
var rawhostmask string
cfrawhost, err := Casefold(rawHostname)
if err == nil {
rawhostmask = fmt.Sprintf("%s!%s@%s", nick, username, cfrawhost)
masks = append(masks, rawhostmask)
}
cfrawhost := strings.ToLower(rawHostname)
rawhostmask = fmt.Sprintf("%s!%s@%s", nick, username, cfrawhost)
masks = append(masks, rawhostmask)
if cloakedHostname != "" {
masks = append(masks, fmt.Sprintf("%s!%s@%s", nick, username, cloakedHostname))

View File

@ -182,6 +182,27 @@ func (nr *NickEnforcementMethod) UnmarshalYAML(unmarshal func(interface{}) error
return err
}
func (cm *Casemapping) UnmarshalYAML(unmarshal func(interface{}) error) (err error) {
var orig string
if err = unmarshal(&orig); err != nil {
return err
}
var result Casemapping
switch strings.ToLower(orig) {
case "ascii":
result = CasemappingASCII
case "precis", "rfc7613", "rfc8265":
result = CasemappingPRECIS
case "permissive", "fun":
result = CasemappingPermissive
default:
return fmt.Errorf("invalid casemapping value: %s", orig)
}
*cm = result
return nil
}
type NickReservationConfig struct {
Enabled bool
AdditionalNickLimit int `yaml:"additional-nick-limit"`
@ -324,6 +345,7 @@ type Config struct {
Cloaks cloaks.CloakConfig `yaml:"ip-cloaking"`
supportedCaps *caps.Set
capValues caps.Values
Casemapping Casemapping
}
Languages struct {
@ -612,6 +634,7 @@ func LoadConfig(filename string) (config *Config, err error) {
if !utils.IsServerName(config.Server.Name) {
return nil, ErrServerNameNotHostname
}
config.Server.nameCasefolded = strings.ToLower(config.Server.Name)
if config.Datastore.Path == "" {
return nil, ErrDatastorePathMissing
}
@ -810,12 +833,6 @@ func LoadConfig(filename string) (config *Config, err error) {
config.Debug.recoverFromErrors = true
}
// casefold/validate server name
config.Server.nameCasefolded, err = Casefold(config.Server.Name)
if err != nil {
return nil, fmt.Errorf("Server name isn't valid [%s]: %s", config.Server.Name, err.Error())
}
// process operator definitions, store them to config.operators
operclasses, err := config.OperatorClasses()
if err != nil {

View File

@ -2806,7 +2806,7 @@ func whoHandler(server *Server, client *Client, msg ircmsg.IrcMessage, rb *Respo
} else if mask[0] == '#' {
mask, err = CasefoldChannel(msg.Params[0])
} else {
mask, err = Casefold(mask)
mask, err = CanonicalizeMaskWildcard(mask)
}
if err != nil {

View File

@ -165,7 +165,9 @@ func (config *Config) generateISupport() (err error) {
isupport.Add("STATUSMSG", "~&@%+")
isupport.Add("TARGMAX", fmt.Sprintf("NAMES:1,LIST:1,KICK:1,WHOIS:1,USERHOST:10,PRIVMSG:%s,TAGMSG:%s,NOTICE:%s,MONITOR:", maxTargetsString, maxTargetsString, maxTargetsString))
isupport.Add("TOPICLEN", strconv.Itoa(config.Limits.TopicLen))
isupport.Add("UTF8MAPPING", casemappingName)
if globalCasemappingSetting == CasemappingPRECIS {
isupport.Add("UTF8MAPPING", precisUTF8MappingToken)
}
err = isupport.RegenerateCachedReply()
return
@ -599,6 +601,7 @@ func (server *Server) applyConfig(config *Config, initial bool) (err error) {
server.configFilename = config.Filename
server.name = config.Server.Name
server.nameCasefolded = config.Server.nameCasefolded
globalCasemappingSetting = config.Server.Casemapping
} else {
// enforce configs that can't be changed after launch:
currentLimits := server.Config().Limits
@ -608,6 +611,8 @@ func (server *Server) applyConfig(config *Config, initial bool) (err error) {
return fmt.Errorf("Server name cannot be changed after launching the server, rehash aborted")
} else if server.Config().Datastore.Path != config.Datastore.Path {
return fmt.Errorf("Datastore path cannot be changed after launching the server, rehash aborted")
} else if globalCasemappingSetting != config.Server.Casemapping {
return fmt.Errorf("Casemapping cannot be changed after launching the server, rehash aborted")
}
}

View File

@ -7,19 +7,49 @@ package irc
import (
"fmt"
"regexp"
"strings"
"github.com/oragono/confusables"
"golang.org/x/text/cases"
"golang.org/x/text/language"
"golang.org/x/text/secure/precis"
"golang.org/x/text/unicode/norm"
"golang.org/x/text/width"
)
const (
casemappingName = "rfc8265"
precisUTF8MappingToken = "rfc8265"
)
var (
// reviving the old ergonomadic nickname regex:
// in permissive mode, allow arbitrary letters, numbers, punctuation, and symbols
permissiveCharsRegex = regexp.MustCompile(`^[\pL\pN\pP\pS]*$`)
)
type Casemapping uint
const (
// "precis" is the default / zero value:
// casefolding/validation: PRECIS + ircd restrictions (like no *)
// confusables detection: standard skeleton algorithm
CasemappingPRECIS Casemapping = iota
// "ascii" is the traditional ircd behavior:
// casefolding/validation: must be pure ASCII and follow ircd restrictions, ASCII lowercasing
// confusables detection: none
CasemappingASCII
// "permissive" is an insecure mode:
// casefolding/validation: arbitrary unicodes that follow ircd restrictions, unicode casefolding
// confusables detection: standard skeleton algorithm (which may be ineffective
// over the larger set of permitted identifiers)
CasemappingPermissive
)
// XXX this is a global variable without explicit synchronization.
// it gets set during the initial Server.applyConfig and cannot be changed by rehash:
// this happens-before all IRC connections and all casefolding operations.
var globalCasemappingSetting Casemapping = CasemappingPRECIS
// Each pass of PRECIS casefolding is a composition of idempotent operations,
// but not idempotent itself. Therefore, the spec says "do it four times and hope
// it converges" (lolwtf). Golang's PRECIS implementation has a "repeat" option,
@ -46,7 +76,14 @@ func iterateFolding(profile *precis.Profile, oldStr string) (str string, err err
// Casefold returns a casefolded string, without doing any name or channel character checks.
func Casefold(str string) (string, error) {
return iterateFolding(precis.UsernameCaseMapped, str)
switch globalCasemappingSetting {
default:
return iterateFolding(precis.UsernameCaseMapped, str)
case CasemappingASCII:
return foldASCII(str)
case CasemappingPermissive:
return foldPermissive(str)
}
}
// CasefoldChannel returns a casefolded version of a channel name.
@ -144,6 +181,16 @@ func isIdent(name string) bool {
// from the original (unfolded) identifier and stored/tracked separately from the
// casefolded identifier.
func Skeleton(name string) (string, error) {
switch globalCasemappingSetting {
default:
return realSkeleton(name)
case CasemappingASCII:
// identity function is fine because we independently case-normalize in Casefold
return name, nil
}
}
func realSkeleton(name string) (string, error) {
// XXX the confusables table includes some, but not all, fullwidth->standard
// mappings for latin characters. do a pass of explicit width folding,
// same as PRECIS:
@ -156,7 +203,7 @@ func Skeleton(name string) (string, error) {
// violate the bidi rule). We also don't care if they contain runes
// that are disallowed by PRECIS, because every identifier must independently
// pass PRECIS --- we are just further canonicalizing the skeleton.
return cases.Lower(language.Und).String(name), nil
return cases.Fold().String(name), nil
}
// maps a nickmask fragment to an expanded, casefolded wildcard:
@ -212,3 +259,33 @@ func CanonicalizeMaskWildcard(userhost string) (expanded string, err error) {
}
return fmt.Sprintf("%s!%s@%s", nick, user, host), nil
}
func foldASCII(str string) (result string, err error) {
if !IsPrintableASCII(str) {
return "", errInvalidCharacter
}
return strings.ToLower(str), nil
}
func IsPrintableASCII(str string) bool {
for i := 0; i < len(str); i++ {
// allow space here because it's technically printable;
// it will be disallowed later by CasefoldName/CasefoldChannel
chr := str[i]
if chr < ' ' || chr > '~' {
return false
}
}
return true
}
func foldPermissive(str string) (result string, err error) {
if !permissiveCharsRegex.MatchString(str) {
return "", errInvalidCharacter
}
// YOLO
str = norm.NFD.String(str)
str = cases.Fold().String(str)
str = norm.NFD.String(str)
return str, nil
}

View File

@ -63,6 +63,7 @@ func TestCasefoldChannel(t *testing.T) {
"", "#*starpower", "# NASA", "#interro?", "OOF#", "foo",
// bidi violation mixing latin and hebrew characters:
"#shalomעליכם",
"#tab\tcharacter", "#\t", "#carriage\rreturn",
} {
testCases = append(testCases, channelTest{channel: errCase, err: true})
}
@ -215,3 +216,61 @@ func TestCanonicalizeMaskWildcard(t *testing.T) {
tester("Shivaram*", "shivaram*!*@*", nil)
tester("*SHIVARAM*", "*shivaram*!*@*", nil)
}
func validFoldTester(first, second string, equal bool, folder func(string) (string, error), t *testing.T) {
firstFolded, err := folder(first)
if err != nil {
panic(err)
}
secondFolded, err := folder(second)
if err != nil {
panic(err)
}
foundEqual := firstFolded == secondFolded
if foundEqual != equal {
t.Errorf("%s and %s: expected equality %t, but got %t", first, second, equal, foundEqual)
}
}
func TestFoldPermissive(t *testing.T) {
tester := func(first, second string, equal bool) {
validFoldTester(first, second, equal, foldPermissive, t)
}
tester("SHIVARAM", "shivaram", true)
tester("shIvaram", "shivaraM", true)
tester("shivaram", "DAN-", false)
tester("dolph🐬n", "DOLPH🐬n", true)
tester("dolph🐬n", "dolph💻n", false)
tester("9FRONT", "9front", true)
}
func TestFoldPermissiveInvalid(t *testing.T) {
_, err := foldPermissive("a\tb")
if err == nil {
t.Errorf("whitespace should be invalid in identifiers")
}
_, err = foldPermissive("a\x00b")
if err == nil {
t.Errorf("the null byte should be invalid in identifiers")
}
}
func TestFoldASCII(t *testing.T) {
tester := func(first, second string, equal bool) {
validFoldTester(first, second, equal, foldASCII, t)
}
tester("shivaram", "SHIVARAM", true)
tester("X|Y", "x|y", true)
tester("a != b", "A != B", true)
}
func TestFoldASCIIInvalid(t *testing.T) {
_, err := foldASCII("\x01")
if err == nil {
t.Errorf("control characters should be invalid in identifiers")
}
_, err = foldASCII("\x7F")
if err == nil {
t.Errorf("control characters should be invalid in identifiers")
}
}

View File

@ -85,6 +85,18 @@ server:
# should clients include this STS policy when they ship their inbuilt preload lists?
preload: false
# casemapping controls what kinds of strings are permitted as identifiers (nicknames,
# channel names, account names, etc.), and how they are normalized for case.
# with the recommended default of 'precis', utf-8 identifiers that are "sane"
# (according to RFC 8265) are allowed, and the server additionally tries to protect
# against confusable characters ("homoglyph attacks").
# the other options are 'ascii' (traditional ASCII-only identifiers), and 'permissive',
# which allows identifiers to contain unusual characters like emoji, but makes users
# vulnerable to homoglyph attacks. unless you're really confident in your decision,
# we recommend leaving this value at its default (changing it once the network is
# already up and running is problematic).
casemapping: "precis"
# whether to look up user hostnames with reverse DNS
# (to suppress this for privacy purposes, use the ip-cloaking options below)
lookup-hostnames: true