From 9ccd6037a652056315f6a2f634d28e0a484fdea2 Mon Sep 17 00:00:00 2001 From: Daniel Oaks Date: Fri, 14 Oct 2016 00:25:37 +1000 Subject: [PATCH] strings: Make Nickname profile not actually case-sensitive --- irc/strings.go | 8 ++-- irc/strings_nickname.go | 89 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 5 deletions(-) create mode 100644 irc/strings_nickname.go diff --git a/irc/strings.go b/irc/strings.go index 94e24162..4e777a05 100644 --- a/irc/strings.go +++ b/irc/strings.go @@ -8,8 +8,6 @@ package irc import ( "errors" "strings" - - "golang.org/x/text/secure/precis" ) var ( @@ -18,12 +16,12 @@ var ( // Casefold returns a casefolded string, without doing any name or channel character checks. func Casefold(str string) (string, error) { - return precis.Nickname.String(str) + return NicknameProfile.String(str) } // CasefoldChannel returns a casefolded version of a channel name. func CasefoldChannel(name string) (string, error) { - lowered, err := precis.Nickname.String(name) + lowered, err := NicknameProfile.String(name) if err != nil { return "", err @@ -47,7 +45,7 @@ func CasefoldChannel(name string) (string, error) { // CasefoldName returns a casefolded version of a nick/user name. func CasefoldName(name string) (string, error) { - lowered, err := precis.Nickname.String(name) + lowered, err := NicknameProfile.String(name) if err != nil { return "", err diff --git a/irc/strings_nickname.go b/irc/strings_nickname.go new file mode 100644 index 00000000..a426fbe9 --- /dev/null +++ b/irc/strings_nickname.go @@ -0,0 +1,89 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//NOTE(dan): I need this because the default PRECIS API does not allow a way to retrieve the casefolded version of strings. +// See also: https://github.com/golang/go/issues/17386 + +// the content of this file is taken wholesale from the proper PRECIS API: +// https://github.com/golang/text/tree/master/secure/precis + +package irc + +import ( + "unicode" + "unicode/utf8" + + "golang.org/x/text/secure/precis" + "golang.org/x/text/transform" + "golang.org/x/text/unicode/norm" +) + +type nickAdditionalMapping struct { + // TODO: This transformer needs to be stateless somehow… + notStart bool + prevSpace bool +} + +func (t *nickAdditionalMapping) Reset() { + t.prevSpace = false + t.notStart = false +} + +func (t *nickAdditionalMapping) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + // RFC 7700 §2.1. Rules + // + // 2. Additional Mapping Rule: The additional mapping rule consists of + // the following sub-rules. + // + // 1. Any instances of non-ASCII space MUST be mapped to ASCII + // space (U+0020); a non-ASCII space is any Unicode code point + // having a general category of "Zs", naturally with the + // exception of U+0020. + // + // 2. Any instances of the ASCII space character at the beginning + // or end of a nickname MUST be removed (e.g., "stpeter " is + // mapped to "stpeter"). + // + // 3. Interior sequences of more than one ASCII space character + // MUST be mapped to a single ASCII space character (e.g., + // "St Peter" is mapped to "St Peter"). + + for nSrc < len(src) { + r, size := utf8.DecodeRune(src[nSrc:]) + if size == 0 { // Incomplete UTF-8 encoding + if !atEOF { + return nDst, nSrc, transform.ErrShortSrc + } + size = 1 + } + if unicode.Is(unicode.Zs, r) { + t.prevSpace = true + } else { + if t.prevSpace && t.notStart { + dst[nDst] = ' ' + nDst += 1 + } + if size != copy(dst[nDst:], src[nSrc:nSrc+size]) { + nDst += size + return nDst, nSrc, transform.ErrShortDst + } + nDst += size + t.prevSpace = false + t.notStart = true + } + nSrc += size + } + return nDst, nSrc, nil +} + +var ( + NicknameProfile = precis.NewFreeform( + precis.AdditionalMapping(func() transform.Transformer { + return &nickAdditionalMapping{} + }), + precis.LowerCase(), + precis.Norm(norm.NFKC), + precis.DisallowEmpty, + ) +)