mirror of
https://github.com/ergochat/ergo.git
synced 2024-11-22 11:59:40 +01:00
more lenient casefolding for skeletons
This commit is contained in:
parent
e7399ba2b5
commit
c34d9e0b72
@ -9,8 +9,10 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/oragono/confusables"
|
"github.com/oragono/confusables"
|
||||||
|
"golang.org/x/text/cases"
|
||||||
|
"golang.org/x/text/language"
|
||||||
"golang.org/x/text/secure/precis"
|
"golang.org/x/text/secure/precis"
|
||||||
"golang.org/x/text/unicode/norm"
|
"golang.org/x/text/width"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -126,14 +128,6 @@ func isBoring(name string) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
var skeletonCasefolder = precis.NewIdentifier(precis.FoldWidth, precis.LowerCase(), precis.Norm(norm.NFC))
|
|
||||||
|
|
||||||
// similar to Casefold, but exempts the bidi rule, because skeletons may
|
|
||||||
// mix scripts strangely
|
|
||||||
func casefoldSkeleton(str string) (string, error) {
|
|
||||||
return iterateFolding(skeletonCasefolder, str)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skeleton produces a canonicalized identifier that tries to catch
|
// Skeleton produces a canonicalized identifier that tries to catch
|
||||||
// homoglyphic / confusable identifiers. It's a tweaked version of the TR39
|
// homoglyphic / confusable identifiers. It's a tweaked version of the TR39
|
||||||
// skeleton algorithm. We apply the skeleton algorithm first and only then casefold,
|
// skeleton algorithm. We apply the skeleton algorithm first and only then casefold,
|
||||||
@ -146,5 +140,16 @@ func Skeleton(name string) (string, error) {
|
|||||||
if !isBoring(name) {
|
if !isBoring(name) {
|
||||||
name = confusables.Skeleton(name)
|
name = confusables.Skeleton(name)
|
||||||
}
|
}
|
||||||
return casefoldSkeleton(name)
|
|
||||||
|
// XXX the confusables table includes some, but not all, fullwidth->standard
|
||||||
|
// mappings for latin characters. do a pass of explicit width folding,
|
||||||
|
// same as PRECIS:
|
||||||
|
name = width.Fold.String(name)
|
||||||
|
|
||||||
|
// internationalized lowercasing for skeletons; this is much more lenient than
|
||||||
|
// Casefold. In particular, skeletons are expected to mix scripts (which may
|
||||||
|
// violate the bidi rule). We also don't care if they contain runes
|
||||||
|
// that are disallowed by PRECIS, because every identifier must independently
|
||||||
|
// pass PRECIS --- we are just further canonicalizing the skeleton.
|
||||||
|
return cases.Lower(language.Und).String(name), nil
|
||||||
}
|
}
|
||||||
|
@ -173,4 +173,6 @@ func TestSkeleton(t *testing.T) {
|
|||||||
t.Errorf("we must protect against cyrillic homoglyph attacks")
|
t.Errorf("we must protect against cyrillic homoglyph attacks")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// should not raise an error:
|
||||||
|
skeleton("けらんぐ")
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user