ergo/vendor/github.com/oragono/confusables/confusables.go

83 lines
2.0 KiB
Go
Raw Normal View History

//go:generate go run maketables.go > tables.go
package confusables
import (
"bytes"
"golang.org/x/text/unicode/norm"
)
// TODO: document casefolding approaches
// (suggest to force casefold strings; explain how to catch paypal - pAypal)
// TODO: DOC you might want to store the Skeleton and check against it later
// TODO: implement xidmodifications.txt restricted characters
type lookupFunc func(rune) (string)
func lookupReplacement(r rune) string {
return confusablesMap[r]
}
func lookupReplacementTweaked(r rune) string {
if replacement, ok := tweaksMap[r]; ok {
return replacement
}
return confusablesMap[r]
}
func skeletonBase(s string, lookup lookupFunc) string {
// 1. Converting X to NFD format
s = norm.NFD.String(s)
// 2. Successively mapping each source character in X to the target string
// according to the specified data table
var buf bytes.Buffer
changed := false // fast path: if this remains false, keep s intact
prevPos := 0
var replacement string
for i, r := range s {
if changed && replacement == "" {
buf.WriteString(s[prevPos:i])
}
prevPos = i
replacement = lookup(r)
if replacement != "" {
if !changed {
changed = true
// first replacement: copy over the previously unmodified text
buf.WriteString(s[:i])
}
buf.WriteString(replacement)
}
}
if changed && replacement == "" {
buf.WriteString(s[prevPos:]) // loop-and-a-half
}
if changed {
s = buf.String()
}
// 3. Reapplying NFD
s = norm.NFD.String(s)
return s
}
// Skeleton converts a string to its "skeleton" form
// as described in http://www.unicode.org/reports/tr39/#Confusable_Detection
func Skeleton(s string) string {
return skeletonBase(s, lookupReplacement)
}
// SkeletonTweaked is like Skeleton, but it implements some custom overrides
// to the confusables table (currently it removes the m -> rn mapping):
func SkeletonTweaked(s string) string {
return skeletonBase(s, lookupReplacementTweaked)
}
func Confusable(x, y string) bool {
return Skeleton(x) == Skeleton(y)
}