mirror of
https://github.com/ergochat/ergo.git
synced 2025-01-03 16:42:38 +01:00
Merge pull request #564 from slingamn/confusables_again.1
more systematic fix for #562
This commit is contained in:
commit
c65c7de736
@ -108,26 +108,6 @@ func CasefoldName(name string) (string, error) {
|
||||
return lowered, err
|
||||
}
|
||||
|
||||
// "boring" names are exempt from skeletonization.
|
||||
// this is because confusables.txt considers various pure ASCII alphanumeric
|
||||
// strings confusable: 0 and O, 1 and l, m and rn. IMO this causes more problems
|
||||
// than it solves.
|
||||
func isBoring(name string) bool {
|
||||
for i := 0; i < len(name); i += 1 {
|
||||
chr := name[i]
|
||||
if (chr >= 'a' && chr <= 'z') || (chr >= 'A' && chr <= 'Z') || (chr >= '0' && chr <= '9') {
|
||||
continue // alphanumerics
|
||||
}
|
||||
switch chr {
|
||||
case '$', '%', '^', '&', '(', ')', '{', '}', '[', ']', '<', '>', '=':
|
||||
continue // benign printable ascii characters
|
||||
default:
|
||||
return false // potentially confusable ascii like | ' `, non-ascii
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// returns true if the given name is a valid ident, using a mix of Insp and
|
||||
// Chary's ident restrictions.
|
||||
func isIdent(name string) bool {
|
||||
@ -168,9 +148,7 @@ func Skeleton(name string) (string, error) {
|
||||
// same as PRECIS:
|
||||
name = width.Fold.String(name)
|
||||
|
||||
if !isBoring(name) {
|
||||
name = confusables.Skeleton(name)
|
||||
}
|
||||
|
||||
// internationalized lowercasing for skeletons; this is much more lenient than
|
||||
// Casefold. In particular, skeletons are expected to mix scripts (which may
|
||||
|
@ -128,18 +128,6 @@ func TestCasefoldName(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsBoring(t *testing.T) {
|
||||
assertBoring := func(str string, expected bool) {
|
||||
if isBoring(str) != expected {
|
||||
t.Errorf("expected [%s] to have boringness [%t], but got [%t]", str, expected, !expected)
|
||||
}
|
||||
}
|
||||
|
||||
assertBoring("warning", true)
|
||||
assertBoring("phi|ip", false)
|
||||
assertBoring("Νικηφόρος", false)
|
||||
}
|
||||
|
||||
func TestIsIdent(t *testing.T) {
|
||||
assertIdent := func(str string, expected bool) {
|
||||
if isIdent(str) != expected {
|
||||
@ -165,15 +153,15 @@ func TestSkeleton(t *testing.T) {
|
||||
return skel
|
||||
}
|
||||
|
||||
if skeleton("warning") == skeleton("waming") {
|
||||
t.Errorf("Oragono shouldn't consider rn confusable with m")
|
||||
if skeleton("warning") != skeleton("waming") {
|
||||
t.Errorf("i give up, Oragono should consider rn confusable with m")
|
||||
}
|
||||
|
||||
if skeleton("Phi|ip") != "philip" {
|
||||
t.Errorf("but we still consider pipe confusable with l")
|
||||
}
|
||||
|
||||
if skeleton("smt") != "smt" {
|
||||
if skeleton("smt") != skeleton("smt") {
|
||||
t.Errorf("fullwidth characters should skeletonize to plain old ascii characters")
|
||||
}
|
||||
|
||||
@ -181,7 +169,7 @@ func TestSkeleton(t *testing.T) {
|
||||
t.Errorf("after skeletonizing, we should casefold")
|
||||
}
|
||||
|
||||
if skeleton("smt") != "smt" {
|
||||
if skeleton("smt") != skeleton("smt") {
|
||||
t.Errorf("our friend lover successfully tricked the skeleton algorithm!")
|
||||
}
|
||||
|
||||
@ -189,6 +177,10 @@ func TestSkeleton(t *testing.T) {
|
||||
t.Errorf("we must protect against cyrillic homoglyph attacks")
|
||||
}
|
||||
|
||||
if skeleton("еmily") != skeleton("emily") {
|
||||
t.Errorf("we must protect against cyrillic homoglyph attacks")
|
||||
}
|
||||
|
||||
if skeleton("РОТАТО") != "potato" {
|
||||
t.Errorf("we must protect against cyrillic homoglyph attacks")
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user