Merge pull request #564 from slingamn/confusables_again.1

more systematic fix for #562
This commit is contained in:
Daniel Oaks 2019-06-24 14:33:17 +10:00 committed by GitHub
commit c65c7de736
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 39 deletions

View File

@ -108,26 +108,6 @@ func CasefoldName(name string) (string, error) {
return lowered, err return lowered, err
} }
// "boring" names are exempt from skeletonization.
// this is because confusables.txt considers various pure ASCII alphanumeric
// strings confusable: 0 and O, 1 and l, m and rn. IMO this causes more problems
// than it solves.
func isBoring(name string) bool {
for i := 0; i < len(name); i += 1 {
chr := name[i]
if (chr >= 'a' && chr <= 'z') || (chr >= 'A' && chr <= 'Z') || (chr >= '0' && chr <= '9') {
continue // alphanumerics
}
switch chr {
case '$', '%', '^', '&', '(', ')', '{', '}', '[', ']', '<', '>', '=':
continue // benign printable ascii characters
default:
return false // potentially confusable ascii like | ' `, non-ascii
}
}
return true
}
// returns true if the given name is a valid ident, using a mix of Insp and // returns true if the given name is a valid ident, using a mix of Insp and
// Chary's ident restrictions. // Chary's ident restrictions.
func isIdent(name string) bool { func isIdent(name string) bool {
@ -168,9 +148,7 @@ func Skeleton(name string) (string, error) {
// same as PRECIS: // same as PRECIS:
name = width.Fold.String(name) name = width.Fold.String(name)
if !isBoring(name) {
name = confusables.Skeleton(name) name = confusables.Skeleton(name)
}
// internationalized lowercasing for skeletons; this is much more lenient than // internationalized lowercasing for skeletons; this is much more lenient than
// Casefold. In particular, skeletons are expected to mix scripts (which may // Casefold. In particular, skeletons are expected to mix scripts (which may

View File

@ -128,18 +128,6 @@ func TestCasefoldName(t *testing.T) {
} }
} }
func TestIsBoring(t *testing.T) {
assertBoring := func(str string, expected bool) {
if isBoring(str) != expected {
t.Errorf("expected [%s] to have boringness [%t], but got [%t]", str, expected, !expected)
}
}
assertBoring("warning", true)
assertBoring("phi|ip", false)
assertBoring("Νικηφόρος", false)
}
func TestIsIdent(t *testing.T) { func TestIsIdent(t *testing.T) {
assertIdent := func(str string, expected bool) { assertIdent := func(str string, expected bool) {
if isIdent(str) != expected { if isIdent(str) != expected {
@ -165,15 +153,15 @@ func TestSkeleton(t *testing.T) {
return skel return skel
} }
if skeleton("warning") == skeleton("waming") { if skeleton("warning") != skeleton("waming") {
t.Errorf("Oragono shouldn't consider rn confusable with m") t.Errorf("i give up, Oragono should consider rn confusable with m")
} }
if skeleton("Phi|ip") != "philip" { if skeleton("Phi|ip") != "philip" {
t.Errorf("but we still consider pipe confusable with l") t.Errorf("but we still consider pipe confusable with l")
} }
if skeleton("") != "smt" { if skeleton("") != skeleton("smt") {
t.Errorf("fullwidth characters should skeletonize to plain old ascii characters") t.Errorf("fullwidth characters should skeletonize to plain old ascii characters")
} }
@ -181,7 +169,7 @@ func TestSkeleton(t *testing.T) {
t.Errorf("after skeletonizing, we should casefold") t.Errorf("after skeletonizing, we should casefold")
} }
if skeleton("sm") != "smt" { if skeleton("sm") != skeleton("smt") {
t.Errorf("our friend lover successfully tricked the skeleton algorithm!") t.Errorf("our friend lover successfully tricked the skeleton algorithm!")
} }
@ -189,6 +177,10 @@ func TestSkeleton(t *testing.T) {
t.Errorf("we must protect against cyrillic homoglyph attacks") t.Errorf("we must protect against cyrillic homoglyph attacks")
} }
if skeleton("еmily") != skeleton("emily") {
t.Errorf("we must protect against cyrillic homoglyph attacks")
}
if skeleton("РОТАТО") != "potato" { if skeleton("РОТАТО") != "potato" {
t.Errorf("we must protect against cyrillic homoglyph attacks") t.Errorf("we must protect against cyrillic homoglyph attacks")
} }