mirror of
https://github.com/ergochat/ergo.git
synced 2024-11-10 22:19:31 +01:00
Merge pull request #564 from slingamn/confusables_again.1
more systematic fix for #562
This commit is contained in:
commit
c65c7de736
@ -108,26 +108,6 @@ func CasefoldName(name string) (string, error) {
|
|||||||
return lowered, err
|
return lowered, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// "boring" names are exempt from skeletonization.
|
|
||||||
// this is because confusables.txt considers various pure ASCII alphanumeric
|
|
||||||
// strings confusable: 0 and O, 1 and l, m and rn. IMO this causes more problems
|
|
||||||
// than it solves.
|
|
||||||
func isBoring(name string) bool {
|
|
||||||
for i := 0; i < len(name); i += 1 {
|
|
||||||
chr := name[i]
|
|
||||||
if (chr >= 'a' && chr <= 'z') || (chr >= 'A' && chr <= 'Z') || (chr >= '0' && chr <= '9') {
|
|
||||||
continue // alphanumerics
|
|
||||||
}
|
|
||||||
switch chr {
|
|
||||||
case '$', '%', '^', '&', '(', ')', '{', '}', '[', ']', '<', '>', '=':
|
|
||||||
continue // benign printable ascii characters
|
|
||||||
default:
|
|
||||||
return false // potentially confusable ascii like | ' `, non-ascii
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// returns true if the given name is a valid ident, using a mix of Insp and
|
// returns true if the given name is a valid ident, using a mix of Insp and
|
||||||
// Chary's ident restrictions.
|
// Chary's ident restrictions.
|
||||||
func isIdent(name string) bool {
|
func isIdent(name string) bool {
|
||||||
@ -168,9 +148,7 @@ func Skeleton(name string) (string, error) {
|
|||||||
// same as PRECIS:
|
// same as PRECIS:
|
||||||
name = width.Fold.String(name)
|
name = width.Fold.String(name)
|
||||||
|
|
||||||
if !isBoring(name) {
|
name = confusables.Skeleton(name)
|
||||||
name = confusables.Skeleton(name)
|
|
||||||
}
|
|
||||||
|
|
||||||
// internationalized lowercasing for skeletons; this is much more lenient than
|
// internationalized lowercasing for skeletons; this is much more lenient than
|
||||||
// Casefold. In particular, skeletons are expected to mix scripts (which may
|
// Casefold. In particular, skeletons are expected to mix scripts (which may
|
||||||
|
@ -128,18 +128,6 @@ func TestCasefoldName(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestIsBoring(t *testing.T) {
|
|
||||||
assertBoring := func(str string, expected bool) {
|
|
||||||
if isBoring(str) != expected {
|
|
||||||
t.Errorf("expected [%s] to have boringness [%t], but got [%t]", str, expected, !expected)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
assertBoring("warning", true)
|
|
||||||
assertBoring("phi|ip", false)
|
|
||||||
assertBoring("Νικηφόρος", false)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestIsIdent(t *testing.T) {
|
func TestIsIdent(t *testing.T) {
|
||||||
assertIdent := func(str string, expected bool) {
|
assertIdent := func(str string, expected bool) {
|
||||||
if isIdent(str) != expected {
|
if isIdent(str) != expected {
|
||||||
@ -165,15 +153,15 @@ func TestSkeleton(t *testing.T) {
|
|||||||
return skel
|
return skel
|
||||||
}
|
}
|
||||||
|
|
||||||
if skeleton("warning") == skeleton("waming") {
|
if skeleton("warning") != skeleton("waming") {
|
||||||
t.Errorf("Oragono shouldn't consider rn confusable with m")
|
t.Errorf("i give up, Oragono should consider rn confusable with m")
|
||||||
}
|
}
|
||||||
|
|
||||||
if skeleton("Phi|ip") != "philip" {
|
if skeleton("Phi|ip") != "philip" {
|
||||||
t.Errorf("but we still consider pipe confusable with l")
|
t.Errorf("but we still consider pipe confusable with l")
|
||||||
}
|
}
|
||||||
|
|
||||||
if skeleton("smt") != "smt" {
|
if skeleton("smt") != skeleton("smt") {
|
||||||
t.Errorf("fullwidth characters should skeletonize to plain old ascii characters")
|
t.Errorf("fullwidth characters should skeletonize to plain old ascii characters")
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -181,7 +169,7 @@ func TestSkeleton(t *testing.T) {
|
|||||||
t.Errorf("after skeletonizing, we should casefold")
|
t.Errorf("after skeletonizing, we should casefold")
|
||||||
}
|
}
|
||||||
|
|
||||||
if skeleton("smt") != "smt" {
|
if skeleton("smt") != skeleton("smt") {
|
||||||
t.Errorf("our friend lover successfully tricked the skeleton algorithm!")
|
t.Errorf("our friend lover successfully tricked the skeleton algorithm!")
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -189,6 +177,10 @@ func TestSkeleton(t *testing.T) {
|
|||||||
t.Errorf("we must protect against cyrillic homoglyph attacks")
|
t.Errorf("we must protect against cyrillic homoglyph attacks")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if skeleton("еmily") != skeleton("emily") {
|
||||||
|
t.Errorf("we must protect against cyrillic homoglyph attacks")
|
||||||
|
}
|
||||||
|
|
||||||
if skeleton("РОТАТО") != "potato" {
|
if skeleton("РОТАТО") != "potato" {
|
||||||
t.Errorf("we must protect against cyrillic homoglyph attacks")
|
t.Errorf("we must protect against cyrillic homoglyph attacks")
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user