mirror of
https://github.com/ergochat/ergo.git
synced 2024-12-31 15:12:34 +01:00
fix an edge case in skeletonization
'm' skeletonizes to 'rn' (but is exempted by the isBoring check), but the fullwidth 'm' does not skeletonize to anything. The root cause of this is the (still unexplained) patchiness of the skeleton mapping for fullwidth -> standard-width Latin characters; the fix is to perform width mapping first, before either skeletonization or isBoring.
This commit is contained in:
parent
8991846fcf
commit
be4d098945
@ -163,15 +163,15 @@ func isIdent(name string) bool {
|
||||
// from the original (unfolded) identifier and stored/tracked separately from the
|
||||
// casefolded identifier.
|
||||
func Skeleton(name string) (string, error) {
|
||||
if !isBoring(name) {
|
||||
name = confusables.Skeleton(name)
|
||||
}
|
||||
|
||||
// XXX the confusables table includes some, but not all, fullwidth->standard
|
||||
// mappings for latin characters. do a pass of explicit width folding,
|
||||
// same as PRECIS:
|
||||
name = width.Fold.String(name)
|
||||
|
||||
if !isBoring(name) {
|
||||
name = confusables.Skeleton(name)
|
||||
}
|
||||
|
||||
// internationalized lowercasing for skeletons; this is much more lenient than
|
||||
// Casefold. In particular, skeletons are expected to mix scripts (which may
|
||||
// violate the bidi rule). We also don't care if they contain runes
|
||||
|
@ -181,6 +181,10 @@ func TestSkeleton(t *testing.T) {
|
||||
t.Errorf("after skeletonizing, we should casefold")
|
||||
}
|
||||
|
||||
if skeleton("smt") != "smt" {
|
||||
t.Errorf("our friend lover successfully tricked the skeleton algorithm!")
|
||||
}
|
||||
|
||||
if skeleton("еvan") != "evan" {
|
||||
t.Errorf("we must protect against cyrillic homoglyph attacks")
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user