fix an edge case in skeletonization

'm' skeletonizes to 'rn' (but is exempted by the isBoring check), but the fullwidth 'ｍ' does not skeletonize to anything. The root cause of this is the (still unexplained) patchiness of the skeleton mapping for fullwidth -> standard-width Latin characters; the fix is to perform width mapping first, before either skeletonization or isBoring.
2026-06-21 16:17:39 +02:00 · 2019-06-18 02:34:16 -04:00 · 2019-06-18 02:34:16 -04:00 · be4d098945
commit be4d098945
parent 8991846fcf
2 changed files with 8 additions and 4 deletions
--- a/irc/strings.go
+++ b/irc/strings.go
@ -163,15 +163,15 @@ func isIdent(name string) bool {
 // from the original (unfolded) identifier and stored/tracked separately from the
 // casefolded identifier.
 func Skeleton(name string) (string, error) {
-	if !isBoring(name) {
-		name = confusables.Skeleton(name)
-	}
-
 	// XXX the confusables table includes some, but not all, fullwidth->standard
 	// mappings for latin characters. do a pass of explicit width folding,
 	// same as PRECIS:
 	name = width.Fold.String(name)

+	if !isBoring(name) {
+		name = confusables.Skeleton(name)
+	}
+
 	// internationalized lowercasing for skeletons; this is much more lenient than
 	// Casefold. In particular, skeletons are expected to mix scripts (which may
 	// violate the bidi rule). We also don't care if they contain runes
--- a/irc/strings_test.go
+++ b/irc/strings_test.go
@ -181,6 +181,10 @@ func TestSkeleton(t *testing.T) {
 		t.Errorf("after skeletonizing, we should casefold")
 	}

+	if skeleton("smｔ") != "smt" {
+		t.Errorf("our friend lover successfully tricked the skeleton algorithm!")
+	}
+
 	if skeleton("еvan") != "evan" {
 		t.Errorf("we must protect against cyrillic homoglyph attacks")
 	}