3
0
mirror of https://github.com/ergochat/ergo.git synced 2026-03-13 18:58:02 +01:00

build changes (#2356)

* default build includes everything
* allow compiling out PRECIS and Skeleton (still included by default)
* consistently use `postgresql` in identifiers
This commit is contained in:
Shivaram Lingamneni 2026-03-12 18:48:37 -07:00 committed by GitHub
parent 768c01c17b
commit e7558f292c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 523 additions and 389 deletions

View File

@ -22,8 +22,10 @@ jobs:
go-version: "1.26"
- name: "install python3-pytest"
run: "sudo apt install -y python3-pytest python3-websockets"
- name: "make build_full"
run: "make build_full"
- name: "make minimal"
run: "make minimal"
- name: "make build"
run: "make build"
- name: "make install"
run: "make install"
- name: "make test"

View File

@ -5,13 +5,13 @@ GIT_TAG := $(shell git tag --points-at HEAD 2> /dev/null | head -n 1)
# this can be overridden by passing CGO_ENABLED=1 to make
export CGO_ENABLED ?= 0
capdef_file = ./irc/caps/defs.go
# default build tags; override by passing, e.g. ERGO_BUILD_TAGS="mysql postgres"
ERGO_BUILD_TAGS ?= mysql
# build tags for the maximalist build with everything included
full_tags = "mysql postgres sqlite"
full_tags = i18n mysql postgresql sqlite
# build everything by default; override by passing, e.g. ERGO_BUILD_TAGS="mysql postgresql"
ERGO_BUILD_TAGS ?= $(full_tags)
capdef_file = ./irc/caps/defs.go
.PHONY: all
all: build
@ -20,22 +20,18 @@ all: build
build:
go build -v -tags "$(ERGO_BUILD_TAGS)" -ldflags "-X main.commit=$(GIT_COMMIT) -X main.version=$(GIT_TAG)"
.PHONY: build_full
build_full:
go build -v -tags $(full_tags) -ldflags "-X main.commit=$(GIT_COMMIT) -X main.version=$(GIT_TAG)"
.PHONY: install
install:
go install -v -tags $(ERGO_BUILD_TAGS) -ldflags "-X main.commit=$(GIT_COMMIT) -X main.version=$(GIT_TAG)"
.PHONY: install_full
install_full:
go install -v -tags $(full_tags) -ldflags "-X main.commit=$(GIT_COMMIT) -X main.version=$(GIT_TAG)"
go install -v -tags "$(ERGO_BUILD_TAGS)" -ldflags "-X main.commit=$(GIT_COMMIT) -X main.version=$(GIT_TAG)"
.PHONY: release
release:
goreleaser --skip=publish --clean
.PHONY: minimal
minimal:
go build -v -tags "" -ldflags "-X main.commit=$(GIT_COMMIT) -X main.version=$(GIT_TAG)"
.PHONY: capdefs
capdefs:
python3 ./gencapdefs.py > ${capdef_file}
@ -43,8 +39,9 @@ capdefs:
.PHONY: test
test:
python3 ./gencapdefs.py | diff - ${capdef_file}
go test -tags $(full_tags) ./...
go vet -tags $(full_tags) ./...
go test -tags "$(full_tags)" ./...
go vet -tags "$(full_tags)" ./...
go vet -tags "" ./...
./.check-gofmt.sh
.PHONY: smoke

View File

@ -81,18 +81,13 @@ You can also clone this repository and build from source. A quick start guide:
1. Obtain an [up-to-date distribution of the Go language for your OS and architecture](https://golang.org/dl/). Check the output of `go version` to ensure it was installed correctly.
1. Clone the repository.
1. `git checkout stable`
1. To build the default Ergo binary, `make`
1. To instead build an Ergo binary that includes support for all datastores (including PostgreSQL and SQLite), `make build_full`
1. `make`
1. You should now have a binary named `ergo` in the working directory.
For more information, including on build customization, see [docs/BUILD.md](https://github.com/ergochat/ergo/blob/master/docs/BUILD.md).
Ergo vendors all its dependencies, so you will not need to fetch any dependencies remotely. For more information, including on build customization, see [docs/BUILD.md](https://github.com/ergochat/ergo/blob/master/docs/BUILD.md).
For information on contributing to Ergo, see [DEVELOPING.md](https://github.com/ergochat/ergo/blob/master/DEVELOPING.md).
#### Building
You'll need an [up-to-date distribution of the Go language for your OS and architecture](https://golang.org/dl/). Once that's installed (check the output of `go version`), just check out your desired branch or tag and run `make`. This will produce an executable binary named `ergo` in the base directory of the project. (Ergo vendors all its dependencies, so you will not need to fetch any dependencies remotely.)
## Configuration
The default config file [`default.yaml`](default.yaml) helps walk you through what each option means and changes.

View File

@ -31,4 +31,11 @@ The `master` branch is not recommended for production use since it may contain b
By default, Ergo is built with cgo disabled, producing a fully statically linked binary. You can disable this with `export CGO_ENABLED=1` before running `make`.
The default Ergo binary (built with `make` or `make build`) includes support for an in-memory history backend, plus a MySQL history backend. `make build_full` will additionally compile in support for PostgreSQL and SQLite history backends. You can also customize which backends are included, with, e.g. `export ERGO_BUILD_TAGS="mysql sqlite"`.
The default Ergo binary (built with `make` or `make build`) includes support for all optional features. Each optional feature is controlled via a separate build tag; to override the build tags, pass the environment variable `ERGO_BUILD_TAGS` with a space-separated list of tags. (For example, for parity with v2.17.0 and earlier, you can run `ERGO_BUILD_TAGS="i18n mysql" make`. Passing the empty string disables all optional features.)
The supported build tags are:
* `i18n` enables support for non-ASCII casemappings (allowing Unicode in nicknames and channel names). (This was a default feature in Ergo v2.17.0 and earlier, but was not enabled by default at runtime. See the `server.casemapping` value of the config file.)
* `mysql` enables support for MySQL as a persistent history backend. (This was a default feature in v2.17.0 and earlier.)
* `postgresql` enables support for PostgreSQL as a persistent history backend.
* `sqlite` enables support for SQLite as a persistent history backend.

View File

@ -33,6 +33,7 @@ import (
"github.com/ergochat/ergo/irc/connection_limits"
"github.com/ergochat/ergo/irc/custime"
"github.com/ergochat/ergo/irc/email"
"github.com/ergochat/ergo/irc/i18n"
"github.com/ergochat/ergo/irc/isupport"
"github.com/ergochat/ergo/irc/jwt"
"github.com/ergochat/ergo/irc/languages"
@ -41,7 +42,7 @@ import (
"github.com/ergochat/ergo/irc/mysql"
"github.com/ergochat/ergo/irc/oauth2"
"github.com/ergochat/ergo/irc/passwd"
"github.com/ergochat/ergo/irc/postgres"
"github.com/ergochat/ergo/irc/postgresql"
"github.com/ergochat/ergo/irc/sqlite"
"github.com/ergochat/ergo/irc/utils"
"github.com/ergochat/ergo/irc/webpush"
@ -447,31 +448,6 @@ func (nr *NickEnforcementMethod) UnmarshalYAML(unmarshal func(interface{}) error
return err
}
func (cm *Casemapping) UnmarshalYAML(unmarshal func(interface{}) error) (err error) {
var orig string
if err = unmarshal(&orig); err != nil {
return err
}
var result Casemapping
switch strings.ToLower(orig) {
case "ascii":
result = CasemappingASCII
case "precis", "rfc7613", "rfc8265":
result = CasemappingPRECIS
case "permissive", "fun":
result = CasemappingPermissive
case "rfc1459":
result = CasemappingRFC1459
case "rfc1459-strict":
result = CasemappingRFC1459Strict
default:
return fmt.Errorf("invalid casemapping value: %s", orig)
}
*cm = result
return nil
}
// OperClassConfig defines a specific operator class.
type OperClassConfig struct {
Title string
@ -615,7 +591,7 @@ type Config struct {
supportedCaps *caps.Set
supportedCapsWithoutSTS *caps.Set
capValues caps.Values
Casemapping Casemapping
Casemapping i18n.Casemapping
EnforceUtf8 bool `yaml:"enforce-utf8"`
OutputPath string `yaml:"output-path"`
IPCheckScript IPCheckScriptConfig `yaml:"ip-check-script"`
@ -664,7 +640,7 @@ type Config struct {
Path string
AutoUpgrade bool
MySQL mysql.Config
PostgreSQL postgres.Config
PostgreSQL postgresql.Config
SQLite sqlite.Config
}
@ -1255,11 +1231,11 @@ func LoadConfig(filename string) (config *Config, err error) {
}
}
if config.Datastore.PostgreSQL.Enabled {
if !postgres.Enabled {
if !postgresql.Enabled {
return nil, fmt.Errorf("PostgreSQL is enabled in the config, but this binary was not built with PostgreSQL support. Rebuild with `make build_full` to enable")
}
if config.Limits.NickLen > postgres.MaxTargetLength || config.Limits.ChannelLen > postgres.MaxTargetLength {
return nil, fmt.Errorf("to use PostgreSQL, nick and channel length limits must be %d or lower", postgres.MaxTargetLength)
if config.Limits.NickLen > postgresql.MaxTargetLength || config.Limits.ChannelLen > postgresql.MaxTargetLength {
return nil, fmt.Errorf("to use PostgreSQL, nick and channel length limits must be %d or lower", postgresql.MaxTargetLength)
}
}
if config.Datastore.SQLite.Enabled {
@ -1377,6 +1353,12 @@ func LoadConfig(filename string) (config *Config, err error) {
config.Server.capValues[caps.Multiline] = multilineCapValue
}
if !i18n.Enabled {
if config.Server.Casemapping != i18n.CasemappingASCII {
return nil, fmt.Errorf("i18n support was compiled out; set casemapping to 'ascii' or recompile")
}
}
// handle legacy name 'bouncer' for 'multiclient' section:
if config.Accounts.Bouncer != nil {
config.Accounts.Multiclient = *config.Accounts.Bouncer
@ -1711,7 +1693,7 @@ func LoadConfig(filename string) (config *Config, err error) {
// same machine:
config.Datastore.MySQL.MaxConns = runtime.NumCPU()
}
// do the same for postgres
// do the same for postgresql
config.Datastore.PostgreSQL.ExpireTime = time.Duration(config.History.Restrictions.ExpireTime)
config.Datastore.PostgreSQL.TrackAccountMessages = config.History.Retention.EnableAccountIndexing
if config.Datastore.PostgreSQL.MaxConns == 0 {
@ -1835,9 +1817,9 @@ func (config *Config) generateISupport() (err error) {
switch config.Server.Casemapping {
default:
casemappingToken = "ascii" // this is published for ascii, precis, or permissive
case CasemappingRFC1459:
case i18n.CasemappingRFC1459:
casemappingToken = "rfc1459"
case CasemappingRFC1459Strict:
case i18n.CasemappingRFC1459Strict:
casemappingToken = "rfc1459-strict"
}
isupport.Add("CASEMAPPING", casemappingToken)
@ -1876,7 +1858,7 @@ func (config *Config) generateISupport() (err error) {
isupport.Add("STATUSMSG", "~&@%+")
isupport.Add("TARGMAX", fmt.Sprintf("NAMES:1,LIST:1,KICK:,WHOIS:1,USERHOST:10,PRIVMSG:%s,TAGMSG:%s,NOTICE:%s,MONITOR:%d", maxTargetsString, maxTargetsString, maxTargetsString, config.Limits.MonitorEntries))
isupport.Add("TOPICLEN", strconv.Itoa(config.Limits.TopicLen))
if config.Server.Casemapping == CasemappingPRECIS {
if config.Server.Casemapping == i18n.CasemappingPRECIS {
isupport.Add("UTF8MAPPING", precisUTF8MappingToken)
}
if config.Server.EnforceUtf8 {
@ -1953,7 +1935,7 @@ func (config *Config) historyChangedFrom(oldConfig *Config) bool {
config.History.Persistent != oldConfig.History.Persistent
}
func compileGuestRegexp(guestFormat string, casemapping Casemapping) (standard, folded *regexp.Regexp, err error) {
func compileGuestRegexp(guestFormat string, casemapping i18n.Casemapping) (standard, folded *regexp.Regexp, err error) {
if strings.Count(guestFormat, "?") != 0 || strings.Count(guestFormat, "*") != 1 {
err = errors.New("guest format must contain 1 '*' and no '?'s")
return
@ -1967,11 +1949,11 @@ func compileGuestRegexp(guestFormat string, casemapping Casemapping) (standard,
starIndex := strings.IndexByte(guestFormat, '*')
initial := guestFormat[:starIndex]
final := guestFormat[starIndex+1:]
initialFolded, err := casefoldWithSetting(initial, casemapping)
initialFolded, err := i18n.CasefoldWithSetting(initial, casemapping)
if err != nil {
return
}
finalFolded, err := casefoldWithSetting(final, casemapping)
finalFolded, err := i18n.CasefoldWithSetting(final, casemapping)
if err != nil {
return
}

View File

@ -82,9 +82,8 @@ var (
// String Errors
var (
errCouldNotStabilize = errors.New("Could not stabilize string while casefolding")
errStringIsEmpty = errors.New("String is empty")
errInvalidCharacter = errors.New("Invalid character")
errStringIsEmpty = errors.New("String is empty")
errInvalidCharacter = errors.New("Invalid character")
)
type CertKeyError struct {

79
irc/i18n/common.go Normal file
View File

@ -0,0 +1,79 @@
package i18n
import (
"errors"
"fmt"
"strings"
)
// Casemapping represents a set of algorithm for case normalization
// and confusables prevention for IRC identifiers (nicknames and channel names)
type Casemapping uint
const (
// "precis" is the default / zero value:
// casefolding/validation: PRECIS + ircd restrictions (like no *)
// confusables detection: standard skeleton algorithm
CasemappingPRECIS Casemapping = iota
// "ascii" is the traditional ircd behavior:
// casefolding/validation: must be pure ASCII and follow ircd restrictions, ASCII lowercasing
// confusables detection: none
CasemappingASCII
// "permissive" is an insecure mode:
// casefolding/validation: arbitrary unicodes that follow ircd restrictions, unicode casefolding
// confusables detection: standard skeleton algorithm (which may be ineffective
// over the larger set of permitted identifiers)
CasemappingPermissive
// rfc1459 is a legacy mapping as defined here: https://modern.ircdocs.horse/#casemapping-parameter
CasemappingRFC1459
// rfc1459-strict is a legacy mapping as defined here: https://modern.ircdocs.horse/#casemapping-parameter
CasemappingRFC1459Strict
)
var (
errInvalidCharacter = errors.New("Invalid character")
)
func (cm *Casemapping) UnmarshalYAML(unmarshal func(interface{}) error) (err error) {
var orig string
if err = unmarshal(&orig); err != nil {
return err
}
var result Casemapping
switch strings.ToLower(orig) {
case "ascii":
result = CasemappingASCII
case "precis", "rfc7613", "rfc8265":
result = CasemappingPRECIS
case "permissive", "fun":
result = CasemappingPermissive
case "rfc1459":
result = CasemappingRFC1459
case "rfc1459-strict":
result = CasemappingRFC1459Strict
default:
return fmt.Errorf("invalid casemapping value: %s", orig)
}
*cm = result
return nil
}
func isPrintableASCII(str string) bool {
for i := 0; i < len(str); i++ {
// allow space here because it's technically printable;
// it will be disallowed later by CasefoldName/CasefoldChannel
chr := str[i]
if chr < ' ' || chr > '~' {
return false
}
}
return true
}
func foldASCII(str string) (result string, err error) {
if !isPrintableASCII(str) {
return "", errInvalidCharacter
}
return strings.ToLower(str), nil
}

132
irc/i18n/strings.go Normal file
View File

@ -0,0 +1,132 @@
//go:build i18n
package i18n
import (
"errors"
"regexp"
"strings"
"github.com/ergochat/confusables"
"golang.org/x/text/cases"
"golang.org/x/text/secure/precis"
"golang.org/x/text/unicode/norm"
"golang.org/x/text/width"
)
const (
Enabled = true
// 1.x configurations don't have a server.casemapping field, but
// expect PRECIS. however, technically it's not this value that
// causes them to get PRECIS, it's that PRECIS is the zero value of
// Casemapping (so that's how the YAML deserializes when the field
// is missing).
DefaultCasemapping = CasemappingPRECIS
)
var (
// reviving the old ergonomadic nickname regex:
// in permissive mode, allow arbitrary letters, numbers, punctuation, and symbols
permissiveCharsRegex = regexp.MustCompile(`^[\pL\pN\pP\pS]*$`)
)
// String Errors
var (
errCouldNotStabilize = errors.New("Could not stabilize string while casefolding")
)
// Each pass of PRECIS casefolding is a composition of idempotent operations,
// but not idempotent itself. Therefore, the spec says "do it four times and hope
// it converges" (lolwtf). Golang's PRECIS implementation has a "repeat" option,
// which provides this functionality, but unfortunately it's not exposed publicly.
func iterateFolding(profile *precis.Profile, oldStr string) (str string, err error) {
str = oldStr
// follow the stabilizing rules laid out here:
// https://tools.ietf.org/html/draft-ietf-precis-7564bis-10.html#section-7
for i := 0; i < 4; i++ {
str, err = profile.CompareKey(str)
if err != nil {
return "", err
}
if oldStr == str {
break
}
oldStr = str
}
if oldStr != str {
return "", errCouldNotStabilize
}
return str, nil
}
func foldPRECIS(str string) (result string, err error) {
return iterateFolding(precis.UsernameCaseMapped, str)
}
func foldPermissive(str string) (result string, err error) {
if !permissiveCharsRegex.MatchString(str) {
return "", errInvalidCharacter
}
// YOLO
str = norm.NFD.String(str)
str = cases.Fold().String(str)
str = norm.NFD.String(str)
return str, nil
}
var (
rfc1459Replacer = strings.NewReplacer("[", "{", "]", "}", "\\", "|", "~", "^")
rfc1459StrictReplacer = strings.NewReplacer("[", "{", "]", "}", "\\", "|")
)
func foldRFC1459(str string, strict bool) (result string, err error) {
asciiFold, err := foldASCII(str)
if err != nil {
return "", err
}
replacer := rfc1459Replacer
if strict {
replacer = rfc1459StrictReplacer
}
return replacer.Replace(asciiFold), nil
}
func CasefoldWithSetting(str string, setting Casemapping) (string, error) {
switch setting {
default:
return foldPRECIS(str)
case CasemappingASCII:
return foldASCII(str)
case CasemappingPermissive:
return foldPermissive(str)
case CasemappingRFC1459:
return foldRFC1459(str, false)
case CasemappingRFC1459Strict:
return foldRFC1459(str, true)
}
}
// Skeleton produces a canonicalized identifier that tries to catch
// homoglyphic / confusable identifiers. It's a tweaked version of the TR39
// skeleton algorithm. We apply the skeleton algorithm first and only then casefold,
// because casefolding first would lose some information about visual confusability.
// This has the weird consequence that the skeleton is not a function of the
// casefolded identifier --- therefore it must always be computed
// from the original (unfolded) identifier and stored/tracked separately from the
// casefolded identifier.
func Skeleton(name string) (string, error) {
// XXX the confusables table includes some, but not all, fullwidth->standard
// mappings for latin characters. do a pass of explicit width folding,
// same as PRECIS:
name = width.Fold.String(name)
name = confusables.SkeletonTweaked(name)
// internationalized lowercasing for skeletons; this is much more lenient than
// Casefold. In particular, skeletons are expected to mix scripts (which may
// violate the bidi rule). We also don't care if they contain runes
// that are disallowed by PRECIS, because every identifier must independently
// pass PRECIS --- we are just further canonicalizing the skeleton.
return cases.Fold().String(name), nil
}

156
irc/i18n/strings_test.go Normal file
View File

@ -0,0 +1,156 @@
//go:build i18n
package i18n
import "testing"
func validFoldTester(first, second string, equal bool, folder func(string) (string, error), t *testing.T) {
firstFolded, err := folder(first)
if err != nil {
panic(err)
}
secondFolded, err := folder(second)
if err != nil {
panic(err)
}
foundEqual := firstFolded == secondFolded
if foundEqual != equal {
t.Errorf("%s and %s: expected equality %t, but got %t", first, second, equal, foundEqual)
}
}
func TestFoldPermissive(t *testing.T) {
tester := func(first, second string, equal bool) {
validFoldTester(first, second, equal, foldPermissive, t)
}
tester("SHIVARAM", "shivaram", true)
tester("shIvaram", "shivaraM", true)
tester("shivaram", "DAN-", false)
tester("dolph🐬n", "DOLPH🐬n", true)
tester("dolph🐬n", "dolph💻n", false)
tester("9FRONT", "9front", true)
}
func TestFoldPermissiveInvalid(t *testing.T) {
_, err := foldPermissive("a\tb")
if err == nil {
t.Errorf("whitespace should be invalid in identifiers")
}
_, err = foldPermissive("a\x00b")
if err == nil {
t.Errorf("the null byte should be invalid in identifiers")
}
_, err = foldPermissive("a b")
if err == nil {
t.Errorf("space should be invalid in identifiers")
}
}
func TestFoldPermissiveNormalization(t *testing.T) {
tester := func(first, second string, equal bool) {
validFoldTester(first, second, equal, foldPermissive, t)
}
// case folding should work on non-ASCII letters
tester("Ω", "ω", true) // Greek capital/small omega
tester("Ñoño", "ñoño", true) // Spanish precomposed tilde-n, upper vs lower
tester("中文", "中文", true) // CJK (no case distinction)
tester("中文", "English", false) // different scripts, not equal
// NFC-encoded input: "É" (U+00C9) and "é" (U+00E9) should fold equal
// NFD normalization before case folding ensures composed chars are handled
tester("\u00c9l\u00e8ve", "\u00e9l\u00e8ve", true) // Élève vs élève
}
func TestFoldASCII(t *testing.T) {
tester := func(first, second string, equal bool) {
validFoldTester(first, second, equal, foldASCII, t)
}
tester("shivaram", "SHIVARAM", true)
tester("X|Y", "x|y", true)
tester("a != b", "A != B", true)
}
func TestFoldASCIIInvalid(t *testing.T) {
_, err := foldASCII("\x01")
if err == nil {
t.Errorf("control characters should be invalid in identifiers")
}
_, err = foldASCII("\x7F")
if err == nil {
t.Errorf("control characters should be invalid in identifiers")
}
}
func TestFoldRFC1459(t *testing.T) {
folder := func(str string) (string, error) {
return foldRFC1459(str, false)
}
tester := func(first, second string, equal bool) {
validFoldTester(first, second, equal, folder, t)
}
tester("shivaram", "SHIVARAM", true)
tester("shivaram[a]", "shivaram{a}", true)
tester("shivaram\\a]", "shivaram{a}", false)
tester("shivaram\\a]", "shivaram|a}", true)
tester("shivaram~a]", "shivaram^a}", true)
}
func TestFoldRFC1459Strict(t *testing.T) {
folder := func(str string) (string, error) {
return foldRFC1459(str, true)
}
tester := func(first, second string, equal bool) {
validFoldTester(first, second, equal, folder, t)
}
tester("shivaram", "SHIVARAM", true)
tester("shivaram[a]", "shivaram{a}", true)
tester("shivaram\\a]", "shivaram{a}", false)
tester("shivaram\\a]", "shivaram|a}", true)
tester("shivaram~a]", "shivaram^a}", false)
}
func TestSkeleton(t *testing.T) {
skeleton := func(str string) string {
skel, err := Skeleton(str)
if err != nil {
t.Error(err)
}
return skel
}
if skeleton("warning") == skeleton("waming") {
t.Errorf("Oragono shouldn't consider rn confusable with m")
}
if skeleton("Phi|ip") != "philip" {
t.Errorf("but we still consider pipe confusable with l")
}
if skeleton("") != skeleton("smt") {
t.Errorf("fullwidth characters should skeletonize to plain old ascii characters")
}
if skeleton("") != skeleton("smt") {
t.Errorf("after skeletonizing, we should casefold")
}
if skeleton("sm") != skeleton("smt") {
t.Errorf("our friend lover successfully tricked the skeleton algorithm!")
}
if skeleton("еvan") != "evan" {
t.Errorf("we must protect against cyrillic homoglyph attacks")
}
if skeleton("еmily") != skeleton("emily") {
t.Errorf("we must protect against cyrillic homoglyph attacks")
}
if skeleton("РОТАТО") != "potato" {
t.Errorf("we must protect against cyrillic homoglyph attacks")
}
// should not raise an error:
skeleton("けらんぐ")
}

18
irc/i18n/stub.go Normal file
View File

@ -0,0 +1,18 @@
//go:build !i18n
package i18n
const (
Enabled = false
DefaultCasemapping = CasemappingASCII
)
func CasefoldWithSetting(str string, setting Casemapping) (string, error) {
return foldASCII(str)
}
func Skeleton(str string) (string, error) {
// identity function is fine because we independently case-normalize in Casefold
return str, nil
}

View File

@ -1,7 +1,7 @@
// Copyright (c) 2020 Shivaram Lingamneni
// released under the MIT license
package postgres
package postgresql
import (
"time"

View File

@ -3,7 +3,7 @@
// Copyright (c) 2020 Shivaram Lingamneni
// released under the MIT license
package postgres
package postgresql
import (
"testing"

View File

@ -1,9 +1,9 @@
//go:build postgres
//go:build postgresql
// Copyright (c) 2020 Shivaram Lingamneni
// released under the MIT license
package postgres
package postgresql
import (
"context"

View File

@ -1,12 +1,9 @@
//go:build !postgres
//go:build !postgresql
// Copyright (c) 2020 Shivaram Lingamneni
// released under the MIT license
// Package postgres provides a stub implementation when PostgreSQL support is not enabled.
// To enable PostgreSQL support, build with: make build_full
// This stub prevents the binary from including the large pgx PostgreSQL driver dependencies.
package postgres
package postgresql
import (
"errors"

View File

@ -34,7 +34,7 @@ import (
"github.com/ergochat/ergo/irc/logger"
"github.com/ergochat/ergo/irc/modes"
"github.com/ergochat/ergo/irc/mysql"
"github.com/ergochat/ergo/irc/postgres"
"github.com/ergochat/ergo/irc/postgresql"
"github.com/ergochat/ergo/irc/sno"
"github.com/ergochat/ergo/irc/sqlite"
"github.com/ergochat/ergo/irc/utils"
@ -93,7 +93,7 @@ type Server struct {
store *buntdb.DB
dstore datastore.Datastore
mysqlHistoryDB *mysql.MySQL
postgresHistoryDB *postgres.PostgreSQL
postgresHistoryDB *postgresql.PostgreSQL
sqliteHistoryDB *sqlite.SQLite
historyDB history.Database
torLimiter connection_limits.TorLimiter
@ -1040,9 +1040,9 @@ func (server *Server) loadFromDatastore(config *Config) (err error) {
}
server.historyDB = server.mysqlHistoryDB
} else if config.Datastore.PostgreSQL.Enabled {
server.postgresHistoryDB, err = postgres.NewPostgreSQLDatabase(server.logger, config.Datastore.PostgreSQL)
server.postgresHistoryDB, err = postgresql.NewPostgreSQLDatabase(server.logger, config.Datastore.PostgreSQL)
if err != nil {
server.logger.Error("internal", "could not connect to postgres", err.Error())
server.logger.Error("internal", "could not connect to postgresql", err.Error())
return err
}
server.historyDB = server.postgresHistoryDB

View File

@ -7,15 +7,9 @@ package irc
import (
"fmt"
"regexp"
"strings"
"github.com/ergochat/confusables"
"golang.org/x/text/cases"
"golang.org/x/text/secure/precis"
"golang.org/x/text/unicode/norm"
"golang.org/x/text/width"
"github.com/ergochat/ergo/irc/i18n"
"github.com/ergochat/ergo/irc/utils"
)
@ -38,38 +32,10 @@ const (
disfavoredNameCharacters = `<>'";#`
)
var (
// reviving the old ergonomadic nickname regex:
// in permissive mode, allow arbitrary letters, numbers, punctuation, and symbols
permissiveCharsRegex = regexp.MustCompile(`^[\pL\pN\pP\pS]*$`)
)
type Casemapping uint
const (
// "precis" is the default / zero value:
// casefolding/validation: PRECIS + ircd restrictions (like no *)
// confusables detection: standard skeleton algorithm
CasemappingPRECIS Casemapping = iota
// "ascii" is the traditional ircd behavior:
// casefolding/validation: must be pure ASCII and follow ircd restrictions, ASCII lowercasing
// confusables detection: none
CasemappingASCII
// "permissive" is an insecure mode:
// casefolding/validation: arbitrary unicodes that follow ircd restrictions, unicode casefolding
// confusables detection: standard skeleton algorithm (which may be ineffective
// over the larger set of permitted identifiers)
CasemappingPermissive
// rfc1459 is a legacy mapping as defined here: https://modern.ircdocs.horse/#casemapping-parameter
CasemappingRFC1459
// rfc1459-strict is a legacy mapping as defined here: https://modern.ircdocs.horse/#casemapping-parameter
CasemappingRFC1459Strict
)
// XXX this is a global variable without explicit synchronization.
// it gets set during the initial Server.applyConfig and cannot be changed by rehash:
// this happens-before all IRC connections and all casefolding operations.
var globalCasemappingSetting Casemapping = CasemappingPRECIS
var globalCasemappingSetting i18n.Casemapping = i18n.DefaultCasemapping
// XXX analogous unsynchronized global variable controlling utf8 validation
// if this is off, you get the traditional IRC behavior (relaying any valid RFC1459
@ -77,48 +43,9 @@ var globalCasemappingSetting Casemapping = CasemappingPRECIS
// if this is on, invalid utf8 inputs get a FAIL reply.
var globalUtf8EnforcementSetting bool
// Each pass of PRECIS casefolding is a composition of idempotent operations,
// but not idempotent itself. Therefore, the spec says "do it four times and hope
// it converges" (lolwtf). Golang's PRECIS implementation has a "repeat" option,
// which provides this functionality, but unfortunately it's not exposed publicly.
func iterateFolding(profile *precis.Profile, oldStr string) (str string, err error) {
str = oldStr
// follow the stabilizing rules laid out here:
// https://tools.ietf.org/html/draft-ietf-precis-7564bis-10.html#section-7
for i := 0; i < 4; i++ {
str, err = profile.CompareKey(str)
if err != nil {
return "", err
}
if oldStr == str {
break
}
oldStr = str
}
if oldStr != str {
return "", errCouldNotStabilize
}
return str, nil
}
// Casefold returns a casefolded string, without doing any name or channel character checks.
func Casefold(str string) (string, error) {
return casefoldWithSetting(str, globalCasemappingSetting)
}
func casefoldWithSetting(str string, setting Casemapping) (string, error) {
switch setting {
default:
return iterateFolding(precis.UsernameCaseMapped, str)
case CasemappingASCII:
return foldASCII(str)
case CasemappingPermissive:
return foldPermissive(str)
case CasemappingRFC1459:
return foldRFC1459(str, false)
case CasemappingRFC1459Strict:
return foldRFC1459(str, true)
}
return i18n.CasefoldWithSetting(str, globalCasemappingSetting)
}
// CasefoldChannel returns a casefolded version of a channel name.
@ -211,39 +138,17 @@ func isIdent(name string) bool {
}
// Skeleton produces a canonicalized identifier that tries to catch
// homoglyphic / confusable identifiers. It's a tweaked version of the TR39
// skeleton algorithm. We apply the skeleton algorithm first and only then casefold,
// because casefolding first would lose some information about visual confusability.
// This has the weird consequence that the skeleton is not a function of the
// casefolded identifier --- therefore it must always be computed
// from the original (unfolded) identifier and stored/tracked separately from the
// casefolded identifier.
// homoglyphic / confusable identifiers.
func Skeleton(name string) (string, error) {
switch globalCasemappingSetting {
default:
return realSkeleton(name)
case CasemappingASCII, CasemappingRFC1459, CasemappingRFC1459Strict:
return i18n.Skeleton(name)
case i18n.CasemappingASCII, i18n.CasemappingRFC1459, i18n.CasemappingRFC1459Strict:
// identity function is fine because we independently case-normalize in Casefold
return name, nil
}
}
func realSkeleton(name string) (string, error) {
// XXX the confusables table includes some, but not all, fullwidth->standard
// mappings for latin characters. do a pass of explicit width folding,
// same as PRECIS:
name = width.Fold.String(name)
name = confusables.SkeletonTweaked(name)
// internationalized lowercasing for skeletons; this is much more lenient than
// Casefold. In particular, skeletons are expected to mix scripts (which may
// violate the bidi rule). We also don't care if they contain runes
// that are disallowed by PRECIS, because every identifier must independently
// pass PRECIS --- we are just further canonicalizing the skeleton.
return cases.Fold().String(name), nil
}
// maps a nickmask fragment to an expanded, casefolded wildcard:
// Shivaram@good-fortune -> *!shivaram@good-fortune
// EDMUND -> edmund!*@*
@ -303,30 +208,6 @@ func CanonicalizeMaskWildcard(userhost string) (expanded string, err error) {
return
}
func foldASCII(str string) (result string, err error) {
if !IsPrintableASCII(str) {
return "", errInvalidCharacter
}
return strings.ToLower(str), nil
}
var (
rfc1459Replacer = strings.NewReplacer("[", "{", "]", "}", "\\", "|", "~", "^")
rfc1459StrictReplacer = strings.NewReplacer("[", "{", "]", "}", "\\", "|")
)
func foldRFC1459(str string, strict bool) (result string, err error) {
asciiFold, err := foldASCII(str)
if err != nil {
return "", err
}
replacer := rfc1459Replacer
if strict {
replacer = rfc1459StrictReplacer
}
return replacer.Replace(asciiFold), nil
}
func IsPrintableASCII(str string) bool {
for i := 0; i < len(str); i++ {
// allow space here because it's technically printable;
@ -339,17 +220,6 @@ func IsPrintableASCII(str string) bool {
return true
}
func foldPermissive(str string) (result string, err error) {
if !permissiveCharsRegex.MatchString(str) {
return "", errInvalidCharacter
}
// YOLO
str = norm.NFD.String(str)
str = cases.Fold().String(str)
str = norm.NFD.String(str)
return str, nil
}
// Reduce, e.g., `alice!~u@host` to `alice`
func NUHToNick(nuh string) (nick string) {
if idx := strings.IndexByte(nuh, '!'); idx != -1 {

View File

@ -7,13 +7,23 @@ package irc
import (
"fmt"
"testing"
"github.com/ergochat/ergo/irc/i18n"
)
func TestCasefoldChannel(t *testing.T) {
func TestCasefoldChannelAllCasemappings(t *testing.T) {
oldGlobalCasemapping := globalCasemappingSetting
t.Cleanup(func() {
globalCasemappingSetting = oldGlobalCasemapping
})
globalCasemappingSetting = i18n.CasemappingPRECIS
type channelTest struct {
channel string
folded string
err bool
channel string
folded string
nonASCII bool
err bool
}
testCases := []channelTest{
{
@ -49,18 +59,20 @@ func TestCasefoldChannel(t *testing.T) {
folded: "##ubuntu",
},
{
channel: "#中文频道",
folded: "#中文频道",
channel: "#中文频道",
folded: "#中文频道",
nonASCII: true,
},
{
// Hebrew; it's up to the client to display this right-to-left, including the #
channel: "#שלום",
folded: "#שלום",
channel: "#שלום",
folded: "#שלום",
nonASCII: true,
},
}
for _, errCase := range []string{
"", "#*starpower", "# NASA", "#interro?", "OOF#", "foo",
"", "#*starpower", "# NASA", "#interro?", "OOF#", "foo", "a b", "#a b",
// bidi violation mixing latin and hebrew characters:
"#shalomעליכם",
"#tab\tcharacter", "#\t", "#carriage\rreturn",
@ -68,25 +80,38 @@ func TestCasefoldChannel(t *testing.T) {
testCases = append(testCases, channelTest{channel: errCase, err: true})
}
for i, tt := range testCases {
t.Run(fmt.Sprintf("case %d: %s", i, tt.channel), func(t *testing.T) {
res, err := CasefoldChannel(tt.channel)
if tt.err && err == nil {
t.Errorf("expected error when casefolding [%s], but did not receive one", tt.channel)
return
}
if !tt.err && err != nil {
t.Errorf("unexpected error while casefolding [%s]: %s", tt.channel, err.Error())
return
}
if tt.folded != res {
t.Errorf("expected [%v] to be [%v]", res, tt.folded)
}
})
// don't test permissive because it doesn't fail on bidi violations
casemappings := []i18n.Casemapping{i18n.CasemappingASCII, i18n.CasemappingPRECIS}
for _, casemapping := range casemappings {
globalCasemappingSetting = casemapping
for i, tt := range testCases {
t.Run(fmt.Sprintf("case %d: %s", i, tt.channel), func(t *testing.T) {
res, err := CasefoldChannel(tt.channel)
errExpected := tt.err || (tt.nonASCII && (casemapping == i18n.CasemappingASCII || casemapping == i18n.CasemappingRFC1459Strict))
if errExpected && err == nil {
t.Errorf("expected error when casefolding [%s] under casemapping %d, but did not receive one", tt.channel, casemapping)
return
}
if !errExpected && err != nil {
t.Errorf("unexpected error while casefolding [%s] under casemapping %d: %s", tt.channel, casemapping, err.Error())
return
}
if !errExpected && tt.folded != res {
t.Errorf("expected [%v] to be [%v] under casemapping %d", res, tt.folded, casemapping)
}
})
}
}
}
func TestCasefoldName(t *testing.T) {
func TestCasefoldNameAllCasemappings(t *testing.T) {
oldGlobalCasemapping := globalCasemappingSetting
t.Cleanup(func() {
globalCasemappingSetting = oldGlobalCasemapping
})
type nameTest struct {
name string
folded string
@ -104,28 +129,34 @@ func TestCasefoldName(t *testing.T) {
}
for _, errCase := range []string{
"", "#", "foo,bar", "star*man*junior", "lo7t?",
"", "#", "foo,bar", "star*man*junior", "lo7t?", "a b", "#a b",
"f.l", "excited!nick", "foo@bar", ":trail",
"~o", "&o", "@o", "%h", "+v", "-m", "\t", "a\tb",
} {
testCases = append(testCases, nameTest{name: errCase, err: true})
}
for i, tt := range testCases {
t.Run(fmt.Sprintf("case %d: %s", i, tt.name), func(t *testing.T) {
res, err := CasefoldName(tt.name)
if tt.err && err == nil {
t.Errorf("expected error when casefolding [%s], but did not receive one", tt.name)
return
}
if !tt.err && err != nil {
t.Errorf("unexpected error while casefolding [%s]: %s", tt.name, err.Error())
return
}
if tt.folded != res {
t.Errorf("expected [%v] to be [%v]", res, tt.folded)
}
})
casemappings := []i18n.Casemapping{i18n.CasemappingASCII, i18n.CasemappingPRECIS, i18n.CasemappingPermissive, i18n.CasemappingRFC1459Strict}
for _, casemapping := range casemappings {
globalCasemappingSetting = casemapping
for i, tt := range testCases {
t.Run(fmt.Sprintf("case %d: %s", i, tt.name), func(t *testing.T) {
res, err := CasefoldName(tt.name)
if tt.err && err == nil {
t.Errorf("expected error when casefolding [%s], but did not receive one", tt.name)
return
}
if !tt.err && err != nil {
t.Errorf("unexpected error while casefolding [%s]: %s", tt.name, err.Error())
return
}
if tt.folded != res {
t.Errorf("expected [%v] to be [%v]", res, tt.folded)
}
})
}
}
}
@ -145,51 +176,6 @@ func TestIsIdent(t *testing.T) {
assertIdent("-dan56", false)
}
func TestSkeleton(t *testing.T) {
skeleton := func(str string) string {
skel, err := Skeleton(str)
if err != nil {
t.Error(err)
}
return skel
}
if skeleton("warning") == skeleton("waming") {
t.Errorf("Oragono shouldn't consider rn confusable with m")
}
if skeleton("Phi|ip") != "philip" {
t.Errorf("but we still consider pipe confusable with l")
}
if skeleton("") != skeleton("smt") {
t.Errorf("fullwidth characters should skeletonize to plain old ascii characters")
}
if skeleton("") != skeleton("smt") {
t.Errorf("after skeletonizing, we should casefold")
}
if skeleton("sm") != skeleton("smt") {
t.Errorf("our friend lover successfully tricked the skeleton algorithm!")
}
if skeleton("еvan") != "evan" {
t.Errorf("we must protect against cyrillic homoglyph attacks")
}
if skeleton("еmily") != skeleton("emily") {
t.Errorf("we must protect against cyrillic homoglyph attacks")
}
if skeleton("РОТАТО") != "potato" {
t.Errorf("we must protect against cyrillic homoglyph attacks")
}
// should not raise an error:
skeleton("けらんぐ")
}
func TestCanonicalizeMaskWildcard(t *testing.T) {
tester := func(input, expected string, expectedErr error) {
out, err := CanonicalizeMaskWildcard(input)
@ -221,89 +207,3 @@ func TestCanonicalizeMaskWildcard(t *testing.T) {
tester("shivaram!us er@host", "", errInvalidCharacter)
tester("shivaram!user@ho st", "", errInvalidCharacter)
}
func validFoldTester(first, second string, equal bool, folder func(string) (string, error), t *testing.T) {
firstFolded, err := folder(first)
if err != nil {
panic(err)
}
secondFolded, err := folder(second)
if err != nil {
panic(err)
}
foundEqual := firstFolded == secondFolded
if foundEqual != equal {
t.Errorf("%s and %s: expected equality %t, but got %t", first, second, equal, foundEqual)
}
}
func TestFoldPermissive(t *testing.T) {
tester := func(first, second string, equal bool) {
validFoldTester(first, second, equal, foldPermissive, t)
}
tester("SHIVARAM", "shivaram", true)
tester("shIvaram", "shivaraM", true)
tester("shivaram", "DAN-", false)
tester("dolph🐬n", "DOLPH🐬n", true)
tester("dolph🐬n", "dolph💻n", false)
tester("9FRONT", "9front", true)
}
func TestFoldPermissiveInvalid(t *testing.T) {
_, err := foldPermissive("a\tb")
if err == nil {
t.Errorf("whitespace should be invalid in identifiers")
}
_, err = foldPermissive("a\x00b")
if err == nil {
t.Errorf("the null byte should be invalid in identifiers")
}
}
func TestFoldASCII(t *testing.T) {
tester := func(first, second string, equal bool) {
validFoldTester(first, second, equal, foldASCII, t)
}
tester("shivaram", "SHIVARAM", true)
tester("X|Y", "x|y", true)
tester("a != b", "A != B", true)
}
func TestFoldASCIIInvalid(t *testing.T) {
_, err := foldASCII("\x01")
if err == nil {
t.Errorf("control characters should be invalid in identifiers")
}
_, err = foldASCII("\x7F")
if err == nil {
t.Errorf("control characters should be invalid in identifiers")
}
}
func TestFoldRFC1459(t *testing.T) {
folder := func(str string) (string, error) {
return foldRFC1459(str, false)
}
tester := func(first, second string, equal bool) {
validFoldTester(first, second, equal, folder, t)
}
tester("shivaram", "SHIVARAM", true)
tester("shivaram[a]", "shivaram{a}", true)
tester("shivaram\\a]", "shivaram{a}", false)
tester("shivaram\\a]", "shivaram|a}", true)
tester("shivaram~a]", "shivaram^a}", true)
}
func TestFoldRFC1459Strict(t *testing.T) {
folder := func(str string) (string, error) {
return foldRFC1459(str, true)
}
tester := func(first, second string, equal bool) {
validFoldTester(first, second, equal, folder, t)
}
tester("shivaram", "SHIVARAM", true)
tester("shivaram[a]", "shivaram{a}", true)
tester("shivaram\\a]", "shivaram{a}", false)
tester("shivaram\\a]", "shivaram|a}", true)
tester("shivaram~a]", "shivaram^a}", false)
}