3
0
mirror of https://github.com/ergochat/ergo.git synced 2024-12-22 18:52:41 +01:00

Merge pull request #1152 from slingamn/utf8.1

fix #1151
This commit is contained in:
Shivaram Lingamneni 2020-06-23 01:02:59 -07:00 committed by GitHub
commit ba68358c21
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 53 additions and 18 deletions

View File

@ -100,7 +100,7 @@ server:
# casemapping controls what kinds of strings are permitted as identifiers (nicknames,
# channel names, account names, etc.), and how they are normalized for case.
# with the recommended default of 'precis', utf-8 identifiers that are "sane"
# with the recommended default of 'precis', UTF8 identifiers that are "sane"
# (according to RFC 8265) are allowed, and the server additionally tries to protect
# against confusable characters ("homoglyph attacks").
# the other options are 'ascii' (traditional ASCII-only identifiers), and 'permissive',
@ -110,6 +110,11 @@ server:
# already up and running is problematic).
casemapping: "precis"
# enforce-utf8 controls whether the server allows non-UTF8 bytes in messages
# (as in traditional IRC) or preemptively discards non-UTF8 messages (since
# they cannot be relayed to websocket clients).
enforce-utf8: true
# whether to look up user hostnames with reverse DNS.
# (disabling this will expose user IPs instead of hostnames;
# to make IP/hostname information private, see the ip-cloaking section)

View File

@ -126,7 +126,7 @@ server:
# casemapping controls what kinds of strings are permitted as identifiers (nicknames,
# channel names, account names, etc.), and how they are normalized for case.
# with the recommended default of 'precis', utf-8 identifiers that are "sane"
# with the recommended default of 'precis', UTF8 identifiers that are "sane"
# (according to RFC 8265) are allowed, and the server additionally tries to protect
# against confusable characters ("homoglyph attacks").
# the other options are 'ascii' (traditional ASCII-only identifiers), and 'permissive',
@ -136,6 +136,11 @@ server:
# already up and running is problematic).
casemapping: "precis"
# enforce-utf8 controls whether the server allows non-UTF8 bytes in messages
# (as in traditional IRC) or preemptively discards non-UTF8 messages (since
# they cannot be relayed to websocket clients).
enforce-utf8: true
# whether to look up user hostnames with reverse DNS.
# (disabling this will expose user IPs instead of hostnames;
# to make IP/hostname information private, see the ip-cloaking section)

View File

@ -615,8 +615,11 @@ func (client *Client) run(session *Session) {
firstLine := !isReattach
for {
var invalidUtf8 bool
line, err := session.socket.Read()
if err != nil {
if err == errInvalidUtf8 {
invalidUtf8 = true // handle as normal, including labeling
} else if err != nil {
quitMessage := "connection closed"
if err == errReadQ {
quitMessage = "readQ exceeded"
@ -676,6 +679,8 @@ func (client *Client) run(session *Session) {
cmd, exists := Commands[msg.Command]
if !exists {
cmd = unknownCommand
} else if invalidUtf8 {
cmd = invalidUtf8Command
}
isExiting := cmd.Run(client.server, client, session, msg)

View File

@ -79,6 +79,11 @@ var unknownCommand = Command{
usablePreReg: true,
}
var invalidUtf8Command = Command{
handler: invalidUtf8Handler,
usablePreReg: true,
}
// Commands holds all commands executable by a client connected to us.
var Commands map[string]Command

View File

@ -519,6 +519,7 @@ type Config struct {
supportedCaps *caps.Set
capValues caps.Values
Casemapping Casemapping
EnforceUtf8 bool `yaml:"enforce-utf8"`
OutputPath string `yaml:"output-path"`
}

View File

@ -66,6 +66,7 @@ var (
errCredsExternallyManaged = errors.New("Credentials are externally managed and cannot be changed here")
errInvalidMultilineBatch = errors.New("Invalid multiline batch")
errTimedOut = errors.New("Operation timed out")
errInvalidUtf8 = errors.New("Message rejected for invalid utf8")
)
// Socket Errors

View File

@ -2918,3 +2918,9 @@ func unknownCommandHandler(server *Server, client *Client, msg ircmsg.IrcMessage
rb.Add(nil, server.name, ERR_UNKNOWNCOMMAND, client.Nick(), utils.SafeErrorParam(msg.Command), client.t("Unknown command"))
return false
}
// fake handler for invalid utf8
func invalidUtf8Handler(server *Server, client *Client, msg ircmsg.IrcMessage, rb *ResponseBuffer) bool {
rb.Add(nil, server.name, "FAIL", utils.SafeErrorParam(msg.Command), "INVALID_UTF8", client.t("Message rejected for containing invalid UTF-8"))
return false
}

View File

@ -76,7 +76,9 @@ func (cc *IRCStreamConn) ReadLine() (line []byte, err error) {
if isPrefix {
return nil, errReadQ
}
line = bytes.TrimSuffix(line, crlf)
if globalUtf8EnforcementSetting && !utf8.Valid(line) {
err = errInvalidUtf8
}
return
}
@ -101,9 +103,9 @@ func (wc IRCWSConn) UnderlyingConn() *utils.WrappedConn {
func (wc IRCWSConn) WriteLine(buf []byte) (err error) {
buf = bytes.TrimSuffix(buf, crlf)
// there's not much we can do about this;
// silently drop the message
if !utf8.Valid(buf) {
if !globalUtf8EnforcementSetting && !utf8.Valid(buf) {
// there's not much we can do about this;
// silently drop the message
return nil
}
return wc.conn.WriteMessage(websocket.TextMessage, buf)

View File

@ -487,6 +487,7 @@ func (server *Server) applyConfig(config *Config) (err error) {
server.name = config.Server.Name
server.nameCasefolded = config.Server.nameCasefolded
globalCasemappingSetting = config.Server.Casemapping
globalUtf8EnforcementSetting = config.Server.EnforceUtf8
} else {
// enforce configs that can't be changed after launch:
if server.name != config.Server.Name {
@ -495,6 +496,8 @@ func (server *Server) applyConfig(config *Config) (err error) {
return fmt.Errorf("Datastore path cannot be changed after launching the server, rehash aborted")
} else if globalCasemappingSetting != config.Server.Casemapping {
return fmt.Errorf("Casemapping cannot be changed after launching the server, rehash aborted")
} else if globalUtf8EnforcementSetting != config.Server.EnforceUtf8 {
return fmt.Errorf("UTF-8 enforcement cannot be changed after launching the server, rehash aborted")
} else if oldConfig.Accounts.Multiclient.AlwaysOn != config.Accounts.Multiclient.AlwaysOn {
return fmt.Errorf("Default always-on setting cannot be changed after launching the server, rehash aborted")
}

View File

@ -7,7 +7,6 @@ package irc
import (
"errors"
"io"
"strings"
"sync"
"github.com/oragono/oragono/irc/utils"
@ -59,27 +58,24 @@ func (socket *Socket) Close() {
// Read returns a single IRC line from a Socket.
func (socket *Socket) Read() (string, error) {
// immediately fail if Close() has been called, even if there's
// still data in a bufio.Reader or websocket buffer:
if socket.IsClosed() {
return "", io.EOF
}
lineBytes, err := socket.conn.ReadLine()
// convert bytes to string
line := string(lineBytes)
// read last message properly (such as ERROR/QUIT/etc), just fail next reads/writes
if err == io.EOF {
socket.Close()
// process last message properly (such as ERROR/QUIT/etc), just fail next reads/writes
if line != "" {
err = nil
}
}
if err == io.EOF && strings.TrimSpace(line) != "" {
// don't do anything
} else if err != nil {
return "", err
}
return line, nil
return line, err
}
// Write sends the given string out of Socket. Requirements:

View File

@ -50,6 +50,12 @@ const (
// this happens-before all IRC connections and all casefolding operations.
var globalCasemappingSetting Casemapping = CasemappingPRECIS
// XXX analogous unsynchronized global variable controlling utf8 validation
// if this is off, you get the traditional IRC behavior (relaying any valid RFC1459
// octets) and invalid utf8 messages are silently dropped for websocket clients only.
// if this is on, invalid utf8 inputs get a FAIL reply.
var globalUtf8EnforcementSetting bool
// Each pass of PRECIS casefolding is a composition of idempotent operations,
// but not idempotent itself. Therefore, the spec says "do it four times and hope
// it converges" (lolwtf). Golang's PRECIS implementation has a "repeat" option,