This commit is contained in:
Shivaram Lingamneni 2020-06-22 14:54:43 -04:00
parent 4cadb7ad58
commit a4f9e08a85
11 changed files with 45 additions and 6 deletions

View File

@ -100,7 +100,7 @@ server:
# casemapping controls what kinds of strings are permitted as identifiers (nicknames,
# channel names, account names, etc.), and how they are normalized for case.
# with the recommended default of 'precis', utf-8 identifiers that are "sane"
# with the recommended default of 'precis', UTF8 identifiers that are "sane"
# (according to RFC 8265) are allowed, and the server additionally tries to protect
# against confusable characters ("homoglyph attacks").
# the other options are 'ascii' (traditional ASCII-only identifiers), and 'permissive',
@ -110,6 +110,11 @@ server:
# already up and running is problematic).
casemapping: "precis"
# enforce-utf8 controls whether the server allows non-UTF8 bytes in messages
# (as in traditional IRC) or preemptively discards non-UTF8 messages (since
# they cannot be relayed to websocket clients).
enforce-utf8: true
# whether to look up user hostnames with reverse DNS.
# (disabling this will expose user IPs instead of hostnames;
# to make IP/hostname information private, see the ip-cloaking section)

View File

@ -126,7 +126,7 @@ server:
# casemapping controls what kinds of strings are permitted as identifiers (nicknames,
# channel names, account names, etc.), and how they are normalized for case.
# with the recommended default of 'precis', utf-8 identifiers that are "sane"
# with the recommended default of 'precis', UTF8 identifiers that are "sane"
# (according to RFC 8265) are allowed, and the server additionally tries to protect
# against confusable characters ("homoglyph attacks").
# the other options are 'ascii' (traditional ASCII-only identifiers), and 'permissive',
@ -136,6 +136,11 @@ server:
# already up and running is problematic).
casemapping: "precis"
# enforce-utf8 controls whether the server allows non-UTF8 bytes in messages
# (as in traditional IRC) or preemptively discards non-UTF8 messages (since
# they cannot be relayed to websocket clients).
enforce-utf8: true
# whether to look up user hostnames with reverse DNS.
# (disabling this will expose user IPs instead of hostnames;
# to make IP/hostname information private, see the ip-cloaking section)

View File

@ -615,8 +615,11 @@ func (client *Client) run(session *Session) {
firstLine := !isReattach
for {
var invalidUtf8 bool
line, err := session.socket.Read()
if err != nil {
if err == errInvalidUtf8 {
invalidUtf8 = true // handle as normal, including labeling
} else if err != nil {
quitMessage := "connection closed"
if err == errReadQ {
quitMessage = "readQ exceeded"
@ -676,6 +679,8 @@ func (client *Client) run(session *Session) {
cmd, exists := Commands[msg.Command]
if !exists {
cmd = unknownCommand
} else if invalidUtf8 {
cmd = invalidUtf8Command
}
isExiting := cmd.Run(client.server, client, session, msg)

View File

@ -79,6 +79,11 @@ var unknownCommand = Command{
usablePreReg: true,
}
var invalidUtf8Command = Command{
handler: invalidUtf8Handler,
usablePreReg: true,
}
// Commands holds all commands executable by a client connected to us.
var Commands map[string]Command

View File

@ -518,6 +518,7 @@ type Config struct {
supportedCaps *caps.Set
capValues caps.Values
Casemapping Casemapping
EnforceUtf8 bool `yaml:"enforce-utf8"`
OutputPath string `yaml:"output-path"`
}

View File

@ -66,6 +66,7 @@ var (
errCredsExternallyManaged = errors.New("Credentials are externally managed and cannot be changed here")
errInvalidMultilineBatch = errors.New("Invalid multiline batch")
errTimedOut = errors.New("Operation timed out")
errInvalidUtf8 = errors.New("Message rejected for invalid utf8")
)
// Socket Errors

View File

@ -2927,3 +2927,9 @@ func unknownCommandHandler(server *Server, client *Client, msg ircmsg.IrcMessage
rb.Add(nil, server.name, ERR_UNKNOWNCOMMAND, client.Nick(), utils.SafeErrorParam(msg.Command), client.t("Unknown command"))
return false
}
// fake handler for invalid utf8
func invalidUtf8Handler(server *Server, client *Client, msg ircmsg.IrcMessage, rb *ResponseBuffer) bool {
rb.Add(nil, server.name, ERR_UNKNOWNERROR, client.Nick(), utils.SafeErrorParam(msg.Command), client.t("Message rejected for containing invalid UTF-8"))
return false
}

View File

@ -77,6 +77,9 @@ func (cc *IRCStreamConn) ReadLine() (line []byte, err error) {
return nil, errReadQ
}
line = bytes.TrimSuffix(line, crlf)
if globalUtf8EnforcementSetting && !utf8.Valid(line) {
err = errInvalidUtf8
}
return
}
@ -101,9 +104,9 @@ func (wc IRCWSConn) UnderlyingConn() *utils.WrappedConn {
func (wc IRCWSConn) WriteLine(buf []byte) (err error) {
buf = bytes.TrimSuffix(buf, crlf)
// there's not much we can do about this;
// silently drop the message
if !utf8.Valid(buf) {
if !globalUtf8EnforcementSetting && !utf8.Valid(buf) {
// there's not much we can do about this;
// silently drop the message
return nil
}
return wc.conn.WriteMessage(websocket.TextMessage, buf)

View File

@ -487,6 +487,7 @@ func (server *Server) applyConfig(config *Config) (err error) {
server.name = config.Server.Name
server.nameCasefolded = config.Server.nameCasefolded
globalCasemappingSetting = config.Server.Casemapping
globalUtf8EnforcementSetting = config.Server.EnforceUtf8
} else {
// enforce configs that can't be changed after launch:
if server.name != config.Server.Name {
@ -495,6 +496,8 @@ func (server *Server) applyConfig(config *Config) (err error) {
return fmt.Errorf("Datastore path cannot be changed after launching the server, rehash aborted")
} else if globalCasemappingSetting != config.Server.Casemapping {
return fmt.Errorf("Casemapping cannot be changed after launching the server, rehash aborted")
} else if globalUtf8EnforcementSetting != config.Server.EnforceUtf8 {
return fmt.Errorf("UTF-8 enforcement cannot be changed after launching the server, rehash aborted")
} else if oldConfig.Accounts.Multiclient.AlwaysOn != config.Accounts.Multiclient.AlwaysOn {
return fmt.Errorf("Default always-on setting cannot be changed after launching the server, rehash aborted")
}

View File

@ -75,6 +75,9 @@ func (socket *Socket) Read() (string, error) {
if err == io.EOF && strings.TrimSpace(line) != "" {
// don't do anything
} else if err == errInvalidUtf8 {
// pass the data through so we can parse the command at least
return line, err
} else if err != nil {
return "", err
}

View File

@ -50,6 +50,8 @@ const (
// this happens-before all IRC connections and all casefolding operations.
var globalCasemappingSetting Casemapping = CasemappingPRECIS
var globalUtf8EnforcementSetting bool
// Each pass of PRECIS casefolding is a composition of idempotent operations,
// but not idempotent itself. Therefore, the spec says "do it four times and hope
// it converges" (lolwtf). Golang's PRECIS implementation has a "repeat" option,