2018-05-21 16:36:45 +02:00
|
|
|
// Copyright 2018 Google LLC
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
2021-02-20 00:41:10 +01:00
|
|
|
"context"
|
2018-05-21 16:36:45 +02:00
|
|
|
"crypto/tls"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
2021-02-20 00:41:10 +01:00
|
|
|
"sync"
|
2018-05-21 16:36:45 +02:00
|
|
|
"time"
|
2019-12-27 14:07:33 +01:00
|
|
|
|
|
|
|
irc "github.com/fluffle/goirc/client"
|
2021-04-07 03:20:52 +02:00
|
|
|
"github.com/google/alertmanager-irc-relay/logging"
|
2020-03-05 12:58:39 +01:00
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
|
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
2018-05-21 16:36:45 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
connectionTimeoutSecs = 30
|
|
|
|
nickservWaitSecs = 10
|
|
|
|
ircConnectMaxBackoffSecs = 300
|
|
|
|
ircConnectBackoffResetSecs = 1800
|
|
|
|
)
|
|
|
|
|
2020-03-05 12:58:39 +01:00
|
|
|
var (
|
|
|
|
ircConnectedGauge = promauto.NewGauge(prometheus.GaugeOpts{
|
|
|
|
Name: "irc_connected",
|
2021-02-20 11:17:54 +01:00
|
|
|
Help: "Whether the IRC connection is established",
|
2020-03-05 12:58:39 +01:00
|
|
|
})
|
|
|
|
ircSentMsgs = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
|
|
Name: "irc_sent_msgs",
|
|
|
|
Help: "Number of IRC messages sent"},
|
|
|
|
[]string{"ircchannel"},
|
|
|
|
)
|
|
|
|
ircSendMsgErrors = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
|
|
Name: "irc_send_msg_errors",
|
|
|
|
Help: "Errors while sending IRC messages"},
|
|
|
|
[]string{"ircchannel", "error"},
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
2018-05-21 16:36:45 +02:00
|
|
|
func loggerHandler(_ *irc.Conn, line *irc.Line) {
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Info("Received: '%s'", line.Raw)
|
2018-05-21 16:36:45 +02:00
|
|
|
}
|
|
|
|
|
2021-03-26 23:03:21 +01:00
|
|
|
func makeGOIRCConfig(config *Config) *irc.Config {
|
|
|
|
ircConfig := irc.NewConfig(config.IRCNick)
|
|
|
|
ircConfig.Me.Ident = config.IRCNick
|
|
|
|
ircConfig.Me.Name = config.IRCRealName
|
|
|
|
ircConfig.Server = strings.Join(
|
|
|
|
[]string{config.IRCHost, strconv.Itoa(config.IRCPort)}, ":")
|
|
|
|
ircConfig.Pass = config.IRCHostPass
|
|
|
|
ircConfig.SSL = config.IRCUseSSL
|
|
|
|
ircConfig.SSLConfig = &tls.Config{
|
|
|
|
ServerName: config.IRCHost,
|
|
|
|
InsecureSkipVerify: !config.IRCVerifySSL,
|
|
|
|
}
|
2022-10-15 13:00:43 +02:00
|
|
|
ircConfig.PingFreq = time.Duration(config.IRCPingSecs) * time.Second
|
2021-03-26 23:03:21 +01:00
|
|
|
ircConfig.Timeout = connectionTimeoutSecs * time.Second
|
|
|
|
ircConfig.NewNick = func(n string) string { return n + "^" }
|
|
|
|
|
|
|
|
return ircConfig
|
|
|
|
}
|
|
|
|
|
2018-05-21 16:36:45 +02:00
|
|
|
type IRCNotifier struct {
|
|
|
|
// Nick stores the nickname specified in the config, because irc.Client
|
|
|
|
// might change its copy.
|
2021-02-20 00:41:10 +01:00
|
|
|
Nick string
|
|
|
|
NickPassword string
|
2021-04-16 18:17:08 +02:00
|
|
|
|
2022-10-08 18:02:16 +02:00
|
|
|
NickservName string
|
2021-04-16 18:17:08 +02:00
|
|
|
NickservIdentifyPatterns []string
|
|
|
|
|
Restore IRC Ident on Reconnect
After a connection loss on an IRC session with a ngIRCd, the
alertmanager-irc-relay was unable to reconnect. After some debugging,
the error's origin was the state tracking within the used goirc library.
When using an unidentified session, ngIRCd prefixes the user's ident
with a `~`. The state tracking registers this and keeps `~${NICK}` as
the current and the new ident for future reconnects. However, `~` is not
a valid char for the `<user>` part in the `USER` command, at least not
for ngIRCd.
To clarify this behaviour, take a look at the following log. First, the
initial connection is begin established correctly. Keep an eye on the
`USER` command being sent to the server.
> http.go:132: INFO Starting HTTP server
> irc.go:308: INFO Connected to IRC server, waiting to establish session
> connection.go:543: DEBUG -> NICK alertbot
> connection.go:543: DEBUG -> USER alertbot 12 * :Alertmanager IRC Relay
> connection.go:474: DEBUG <- :__SERVER__ 001 alertbot :Welcome to the Internet Relay Network alertbot!~alertbot@__IP__
Now, there was a network incident and the session needs to be recreated.
> connection.go:466: ERROR irc.recv(): read tcp __REDACTED__: read: connection timed out
> connection.go:577: INFO irc.Close(): Disconnected from server.
> irc.go:150: INFO Disconnected from IRC
> reconciler.go:129: INFO Channel #alerts monitor: context canceled while monitoring
> irc.go:300: INFO Connecting to IRC __SERVER__
> backoff.go:111: INFO Backoff for 0s starts
> backoff.go:114: INFO Backoff for 0s ends
> connection.go:390: INFO irc.Connect(): Connecting to __SERVER__.
> irc.go:308: INFO Connected to IRC server, waiting to establish session
> connection.go:543: DEBUG -> NICK alertbot
> connection.go:543: DEBUG -> USER ~alertbot 12 * :Alertmanager IRC Relay
> connection.go:474: DEBUG <- ERROR :Invalid user name
> connection.go:577: INFO irc.Close(): Disconnected from server.
> irc.go:150: INFO Disconnected from IRC
> irc.go:319: WARN Receiving a session down before the session is up, this is odd
This time, the used `user` part of the `USER` command has the prefixed
`~` and fails. However, without using `-debug` and taking a very close
look, this error can be missed very easy.
As the new ident is invalid, the alertmanager-irc-relay is now stuck in
an endless reconnection loop.
This fix is kind of straight forward and just checks if the ident has
changed before trying to reconnect. It might not be the prettiest
solution, but recreating the whole *irc.Config resulted in other bugs as
it was still referenced - even after being `Close`d.
2022-10-15 11:53:26 +02:00
|
|
|
// As the goirc library might alter the irc.Config created by makeGOIRCConfig,
|
|
|
|
// we might also want to keep a reference to the original Config to restore
|
|
|
|
// the desired state.
|
|
|
|
Config *Config
|
|
|
|
IrcConfig *irc.Config
|
|
|
|
|
2021-04-16 18:17:08 +02:00
|
|
|
Client *irc.Conn
|
|
|
|
AlertMsgs chan AlertMsg
|
2021-02-20 00:41:10 +01:00
|
|
|
|
2018-05-21 16:36:45 +02:00
|
|
|
// irc.Conn has a Connected() method that can tell us wether the TCP
|
|
|
|
// connection is up, and thus if we should trigger connect/disconnect.
|
|
|
|
// We need to track the session establishment also at a higher level to
|
|
|
|
// understand when the server has accepted us and thus when we can join
|
|
|
|
// channels, send notices, etc.
|
|
|
|
sessionUp bool
|
|
|
|
sessionUpSignal chan bool
|
|
|
|
sessionDownSignal chan bool
|
2022-10-15 13:00:43 +02:00
|
|
|
sessionPongSignal chan bool
|
|
|
|
sessionPingOnce sync.Once
|
|
|
|
sessionLastPong time.Time
|
2021-03-27 17:29:54 +01:00
|
|
|
sessionWg sync.WaitGroup
|
2018-05-21 16:36:45 +02:00
|
|
|
|
2021-03-26 22:53:46 +01:00
|
|
|
channelReconciler *ChannelReconciler
|
2018-05-21 16:36:45 +02:00
|
|
|
|
2020-01-25 19:03:13 +01:00
|
|
|
UsePrivmsg bool
|
|
|
|
|
2018-05-21 16:36:45 +02:00
|
|
|
NickservDelayWait time.Duration
|
|
|
|
BackoffCounter Delayer
|
2021-03-29 16:06:36 +02:00
|
|
|
timeTeller TimeTeller
|
2018-05-21 16:36:45 +02:00
|
|
|
}
|
|
|
|
|
2021-03-29 16:06:36 +02:00
|
|
|
func NewIRCNotifier(config *Config, alertMsgs chan AlertMsg, delayerMaker DelayerMaker, timeTeller TimeTeller) (*IRCNotifier, error) {
|
2018-05-21 16:36:45 +02:00
|
|
|
|
2021-03-26 23:03:21 +01:00
|
|
|
ircConfig := makeGOIRCConfig(config)
|
2018-05-21 16:36:45 +02:00
|
|
|
|
2021-03-26 22:53:46 +01:00
|
|
|
client := irc.Client(ircConfig)
|
|
|
|
|
2021-03-25 23:58:27 +01:00
|
|
|
backoffCounter := delayerMaker.NewDelayer(
|
2018-05-21 16:36:45 +02:00
|
|
|
ircConnectMaxBackoffSecs, ircConnectBackoffResetSecs,
|
|
|
|
time.Second)
|
|
|
|
|
2021-03-29 16:06:36 +02:00
|
|
|
channelReconciler := NewChannelReconciler(config, client, delayerMaker, timeTeller)
|
2021-03-26 22:53:46 +01:00
|
|
|
|
2018-05-21 16:36:45 +02:00
|
|
|
notifier := &IRCNotifier{
|
2021-04-16 18:17:08 +02:00
|
|
|
Nick: config.IRCNick,
|
|
|
|
NickPassword: config.IRCNickPass,
|
2022-10-08 18:02:16 +02:00
|
|
|
NickservName: config.NickservName,
|
2021-04-16 18:17:08 +02:00
|
|
|
NickservIdentifyPatterns: config.NickservIdentifyPatterns,
|
Restore IRC Ident on Reconnect
After a connection loss on an IRC session with a ngIRCd, the
alertmanager-irc-relay was unable to reconnect. After some debugging,
the error's origin was the state tracking within the used goirc library.
When using an unidentified session, ngIRCd prefixes the user's ident
with a `~`. The state tracking registers this and keeps `~${NICK}` as
the current and the new ident for future reconnects. However, `~` is not
a valid char for the `<user>` part in the `USER` command, at least not
for ngIRCd.
To clarify this behaviour, take a look at the following log. First, the
initial connection is begin established correctly. Keep an eye on the
`USER` command being sent to the server.
> http.go:132: INFO Starting HTTP server
> irc.go:308: INFO Connected to IRC server, waiting to establish session
> connection.go:543: DEBUG -> NICK alertbot
> connection.go:543: DEBUG -> USER alertbot 12 * :Alertmanager IRC Relay
> connection.go:474: DEBUG <- :__SERVER__ 001 alertbot :Welcome to the Internet Relay Network alertbot!~alertbot@__IP__
Now, there was a network incident and the session needs to be recreated.
> connection.go:466: ERROR irc.recv(): read tcp __REDACTED__: read: connection timed out
> connection.go:577: INFO irc.Close(): Disconnected from server.
> irc.go:150: INFO Disconnected from IRC
> reconciler.go:129: INFO Channel #alerts monitor: context canceled while monitoring
> irc.go:300: INFO Connecting to IRC __SERVER__
> backoff.go:111: INFO Backoff for 0s starts
> backoff.go:114: INFO Backoff for 0s ends
> connection.go:390: INFO irc.Connect(): Connecting to __SERVER__.
> irc.go:308: INFO Connected to IRC server, waiting to establish session
> connection.go:543: DEBUG -> NICK alertbot
> connection.go:543: DEBUG -> USER ~alertbot 12 * :Alertmanager IRC Relay
> connection.go:474: DEBUG <- ERROR :Invalid user name
> connection.go:577: INFO irc.Close(): Disconnected from server.
> irc.go:150: INFO Disconnected from IRC
> irc.go:319: WARN Receiving a session down before the session is up, this is odd
This time, the used `user` part of the `USER` command has the prefixed
`~` and fails. However, without using `-debug` and taking a very close
look, this error can be missed very easy.
As the new ident is invalid, the alertmanager-irc-relay is now stuck in
an endless reconnection loop.
This fix is kind of straight forward and just checks if the ident has
changed before trying to reconnect. It might not be the prettiest
solution, but recreating the whole *irc.Config resulted in other bugs as
it was still referenced - even after being `Close`d.
2022-10-15 11:53:26 +02:00
|
|
|
Config: config,
|
|
|
|
IrcConfig: ircConfig,
|
2021-04-16 18:17:08 +02:00
|
|
|
Client: client,
|
|
|
|
AlertMsgs: alertMsgs,
|
|
|
|
sessionUpSignal: make(chan bool),
|
|
|
|
sessionDownSignal: make(chan bool),
|
2022-10-15 13:00:43 +02:00
|
|
|
sessionPongSignal: make(chan bool),
|
2021-04-16 18:17:08 +02:00
|
|
|
channelReconciler: channelReconciler,
|
|
|
|
UsePrivmsg: config.UsePrivmsg,
|
|
|
|
NickservDelayWait: nickservWaitSecs * time.Second,
|
|
|
|
BackoffCounter: backoffCounter,
|
|
|
|
timeTeller: timeTeller,
|
2018-05-21 16:36:45 +02:00
|
|
|
}
|
|
|
|
|
2021-03-26 12:40:44 +01:00
|
|
|
notifier.registerHandlers()
|
|
|
|
|
|
|
|
return notifier, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *IRCNotifier) registerHandlers() {
|
|
|
|
n.Client.HandleFunc(irc.CONNECTED,
|
2018-05-21 16:36:45 +02:00
|
|
|
func(*irc.Conn, *irc.Line) {
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Info("Session established")
|
2021-03-26 12:40:44 +01:00
|
|
|
n.sessionUpSignal <- true
|
2018-05-21 16:36:45 +02:00
|
|
|
})
|
|
|
|
|
2021-03-26 12:40:44 +01:00
|
|
|
n.Client.HandleFunc(irc.DISCONNECTED,
|
2018-05-21 16:36:45 +02:00
|
|
|
func(*irc.Conn, *irc.Line) {
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Info("Disconnected from IRC")
|
2021-03-26 12:40:44 +01:00
|
|
|
n.sessionDownSignal <- false
|
2018-05-21 16:36:45 +02:00
|
|
|
})
|
|
|
|
|
2021-04-16 18:17:08 +02:00
|
|
|
n.Client.HandleFunc(irc.NOTICE,
|
|
|
|
func(_ *irc.Conn, line *irc.Line) {
|
|
|
|
n.HandleNotice(line.Nick, line.Text())
|
|
|
|
})
|
|
|
|
|
2022-10-15 13:00:43 +02:00
|
|
|
n.Client.HandleFunc(irc.PONG,
|
|
|
|
func(_ *irc.Conn, line *irc.Line) {
|
|
|
|
n.sessionPongSignal <- true
|
|
|
|
})
|
|
|
|
|
2021-04-16 18:17:08 +02:00
|
|
|
for _, event := range []string{"433"} {
|
2021-03-26 12:40:44 +01:00
|
|
|
n.Client.HandleFunc(event, loggerHandler)
|
2018-05-21 16:36:45 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-16 18:17:08 +02:00
|
|
|
func (n *IRCNotifier) HandleNotice(nick string, msg string) {
|
|
|
|
logging.Info("Received NOTICE from %s: %s", nick, msg)
|
|
|
|
if strings.ToLower(nick) == "nickserv" {
|
|
|
|
n.HandleNickservMsg(msg)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *IRCNotifier) HandleNickservMsg(msg string) {
|
2021-03-21 19:48:11 +01:00
|
|
|
if n.NickPassword == "" {
|
2021-04-16 18:17:08 +02:00
|
|
|
logging.Debug("Skip processing NickServ request, no password configured")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Remove most common formatting options from NickServ messages
|
|
|
|
cleaner := strings.NewReplacer(
|
|
|
|
"\001", "", // bold
|
|
|
|
"\002", "", // faint
|
|
|
|
"\004", "", // underline
|
2021-04-17 23:18:00 +02:00
|
|
|
"\037", "", // underline
|
2021-04-16 18:17:08 +02:00
|
|
|
)
|
|
|
|
cleanedMsg := cleaner.Replace(msg)
|
|
|
|
|
|
|
|
for _, identifyPattern := range n.NickservIdentifyPatterns {
|
|
|
|
logging.Debug("Checking if NickServ message matches identify request '%s'", identifyPattern)
|
|
|
|
if strings.Contains(cleanedMsg, identifyPattern) {
|
|
|
|
logging.Info("Handling NickServ request to IDENTIFY")
|
2022-10-08 18:02:16 +02:00
|
|
|
n.Client.Privmsgf(n.NickservName, "IDENTIFY %s", n.NickPassword)
|
2021-04-16 18:17:08 +02:00
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *IRCNotifier) MaybeGhostNick() {
|
|
|
|
if n.NickPassword == "" {
|
|
|
|
logging.Debug("Skip GHOST check, no password configured")
|
2018-05-21 16:36:45 +02:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2021-03-21 19:48:11 +01:00
|
|
|
currentNick := n.Client.Me().Nick
|
|
|
|
if currentNick != n.Nick {
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Info("My nick is '%s', sending GHOST to NickServ to get '%s'",
|
2021-03-21 19:48:11 +01:00
|
|
|
currentNick, n.Nick)
|
2022-10-08 18:02:16 +02:00
|
|
|
n.Client.Privmsgf(n.NickservName, "GHOST %s %s", n.Nick,
|
2021-03-21 19:48:11 +01:00
|
|
|
n.NickPassword)
|
|
|
|
time.Sleep(n.NickservDelayWait)
|
2018-05-21 16:36:45 +02:00
|
|
|
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Info("Changing nick to '%s'", n.Nick)
|
2021-03-21 19:48:11 +01:00
|
|
|
n.Client.Nick(n.Nick)
|
2021-04-16 18:17:08 +02:00
|
|
|
time.Sleep(n.NickservDelayWait)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *IRCNotifier) MaybeWaitForNickserv() {
|
|
|
|
if n.NickPassword == "" {
|
|
|
|
logging.Debug("Skip NickServ wait, no password configured")
|
|
|
|
return
|
2018-05-21 16:36:45 +02:00
|
|
|
}
|
2021-04-16 18:17:08 +02:00
|
|
|
|
|
|
|
// Very lazy/optimistic, but this is good enough for my irssi config,
|
|
|
|
// so it should work here as well.
|
|
|
|
logging.Info("Waiting for NickServ to notice us and issue an identify request")
|
2021-03-21 19:48:11 +01:00
|
|
|
time.Sleep(n.NickservDelayWait)
|
2018-05-21 16:36:45 +02:00
|
|
|
}
|
|
|
|
|
2021-03-27 12:35:38 +01:00
|
|
|
func (n *IRCNotifier) ChannelJoined(ctx context.Context, channel string) bool {
|
2021-03-27 00:49:16 +01:00
|
|
|
|
|
|
|
isJoined, waitJoined := n.channelReconciler.JoinChannel(channel)
|
|
|
|
if isJoined {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-waitJoined:
|
|
|
|
return true
|
2021-03-29 16:06:36 +02:00
|
|
|
case <-n.timeTeller.After(ircJoinWaitSecs * time.Second):
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Warn("Channel %s not joined after %d seconds, giving bad news to caller", channel, ircJoinWaitSecs)
|
2021-03-27 00:49:16 +01:00
|
|
|
return false
|
2021-03-27 12:35:38 +01:00
|
|
|
case <-ctx.Done():
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Info("Context canceled while waiting for join on channel %s", channel)
|
2021-03-27 00:49:16 +01:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-27 12:35:38 +01:00
|
|
|
func (n *IRCNotifier) SendAlertMsg(ctx context.Context, alertMsg *AlertMsg) {
|
2021-03-21 19:48:11 +01:00
|
|
|
if !n.sessionUp {
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Error("Cannot send alert to %s : IRC not connected", alertMsg.Channel)
|
2020-03-05 12:58:39 +01:00
|
|
|
ircSendMsgErrors.WithLabelValues(alertMsg.Channel, "not_connected").Inc()
|
2018-05-21 16:36:45 +02:00
|
|
|
return
|
|
|
|
}
|
2021-03-27 12:35:38 +01:00
|
|
|
if !n.ChannelJoined(ctx, alertMsg.Channel) {
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Error("Cannot send alert to %s : cannot join channel", alertMsg.Channel)
|
2021-03-27 00:49:16 +01:00
|
|
|
ircSendMsgErrors.WithLabelValues(alertMsg.Channel, "not_joined").Inc()
|
|
|
|
return
|
|
|
|
}
|
2020-01-25 19:03:13 +01:00
|
|
|
|
2021-03-21 19:48:11 +01:00
|
|
|
if n.UsePrivmsg {
|
|
|
|
n.Client.Privmsg(alertMsg.Channel, alertMsg.Alert)
|
2020-01-25 19:03:13 +01:00
|
|
|
} else {
|
2021-03-21 19:48:11 +01:00
|
|
|
n.Client.Notice(alertMsg.Channel, alertMsg.Alert)
|
2020-01-25 19:03:13 +01:00
|
|
|
}
|
2020-03-05 12:58:39 +01:00
|
|
|
ircSentMsgs.WithLabelValues(alertMsg.Channel).Inc()
|
2018-05-21 16:36:45 +02:00
|
|
|
}
|
|
|
|
|
2021-03-21 19:48:11 +01:00
|
|
|
func (n *IRCNotifier) ShutdownPhase() {
|
2021-03-27 17:29:54 +01:00
|
|
|
if n.sessionUp {
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Info("IRC client connected, quitting")
|
2021-03-21 19:48:11 +01:00
|
|
|
n.Client.Quit("see ya")
|
2018-05-21 16:36:45 +02:00
|
|
|
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Info("Wait for IRC disconnect to complete")
|
2021-03-27 17:29:54 +01:00
|
|
|
select {
|
|
|
|
case <-n.sessionDownSignal:
|
2022-10-15 13:00:43 +02:00
|
|
|
case <-n.sessionPongSignal:
|
2021-03-29 16:06:36 +02:00
|
|
|
case <-n.timeTeller.After(n.Client.Config().Timeout):
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Warn("Timeout while waiting for IRC disconnect to complete, stopping anyway")
|
2018-05-21 16:36:45 +02:00
|
|
|
}
|
2021-03-27 17:29:54 +01:00
|
|
|
n.sessionWg.Done()
|
2018-05-21 16:36:45 +02:00
|
|
|
}
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Info("IRC shutdown complete")
|
2018-05-21 16:36:45 +02:00
|
|
|
}
|
2021-02-20 11:17:32 +01:00
|
|
|
|
2021-03-27 12:35:38 +01:00
|
|
|
func (n *IRCNotifier) ConnectedPhase(ctx context.Context) {
|
2021-02-20 11:17:32 +01:00
|
|
|
select {
|
2021-03-21 19:48:11 +01:00
|
|
|
case alertMsg := <-n.AlertMsgs:
|
2021-03-27 12:35:38 +01:00
|
|
|
n.SendAlertMsg(ctx, &alertMsg)
|
2022-10-15 13:00:43 +02:00
|
|
|
case <-n.sessionPongSignal:
|
|
|
|
logging.Debug("Received a PONG message; prev PONG was at %v", n.sessionLastPong)
|
|
|
|
n.sessionLastPong = time.Now()
|
|
|
|
case <-time.After(2*n.IrcConfig.PingFreq - time.Since(n.sessionLastPong)):
|
|
|
|
// Calling n.Client.Close() will trigger n.sessionDownSignal. However, as
|
|
|
|
// this also dispatches a hook, which we will catch as sessionDownSignal,
|
|
|
|
// this needs to be done in a concurrent fashion if we don't want to
|
|
|
|
// deadlock ourself.
|
|
|
|
//
|
|
|
|
// Furthermore, as this time.After(...) interval is now zero, it will also
|
|
|
|
// trigger when visiting this select the next time. To mitigate multiple
|
|
|
|
// Close() calls, it is wrapped within an sync.Once which will be reset
|
|
|
|
// during SetupPhase's sessionUpSignal.
|
|
|
|
n.sessionPingOnce.Do(func() {
|
|
|
|
logging.Error("Haven't received a PONG after twice the PING period")
|
|
|
|
go n.Client.Close()
|
|
|
|
})
|
2021-03-21 19:48:11 +01:00
|
|
|
case <-n.sessionDownSignal:
|
|
|
|
n.sessionUp = false
|
2021-03-27 17:29:54 +01:00
|
|
|
n.sessionWg.Done()
|
2021-03-27 00:49:16 +01:00
|
|
|
n.channelReconciler.Stop()
|
2021-03-21 19:48:11 +01:00
|
|
|
n.Client.Quit("see ya")
|
2021-02-20 11:17:32 +01:00
|
|
|
ircConnectedGauge.Set(0)
|
2021-03-27 12:35:38 +01:00
|
|
|
case <-ctx.Done():
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Info("IRC routine asked to terminate")
|
2021-02-20 11:17:32 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-27 12:35:38 +01:00
|
|
|
func (n *IRCNotifier) SetupPhase(ctx context.Context) {
|
2021-03-21 19:48:11 +01:00
|
|
|
if !n.Client.Connected() {
|
Restore IRC Ident on Reconnect
After a connection loss on an IRC session with a ngIRCd, the
alertmanager-irc-relay was unable to reconnect. After some debugging,
the error's origin was the state tracking within the used goirc library.
When using an unidentified session, ngIRCd prefixes the user's ident
with a `~`. The state tracking registers this and keeps `~${NICK}` as
the current and the new ident for future reconnects. However, `~` is not
a valid char for the `<user>` part in the `USER` command, at least not
for ngIRCd.
To clarify this behaviour, take a look at the following log. First, the
initial connection is begin established correctly. Keep an eye on the
`USER` command being sent to the server.
> http.go:132: INFO Starting HTTP server
> irc.go:308: INFO Connected to IRC server, waiting to establish session
> connection.go:543: DEBUG -> NICK alertbot
> connection.go:543: DEBUG -> USER alertbot 12 * :Alertmanager IRC Relay
> connection.go:474: DEBUG <- :__SERVER__ 001 alertbot :Welcome to the Internet Relay Network alertbot!~alertbot@__IP__
Now, there was a network incident and the session needs to be recreated.
> connection.go:466: ERROR irc.recv(): read tcp __REDACTED__: read: connection timed out
> connection.go:577: INFO irc.Close(): Disconnected from server.
> irc.go:150: INFO Disconnected from IRC
> reconciler.go:129: INFO Channel #alerts monitor: context canceled while monitoring
> irc.go:300: INFO Connecting to IRC __SERVER__
> backoff.go:111: INFO Backoff for 0s starts
> backoff.go:114: INFO Backoff for 0s ends
> connection.go:390: INFO irc.Connect(): Connecting to __SERVER__.
> irc.go:308: INFO Connected to IRC server, waiting to establish session
> connection.go:543: DEBUG -> NICK alertbot
> connection.go:543: DEBUG -> USER ~alertbot 12 * :Alertmanager IRC Relay
> connection.go:474: DEBUG <- ERROR :Invalid user name
> connection.go:577: INFO irc.Close(): Disconnected from server.
> irc.go:150: INFO Disconnected from IRC
> irc.go:319: WARN Receiving a session down before the session is up, this is odd
This time, the used `user` part of the `USER` command has the prefixed
`~` and fails. However, without using `-debug` and taking a very close
look, this error can be missed very easy.
As the new ident is invalid, the alertmanager-irc-relay is now stuck in
an endless reconnection loop.
This fix is kind of straight forward and just checks if the ident has
changed before trying to reconnect. It might not be the prettiest
solution, but recreating the whole *irc.Config resulted in other bugs as
it was still referenced - even after being `Close`d.
2022-10-15 11:53:26 +02:00
|
|
|
if n.IrcConfig.Me.Ident != n.Config.IRCNick {
|
|
|
|
logging.Debug("Restoring IRC nick from %s to %s", n.IrcConfig.Me.Ident, n.Config.IRCNick)
|
|
|
|
n.IrcConfig.Me.Ident = n.Config.IRCNick
|
|
|
|
}
|
|
|
|
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Info("Connecting to IRC %s", n.Client.Config().Server)
|
2021-03-27 12:35:38 +01:00
|
|
|
if ok := n.BackoffCounter.DelayContext(ctx); !ok {
|
2021-02-20 11:17:32 +01:00
|
|
|
return
|
|
|
|
}
|
2021-03-27 17:29:54 +01:00
|
|
|
if err := n.Client.ConnectContext(WithWaitGroup(ctx, &n.sessionWg)); err != nil {
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Error("Could not connect to IRC: %s", err)
|
2021-02-20 11:17:32 +01:00
|
|
|
return
|
|
|
|
}
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Info("Connected to IRC server, waiting to establish session")
|
2021-02-20 11:17:32 +01:00
|
|
|
}
|
|
|
|
select {
|
2021-03-21 19:48:11 +01:00
|
|
|
case <-n.sessionUpSignal:
|
|
|
|
n.sessionUp = true
|
2022-10-15 13:00:43 +02:00
|
|
|
n.sessionPingOnce = sync.Once{}
|
|
|
|
n.sessionLastPong = time.Now()
|
2021-03-27 17:29:54 +01:00
|
|
|
n.sessionWg.Add(1)
|
2021-04-16 18:17:08 +02:00
|
|
|
n.MaybeGhostNick()
|
|
|
|
n.MaybeWaitForNickserv()
|
2021-03-27 12:35:38 +01:00
|
|
|
n.channelReconciler.Start(ctx)
|
2021-02-20 11:17:32 +01:00
|
|
|
ircConnectedGauge.Set(1)
|
2021-03-21 19:48:11 +01:00
|
|
|
case <-n.sessionDownSignal:
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Warn("Receiving a session down before the session is up, this is odd")
|
2022-10-15 13:00:43 +02:00
|
|
|
case <-n.sessionPongSignal:
|
|
|
|
logging.Warn("Receiving a PONG before the session is up, this is odd")
|
2021-03-27 12:35:38 +01:00
|
|
|
case <-ctx.Done():
|
2021-04-07 03:20:52 +02:00
|
|
|
logging.Info("IRC routine asked to terminate")
|
2021-02-20 11:17:32 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-27 12:35:38 +01:00
|
|
|
func (n *IRCNotifier) Run(ctx context.Context, stopWg *sync.WaitGroup) {
|
|
|
|
defer stopWg.Done()
|
2021-02-20 11:17:32 +01:00
|
|
|
|
2021-03-27 12:35:38 +01:00
|
|
|
for ctx.Err() != context.Canceled {
|
2021-03-21 19:48:11 +01:00
|
|
|
if !n.sessionUp {
|
2021-03-27 12:35:38 +01:00
|
|
|
n.SetupPhase(ctx)
|
2021-02-20 11:17:32 +01:00
|
|
|
} else {
|
2021-03-27 12:35:38 +01:00
|
|
|
n.ConnectedPhase(ctx)
|
2021-02-20 11:17:32 +01:00
|
|
|
}
|
|
|
|
}
|
2021-03-21 19:48:11 +01:00
|
|
|
n.ShutdownPhase()
|
2021-02-20 11:17:32 +01:00
|
|
|
}
|