Add a charset option (irc). Closes #247

This commit is contained in:
Wim 2017-08-29 21:30:59 +02:00
parent 5a85258f74
commit e7fcb25107
3 changed files with 35 additions and 12 deletions

View File

@ -43,6 +43,7 @@ type Protocol struct {
AuthCode string // steam AuthCode string // steam
BindAddress string // mattermost, slack // DEPRECATED BindAddress string // mattermost, slack // DEPRECATED
Buffer int // api Buffer int // api
Charset string // irc
EditSuffix string // mattermost, slack, discord, telegram, gitter EditSuffix string // mattermost, slack, discord, telegram, gitter
EditDisable bool // mattermost, slack, discord, telegram, gitter EditDisable bool // mattermost, slack, discord, telegram, gitter
IconURL string // mattermost, slack IconURL string // mattermost, slack

View File

@ -265,6 +265,10 @@ func (b *Birc) handlePrivMsg(event *irc.Event) {
re := regexp.MustCompile(`[[:cntrl:]](\d+,|)\d+`) re := regexp.MustCompile(`[[:cntrl:]](\d+,|)\d+`)
msg = re.ReplaceAllString(msg, "") msg = re.ReplaceAllString(msg, "")
var r io.Reader
var err error
mycharset := b.Config.Charset
if mycharset == "" {
// detect what were sending so that we convert it to utf-8 // detect what were sending so that we convert it to utf-8
detector := chardet.NewTextDetector() detector := chardet.NewTextDetector()
result, err := detector.DetectBest([]byte(msg)) result, err := detector.DetectBest([]byte(msg))
@ -273,12 +277,13 @@ func (b *Birc) handlePrivMsg(event *irc.Event) {
return return
} }
flog.Debugf("detected %s confidence %#v", result.Charset, result.Confidence) flog.Debugf("detected %s confidence %#v", result.Charset, result.Confidence)
var r io.Reader
r, err = charset.NewReader(result.Charset, strings.NewReader(msg)) r, err = charset.NewReader(result.Charset, strings.NewReader(msg))
// if we're not sure, just pick ISO-8859-1 // if we're not sure, just pick ISO-8859-1
if result.Confidence < 80 { if result.Confidence < 80 {
r, err = charset.NewReader("ISO-8859-1", strings.NewReader(msg)) mycharset = "ISO-8859-1"
} }
}
r, err = charset.NewReader(mycharset, strings.NewReader(msg))
if err != nil { if err != nil {
flog.Errorf("charset to utf-8 conversion failed: %s", err) flog.Errorf("charset to utf-8 conversion failed: %s", err)
return return

View File

@ -32,6 +32,23 @@ UseSASL=false
#OPTIONAL (default false) #OPTIONAL (default false)
SkipTLSVerify=true SkipTLSVerify=true
#If you know your charset, you can specify it manually.
#Otherwise it tries to detect this automatically. Select one below
# "iso-8859-2:1987", "iso-8859-9:1989", "866", "latin9", "iso-8859-10:1992", "iso-ir-109", "hebrew",
# "cp932", "iso-8859-15", "cp437", "utf-16be", "iso-8859-3:1988", "windows-1251", "utf16", "latin6",
# "latin3", "iso-8859-1:1987", "iso-8859-9", "utf-16le", "big5", "cp819", "asmo-708", "utf-8",
# "ibm437", "iso-ir-157", "iso-ir-144", "latin4", "850", "iso-8859-5", "iso-8859-5:1988", "l3",
# "windows-31j", "utf8", "iso-8859-3", "437", "greek", "iso-8859-8", "l6", "l9-iso-8859-15",
# "iso-8859-2", "latin2", "iso-ir-100", "iso-8859-6", "arabic", "iso-ir-148", "us-ascii", "x-sjis",
# "utf16be", "iso-8859-8:1988", "utf16le", "l4", "utf-16", "iso-ir-138", "iso-8859-7", "iso-8859-7:1987",
# "windows-1252", "l2", "koi8-r", "iso8859-1", "latin1", "ecma-114", "iso-ir-110", "elot-928",
# "iso-ir-126", "iso-8859-1", "iso-ir-127", "cp850", "cyrillic", "greek8", "windows-1250", "iso-latin-1",
# "l5", "ibm866", "cp866", "ms-kanji", "ibm850", "ecma-118", "iso-ir-101", "ibm819", "l1", "iso-8859-6:1987",
# "latin5", "ascii", "sjis", "iso-8859-10", "iso-8859-4", "iso-8859-4:1988", "shift-jis
# The select charset will be converted to utf-8 when sent to other bridges.
#OPTIONAL (default "")
Charset=""
#Your nick on irc. #Your nick on irc.
#REQUIRED #REQUIRED
Nick="matterbot" Nick="matterbot"