3
0
mirror of https://github.com/jlu5/PyLink.git synced 2025-02-25 01:40:38 +01:00

relay: rework nick normalization with optional unidecode support

This will attempt to translate UTF-8 nicks to ASCII ones instead of doing the ugly '||||' replace.
Also, the fallback character for disallowed nick characters is now "-" instead of "|".

TODO: document relay::use_unidecode

Closes #561.
This commit is contained in:
James Lu 2019-02-12 00:58:07 -08:00
parent cfbadb4539
commit 11b65ee809

View File

@ -12,6 +12,14 @@ from pylinkirc.coremods import permissions
CHANNEL_DELINKED_MSG = "Channel delinked." CHANNEL_DELINKED_MSG = "Channel delinked."
RELAY_UNLOADED_MSG = "Relay plugin unloaded." RELAY_UNLOADED_MSG = "Relay plugin unloaded."
try:
import unidecode
except ImportError:
log.info('relay: unidecode not found; disabling unicode nicks support')
USE_UNIDECODE = False
else:
USE_UNIDECODE = conf.conf.get('relay', {}).get('use_unidecode', True)
### GLOBAL (statekeeping) VARIABLES ### GLOBAL (statekeeping) VARIABLES
relayusers = defaultdict(dict) relayusers = defaultdict(dict)
relayservers = defaultdict(dict) relayservers = defaultdict(dict)
@ -111,8 +119,9 @@ def die(irc=None):
except KeyError: except KeyError:
log.debug('relay.die: failed to clear persistent channels:', exc_info=True) log.debug('relay.die: failed to clear persistent channels:', exc_info=True)
allowed_chars = string.digits + string.ascii_letters + '/^|\\-_[]{}`' IRC_ASCII_ALLOWED_CHARS = string.digits + string.ascii_letters + '/^|\\-_[]{}`'
fallback_separator = '|' FALLBACK_SEPARATOR = '|'
FALLBACK_CHARACTER = '-'
def normalize_nick(irc, netname, nick, times_tagged=0, uid=''): def normalize_nick(irc, netname, nick, times_tagged=0, uid=''):
""" """
Creates a normalized nickname for the given nick suitable for introduction to a remote network Creates a normalized nickname for the given nick suitable for introduction to a remote network
@ -121,6 +130,14 @@ def normalize_nick(irc, netname, nick, times_tagged=0, uid=''):
UID is optional for checking regular nick changes, to make sure that the sender doesn't get UID is optional for checking regular nick changes, to make sure that the sender doesn't get
marked as nick-colliding with itself. marked as nick-colliding with itself.
""" """
is_unicode_capable = irc.casemapping in ('utf8', 'utf-8', 'rfc7700')
if USE_UNIDECODE and not is_unicode_capable:
nick = unidecode.unidecode(nick)
netname = unidecode.unidecode(netname)
# Normalize spaces to hyphens
nick = nick.replace(' ', FALLBACK_CHARACTER)
netname = netname.replace(' ', FALLBACK_CHARACTER)
# Get the nick/net separator # Get the nick/net separator
separator = irc.serverdata.get('separator') or \ separator = irc.serverdata.get('separator') or \
@ -154,8 +171,8 @@ def normalize_nick(irc, netname, nick, times_tagged=0, uid=''):
irc.serverdata.get('relay_force_slashes') irc.serverdata.get('relay_force_slashes')
if '/' not in separator or not protocol_allows_slashes: if '/' not in separator or not protocol_allows_slashes:
separator = separator.replace('/', fallback_separator) separator = separator.replace('/', FALLBACK_SEPARATOR)
nick = nick.replace('/', fallback_separator) nick = nick.replace('/', FALLBACK_SEPARATOR)
if nick.startswith(tuple(string.digits+'-')): if nick.startswith(tuple(string.digits+'-')):
# On TS6 IRCds, nicks that start with 0-9 are only allowed if # On TS6 IRCds, nicks that start with 0-9 are only allowed if
@ -182,9 +199,15 @@ def normalize_nick(irc, netname, nick, times_tagged=0, uid=''):
# Loop over every character in the nick, making sure that it only contains valid # Loop over every character in the nick, making sure that it only contains valid
# characters. # characters.
if not is_unicode_capable:
for char in nick: for char in nick:
if char not in allowed_chars: if char not in IRC_ASCII_ALLOWED_CHARS:
nick = nick.replace(char, fallback_separator) nick = nick.replace(char, FALLBACK_CHARACTER)
else:
# UnrealIRCd 4's forbidden nick chars, from
# https://github.com/unrealircd/unrealircd/blob/02d69e7d8/src/modules/charsys.c#L152-L163
for char in """!+%@&~#$:'\"?*,.""":
nick = nick.replace(char, FALLBACK_CHARACTER)
while irc.nick_to_uid(nick) not in (None, uid): while irc.nick_to_uid(nick) not in (None, uid):
# The nick we want exists: Increase the separator length by 1 if the user was already # The nick we want exists: Increase the separator length by 1 if the user was already