3
0
mirror of https://github.com/jlu5/PyLink.git synced 2025-01-25 19:54:25 +01:00

antispam: filter away Unicode lookalike characters when processing

Based off 56b48e4e51
This commit is contained in:
James Lu 2018-11-07 16:14:48 -08:00
parent 6462ae3a3c
commit 32e9cc689e

View File

@ -11,6 +11,76 @@ sbot = utils.register_service("antispam", default_nick="AntiSpam", desc=mydesc)
def die(irc=None):
utils.unregister_service("antispam")
_UNICODE_CHARMAP = {
'A': 'AΑА𝐀𝐴𝑨𝒜𝓐𝔄𝔸𝕬𝖠𝗔𝘈𝘼𝙰𝚨𝛢𝜜𝝖𝞐',
'B': 'ΒВв𐌁𝐁𝐵𝑩𝓑𝔅𝔹𝕭𝖡𝗕𝘉𝘽𝙱𝚩𝛣𝜝𝝗𝞑',
'C': 'CϹС𐌂𝐂𝐶𝑪𝒞𝓒𝕮𝖢𝗖𝘊𝘾𝙲',
'D': 'D𝐃𝐷𝑫𝒟𝓓𝔇𝔻𝕯𝖣𝗗𝘋𝘿𝙳',
'E': 'EΕЕ𝐄𝐸𝑬𝓔𝔈𝔼𝕰𝖤𝗘𝘌𝙀𝙴𝚬𝛦𝜠𝝚𝞔',
'F': 'FϜ𝐅𝐹𝑭𝓕𝔉𝔽𝕱𝖥𝗙𝘍𝙁𝙵𝟊',
'G': 'Ԍԍ𝐆𝐺𝑮𝒢𝓖𝔊𝔾𝕲𝖦𝗚𝘎𝙂𝙶',
'H': 'ΗНн𝐇𝐻𝑯𝓗𝕳𝖧𝗛𝘏𝙃𝙷𝚮𝛨𝜢𝝜𝞖',
'J': 'JЈ𝐉𝐽𝑱𝒥𝓙𝔍𝕁𝕵𝖩𝗝𝘑𝙅𝙹',
'K': 'KΚК𝐊𝐾𝑲𝒦𝓚𝔎𝕂𝕶𝖪𝗞𝘒𝙆𝙺𝚱𝛫𝜥𝝟𝞙',
'L': '𝐋𝐿𝑳𝓛𝔏𝕃𝕷𝖫𝗟𝘓𝙇𝙻',
'M': 'MΜϺМ𐌑𝐌𝑀𝑴𝓜𝔐𝕄𝕸𝖬𝗠𝘔𝙈𝙼𝚳𝛭𝜧𝝡𝞛',
'N': 'Ν𝐍𝑁𝑵𝒩𝓝𝔑𝕹𝖭𝗡𝘕𝙉𝙽𝚴𝛮𝜨𝝢𝞜',
'P': 'PΡРᴘᴩ𝐏𝑃𝑷𝒫𝓟𝔓𝕻𝖯𝗣𝘗𝙋𝙿𝚸𝛲𝜬𝝦𝞠',
'Q': 'Q𝐐𝑄𝑸𝒬𝓠𝔔𝕼𝖰𝗤𝘘𝙌𝚀',
'R': 'RƦʀ𝐑𝑅𝑹𝓡𝕽𝖱𝗥𝘙𝙍𝚁',
'S': 'SЅՏ𝐒𝑆𝑺𝒮𝓢𝔖𝕊𝕾𝖲𝗦𝘚𝙎𝚂',
'T': 'TΤτТт𐌕𝐓𝑇𝑻𝒯𝓣𝔗𝕋𝕿𝖳𝗧𝘛𝙏𝚃𝚻𝛕𝛵𝜏𝜯𝝉𝝩𝞃𝞣𝞽',
'U': 'UՍ𝐔𝑈𝑼𝒰𝓤𝔘𝕌𝖀𝖴𝗨𝘜𝙐𝚄',
'V': 'VѴ٧۷𝐕𝑉𝑽𝒱𝓥𝔙𝕍𝖁𝖵𝗩𝘝𝙑𝚅',
'W': 'WԜ𝐖𝑊𝑾𝒲𝓦𝔚𝕎𝖂𝖶𝗪𝘞𝙒𝚆',
'X': 'XΧХ𐌗𐌢𝐗𝑋𝑿𝒳𝓧𝔛𝕏𝖃𝖷𝗫𝘟𝙓𝚇𝚾𝛸𝜲𝝬𝞦',
'Y': 'YΥϒУҮ𝐘𝑌𝒀𝒴𝓨𝔜𝕐𝖄𝖸𝗬𝘠𝙔𝚈𝚼𝛶𝜰𝝪𝞤',
'Z': 'ZΖ𝐙𝑍𝒁𝒵𝓩𝖅𝖹𝗭𝘡𝙕𝚉𝚭𝛧𝜡𝝛𝞕',
'a': 'aɑαа𝐚𝑎𝒂𝒶𝓪𝔞𝕒𝖆𝖺𝗮𝘢𝙖𝚊𝛂𝛼𝜶𝝰𝞪',
'b': 'bƄЬ𝐛𝑏𝒃𝒷𝓫𝔟𝕓𝖇𝖻𝗯𝘣𝙗𝚋',
'c': 'cϲс𝐜𝑐𝒄𝒸𝓬𝔠𝕔𝖈𝖼𝗰𝘤𝙘𝚌',
'd': 'dԁ𝐝𝑑𝒅𝒹𝓭𝔡𝕕𝖉𝖽𝗱𝘥𝙙𝚍',
'e': 'eеҽ𝐞𝑒𝒆𝓮𝔢𝕖𝖊𝖾𝗲𝘦𝙚𝚎',
'f': 'fſϝք𝐟𝑓𝒇𝒻𝓯𝔣𝕗𝖋𝖿𝗳𝘧𝙛𝚏𝟋',
'g': 'gƍɡց𝐠𝑔𝒈𝓰𝔤𝕘𝖌𝗀𝗴𝘨𝙜𝚐',
'h': 'hһհ𝐡𝒉𝒽𝓱𝔥𝕙𝖍𝗁𝗵𝘩𝙝𝚑',
'i': 'iıɩɪιіӏ𝐢𝑖𝒊𝒾𝓲𝔦𝕚𝖎𝗂𝗶𝘪𝙞𝚒𝚤𝛊𝜄𝜾𝝸𝞲',
'j': 'jϳј𝐣𝑗𝒋𝒿𝓳𝔧𝕛𝖏𝗃𝗷𝘫𝙟𝚓',
'k': 'k𝐤𝑘𝒌𝓀𝓴𝔨𝕜𝖐𝗄𝗸𝘬𝙠𝚔',
'm': 'ⅿm',
'n': 'nոռ𝐧𝑛𝒏𝓃𝓷𝔫𝕟𝖓𝗇𝗻𝘯𝙣𝚗',
'o': '',
'p': 'pρϱр𝐩𝑝𝒑𝓅𝓹𝔭𝕡𝖕𝗉𝗽𝘱𝙥𝚙𝛒𝛠𝜌𝜚𝝆𝝔𝞀𝞎𝞺𝟈',
'q': 'qԛգզ𝐪𝑞𝒒𝓆𝓺𝔮𝕢𝖖𝗊𝗾𝘲𝙦𝚚',
'r': 'rг𝐫𝑟𝒓𝓇𝓻𝔯𝕣𝖗𝗋𝗿𝘳𝙧𝚛',
's': 'sƽѕ𝐬𝑠𝒔𝓈𝓼𝔰𝕤𝖘𝗌𝘀𝘴𝙨𝚜',
't': 't𝐭𝑡𝒕𝓉𝓽𝔱𝕥𝖙𝗍𝘁𝘵𝙩𝚝',
'u': 'uʋυս𝐮𝑢𝒖𝓊𝓾𝔲𝕦𝖚𝗎𝘂𝘶𝙪𝚞𝛖𝜐𝝊𝞄𝞾',
'v': 'vνѵט𝐯𝑣𝒗𝓋𝓿𝔳𝕧𝖛𝗏𝘃𝘷𝙫𝚟𝛎𝜈𝝂𝝼𝞶',
'w': 'wɯѡԝա𝐰𝑤𝒘𝓌𝔀𝔴𝕨𝖜𝗐𝘄𝘸𝙬𝚠',
'x': 'x×х𝐱𝑥𝒙𝓍𝔁𝔵𝕩𝖝𝗑𝘅𝘹𝙭𝚡',
'y': 'yɣʏγуүỿ𝐲𝑦𝒚𝓎𝔂𝔶𝕪𝖞𝗒𝘆𝘺𝙮𝚢𝛄𝛾𝜸𝝲𝞬',
'z': 'z𝐳𝑧𝒛𝓏𝔃𝔷𝕫𝖟𝗓𝘇𝘻𝙯𝚣',
'/': '',
'\\': '',
' ': '\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\xa0\u202f\u205f',
'.': '',
'-': '˗╴﹣-−⎼',
'!': '﹗!ǃⵑ︕',
':': ':˸։፡᛬⁚∶⠆︓﹕',
}
def _prep_maketrans(data):
from_s = ''
to_s = ''
for target, chars in data.items():
from_s += chars
to_s += target * len(chars)
return str.maketrans(from_s, to_s)
UNICODE_CHARMAP = _prep_maketrans(_UNICODE_CHARMAP)
PUNISH_OPTIONS = ['kill', 'ban', 'quiet', 'kick', 'block']
EXEMPT_OPTIONS = ['voice', 'halfop', 'op']
DEFAULT_EXEMPT_OPTION = 'halfop'
@ -215,8 +285,9 @@ utils.add_hook(handle_masshighlight, 'NOTICE', priority=1000)
TEXTFILTER_DEFAULTS = {
'reason': "Spam is prohibited",
'punishment': 'kick+ban+block',
'watch_pms': 'false',
'enabled': False
'watch_pms': False,
'enabled': False,
'munge_unicode': True,
}
def handle_textfilter(irc, source, command, args):
"""Antispam text filter handler."""
@ -277,6 +348,8 @@ def handle_textfilter(irc, source, command, args):
if irc.get_service_option('antispam', 'strip_formatting', True):
text = utils.strip_irc_formatting(text)
if txf_settings.get('munge_unicode', TEXTFILTER_DEFAULTS['munge_unicode']):
text = str.translate(text, UNICODE_CHARMAP)
punished = False
for filterglob in txf_globs: