Updated the tiny regexp to match the site's new layout. Added more checks for

nonSnarfingRegexp. Lowered the minimum length we check for tiny URLs since
they can be as short as 20 characters.
This commit is contained in:
James Vega 2004-06-07 19:45:49 +00:00
parent 2bf216626d
commit 5633b9d46b
2 changed files with 39 additions and 6 deletions

View File

@ -147,8 +147,11 @@ class URL(callbacks.PrivmsgCommandAndRegexp,
text = msg.args[1] text = msg.args[1]
for url in webutils.urlRe.findall(text): for url in webutils.urlRe.findall(text):
r = self.registryValue('nonSnarfingRegexp', channel) r = self.registryValue('nonSnarfingRegexp', channel)
#self.log.warning(repr(r))
if r and r.search(url): if r and r.search(url):
#self.log.warning('Skipping addition of URL to db.')
continue continue
#self.log.warning('Adding URL to db.')
(protocol, site, filename, _, _, _) = urlparse.urlparse(url) (protocol, site, filename, _, _, _) = urlparse.urlparse(url)
previousMsg = '' previousMsg = ''
for oldMsg in reversed(irc.state.history): for oldMsg in reversed(irc.state.history):
@ -171,8 +174,11 @@ class URL(callbacks.PrivmsgCommandAndRegexp,
if not ircutils.isChannel(msg.args[0]): if not ircutils.isChannel(msg.args[0]):
return return
channel = msg.args[0] channel = msg.args[0]
r = self.registryValue('nonSnarfingRegexp', channel)
if self.registryValue('tinyurlSnarfer', channel): if self.registryValue('tinyurlSnarfer', channel):
url = match.group(0) url = match.group(0)
if r and r.search(url):
return
minlen = self.registryValue('tinyurlSnarfer.minimumLength',channel) minlen = self.registryValue('tinyurlSnarfer.minimumLength',channel)
if len(url) >= minlen: if len(url) >= minlen:
db = self.getDb(channel) db = self.getDb(channel)
@ -195,8 +201,12 @@ class URL(callbacks.PrivmsgCommandAndRegexp,
if callbacks.addressed(irc.nick, msg): if callbacks.addressed(irc.nick, msg):
return return
channel = msg.args[0] channel = msg.args[0]
r = self.registryValue('nonSnarfingRegexp', channel)
#self.log.warning('Title: %r' % r)
if self.registryValue('titleSnarfer', channel): if self.registryValue('titleSnarfer', channel):
url = match.group(0) url = match.group(0)
if r and r.search(url):
return
try: try:
size = conf.supybot.protocols.http.peekSize() size = conf.supybot.protocols.http.peekSize()
text = webutils.getUrl(url, size=size) text = webutils.getUrl(url, size=size)
@ -222,7 +232,7 @@ class URL(callbacks.PrivmsgCommandAndRegexp,
WHERE tinyurl=%s""", id, tinyurl) WHERE tinyurl=%s""", id, tinyurl)
db.commit() db.commit()
_tinyRe = re.compile(r'(http://tinyurl\.com/\w+)</blockquote>') _tinyRe = re.compile(r'<blockquote><b>(http://tinyurl\.com/\w+)</b>')
def _getTinyUrl(self, url, channel, cmd=False): def _getTinyUrl(self, url, channel, cmd=False):
db = self.getDb(channel) db = self.getDb(channel)
cursor = db.cursor() cursor = db.cursor()
@ -280,14 +290,14 @@ class URL(callbacks.PrivmsgCommandAndRegexp,
Returns a TinyURL.com version of <url> Returns a TinyURL.com version of <url>
""" """
url = privmsgs.getArgs(args) url = privmsgs.getArgs(args)
if len(url) < 24: if len(url) < 20:
irc.error( irc.error('Stop being a lazy-biotch and type the URL yourself.')
'Stop being a lazy-biotch and type the URL yourself.')
return return
channel = msg.args[0] channel = msg.args[0]
snarf = self.registryValue('tinyurlSnarfer', channel) snarf = self.registryValue('tinyurlSnarfer', channel)
minlen = self.registryValue('tinyurlSnarfer.minimumLength', channel) minlen = self.registryValue('tinyurlSnarfer.minimumLength', channel)
if snarf and len(url) >= minlen: r = self.registryValue('nonSnarfingRegexp', channel)
if snarf and len(url) >= minlen and not r.search(url):
return return
(tinyurl, updateDb) = self._getTinyUrl(url, channel, cmd=True) (tinyurl, updateDb) = self._getTinyUrl(url, channel, cmd=True)
if tinyurl: if tinyurl:

View File

@ -149,6 +149,29 @@ if sqlite is not None:
finally: finally:
conf.supybot.plugins.URL.titleSnarfer.setValue(False) conf.supybot.plugins.URL.titleSnarfer.setValue(False)
def testNonSnarfing(self):
tiny = conf.supybot.plugins.URL.tinyurlSnarfer()
snarf = conf.supybot.plugins.URL.nonSnarfingRegexp()
title = conf.supybot.plugins.URL.titleSnarfer()
try:
conf.supybot.plugins.URL.nonSnarfingRegexp.set('m/sf/')
try:
conf.supybot.plugins.URL.tinyurlSnarfer.setValue(True)
self.assertNoResponse('http://sf.net/', 2)
self.assertResponse('http://www.sourceforge.net/',
'http://tinyurl.com/2cnkf')
finally:
conf.supybot.plugins.URL.tinyurlSnarfer.setValue(tiny)
try:
conf.supybot.plugins.URL.titleSnarfer.setValue(True)
self.assertNoResponse('http://sf.net/', 2)
self.assertRegexp('http://www.sourceforge.net/',
r'Sourceforge\.net')
finally:
conf.supybot.plugins.URL.titleSnarfer.setValue(title)
finally:
conf.supybot.plugins.URL.nonSnarfingRegexp.setValue(snarf)
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: # vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: