Updated the tiny regexp to match the site's new layout. Added more checks for

nonSnarfingRegexp. Lowered the minimum length we check for tiny URLs since
they can be as short as 20 characters.
This commit is contained in:
James Vega 2004-06-07 19:45:49 +00:00
parent 2bf216626d
commit 5633b9d46b
2 changed files with 39 additions and 6 deletions

View File

@ -147,8 +147,11 @@ class URL(callbacks.PrivmsgCommandAndRegexp,
text = msg.args[1]
for url in webutils.urlRe.findall(text):
r = self.registryValue('nonSnarfingRegexp', channel)
#self.log.warning(repr(r))
if r and r.search(url):
#self.log.warning('Skipping addition of URL to db.')
continue
#self.log.warning('Adding URL to db.')
(protocol, site, filename, _, _, _) = urlparse.urlparse(url)
previousMsg = ''
for oldMsg in reversed(irc.state.history):
@ -171,8 +174,11 @@ class URL(callbacks.PrivmsgCommandAndRegexp,
if not ircutils.isChannel(msg.args[0]):
return
channel = msg.args[0]
r = self.registryValue('nonSnarfingRegexp', channel)
if self.registryValue('tinyurlSnarfer', channel):
url = match.group(0)
if r and r.search(url):
return
minlen = self.registryValue('tinyurlSnarfer.minimumLength',channel)
if len(url) >= minlen:
db = self.getDb(channel)
@ -195,8 +201,12 @@ class URL(callbacks.PrivmsgCommandAndRegexp,
if callbacks.addressed(irc.nick, msg):
return
channel = msg.args[0]
r = self.registryValue('nonSnarfingRegexp', channel)
#self.log.warning('Title: %r' % r)
if self.registryValue('titleSnarfer', channel):
url = match.group(0)
if r and r.search(url):
return
try:
size = conf.supybot.protocols.http.peekSize()
text = webutils.getUrl(url, size=size)
@ -222,7 +232,7 @@ class URL(callbacks.PrivmsgCommandAndRegexp,
WHERE tinyurl=%s""", id, tinyurl)
db.commit()
_tinyRe = re.compile(r'(http://tinyurl\.com/\w+)</blockquote>')
_tinyRe = re.compile(r'<blockquote><b>(http://tinyurl\.com/\w+)</b>')
def _getTinyUrl(self, url, channel, cmd=False):
db = self.getDb(channel)
cursor = db.cursor()
@ -280,14 +290,14 @@ class URL(callbacks.PrivmsgCommandAndRegexp,
Returns a TinyURL.com version of <url>
"""
url = privmsgs.getArgs(args)
if len(url) < 24:
irc.error(
'Stop being a lazy-biotch and type the URL yourself.')
if len(url) < 20:
irc.error('Stop being a lazy-biotch and type the URL yourself.')
return
channel = msg.args[0]
snarf = self.registryValue('tinyurlSnarfer', channel)
minlen = self.registryValue('tinyurlSnarfer.minimumLength', channel)
if snarf and len(url) >= minlen:
r = self.registryValue('nonSnarfingRegexp', channel)
if snarf and len(url) >= minlen and not r.search(url):
return
(tinyurl, updateDb) = self._getTinyUrl(url, channel, cmd=True)
if tinyurl:

View File

@ -148,7 +148,30 @@ if sqlite is not None:
' (at microsoft.com)')
finally:
conf.supybot.plugins.URL.titleSnarfer.setValue(False)
def testNonSnarfing(self):
tiny = conf.supybot.plugins.URL.tinyurlSnarfer()
snarf = conf.supybot.plugins.URL.nonSnarfingRegexp()
title = conf.supybot.plugins.URL.titleSnarfer()
try:
conf.supybot.plugins.URL.nonSnarfingRegexp.set('m/sf/')
try:
conf.supybot.plugins.URL.tinyurlSnarfer.setValue(True)
self.assertNoResponse('http://sf.net/', 2)
self.assertResponse('http://www.sourceforge.net/',
'http://tinyurl.com/2cnkf')
finally:
conf.supybot.plugins.URL.tinyurlSnarfer.setValue(tiny)
try:
conf.supybot.plugins.URL.titleSnarfer.setValue(True)
self.assertNoResponse('http://sf.net/', 2)
self.assertRegexp('http://www.sourceforge.net/',
r'Sourceforge\.net')
finally:
conf.supybot.plugins.URL.titleSnarfer.setValue(title)
finally:
conf.supybot.plugins.URL.nonSnarfingRegexp.setValue(snarf)
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: