Web: Factorize the code of the title snarfer and the title command.

2025-12-29 06:27:54 +01:00 · 2015-08-29 21:04:38 +02:00 · 2015-08-29 21:04:38 +02:00 · 576a96fb71
commit 576a96fb71
parent 8033e6ae14
1 changed files with 22 additions and 36 deletions
--- a/plugins/Web/plugin.py
+++ b/plugins/Web/plugin.py
@ -130,6 +130,24 @@ class Web(callbacks.PluginRegexp):
    def noIgnore(self, irc, msg):
        return not self.registryValue('checkIgnored', msg.args[0])

+    def getTitle(self, url):
+        size = conf.supybot.protocols.http.peekSize()
+        text = utils.web.getUrl(url, size=size)
+        try:
+            text = text.decode(utils.web.getEncoding(text) or 'utf8',
+                    'replace')
+        except UnicodeDecodeError:
+            pass
+        parser = Title()
+        if minisix.PY3 and isinstance(text, bytes):
+            irc.error(_('Could not guess the page\'s encoding. (Try '
+                    'installing python-charade.)'), Raise=True)
+        parser.feed(text)
+        title = parser.title
+        if title:
+            title = utils.web.htmlToText(parser.title.strip())
+        return parser.title
+
    @fetch_sandbox
    def titleSnarfer(self, irc, msg, match):
        channel = msg.args[0]
@ -145,31 +163,11 @@ class Web(callbacks.PluginRegexp):
            if r and r.search(url):
                self.log.debug('Not titleSnarfing %q.', url)
                return
-            try:
-                size = conf.supybot.protocols.http.peekSize()
-                fd = utils.web.getUrlFd(url)
-                text = fd.read(size)
-                fd.close()
-            except socket.timeout as e:
-                self.log.info('Couldn\'t snarf title of %u: %s.', url, e)
-                if self.registryValue('snarferReportIOExceptions', channel):
-                     irc.reply(url+" : "+utils.web.TIMED_OUT, prefixNick=False)
-                return
-            try:
-                text = text.decode(utils.web.getEncoding(text) or 'utf8',
-                        'replace')
-            except:
-                pass
-            parser = Title()
-            parser.feed(text)
-            if parser.title:
+            title = self.getTitle(url)
+            if title:
                domain = utils.web.getDomain(fd.geturl()
                        if self.registryValue('snarferShowTargetDomain', channel)
                        else url)
-                title = utils.web.htmlToText(parser.title.strip())
-                if minisix.PY2:
-                    if isinstance(title, unicode):
-                        title = title.encode('utf8', 'replace')
                s = format(_('Title: %s (at %s)'), title, domain)
                irc.reply(s, prefixNick=False)
    titleSnarfer = urlSnarfer(titleSnarfer)
@ -276,20 +274,8 @@ class Web(callbacks.PluginRegexp):
        if not self._checkURLWhitelist(url):
            irc.error("This url is not on the whitelist.")
            return
-        size = conf.supybot.protocols.http.peekSize()
-        text = utils.web.getUrl(url, size=size)
-        try:
-            text = text.decode(utils.web.getEncoding(text) or 'utf8',
-                    'replace')
-        except UnicodeDecodeError:
-            pass
-        parser = Title()
-        if minisix.PY3 and isinstance(text, bytes):
-            irc.error(_('Could not guess the page\'s encoding. (Try '
-                    'installing python-charade.)'), Raise=True)
-        parser.feed(text)
-        if parser.title:
-            title = utils.web.htmlToText(parser.title.strip())
+        title = self.getTitle(url)
+        if title:
            if not [y for x,y in optlist if x == 'no-filter']:
                for i in range(1, 4):
                    title = title.replace(chr(i), '')