From f512efd6cc3a634e5175c84dd521e0f6eff9fe80 Mon Sep 17 00:00:00 2001 From: Pragmatic Software Date: Mon, 3 Jun 2013 17:02:58 +0000 Subject: [PATCH] Update get_title.pl to use Levenshtein matching on URLs/titles in order to avoid displaying titles that are similiar to URLs --- modules/get_title.pl | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/modules/get_title.pl b/modules/get_title.pl index f4e64f82..b127feae 100755 --- a/modules/get_title.pl +++ b/modules/get_title.pl @@ -4,12 +4,13 @@ use LWP::UserAgent; use HTML::Entities; +use Text::Levenshtein qw(fastdistance); my ($text); if ($#ARGV <= 0) { - print "Usage: !title nick URL\n"; + print "Usage: title nick URL\n"; exit; } @@ -18,6 +19,7 @@ $arguments = join("%20", @ARGV); exit if($arguments =~ m/stackoverflow.com/i); exit if($arguments =~ m/scratch.mit.edu/i); +exit if($arguments =~ m/imgur.com/i); exit if($arguments =~ m/sprunge.us/i); exit if($arguments =~ m/hastebin.com/i); exit if($arguments =~ m/lmgtfy.com/i); @@ -35,8 +37,8 @@ exit if($arguments =~ m/pastie/i); exit if($arguments =~ m/ideone.com/i); exit if($arguments =~ m/codepad.org/i); exit if($arguments =~ m/^http\:\/\/past(e|ing)\./i); -exit if($arguments =~ m/paste.*\.(?:com|org|net|ch|ca|uk|info)/i); -exit if($arguments =~ m/pasting.*\.(?:com|org|net|ca|uk|info|ch)/i); +exit if($arguments =~ m/paste.*\.(?:com|org|net|ch|ca|de|uk|info)/i); +exit if($arguments =~ m/pasting.*\.(?:com|org|net|ca|de|uk|info|ch)/i); my $ua = LWP::UserAgent->new; $ua->agent("Mozilla/5.0"); @@ -99,4 +101,14 @@ $t = decode_entities($t); $t =~ s/^\s+//; $t =~ s/\s+$//; +my ($file) = $arguments =~ m/.*\/(.*)$/; +$file =~ s/[_-]/ /g; + +my $distance = fastdistance(lc $file, lc $t); +my $length = (length $file > length $t) ? length $file : length $t; + +if($distance / $length < 0.75) { + exit; +} + print "Title of $nick\'s link: $t\n" if length $t;