From d1bb30ef94f864e5748a25a637817a3ae1abac01 Mon Sep 17 00:00:00 2001 From: Pragmatic Software Date: Fri, 27 Aug 2021 19:41:51 -0700 Subject: [PATCH] Replace Text::Levenshtein with Text::Levenshtein::XS Text::Levenshtein::XS is much, much, much, much faster and more efficient. Existing PBot users: Please run `cpanm Text::Levenshtein::XS` to install this module. --- cpanfile | 2 +- lib/PBot/Core/Commands/Factoids.pm | 5 +++++ lib/PBot/Core/Factoids/Interpreter.pm | 2 +- lib/PBot/Core/Storage/DualIndexHashObject.pm | 14 +++++++++----- lib/PBot/Core/Storage/DualIndexSQLiteObject.pm | 11 ++++++++--- lib/PBot/Core/Storage/HashObject.pm | 5 +++-- lib/PBot/VERSION.pm | 4 ++-- updates/4371_SEE_COMMIT_MESSAGE_FOR_DETAILS.pl | 11 +++++++++++ 8 files changed, 40 insertions(+), 14 deletions(-) create mode 100755 updates/4371_SEE_COMMIT_MESSAGE_FOR_DETAILS.pl diff --git a/cpanfile b/cpanfile index 58462630..ad0596c4 100644 --- a/cpanfile +++ b/cpanfile @@ -48,7 +48,7 @@ requires 'Socket'; requires 'Storable'; requires 'Symbol'; requires 'Text::CSV'; -requires 'Text::Levenshtein'; +requires 'Text::Levenshtein::XS'; requires 'Text::ParseWords'; requires 'Time::Duration'; requires 'Time::HiRes'; diff --git a/lib/PBot/Core/Commands/Factoids.pm b/lib/PBot/Core/Commands/Factoids.pm index 1766e113..c43a42dd 100644 --- a/lib/PBot/Core/Commands/Factoids.pm +++ b/lib/PBot/Core/Commands/Factoids.pm @@ -1093,6 +1093,7 @@ sub cmd_factfind { $arguments =~ s/\s+/ /g; $arguments = substr($arguments, 0, 30); + my $argtype = undef; $argtype = "owned by $owner" if defined $owner and $owner ne '.*'; @@ -1117,6 +1118,10 @@ sub cmd_factfind { if (not defined $argtype) { return $usage; } + if ($channel eq 'global') { + $channel = '\.\*'; + } + my ($text, $last_trigger, $last_chan, $i); $last_chan = ""; $i = 0; diff --git a/lib/PBot/Core/Factoids/Interpreter.pm b/lib/PBot/Core/Factoids/Interpreter.pm index a25ddf4d..1de0bede 100644 --- a/lib/PBot/Core/Factoids/Interpreter.pm +++ b/lib/PBot/Core/Factoids/Interpreter.pm @@ -115,7 +115,7 @@ sub interpreter { # otherwise keyword hasn't been found, display similiar matches for all channels else { - my $namespace = $strictnamespace ? $context->{from} : '.*'; + my $namespace = $context->{from}; $namespace = '.*' if $namespace !~ /^#/; my $namespace_regex = $namespace; diff --git a/lib/PBot/Core/Storage/DualIndexHashObject.pm b/lib/PBot/Core/Storage/DualIndexHashObject.pm index 85e7a97b..a7945d0b 100644 --- a/lib/PBot/Core/Storage/DualIndexHashObject.pm +++ b/lib/PBot/Core/Storage/DualIndexHashObject.pm @@ -19,7 +19,7 @@ package PBot::Core::Storage::DualIndexHashObject; use PBot::Imports; -use Text::Levenshtein qw(fastdistance); +use Text::Levenshtein::XS qw(distance); use JSON; sub new { @@ -153,8 +153,10 @@ sub levenshtein_matches { if (not $secondary_index) { foreach my $index (sort keys %{$self->{hash}}) { - my $distance_result = fastdistance($primary_index, $index); - my $length = (length $primary_index > length $index) ? length $primary_index : length $index; + my $distance_result = distance($primary_index, $index, 20); + next if not defined $distance_result; + + my $length = (length $primary_index > length $index) ? length $primary_index : length $index; if ($distance_result / $length < $distance) { my $name = $self->get_key_name($index); @@ -180,8 +182,10 @@ sub levenshtein_matches { } foreach my $index2 (sort keys %{$self->{hash}->{$index1}}) { - my $distance_result = fastdistance($secondary_index, $index2); - my $length = (length $secondary_index > length $index2) ? length $secondary_index : length $index2; + my $distance_result = distance($secondary_index, $index2, 20); + next if not defined $distance_result; + + my $length = (length $secondary_index > length $index2) ? length $secondary_index : length $index2; if ($distance_result / $length < $distance) { my $name = $self->get_key_name($index1, $index2); diff --git a/lib/PBot/Core/Storage/DualIndexSQLiteObject.pm b/lib/PBot/Core/Storage/DualIndexSQLiteObject.pm index 575815c2..fc2de02a 100644 --- a/lib/PBot/Core/Storage/DualIndexSQLiteObject.pm +++ b/lib/PBot/Core/Storage/DualIndexSQLiteObject.pm @@ -24,7 +24,7 @@ use PBot::Core::Utils::SQLiteLogger; use PBot::Core::Utils::SQLiteLoggerLayer; use DBI; -use Text::Levenshtein qw(fastdistance); +use Text::Levenshtein::XS qw(distance); sub new { my ($class, %args) = @_; @@ -238,7 +238,8 @@ sub levenshtein_matches { my $length_a = length $index1; foreach my $index (sort $self->get_keys) { - my $distance_result = fastdistance($index1, $index); + my $distance_result = distance($index1, $index, 20); + next if not defined $distance_result; my $length_b = length $index; @@ -274,7 +275,8 @@ sub levenshtein_matches { } foreach my $i2 (sort $self->get_keys($i1)) { - my $distance_result = fastdistance($index2, $i2); + my $distance_result = distance($index2, $i2, 20); + next if not defined $distance_result; my $length_b = length $i2; @@ -304,6 +306,9 @@ sub levenshtein_matches { } $output =~ s/(.*), /$1 or /; + + $output = 'none' if not length $output; + return $output; } diff --git a/lib/PBot/Core/Storage/HashObject.pm b/lib/PBot/Core/Storage/HashObject.pm index 7af19166..02a5aeac 100644 --- a/lib/PBot/Core/Storage/HashObject.pm +++ b/lib/PBot/Core/Storage/HashObject.pm @@ -15,7 +15,7 @@ package PBot::Core::Storage::HashObject; use PBot::Imports; -use Text::Levenshtein qw(fastdistance); +use Text::Levenshtein::XS qw(distance); use JSON; sub new { @@ -143,7 +143,8 @@ sub levenshtein_matches { my @matches; foreach my $index (sort keys %{$self->{hash}}) { - my $distance = fastdistance($keyword, $index); + my $distance = distance($keyword, $index, 20); + next if not defined $distance; my $length_a = length $keyword; my $length_b = length $index; diff --git a/lib/PBot/VERSION.pm b/lib/PBot/VERSION.pm index 1e98bdf1..3034eeb6 100644 --- a/lib/PBot/VERSION.pm +++ b/lib/PBot/VERSION.pm @@ -25,8 +25,8 @@ use PBot::Imports; # These are set by the /misc/update_version script use constant { BUILD_NAME => "PBot", - BUILD_REVISION => 4370, - BUILD_DATE => "2021-08-26", + BUILD_REVISION => 4371, + BUILD_DATE => "2021-08-27", }; sub initialize {} diff --git a/updates/4371_SEE_COMMIT_MESSAGE_FOR_DETAILS.pl b/updates/4371_SEE_COMMIT_MESSAGE_FOR_DETAILS.pl new file mode 100755 index 00000000..6f71b5ee --- /dev/null +++ b/updates/4371_SEE_COMMIT_MESSAGE_FOR_DETAILS.pl @@ -0,0 +1,11 @@ +#!/usr/bin/env perl + +# Recent updates require a bot restart +# +# Replaced Text::Levenshtein with the much, much faster Text::Levenshtein::XS. +# +# If you do not have Text::Levenshtein::XS installed, you must install it: +# +# cpanm Text::Levenshtein::XS + +exit 0;