Replace Text::Levenshtein with Text::Levenshtein::XS

Text::Levenshtein::XS is much, much, much, much faster and more
efficient.

Existing PBot users: Please run `cpanm Text::Levenshtein::XS` to install this
module.
This commit is contained in:
Pragmatic Software 2021-08-27 19:41:51 -07:00
parent 731d795c03
commit d1bb30ef94
8 changed files with 40 additions and 14 deletions

View File

@ -48,7 +48,7 @@ requires 'Socket';
requires 'Storable';
requires 'Symbol';
requires 'Text::CSV';
requires 'Text::Levenshtein';
requires 'Text::Levenshtein::XS';
requires 'Text::ParseWords';
requires 'Time::Duration';
requires 'Time::HiRes';

View File

@ -1093,6 +1093,7 @@ sub cmd_factfind {
$arguments =~ s/\s+/ /g;
$arguments = substr($arguments, 0, 30);
my $argtype = undef;
$argtype = "owned by $owner" if defined $owner and $owner ne '.*';
@ -1117,6 +1118,10 @@ sub cmd_factfind {
if (not defined $argtype) { return $usage; }
if ($channel eq 'global') {
$channel = '\.\*';
}
my ($text, $last_trigger, $last_chan, $i);
$last_chan = "";
$i = 0;

View File

@ -115,7 +115,7 @@ sub interpreter {
# otherwise keyword hasn't been found, display similiar matches for all channels
else {
my $namespace = $strictnamespace ? $context->{from} : '.*';
my $namespace = $context->{from};
$namespace = '.*' if $namespace !~ /^#/;
my $namespace_regex = $namespace;

View File

@ -19,7 +19,7 @@ package PBot::Core::Storage::DualIndexHashObject;
use PBot::Imports;
use Text::Levenshtein qw(fastdistance);
use Text::Levenshtein::XS qw(distance);
use JSON;
sub new {
@ -153,8 +153,10 @@ sub levenshtein_matches {
if (not $secondary_index) {
foreach my $index (sort keys %{$self->{hash}}) {
my $distance_result = fastdistance($primary_index, $index);
my $length = (length $primary_index > length $index) ? length $primary_index : length $index;
my $distance_result = distance($primary_index, $index, 20);
next if not defined $distance_result;
my $length = (length $primary_index > length $index) ? length $primary_index : length $index;
if ($distance_result / $length < $distance) {
my $name = $self->get_key_name($index);
@ -180,8 +182,10 @@ sub levenshtein_matches {
}
foreach my $index2 (sort keys %{$self->{hash}->{$index1}}) {
my $distance_result = fastdistance($secondary_index, $index2);
my $length = (length $secondary_index > length $index2) ? length $secondary_index : length $index2;
my $distance_result = distance($secondary_index, $index2, 20);
next if not defined $distance_result;
my $length = (length $secondary_index > length $index2) ? length $secondary_index : length $index2;
if ($distance_result / $length < $distance) {
my $name = $self->get_key_name($index1, $index2);

View File

@ -24,7 +24,7 @@ use PBot::Core::Utils::SQLiteLogger;
use PBot::Core::Utils::SQLiteLoggerLayer;
use DBI;
use Text::Levenshtein qw(fastdistance);
use Text::Levenshtein::XS qw(distance);
sub new {
my ($class, %args) = @_;
@ -238,7 +238,8 @@ sub levenshtein_matches {
my $length_a = length $index1;
foreach my $index (sort $self->get_keys) {
my $distance_result = fastdistance($index1, $index);
my $distance_result = distance($index1, $index, 20);
next if not defined $distance_result;
my $length_b = length $index;
@ -274,7 +275,8 @@ sub levenshtein_matches {
}
foreach my $i2 (sort $self->get_keys($i1)) {
my $distance_result = fastdistance($index2, $i2);
my $distance_result = distance($index2, $i2, 20);
next if not defined $distance_result;
my $length_b = length $i2;
@ -304,6 +306,9 @@ sub levenshtein_matches {
}
$output =~ s/(.*), /$1 or /;
$output = 'none' if not length $output;
return $output;
}

View File

@ -15,7 +15,7 @@ package PBot::Core::Storage::HashObject;
use PBot::Imports;
use Text::Levenshtein qw(fastdistance);
use Text::Levenshtein::XS qw(distance);
use JSON;
sub new {
@ -143,7 +143,8 @@ sub levenshtein_matches {
my @matches;
foreach my $index (sort keys %{$self->{hash}}) {
my $distance = fastdistance($keyword, $index);
my $distance = distance($keyword, $index, 20);
next if not defined $distance;
my $length_a = length $keyword;
my $length_b = length $index;

View File

@ -25,8 +25,8 @@ use PBot::Imports;
# These are set by the /misc/update_version script
use constant {
BUILD_NAME => "PBot",
BUILD_REVISION => 4370,
BUILD_DATE => "2021-08-26",
BUILD_REVISION => 4371,
BUILD_DATE => "2021-08-27",
};
sub initialize {}

View File

@ -0,0 +1,11 @@
#!/usr/bin/env perl
# Recent updates require a bot restart
#
# Replaced Text::Levenshtein with the much, much faster Text::Levenshtein::XS.
#
# If you do not have Text::Levenshtein::XS installed, you must install it:
#
# cpanm Text::Levenshtein::XS
exit 0;