mirror of
https://github.com/pragma-/pbot.git
synced 2024-12-12 13:59:28 +01:00
d1bb30ef94
Text::Levenshtein::XS is much, much, much, much faster and more efficient. Existing PBot users: Please run `cpanm Text::Levenshtein::XS` to install this module.
456 lines
17 KiB
Perl
456 lines
17 KiB
Perl
# File: DualIndexHashObject.pm
|
|
#
|
|
# Purpose: Provides a hash-table object with an abstracted API that includes
|
|
# setting and deleting values, saving to and loading from files, etc.
|
|
#
|
|
# DualIndexHashObject extends the HashObject with an additional index key.
|
|
# Provides case-insensitive access to both index keys, while preserving
|
|
# original case when displaying the keys.
|
|
#
|
|
# Data is stored in working memory for lightning fast performance. If you have
|
|
# a huge amount of data, consider using DualIndexSQLiteObject instead.
|
|
#
|
|
# If a filename is provided, data is written to the file after any modifications.
|
|
|
|
# SPDX-FileCopyrightText: 2021 Pragmatic Software <pragma78@gmail.com>
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
package PBot::Core::Storage::DualIndexHashObject;
|
|
|
|
use PBot::Imports;
|
|
|
|
use Text::Levenshtein::XS qw(distance);
|
|
use JSON;
|
|
|
|
sub new {
|
|
my ($class, %args) = @_;
|
|
my $self = bless {}, $class;
|
|
Carp::croak("Missing pbot reference to " . __FILE__) unless exists $args{pbot};
|
|
$self->{pbot} = delete $args{pbot};
|
|
$self->initialize(%args);
|
|
return $self;
|
|
}
|
|
|
|
sub initialize {
|
|
my ($self, %conf) = @_;
|
|
$self->{name} = $conf{name} // 'unnamed';
|
|
$self->{filename} = $conf{filename} // Carp::carp("Missing filename to DualIndexHashObject, will not be able to save to or load from file.");
|
|
$self->{save_queue_timeout} = $conf{save_queue_timeout} // 0;
|
|
$self->{hash} = {};
|
|
}
|
|
|
|
sub load {
|
|
my ($self, $filename) = @_;
|
|
$filename = $self->{filename} if not defined $filename;
|
|
|
|
if (not defined $filename) {
|
|
Carp::carp "No $self->{name} filename specified -- skipping loading from file";
|
|
return;
|
|
}
|
|
|
|
$self->{pbot}->{logger}->log("Loading $self->{name} from $filename\n");
|
|
|
|
if (not open(FILE, "< $filename")) {
|
|
$self->{pbot}->{logger}->log("Skipping loading from file: Couldn't open $filename: $!\n");
|
|
return;
|
|
}
|
|
|
|
my $contents = do {
|
|
local $/;
|
|
<FILE>;
|
|
};
|
|
|
|
$self->{hash} = decode_json $contents if length $contents;
|
|
close FILE;
|
|
|
|
# update existing entries to use _name to preserve case
|
|
# and lowercase any non-lowercased entries
|
|
foreach my $primary_index (keys %{$self->{hash}}) {
|
|
if (not exists $self->{hash}->{$primary_index}->{_name}) {
|
|
if ($primary_index ne lc $primary_index) {
|
|
if (exists $self->{hash}->{lc $primary_index}) {
|
|
Carp::croak "Cannot update $self->{name} primary index $primary_index; duplicate object found";
|
|
}
|
|
|
|
my $data = delete $self->{hash}->{$primary_index};
|
|
$data->{_name} = $primary_index;
|
|
$primary_index = lc $primary_index;
|
|
$self->{hash}->{$primary_index} = $data;
|
|
}
|
|
}
|
|
|
|
foreach my $secondary_index (grep { $_ ne '_name' } keys %{$self->{hash}->{$primary_index}}) {
|
|
if (not exists $self->{hash}->{$primary_index}->{$secondary_index}->{_name}) {
|
|
if ($secondary_index ne lc $secondary_index) {
|
|
if (exists $self->{hash}->{$primary_index}->{lc $secondary_index}) {
|
|
Carp::croak "Cannot update $self->{name} $primary_index sub-object $secondary_index; duplicate object found";
|
|
}
|
|
|
|
my $data = delete $self->{hash}->{$primary_index}->{$secondary_index};
|
|
$data->{_name} = $secondary_index;
|
|
$secondary_index = lc $secondary_index;
|
|
$self->{hash}->{$primary_index}->{$secondary_index} = $data;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
sub save {
|
|
my $self = shift;
|
|
my $filename;
|
|
if (@_) { $filename = shift; }
|
|
else { $filename = $self->{filename}; }
|
|
|
|
if (not defined $filename) {
|
|
Carp::carp "No $self->{name} filename specified -- skipping saving to file.\n";
|
|
return;
|
|
}
|
|
|
|
my $subref = sub {
|
|
$self->{pbot}->{logger}->log("Saving $self->{name} to $filename\n");
|
|
|
|
if (not $self->get_data('$metadata$', '$metadata$', 'update_version')) {
|
|
$self->add('$metadata$', '$metadata$', { update_version => PBot::VERSION::BUILD_REVISION });
|
|
}
|
|
|
|
$self->set('$metadata$', '$metadata$', 'name', $self->{name}, 1);
|
|
|
|
my $json = JSON->new;
|
|
my $json_text = $json->pretty->canonical->utf8->encode($self->{hash});
|
|
|
|
open(FILE, "> $filename") or die "Couldn't open $filename: $!\n";
|
|
print FILE "$json_text\n";
|
|
close FILE;
|
|
};
|
|
|
|
if ($self->{save_queue_timeout}) {
|
|
# enqueue the save to prevent save-thrashing
|
|
$self->{pbot}->{event_queue}->replace_subref_or_enqueue_event(
|
|
$subref,
|
|
$self->{save_queue_timeout},
|
|
"save $self->{name}",
|
|
);
|
|
} else {
|
|
# execute it right now
|
|
$subref->();
|
|
}
|
|
}
|
|
|
|
sub clear {
|
|
my $self = shift;
|
|
$self->{hash} = {};
|
|
}
|
|
|
|
sub levenshtein_matches {
|
|
my ($self, $primary_index, $secondary_index, $distance, $strictnamespace) = @_;
|
|
my $comma = '';
|
|
my $result = "";
|
|
|
|
$distance = 0.60 if not defined $distance;
|
|
|
|
$primary_index = '.*' if not defined $primary_index;
|
|
|
|
if (not $secondary_index) {
|
|
foreach my $index (sort keys %{$self->{hash}}) {
|
|
my $distance_result = distance($primary_index, $index, 20);
|
|
next if not defined $distance_result;
|
|
|
|
my $length = (length $primary_index > length $index) ? length $primary_index : length $index;
|
|
|
|
if ($distance_result / $length < $distance) {
|
|
my $name = $self->get_key_name($index);
|
|
if ($name =~ / /) { $result .= $comma . "\"$name\""; }
|
|
else { $result .= $comma . $name; }
|
|
$comma = ", ";
|
|
}
|
|
}
|
|
} else {
|
|
my $lc_primary_index = lc $primary_index;
|
|
if (not exists $self->{hash}->{$lc_primary_index}) { return 'none'; }
|
|
|
|
my $last_header = "";
|
|
my $header = "";
|
|
|
|
foreach my $index1 (sort keys %{$self->{hash}}) {
|
|
$header = "[" . $self->get_key_name($index1) . "] ";
|
|
$header = '[global] ' if $header eq '[.*] ';
|
|
|
|
if ($strictnamespace) {
|
|
next unless $index1 eq '.*' or $index1 eq $lc_primary_index;
|
|
$header = "" unless $header eq '[global] ';
|
|
}
|
|
|
|
foreach my $index2 (sort keys %{$self->{hash}->{$index1}}) {
|
|
my $distance_result = distance($secondary_index, $index2, 20);
|
|
next if not defined $distance_result;
|
|
|
|
my $length = (length $secondary_index > length $index2) ? length $secondary_index : length $index2;
|
|
|
|
if ($distance_result / $length < $distance) {
|
|
my $name = $self->get_key_name($index1, $index2);
|
|
$header = "" if $last_header eq $header;
|
|
$last_header = $header;
|
|
$comma = '; ' if $comma ne '' and $header ne '';
|
|
if ($name =~ / /) { $result .= $comma . $header . "\"$name\""; }
|
|
else { $result .= $comma . $header . $name; }
|
|
$comma = ", ";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
$result =~ s/(.*), /$1 or /;
|
|
$result = 'none' if $comma eq '';
|
|
return $result;
|
|
}
|
|
|
|
sub set {
|
|
my ($self, $primary_index, $secondary_index, $key, $value, $dont_save) = @_;
|
|
my $lc_primary_index = lc $primary_index;
|
|
my $lc_secondary_index = lc $secondary_index;
|
|
|
|
if (not exists $self->{hash}->{$lc_primary_index}) {
|
|
my $result = "$self->{name}: $primary_index not found; similiar matches: ";
|
|
$result .= $self->levenshtein_matches($primary_index);
|
|
return $result;
|
|
}
|
|
|
|
if (not exists $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}) {
|
|
my $secondary_text = $secondary_index =~ / / ? "\"$secondary_index\"" : $secondary_index;
|
|
my $result = "$self->{name}: [" . $self->get_key_name($lc_primary_index) . "] $secondary_text not found; similiar matches: ";
|
|
$result .= $self->levenshtein_matches($primary_index, $secondary_index);
|
|
return $result;
|
|
}
|
|
|
|
my $name1 = $self->get_key_name($lc_primary_index);
|
|
my $name2 = $self->get_key_name($lc_primary_index, $lc_secondary_index);
|
|
|
|
$name1 = 'global' if $name1 eq '.*';
|
|
$name2 = "\"$name2\"" if $name2 =~ / /;
|
|
|
|
if (not defined $key) {
|
|
my $result = "[$name1] $name2 keys:\n";
|
|
my $comma = '';
|
|
foreach my $key (sort keys %{$self->{hash}->{$lc_primary_index}->{$lc_secondary_index}}) {
|
|
next if $key eq '_name';
|
|
$result .= $comma . "$key: " . $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}->{$key};
|
|
$comma = ";\n";
|
|
}
|
|
$result .= "none" if ($comma eq '');
|
|
return $result;
|
|
}
|
|
|
|
if (not defined $value) { $value = $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}->{$key}; }
|
|
else {
|
|
$self->{hash}->{$lc_primary_index}->{$lc_secondary_index}->{$key} = $value;
|
|
$self->save unless $dont_save;
|
|
}
|
|
|
|
return "[$name1] $name2: $key " . (defined $value ? "set to $value" : "is not set.");
|
|
}
|
|
|
|
sub unset {
|
|
my ($self, $primary_index, $secondary_index, $key) = @_;
|
|
my $lc_primary_index = lc $primary_index;
|
|
my $lc_secondary_index = lc $secondary_index;
|
|
|
|
if (not exists $self->{hash}->{$lc_primary_index}) {
|
|
my $result = "$self->{name}: $primary_index not found; similiar matches: ";
|
|
$result .= $self->levenshtein_matches($primary_index);
|
|
return $result;
|
|
}
|
|
|
|
my $name1 = $self->get_key_name($lc_primary_index);
|
|
$name1 = 'global' if $name1 eq '.*';
|
|
|
|
if (not exists $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}) {
|
|
my $result = "$self->{name}: [$name1] $secondary_index not found; similiar matches: ";
|
|
$result .= $self->levenshtein_matches($primary_index, $secondary_index);
|
|
return $result;
|
|
}
|
|
|
|
my $name2 = $self->get_key_name($lc_primary_index, $lc_secondary_index);
|
|
$name2 = "\"$name2\"" if $name2 =~ / /;
|
|
|
|
if (defined delete $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}->{$key}) {
|
|
$self->save;
|
|
return "$self->{name}: [$name1] $name2: $key unset.";
|
|
} else {
|
|
return "$self->{name}: [$name1] $name2: $key does not exist.";
|
|
}
|
|
$self->save;
|
|
}
|
|
|
|
sub exists {
|
|
my ($self, $primary_index, $secondary_index, $data_index) = @_;
|
|
return 0 if not defined $primary_index;
|
|
$primary_index = lc $primary_index;
|
|
return 0 if not exists $self->{hash}->{$primary_index};
|
|
return 1 if not defined $secondary_index;
|
|
$secondary_index = lc $secondary_index;
|
|
return 0 if not exists $self->{hash}->{$primary_index}->{$secondary_index};
|
|
return 1 if not defined $data_index;
|
|
return exists $self->{hash}->{$primary_index}->{$secondary_index}->{$data_index};
|
|
}
|
|
|
|
sub get_key_name {
|
|
my ($self, $primary_index, $secondary_index) = @_;
|
|
|
|
my $lc_primary_index = lc $primary_index;
|
|
|
|
return $lc_primary_index if not exists $self->{hash}->{$lc_primary_index};
|
|
|
|
if (not defined $secondary_index) {
|
|
if (exists $self->{hash}->{$lc_primary_index}->{_name}) {
|
|
return $self->{hash}->{$lc_primary_index}->{_name};
|
|
} else {
|
|
return $lc_primary_index;
|
|
}
|
|
}
|
|
|
|
my $lc_secondary_index = lc $secondary_index;
|
|
|
|
return $lc_secondary_index if not exists $self->{hash}->{$lc_primary_index}->{$lc_secondary_index};
|
|
|
|
if (exists $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}->{_name}) {
|
|
return $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}->{_name};
|
|
} else {
|
|
return $lc_secondary_index;
|
|
}
|
|
}
|
|
|
|
sub get_keys {
|
|
my ($self, $primary_index, $secondary_index) = @_;
|
|
return grep { $_ ne '$metadata$' } keys %{$self->{hash}} if not defined $primary_index;
|
|
|
|
my $lc_primary_index = lc $primary_index;
|
|
|
|
if (not defined $secondary_index) {
|
|
return () if not exists $self->{hash}->{$lc_primary_index};
|
|
return grep { $_ ne '_name' and $_ ne '$metadata$' } keys %{$self->{hash}->{$lc_primary_index}};
|
|
}
|
|
|
|
my $lc_secondary_index = lc $secondary_index;
|
|
|
|
return () if not exists $self->{hash}->{$lc_primary_index}
|
|
or not exists $self->{hash}->{$lc_primary_index}->{$lc_secondary_index};
|
|
|
|
return grep { $_ ne '_name' } keys %{$self->{hash}->{lc $primary_index}->{lc $secondary_index}};
|
|
}
|
|
|
|
sub get_data {
|
|
my ($self, $primary_index, $secondary_index, $data_index) = @_;
|
|
$primary_index = lc $primary_index if defined $primary_index;
|
|
$secondary_index = lc $secondary_index if defined $secondary_index;
|
|
return undef if not exists $self->{hash}->{$primary_index};
|
|
return $self->{hash}->{$primary_index} if not defined $secondary_index;
|
|
return $self->{hash}->{$primary_index}->{$secondary_index} if not defined $data_index;
|
|
return $self->{hash}->{$primary_index}->{$secondary_index}->{$data_index};
|
|
}
|
|
|
|
sub add {
|
|
my ($self, $primary_index, $secondary_index, $data, $dont_save, $quiet) = @_;
|
|
my $lc_primary_index = lc $primary_index;
|
|
my $lc_secondary_index = lc $secondary_index;
|
|
|
|
if (not exists $self->{hash}->{$lc_primary_index}) {
|
|
# preserve case
|
|
if ($primary_index ne $lc_primary_index) {
|
|
$self->{hash}->{$lc_primary_index}->{_name} = $primary_index;
|
|
}
|
|
}
|
|
|
|
if ($secondary_index ne $lc_secondary_index) {
|
|
# preserve case
|
|
$data->{_name} = $secondary_index;
|
|
}
|
|
|
|
if (exists $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}) {
|
|
foreach my $key (keys %{$data}) {
|
|
if (not exists $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}->{$key}) {
|
|
$self->{hash}->{$lc_primary_index}->{$lc_secondary_index}->{$key} = $data->{$key};
|
|
}
|
|
}
|
|
} else {
|
|
$self->{hash}->{$lc_primary_index}->{$lc_secondary_index} = $data;
|
|
}
|
|
|
|
$self->save() unless $dont_save;
|
|
|
|
my $name1 = $self->get_key_name($lc_primary_index);
|
|
my $name2 = $self->get_key_name($lc_primary_index, $lc_secondary_index);
|
|
$name1 = 'global' if $name1 eq '.*';
|
|
$name2 = "\"$name2\"" if $name2 =~ / /;
|
|
$self->{pbot}->{logger}->log("$self->{name}: [$name1]: $name2 added.\n") unless $dont_save or $quiet;
|
|
return "$self->{name}: [$name1]: $name2 added.";
|
|
}
|
|
|
|
sub remove {
|
|
my ($self, $primary_index, $secondary_index, $data_index, $dont_save) = @_;
|
|
my $lc_primary_index = lc $primary_index;
|
|
my $lc_secondary_index = lc $secondary_index;
|
|
|
|
if (not exists $self->{hash}->{$lc_primary_index}) {
|
|
my $result = "$self->{name}: $primary_index not found; similiar matches: ";
|
|
$result .= $self->levenshtein_matches($primary_index);
|
|
return $result;
|
|
}
|
|
|
|
if (not defined $secondary_index) {
|
|
my $data = delete $self->{hash}->{$lc_primary_index};
|
|
if (defined $data) {
|
|
my $name = exists $data->{_name} ? $data->{_name} : $lc_primary_index;
|
|
$name = 'global' if $name eq '.*';
|
|
$self->save unless $dont_save;
|
|
return "$self->{name}: $name removed.";
|
|
} else {
|
|
return "$self->{name}: $primary_index does not exist.";
|
|
}
|
|
}
|
|
|
|
my $name1 = $self->get_key_name($lc_primary_index);
|
|
$name1 = 'global' if $name1 eq '.*';
|
|
|
|
if (not exists $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}) {
|
|
my $result = "$self->{name}: [$name1] $secondary_index not found; similiar matches: ";
|
|
$result .= $self->levenshtein_matches($primary_index, $secondary_index);
|
|
return $result;
|
|
}
|
|
|
|
if (not defined $data_index) {
|
|
my $data = delete $self->{hash}->{$lc_primary_index}->{$lc_secondary_index};
|
|
if (defined $data) {
|
|
my $name2 = exists $data->{_name} ? $data->{_name} : $lc_secondary_index;
|
|
$name2 = "\"$name2\"" if $name2 =~ / /;
|
|
|
|
# remove primary group if no more secondaries
|
|
if ((grep { $_ ne '_name' } keys %{$self->{hash}->{$lc_primary_index}}) == 0) {
|
|
delete $self->{hash}->{$lc_primary_index};
|
|
}
|
|
|
|
$self->save unless $dont_save;
|
|
return "$self->{name}: [$name1] $name2 removed.";
|
|
} else {
|
|
return "$self->{name}: [$name1] $secondary_index does not exist.";
|
|
}
|
|
}
|
|
|
|
my $name2 = $self->get_key_name($lc_primary_index, $lc_secondary_index);
|
|
if (defined delete $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}->{$data_index}) {
|
|
return "$self->{name}: [$name1] $name2.$data_index removed.";
|
|
} else {
|
|
return "$self->{name}: [$name1] $name2.$data_index does not exist.";
|
|
}
|
|
}
|
|
|
|
# for compatibility with DualIndexSQLiteObject
|
|
sub create_metadata { }
|
|
|
|
# todo:
|
|
sub get_each { }
|
|
sub get_next { }
|
|
sub get_all { }
|
|
|
|
1;
|