3
0
mirror of https://github.com/pragma-/pbot.git synced 2025-01-12 21:12:33 +01:00
pbot/PBot/DualIndexHashObject.pm

386 lines
15 KiB
Perl
Raw Normal View History

# File: DualIndexHashObject.pm
# Author: pragma_
#
2019-06-26 18:34:19 +02:00
# Purpose: Provides a hash-table object with an abstracted API that includes
# setting and deleting values, saving to and loading from files, etc. This
# extends the HashObject with an additional index key. Provides case-insensitive
# access to both index keys, while preserving original case when displaying the
# keys.
#
# Data is stored in working memory for lightning fast performance. If you have
# a huge amount of data, consider DualIndexSQLiteObject instead.
License project under MPL2 This patch adds the file LICENSE which is the verbatim copy of the Mozilla Public License Version 2.0 as retreived from https://www.mozilla.org/media/MPL/2.0/index.815ca599c9df.txt on 2017-03-05. This patch also places license headers for the MPL2 type A variant of the license header in the following files: PBot/AntiFlood.pm PBot/BanTracker.pm PBot/BlackList.pm PBot/BotAdminCommands.pm PBot/BotAdmins.pm PBot/ChanOpCommands.pm PBot/ChanOps.pm PBot/Channels.pm PBot/Commands.pm PBot/DualIndexHashObject.pm PBot/EventDispatcher.pm PBot/FactoidCommands.pm PBot/FactoidModuleLauncher.pm PBot/Factoids.pm PBot/HashObject.pm PBot/IRCHandlers.pm PBot/IgnoreList.pm PBot/IgnoreListCommands.pm PBot/Interpreter.pm PBot/LagChecker.pm PBot/Logger.pm PBot/MessageHistory.pm PBot/MessageHistory_SQLite.pm PBot/NickList.pm PBot/PBot.pm PBot/Plugins.pm PBot/Plugins/AntiAway.pm PBot/Plugins/AntiKickAutoRejoin.pm PBot/Plugins/AntiRepeat.pm PBot/Plugins/AntiTwitter.pm PBot/Plugins/AutoRejoin.pm PBot/Plugins/Counter.pm PBot/Plugins/Quotegrabs.pm PBot/Plugins/Quotegrabs/Quotegrabs_Hashtable.pm PBot/Plugins/Quotegrabs/Quotegrabs_SQLite.pm PBot/Plugins/UrlTitles.pm PBot/Plugins/_Example.pm PBot/Refresher.pm PBot/Registerable.pm PBot/Registry.pm PBot/RegistryCommands.pm PBot/SQLiteLogger.pm PBot/SQLiteLoggerLayer.pm PBot/SelectHandler.pm PBot/StdinReader.pm PBot/Timer.pm PBot/Utils/ParseDate.pm PBot/VERSION.pm build/update-version.pl modules/acronym.pl modules/ago.pl modules/c11std.pl modules/c2english.pl modules/c2english/CGrammar.pm modules/c2english/c2eng.pl modules/c99std.pl modules/cdecl.pl modules/cfaq.pl modules/cjeopardy/IRCColors.pm modules/cjeopardy/QStatskeeper.pm modules/cjeopardy/Scorekeeper.pm modules/cjeopardy/cjeopardy.pl modules/cjeopardy/cjeopardy_answer.pl modules/cjeopardy/cjeopardy_filter.pl modules/cjeopardy/cjeopardy_hint.pl modules/cjeopardy/cjeopardy_qstats.pl modules/cjeopardy/cjeopardy_scores.pl modules/cjeopardy/cjeopardy_show.pl modules/codepad.pl modules/compiler_block.pl modules/compiler_client.pl modules/compiler_vm/Diff.pm modules/compiler_vm/cc modules/compiler_vm/compiler_client.pl modules/compiler_vm/compiler_server.pl modules/compiler_vm/compiler_server_vbox_win32.pl modules/compiler_vm/compiler_server_watchdog.pl modules/compiler_vm/compiler_vm_client.pl modules/compiler_vm/compiler_vm_server.pl modules/compiler_vm/compiler_watchdog.pl modules/compiler_vm/languages/_c_base.pm modules/compiler_vm/languages/_default.pm modules/compiler_vm/languages/bash.pm modules/compiler_vm/languages/bc.pm modules/compiler_vm/languages/bf.pm modules/compiler_vm/languages/c11.pm modules/compiler_vm/languages/c89.pm modules/compiler_vm/languages/c99.pm modules/compiler_vm/languages/clang.pm modules/compiler_vm/languages/clang11.pm modules/compiler_vm/languages/clang89.pm modules/compiler_vm/languages/clang99.pm modules/compiler_vm/languages/clangpp.pm modules/compiler_vm/languages/clisp.pm modules/compiler_vm/languages/cpp.pm modules/compiler_vm/languages/freebasic.pm modules/compiler_vm/languages/go.pm modules/compiler_vm/languages/haskell.pm modules/compiler_vm/languages/java.pm modules/compiler_vm/languages/javascript.pm modules/compiler_vm/languages/ksh.pm modules/compiler_vm/languages/lua.pm modules/compiler_vm/languages/perl.pm modules/compiler_vm/languages/python.pm modules/compiler_vm/languages/python3.pm modules/compiler_vm/languages/qbasic.pm modules/compiler_vm/languages/scheme.pm modules/compiler_vm/languages/server/_c_base.pm modules/compiler_vm/languages/server/_default.pm modules/compiler_vm/languages/server/c11.pm modules/compiler_vm/languages/server/c89.pm modules/compiler_vm/languages/server/c99.pm modules/compiler_vm/languages/server/clang.pm modules/compiler_vm/languages/server/clang11.pm modules/compiler_vm/languages/server/clang89.pm modules/compiler_vm/languages/server/clang99.pm modules/compiler_vm/languages/server/cpp.pm modules/compiler_vm/languages/server/freebasic.pm modules/compiler_vm/languages/server/haskell.pm modules/compiler_vm/languages/server/java.pm modules/compiler_vm/languages/server/qbasic.pm modules/compiler_vm/languages/server/tendra.pm modules/compiler_vm/languages/sh.pm modules/compiler_vm/languages/tendra.pm modules/compliment modules/cstd.pl modules/define.pl modules/dice_roll.pl modules/excuse.sh modules/expand_macros.pl modules/fnord.pl modules/funnyish_quote.pl modules/g.pl modules/gdefine.pl modules/gen_cfacts.pl modules/gencstd.pl modules/get_title.pl modules/getcfact.pl modules/google.pl modules/gspy.pl modules/gtop10.pl modules/gtop15.pl modules/headlines.pl modules/horoscope modules/horrorscope modules/ideone.pl modules/insult.pl modules/love_quote.pl modules/man.pl modules/map.pl modules/math.pl modules/prototype.pl modules/qalc.pl modules/random_quote.pl modules/seen.pl modules/urban modules/weather.pl modules/wikipedia.pl pbot.pl pbot.sh It is highly recommended that this list of files is reviewed to ensure that all files are the copyright of the sole maintainer of the repository. If any files with license headers contain the intellectual property of anyone else, it is recommended that a request is made to revise this patch or that the explicit permission of the co-author is gained to allow for the license of the work to be changed. I (Tomasz Kramkowski), the contributor, take no responsibility for any legal action taken against the maintainer of this repository for incorrectly claiming copyright to any work not owned by the maintainer of this repository.
2017-03-05 22:33:31 +01:00
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
package PBot::DualIndexHashObject;
use warnings; use strict;
2019-07-11 03:40:53 +02:00
use feature 'unicode_strings';
use Text::Levenshtein qw(fastdistance);
use JSON;
sub new {
2020-02-15 23:38:32 +01:00
my ($proto, %conf) = @_;
my $class = ref($proto) || $proto;
my $self = bless {}, $class;
Carp::croak("Missing pbot reference to " . __FILE__) unless exists $conf{pbot};
$self->{pbot} = $conf{pbot};
$self->initialize(%conf);
return $self;
}
sub initialize {
2020-02-15 23:38:32 +01:00
my ($self, %conf) = @_;
$self->{name} = $conf{name} // 'Dual Index hash object';
$self->{filename} = $conf{filename} // Carp::carp("Missing filename to DualIndexHashObject, will not be able to save to or load from file.");
$self->{hash} = {};
}
sub load {
2020-02-15 23:38:32 +01:00
my ($self, $filename) = @_;
$filename = $self->{filename} if not defined $filename;
2020-02-15 23:38:32 +01:00
if (not defined $filename) {
Carp::carp "No $self->{name} filename specified -- skipping loading from file";
return;
}
2020-02-15 23:38:32 +01:00
$self->{pbot}->{logger}->log("Loading $self->{name} from $filename ...\n");
if (not open(FILE, "< $filename")) {
$self->{pbot}->{logger}->log("Skipping loading from file: Couldn't open $filename: $!\n");
return;
}
my $contents = do {
local $/;
<FILE>;
};
$self->{hash} = decode_json $contents if length $contents;
close FILE;
# update existing entries to use _name to preserve case
# and lowercase any non-lowercased entries
foreach my $primary_index (keys %{$self->{hash}}) {
if (not exists $self->{hash}->{$primary_index}->{_name}) {
if (lc $primary_index eq $primary_index) { $self->{hash}->{$primary_index}->{_name} = $primary_index; }
else {
if (exists $self->{hash}->{lc $primary_index}) { Carp::croak "Cannot update $self->{name} primary index $primary_index; duplicate object found"; }
my $data = delete $self->{hash}->{$primary_index};
$data->{_name} = $primary_index;
$primary_index = lc $primary_index;
$self->{hash}->{$primary_index} = $data;
}
}
foreach my $secondary_index (keys %{$self->{hash}->{$primary_index}}) {
next if $secondary_index eq '_name';
if (not exists $self->{hash}->{$primary_index}->{$secondary_index}->{_name}) {
if (lc $secondary_index eq $secondary_index) { $self->{hash}->{$primary_index}->{$secondary_index}->{_name} = $secondary_index; }
else {
if (exists $self->{hash}->{$primary_index}->{lc $secondary_index}) {
Carp::croak "Cannot update $self->{name} $primary_index sub-object $secondary_index; duplicate object found";
}
my $data = delete $self->{hash}->{$primary_index}->{$secondary_index};
$data->{_name} = $secondary_index;
$secondary_index = lc $secondary_index;
$self->{hash}->{$primary_index}->{$secondary_index} = $data;
}
}
}
}
}
sub save {
2020-02-15 23:38:32 +01:00
my $self = shift;
my $filename;
if (@_) { $filename = shift; }
else { $filename = $self->{filename}; }
if (not defined $filename) {
Carp::carp "No $self->{name} filename specified -- skipping saving to file.\n";
return;
}
2020-02-15 23:38:32 +01:00
$self->{pbot}->{logger}->log("Saving $self->{name} to $filename\n");
2020-02-15 23:38:32 +01:00
my $json = JSON->new;
my $json_text = $json->pretty->canonical->utf8->encode($self->{hash});
2020-02-15 23:38:32 +01:00
open(FILE, "> $filename") or die "Couldn't open $filename: $!\n";
print FILE "$json_text\n";
close FILE;
}
sub clear {
2020-02-15 23:38:32 +01:00
my $self = shift;
$self->{hash} = {};
}
sub levenshtein_matches {
2020-02-15 23:38:32 +01:00
my ($self, $primary_index, $secondary_index, $distance, $strictnamespace) = @_;
my $comma = '';
my $result = "";
2020-02-15 23:38:32 +01:00
$distance = 0.60 if not defined $distance;
2020-02-15 23:38:32 +01:00
$primary_index = '.*' if not defined $primary_index;
2019-06-26 18:34:19 +02:00
2020-02-15 23:38:32 +01:00
if (not $secondary_index) {
foreach my $index (sort keys %{$self->{hash}}) {
my $distance_result = fastdistance($primary_index, $index);
my $length = (length $primary_index > length $index) ? length $primary_index : length $index;
2020-02-15 23:38:32 +01:00
if ($distance_result / $length < $distance) {
my $name = $self->{hash}->{$index}->{_name};
if ($name =~ / /) { $result .= $comma . "\"$name\""; }
else { $result .= $comma . $name; }
$comma = ", ";
}
}
2020-02-15 23:38:32 +01:00
} else {
my $lc_primary_index = lc $primary_index;
if (not exists $self->{hash}->{$lc_primary_index}) { return 'none'; }
my $last_header = "";
my $header = "";
foreach my $index1 (sort keys %{$self->{hash}}) {
$header = "[$self->{hash}->{$index1}->{_name}] ";
$header = '[global] ' if $header eq '[.*] ';
if ($strictnamespace) {
next unless $index1 eq '.*' or $index1 eq $lc_primary_index;
$header = "" unless $header eq '[global] ';
}
foreach my $index2 (sort keys %{$self->{hash}->{$index1}}) {
my $distance_result = fastdistance($secondary_index, $index2);
my $length = (length $secondary_index > length $index2) ? length $secondary_index : length $index2;
if ($distance_result / $length < $distance) {
my $name = $self->{hash}->{$index1}->{$index2}->{_name};
$header = "" if $last_header eq $header;
$last_header = $header;
$comma = '; ' if $comma ne '' and $header ne '';
if ($name =~ / /) { $result .= $comma . $header . "\"$name\""; }
else { $result .= $comma . $header . $name; }
$comma = ", ";
}
}
}
}
2020-02-15 23:38:32 +01:00
$result =~ s/(.*), /$1 or /;
$result = 'none' if $comma eq '';
return $result;
}
sub set {
2020-02-15 23:38:32 +01:00
my ($self, $primary_index, $secondary_index, $key, $value, $dont_save) = @_;
my $lc_primary_index = lc $primary_index;
my $lc_secondary_index = lc $secondary_index;
2020-02-15 23:38:32 +01:00
if (not exists $self->{hash}->{$lc_primary_index}) {
my $result = "$self->{name}: $primary_index not found; similiar matches: ";
$result .= $self->levenshtein_matches($primary_index);
return $result;
}
2020-02-15 23:38:32 +01:00
if (not exists $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}) {
my $secondary_text = $secondary_index =~ / / ? "\"$secondary_index\"" : $secondary_index;
my $result = "$self->{name}: [$self->{hash}->{$lc_primary_index}->{_name}] $secondary_text not found; similiar matches: ";
$result .= $self->levenshtein_matches($primary_index, $secondary_index);
return $result;
}
2020-02-15 23:38:32 +01:00
my $name1 = $self->{hash}->{$lc_primary_index}->{_name};
my $name2 = $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}->{_name};
2020-02-15 23:38:32 +01:00
$name1 = 'global' if $name1 eq '.*';
$name2 = "\"$name2\"" if $name2 =~ / /;
2020-02-15 23:38:32 +01:00
if (not defined $key) {
my $result = "[$name1] $name2 keys:\n";
my $comma = '';
foreach my $key (sort keys %{$self->{hash}->{$lc_primary_index}->{$lc_secondary_index}}) {
next if $key eq '_name';
$result .= $comma . "$key => " . $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}->{$key};
$comma = ";\n";
}
$result .= "none" if ($comma eq '');
return $result;
}
2020-02-15 23:38:32 +01:00
if (not defined $value) { $value = $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}->{$key}; }
else {
$self->{hash}->{$lc_primary_index}->{$lc_secondary_index}->{$key} = $value;
$self->save unless $dont_save;
}
2020-02-15 23:38:32 +01:00
return "[$name1] $name2: $key " . (defined $value ? "set to $value" : "is not set.");
}
sub unset {
2020-02-15 23:38:32 +01:00
my ($self, $primary_index, $secondary_index, $key) = @_;
my $lc_primary_index = lc $primary_index;
my $lc_secondary_index = lc $secondary_index;
2020-02-15 23:38:32 +01:00
if (not exists $self->{hash}->{$lc_primary_index}) {
my $result = "$self->{name}: $primary_index not found; similiar matches: ";
$result .= $self->levenshtein_matches($primary_index);
return $result;
}
2020-02-15 23:38:32 +01:00
my $name1 = $self->{hash}->{$lc_primary_index}->{_name};
$name1 = 'global' if $name1 eq '.*';
2020-02-15 23:38:32 +01:00
if (not exists $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}) {
my $result = "$self->{name}: [$name1] $secondary_index not found; similiar matches: ";
$result .= $self->levenshtein_matches($primary_index, $secondary_index);
return $result;
}
2020-02-15 23:38:32 +01:00
my $name2 = $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}->{_name};
$name2 = "\"$name2\"" if $name2 =~ / /;
2020-02-15 23:38:32 +01:00
if (defined delete $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}->{$key}) {
$self->save;
return "$self->{name}: [$name1] $name2: $key unset.";
} else {
return "$self->{name}: [$name1] $name2: $key does not exist.";
}
$self->save;
}
sub exists {
2020-02-15 23:38:32 +01:00
my ($self, $primary_index, $secondary_index, $data_index) = @_;
return 0 if not defined $primary_index;
$primary_index = lc $primary_index;
return 0 if not exists $self->{hash}->{$primary_index};
return 1 if not defined $secondary_index;
$secondary_index = lc $secondary_index;
return 0 if not exists $self->{hash}->{$primary_index}->{$secondary_index};
return 1 if not defined $data_index;
return exists $self->{hash}->{$primary_index}->{$secondary_index}->{$data_index};
}
sub get_keys {
2020-02-15 23:38:32 +01:00
my ($self, $primary_index, $secondary_index) = @_;
return keys %{$self->{hash}} if not defined $primary_index;
my $lc_primary_index = lc $primary_index;
2020-02-15 23:38:32 +01:00
if (not defined $secondary_index) {
return () if not exists $self->{hash}->{$lc_primary_index};
return grep { $_ ne '_name' } keys %{$self->{hash}->{$lc_primary_index}};
2020-02-15 23:38:32 +01:00
}
my $lc_secondary_index = lc $secondary_index;
return () if not exists $self->{hash}->{$lc_primary_index}
or not exists $self->{hash}->{$lc_primary_index}->{$lc_secondary_index};
2020-02-15 23:38:32 +01:00
return grep { $_ ne '_name' } keys %{$self->{hash}->{lc $primary_index}->{lc $secondary_index}};
}
sub get_data {
2020-02-15 23:38:32 +01:00
my ($self, $primary_index, $secondary_index, $data_index) = @_;
$primary_index = lc $primary_index if defined $primary_index;
$secondary_index = lc $secondary_index if defined $secondary_index;
return undef if not exists $self->{hash}->{$primary_index};
return $self->{hash}->{$primary_index} if not defined $secondary_index;
return $self->{hash}->{$primary_index}->{$secondary_index} if not defined $data_index;
return $self->{hash}->{$primary_index}->{$secondary_index}->{$data_index};
}
sub add {
2020-02-15 23:38:32 +01:00
my ($self, $primary_index, $secondary_index, $data, $dont_save, $quiet) = @_;
my $lc_primary_index = lc $primary_index;
my $lc_secondary_index = lc $secondary_index;
if (not exists $self->{hash}->{$lc_primary_index}) {
$self->{hash}->{$lc_primary_index}->{_name} = $primary_index; # preserve case
}
$data->{_name} = $secondary_index; # preserve case
$self->{hash}->{$lc_primary_index}->{$lc_secondary_index} = $data;
$self->save() unless $dont_save;
my $name1 = $self->{hash}->{$lc_primary_index}->{_name};
my $name2 = $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}->{_name};
$name1 = 'global' if $name1 eq '.*';
$name2 = "\"$name2\"" if $name2 =~ / /;
$self->{pbot}->{logger}->log("$self->{name}: [$name1]: $name2 added.\n") unless $dont_save or $quiet;
return "$self->{name}: [$name1]: $name2 added.";
}
sub remove {
2020-02-15 23:38:32 +01:00
my ($self, $primary_index, $secondary_index, $data_index, $dont_save) = @_;
my $lc_primary_index = lc $primary_index;
my $lc_secondary_index = lc $secondary_index;
2020-02-15 23:38:32 +01:00
if (not exists $self->{hash}->{$lc_primary_index}) {
my $result = "$self->{name}: $primary_index not found; similiar matches: ";
$result .= $self->levenshtein_matches($primary_index);
return $result;
}
2020-02-15 23:38:32 +01:00
if (not defined $secondary_index) {
my $data = delete $self->{hash}->{$lc_primary_index};
if (defined $data) {
my $name = $data->{_name};
$name = 'global' if $name eq '.*';
$self->save unless $dont_save;
return "$self->{name}: $name removed.";
} else {
return "$self->{name}: $primary_index does not exist.";
}
}
2020-02-15 23:38:32 +01:00
my $name1 = $self->{hash}->{$lc_primary_index}->{_name};
$name1 = 'global' if $name1 eq '.*';
2020-02-15 23:38:32 +01:00
if (not exists $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}) {
my $result = "$self->{name}: [$name1] $secondary_index not found; similiar matches: ";
$result .= $self->levenshtein_matches($primary_index, $secondary_index);
return $result;
}
2020-02-15 23:38:32 +01:00
if (not defined $data_index) {
my $data = delete $self->{hash}->{$lc_primary_index}->{$lc_secondary_index};
if (defined $data) {
my $name2 = $data->{_name};
$name2 = "\"$name2\"" if $name2 =~ / /;
2020-02-15 23:38:32 +01:00
# remove primary group if no more secondaries (only key left should be the _name key)
if (keys %{$self->{hash}->{$lc_primary_index}} == 1) { delete $self->{hash}->{$lc_primary_index}; }
$self->save unless $dont_save;
return "$self->{name}: [$name1] $name2 removed.";
} else {
return "$self->{name}: [$name1] $secondary_index does not exist.";
}
}
2020-02-15 23:38:32 +01:00
my $name2 = $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}->{_name};
if (defined delete $self->{hash}->{$lc_primary_index}->{$lc_secondary_index}->{$data_index}) { return "$self->{name}: [$name1] $name2.$data_index removed."; }
else { return "$self->{name}: [$name1] $name2.$data_index does not exist."; }
}
# for compatibility with DualIndexSQLiteObject
sub create_metadata { }
# todo:
sub get_each { }
sub get_next { }
sub get_all { }
1;