Refactor DualIndexSqliteObject

- use event queue for decaching
- refactor levenshtein_matches
- misc clean-ups
This commit is contained in:
Pragmatic Software 2021-07-19 16:57:02 -07:00
parent 9ab0355f86
commit eecf756b07
1 changed files with 176 additions and 85 deletions

View File

@ -16,8 +16,8 @@ use Text::Levenshtein qw(fastdistance);
sub new {
my ($class, %args) = @_;
my $self = bless {}, $class;
Carp::croak("Missing pbot reference to " . __FILE__) unless exists $args{pbot};
my $self = bless {}, $class;
$self->{pbot} = delete $args{pbot};
$self->initialize(%args);
return $self;
@ -25,20 +25,17 @@ sub new {
sub initialize {
my ($self, %conf) = @_;
$self->{name} = $conf{name} // 'Dual Index SQLite object';
$self->{filename} = $conf{filename} // Carp::croak("Missing filename in " . __FILE__);
$self->{dbh} = undef;
$self->{cache} = {};
$self->{cache_timeouts} = {};
$self->{debug} = $conf{debug} // '';
$self->{dbh} = undef;
$self->{cache} = {};
$self->{pbot}->{registry}->add_default('text', 'dualindexsqliteobject', "debug_$self->{name}", 0);
$self->{pbot}->{registry}->add_trigger('dualindexsqliteobject', "debug_$self->{name}", sub { $self->sqlite_debug_trigger(@_) });
$self->{pbot}->{atexit}->register(sub { $self->end; return; });
$self->{pbot}->{event_queue}->enqueue(sub {$self->trim_cache }, 60, "Trim $self->{name} cache");
$self->{pbot}->{atexit}->register(sub { $self->end });
$self->begin;
}
@ -65,7 +62,9 @@ sub begin {
$self->{dbh}->trace($self->{dbh}->parse_trace_flags("SQL|$sqlite_debug"), $self->{trace_layer});
};
if ($@) { $self->{pbot}->{logger}->log("Error initializing $self->{name} database: $@\n"); }
if ($@) {
$self->{pbot}->{logger}->log("Error initializing $self->{name} database: $@\n");
}
}
sub end {
@ -75,6 +74,7 @@ sub end {
if (defined $self->{dbh}) {
$self->{dbh}->disconnect;
close $self->{trace_layer};
$self->{dbh} = undef;
}
@ -90,7 +90,12 @@ sub load {
sub save {
my ($self) = @_;
return if not $self->{dbh};
$self->{dbh}->commit;
eval { $self->{dbh}->commit };
if ($@) {
$self->{pbot}->{logger}->log("Error saving $self->{name}: $@");
}
}
sub create_database {
@ -107,23 +112,35 @@ SQL
$self->{dbh}->do('CREATE INDEX IF NOT EXISTS idx1 ON Stuff (index1, index2)');
};
$self->{pbot}->{logger}->log($@) if $@;
$self->{pbot}->{logger}->log("Error creating $self->{name} databse: $@") if $@;
}
sub create_cache {
my ($self) = @_;
$self->{cache} = {};
my ($index1_count, $index2_count) = (0, 0);
foreach my $index1 ($self->get_keys(undef, undef, 1)) {
$index1_count++;
my $lc_index1 = lc $index1;
$index1_count++;
foreach my $index2 ($self->get_keys($lc_index1, undef, 1)) {
my $lc_index2 = lc $index2;
$index2_count++;
$self->{cache}->{$lc_index1}->{lc $index2} = {};
$self->{cache}->{$lc_index1}->{_name} = $index1 if $index1 ne $lc_index1;
$self->{cache}->{$lc_index1}->{$lc_index2}->{_name} = $index2 if $index2 ne $lc_index2;
$self->{cache}->{$lc_index1}->{$lc_index2} = {};
# _name contains original typographical case
if ($index1 ne $lc_index1) {
$self->{cache}->{$lc_index1}->{_name} = $index1;
}
if ($index2 ne $lc_index2) {
$self->{cache}->{$lc_index1}->{$lc_index2}->{_name} = $index2;
}
}
}
@ -134,31 +151,36 @@ sub cache_remove {
my ($self, $index1, $index2) = @_;
if (not defined $index2) {
# remove index1
delete $self->{cache}->{$index1};
delete $self->{cache_timeouts}->{$index1};
} else {
# remove index2
delete $self->{cache}->{$index1}->{$index2};
delete $self->{cache}->{$index1} if not grep { $_ ne '_name' } keys %{$self->{cache}->{$index1}};
delete $self->{cache_timeouts}->{$index1}->{$index2};
delete $self->{cache_timeouts}->{$index1} if not keys %{$self->{cache_timeouts}->{$index1}};
# remove index1 if it has no more keys left (aside from _name)
if (not grep { $_ ne '_name' } keys %{$self->{cache}->{$index1}}) {
delete $self->{cache}->{$index1};
}
}
}
sub trim_cache {
my ($self) = @_;
sub enqueue_decache {
my ($self, $index1, $index2) = @_;
my $now = time;
foreach my $index1 (keys %{$self->{cache_timeouts}}) {
foreach my $index2 (keys %{$self->{cache_timeouts}->{$index1}}) {
if ($now >= $self->{cache_timeouts}->{$index1}->{$index2}) {
my $name = $self->{cache}->{$index1}->{$index2}->{_name};
$self->{cache}->{$index1}->{$index2} = {};
$self->{cache}->{$index1}->{$index2}->{_name} = $name if defined $name;
delete $self->{cache_timeouts}->{$index1}->{$index2};
delete $self->{cache_timeouts}->{$index1} if not keys %{$self->{cache_timeouts}->{$index1}};
}
}
}
my $timeout = $self->{pbot}->{registry}->get_value('dualindexsqliteobject', 'cache_timeout') // 60 * 30;
$self->{pbot}->{event_queue}->enqueue_event(
sub {
# save _name
my $name = $self->{cache}->{$index1}->{$index2}->{_name};
# clear cache
$self->{cache}->{$index1}->{$index2} = {};
# put _name back
$self->{cache}->{$index1}->{$index2}->{_name} = $name if defined $name;
}, $timeout, "Decache $self->{name} " . ($index1 eq '.*' ? 'global' : $index1) . ".$index2"
);
}
sub create_metadata {
@ -170,17 +192,21 @@ sub create_metadata {
eval {
my %existing = ();
foreach my $col (@{$self->{dbh}->selectall_arrayref("PRAGMA TABLE_INFO(Stuff)")}) { $existing{$col->[1]} = $col->[2]; }
foreach my $col (@{$self->{dbh}->selectall_arrayref("PRAGMA TABLE_INFO(Stuff)")}) {
$existing{$col->[1]} = $col->[2];
}
foreach my $col (sort keys %$columns) {
unless (exists $existing{$col}) { $self->{dbh}->do("ALTER TABLE Stuff ADD COLUMN \"$col\" $columns->{$col}"); }
unless (exists $existing{$col}) {
$self->{dbh}->do("ALTER TABLE Stuff ADD COLUMN \"$col\" $columns->{$col}");
}
}
$self->{dbh}->commit;
};
if ($@) {
$self->{pbot}->{logger}->log($@);
$self->{pbot}->{logger}->log("Error creating metadata for $self->{name}: $@");
$self->{dbh}->rollback;
}
}
@ -188,60 +214,83 @@ sub create_metadata {
sub levenshtein_matches {
my ($self, $index1, $index2, $distance, $strictnamespace) = @_;
my $comma = '';
my $result = '';
$index1 //= '.*';
$distance //= 0.60;
$distance = 0.60 if not defined $distance;
$index1 = '.*' if not defined $index1;
my $output = 'none';
if (not $index2) {
my @matches;
my $length_a = length $index1;
foreach my $index (sort $self->get_keys) {
my $distance_result = fastdistance($index1, $index);
my $length = (length $index1 > length $index) ? length $index1 : length $index;
my $length_b = length $index;
my $length = $length_a > $length_b ? $length_a : $length_b;
if ($distance_result / $length < $distance) {
my $name = $self->get_data($index, '_name');
if ($name =~ / /) { $result .= $comma . "\"$name\""; }
else { $result .= $comma . $name; }
$comma = ", ";
if ($name =~ / /) {
push @matches, "\"$name\"";
} else {
push @matches, $name;
}
}
}
} else {
if (not $self->exists($index1)) { return 'none'; }
my $last_header = '';
my $header = '';
$output = join ', ', @matches;
} else {
if (not $self->exists($index1)) {
return $output;
}
my %sections;
my $section;
my $length_a = length $index2;
foreach my $i1 (sort $self->get_keys) {
$header = '[' . $self->get_data($i1, '_name') . '] ';
$header = '[global] ' if $header eq '[.*] ';
$section = $self->get_data($i1, '_name');
$section = 'global' if $section eq '.*';
if ($strictnamespace) {
next unless $i1 eq '.*' or lc $i1 eq lc $index1;
$header = "" unless $header eq '[global] ';
}
foreach my $i2 (sort $self->get_keys($i1)) {
my $distance_result = fastdistance($index2, $i2);
my $length = (length $index2 > length $i2) ? length $index2 : length $i2;
my $length_b = length $i2;
my $length = $length_a > $length_b ? $length_a : $length_b;
if ($distance_result / $length < $distance) {
my $name = $self->get_data($i1, $i2, '_name');
$header = "" if $last_header eq $header;
$last_header = $header;
$comma = '; ' if $comma ne '' and $header ne '';
if ($name =~ / /) { $result .= $comma . $header . "\"$name\""; }
else { $result .= $comma . $header . $name; }
$comma = ", ";
if ($name =~ / /) {
push @{$sections{$section}}, "\"$name\"";
} else {
push @{$sections{$section}}, $name;
}
}
}
}
$output = '';
foreach $section (sort keys %sections) {
$output .= "[$section] ";
$output .= join ', ', @{$sections{$section}};
$output .= '; ';
}
$output =~ s/; $//;
}
$result =~ s/(.*), /$1 or /;
$result = 'none' if $comma eq '';
return $result;
$output =~ s/(.*), /$1 or /;
return $output;
}
sub exists {
@ -258,13 +307,17 @@ sub exists {
sub get_keys {
my ($self, $index1, $index2, $nocache) = @_;
my @keys;
if (not defined $index1) {
if (not $nocache) { return keys %{$self->{cache}}; }
if (not $nocache) {
return keys %{$self->{cache}};
}
@keys = eval {
my $context = $self->{dbh}->selectall_arrayref('SELECT DISTINCT index1 FROM Stuff');
if (@$context) {
return map { $_->[0] } @$context;
} else {
@ -283,12 +336,17 @@ sub get_keys {
$index1 = lc $index1;
if (not defined $index2) {
if (not $nocache) { return grep { $_ ne '_name' } keys %{$self->{cache}->{$index1}}; }
if (not $nocache) {
return grep { $_ ne '_name' } keys %{$self->{cache}->{$index1}};
}
@keys = eval {
my $sth = $self->{dbh}->prepare('SELECT index2 FROM Stuff WHERE index1 = ?');
$sth->execute($index1);
my $context = $sth->fetchall_arrayref;
if (@$context) {
return map { $_->[0] } @$context;
} else {
@ -313,20 +371,23 @@ sub get_keys {
@keys = eval {
my $sth = $self->{dbh}->prepare('SELECT * FROM Stuff WHERE index1 = ? AND index2 = ?');
$sth->execute($index1, $index2);
my $context = $sth->fetchall_arrayref({});
my @k = ();
my @k = ();
return @k if not @{$context};
my ($lc_index1, $lc_index2) = (lc $index1, lc $index2);
foreach my $key (keys %{$context->[0]}) {
next if $key eq 'index1' or $key eq 'index2';
push @k, $key if defined $context->[0]->{$key};
$self->{cache}->{$lc_index1}->{$lc_index2}->{$key} = $context->[0]->{$key};
}
my $timeout = $self->{pbot}->{registry}->get_value('dualindexsqliteobject', 'cache_timeout') // 60 * 30;
$self->{cache_timeouts}->{$lc_index1}->{$lc_index2} = time + $timeout;
$self->enqueue_decache($lc_index1, $lc_index2);
return @k;
};
@ -343,11 +404,12 @@ sub get_each {
my ($self, @opts) = @_;
my $sth = eval {
my $sql = 'SELECT ';
my @keys = ();
my $sql = 'SELECT ';
my @keys = ();
my @values = ();
my @where = ();
my @sort = ();
my @where = ();
my @sort = ();
my $everything = 0;
foreach my $expr (@opts) {
@ -398,8 +460,10 @@ sub get_each {
$sql .= ' FROM Stuff WHERE';
my $in_or = 0;
for (my $i = 0; $i < @where; $i++) {
my ($prefix, $key, $op) = @{$where[$i]};
my ($next_prefix, $next_key) = ('', '');
if ($i < @where - 1) {
@ -469,7 +533,7 @@ sub get_all {
};
if ($@) {
$self->{pbot}->{logger}->log("Error getting data: $@\n");
$self->{pbot}->{logger}->log("Error getting all data: $@\n");
return undef;
}
@ -481,7 +545,9 @@ sub get_key_name {
my $lc_index1 = lc $index1;
return $lc_index1 if not exists $self->{cache}->{$lc_index1};
if (not exists $self->{cache}->{$lc_index1}) {
return $lc_index1;
}
if (not defined $index2) {
if (exists $self->{cache}->{$lc_index1}->{_name}) {
@ -493,7 +559,9 @@ sub get_key_name {
my $lc_index2 = lc $index2;
return $lc_index2 if not exists $self->{cache}->{$lc_index1}->{$lc_index2};
if (not exists $self->{cache}->{$lc_index1}->{$lc_index2}) {
return $lc_index2;
}
if (exists $self->{cache}->{$lc_index1}->{$lc_index2}->{_name}) {
return $self->{cache}->{$lc_index1}->{$lc_index2}->{_name};
@ -508,8 +576,13 @@ sub get_data {
my $lc_index1 = lc $index1;
my $lc_index2 = lc $index2;
return undef if not exists $self->{cache}->{$lc_index1};
return undef if not exists $self->{cache}->{$lc_index1}->{$lc_index2} and $lc_index2 ne '_name';
if (not exists $self->{cache}->{$lc_index1}) {
return undef;
}
if (not exists $self->{cache}->{$lc_index1}->{$lc_index2} and $lc_index2 ne '_name') {
return undef;
}
if (not defined $data_index) {
# special case for compatibility with DualIndexHashObject
@ -527,16 +600,17 @@ sub get_data {
my $context = $sth->fetchall_arrayref({});
my $d = {};
foreach my $key (keys %{$context->[0]}) {
next if $key eq 'index1' or $key eq 'index2';
if (defined $context->[0]->{$key}) {
$self->{cache}->{$lc_index1}->{$lc_index2}->{$key} = $context->[0]->{$key};
$d->{$key} = $context->[0]->{$key};
}
}
my $timeout = $self->{pbot}->{registry}->get_value('dualindexsqliteobject', 'cache_timeout') // 60 * 30;
$self->{cache_timeouts}->{$lc_index1}->{$lc_index2} = time + $timeout;
$self->enqueue_decache($lc_index1, $lc_index2);
return $d;
};
@ -572,8 +646,7 @@ sub get_data {
$self->{cache}->{$lc_index1}->{$lc_index2}->{$key} = $context->[0]->{$key};
}
my $timeout = $self->{pbot}->{registry}->get_value('dualindexsqliteobject', 'cache_timeout') // 60 * 30;
$self->{cache_timeouts}->{$lc_index1}->{$lc_index2} = time + $timeout;
$self->enqueue_decache($lc_index1, $lc_index2);
return $context->[0]->{$data_index};
};
@ -626,7 +699,11 @@ sub add {
# no errors updating SQL -- now we update cache
my ($lc_index1, $lc_index2) = (lc $index1, lc $index2);
$self->{cache}->{$lc_index1}->{_name} = $index1 if $index1 ne $lc_index1 and not exists $self->{cache}->{$lc_index1}->{_name};
if ($index1 ne $lc_index1 and not exists $self->{cache}->{$lc_index1}->{_name}) {
$self->{cache}->{$lc_index1}->{_name} = $index1;
}
if (grep { $_ ne '_name' } keys %{$self->{cache}->{$lc_index1}->{$lc_index2}}) {
foreach my $key (sort keys %$data) {
next if not exists $self->{columns}->{$key};
@ -635,7 +712,10 @@ sub add {
} else {
$self->{cache}->{$lc_index1}->{lc $index2} = {}
}
$self->{cache}->{$lc_index1}->{$lc_index2}->{_name} = $index2 if not exists $self->{cache}->{$lc_index1}->{$lc_index2}->{_name} and $index2 ne $lc_index2;
if (not exists $self->{cache}->{$lc_index1}->{$lc_index2}->{_name} and $index2 ne $lc_index2) {
$self->{cache}->{$lc_index1}->{$lc_index2}->{_name} = $index2;
}
};
if ($@) {
@ -646,8 +726,11 @@ sub add {
$index1 = 'global' if $index1 eq '.*';
$index2 = "\"$index2\"" if $index2 =~ / /;
$self->{pbot}->{logger}->log("$self->{name}: [$index1]: $index2 added.\n") unless $quiet;
$self->save unless $quiet;
return "$index2 added to $name1.";
}
@ -662,6 +745,7 @@ sub remove {
my $name1 = $self->get_data($index1, '_name');
$name1 = 'global' if $name1 eq '.*';
my $lc_index1 = lc $index1;
if (not defined $index2) {
@ -690,6 +774,7 @@ sub remove {
my $name2 = $self->get_data($index1, $index2, '_name');
$name2 = "\"$name2\"" if $name2 =~ / /;
my $lc_index2 = lc $index2;
if (not defined $data_index) {
@ -744,7 +829,7 @@ sub set {
if (not $self->exists($index1, $index2)) {
my $secondary_text = $index2 =~ / / ? "\"$index2\"" : $index2;
my $result = "$self->{name}: [" . $self->get_data($index1, '_name') . "] $secondary_text not found; similiar matches: ";
my $result = "$self->{name}: [" . $self->get_data($index1, '_name') . "] $secondary_text not found; similiar matches: ";
$result .= $self->levenshtein_matches($index1, $index2);
return $result;
}
@ -758,12 +843,18 @@ sub set {
if (not defined $key) {
my $result = "[$name1] $name2 keys:\n";
my @metadata = ();
foreach my $key (sort $self->get_keys($index1, $index2)) {
my $value = $self->get_data($index1, $index2, $key);
push @metadata, "$key => $value" if defined $value;
}
if (not @metadata) { $result .= "none"; }
else { $result .= join ";\n", @metadata; }
if (not @metadata) {
$result .= "none";
} else {
$result .= join ";\n", @metadata;
}
return $result;
}