mirror of
https://github.com/pragma-/pbot.git
synced 2024-11-23 04:19:27 +01:00
Spinach: Add stop-word filter to validation
This commit is contained in:
parent
aeb9130345
commit
5828f595a4
@ -44,10 +44,12 @@ sub initialize {
|
|||||||
|
|
||||||
$self->{leaderboard_filename} = $self->{pbot}->{registry}->get_value('general', 'data_dir') . '/spinach/spinachlb.sqlite3';
|
$self->{leaderboard_filename} = $self->{pbot}->{registry}->get_value('general', 'data_dir') . '/spinach/spinachlb.sqlite3';
|
||||||
$self->{questions_filename} = $self->{pbot}->{registry}->get_value('general', 'data_dir') . '/spinach/spinachq.json';
|
$self->{questions_filename} = $self->{pbot}->{registry}->get_value('general', 'data_dir') . '/spinach/spinachq.json';
|
||||||
|
$self->{stopwords_filename} = $self->{pbot}->{registry}->get_value('general', 'data_dir') . '/spinach/stopwords';
|
||||||
|
|
||||||
$self->create_database;
|
$self->create_database;
|
||||||
$self->create_states;
|
$self->create_states;
|
||||||
$self->load_questions;
|
$self->load_questions;
|
||||||
|
$self->load_stopwords;
|
||||||
|
|
||||||
$self->{channel} = '##spinach';
|
$self->{channel} = '##spinach';
|
||||||
|
|
||||||
@ -112,6 +114,21 @@ sub load_questions {
|
|||||||
$self->{pbot}->{logger}->log("Spinach: Loaded $questions questions in $categories categories.\n");
|
$self->{pbot}->{logger}->log("Spinach: Loaded $questions questions in $categories categories.\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sub load_stopwords {
|
||||||
|
my $self = shift;
|
||||||
|
|
||||||
|
open my $fh, '<', $self->{stopwords_filename} or do {
|
||||||
|
$self->{pbot}->{logger}->log("Spinach: Failed to open $self->{stopwords_filename}: $!\n");
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
foreach my $word (<$fh>) {
|
||||||
|
chomp $word;
|
||||||
|
$self->{stopwords}{$word} = 1;
|
||||||
|
}
|
||||||
|
close $fh;
|
||||||
|
}
|
||||||
|
|
||||||
sub create_database {
|
sub create_database {
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
|
|
||||||
@ -470,6 +487,15 @@ sub spinach_cmd {
|
|||||||
return "/msg $self->{channel} $nick has chosen $self->{state_data}->{current_category}!";
|
return "/msg $self->{channel} $nick has chosen $self->{state_data}->{current_category}!";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
when ('n') {
|
||||||
|
return $self->normalize_text($arguments);
|
||||||
|
}
|
||||||
|
|
||||||
|
when ('v') {
|
||||||
|
my ($truth, $lie) = split /;/, $arguments;
|
||||||
|
return $self->validate_lie($self->normalize_text($truth), $self->normalize_text($lie));
|
||||||
|
}
|
||||||
|
|
||||||
when ('lie') {
|
when ('lie') {
|
||||||
if ($self->{current_state} !~ /getlies$/) {
|
if ($self->{current_state} !~ /getlies$/) {
|
||||||
return "$nick: It is not time to submit a lie!";
|
return "$nick: It is not time to submit a lie!";
|
||||||
@ -1059,10 +1085,10 @@ sub normalize_text {
|
|||||||
sub validate_lie {
|
sub validate_lie {
|
||||||
my ($self, $truth, $lie) = @_;
|
my ($self, $truth, $lie) = @_;
|
||||||
|
|
||||||
my %truth_words = @{stem map { $_ => 1 } grep { /^\w+$/ } split /\b/, $truth};
|
my %truth_words = @{stem map { $_ => 1 } grep { /^\w+$/ and not exists $self->{stopwords}{lc $_} } split /\b/, $truth};
|
||||||
my $truth_word_count = keys %truth_words;
|
my $truth_word_count = keys %truth_words;
|
||||||
|
|
||||||
my %lie_words = @{stem map { $_ => 1 } grep { /^\w+$/ } split /\b/, $lie};
|
my %lie_words = @{stem map { $_ => 1 } grep { /^\w+$/ and not exists $self->{stopwords}{lc $_} } split /\b/, $lie};
|
||||||
my $lie_word_count = keys %lie_words;
|
my $lie_word_count = keys %lie_words;
|
||||||
|
|
||||||
my $count = 0;
|
my $count = 0;
|
||||||
@ -1083,7 +1109,7 @@ sub validate_lie {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($count == $truth_word_count) {
|
if ($count == $lie_word_count) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
225
data/spinach/stopwords
Normal file
225
data/spinach/stopwords
Normal file
@ -0,0 +1,225 @@
|
|||||||
|
a
|
||||||
|
about
|
||||||
|
above
|
||||||
|
after
|
||||||
|
again
|
||||||
|
against
|
||||||
|
all
|
||||||
|
am
|
||||||
|
an
|
||||||
|
and
|
||||||
|
any
|
||||||
|
are
|
||||||
|
aren't
|
||||||
|
arent
|
||||||
|
as
|
||||||
|
at
|
||||||
|
be
|
||||||
|
because
|
||||||
|
been
|
||||||
|
before
|
||||||
|
being
|
||||||
|
below
|
||||||
|
between
|
||||||
|
both
|
||||||
|
but
|
||||||
|
by
|
||||||
|
can't
|
||||||
|
cant
|
||||||
|
cannot
|
||||||
|
could
|
||||||
|
couldn't
|
||||||
|
couldnt
|
||||||
|
did
|
||||||
|
didn't
|
||||||
|
didnt
|
||||||
|
do
|
||||||
|
does
|
||||||
|
doesn't
|
||||||
|
doesnt
|
||||||
|
doing
|
||||||
|
don't
|
||||||
|
dont
|
||||||
|
down
|
||||||
|
during
|
||||||
|
each
|
||||||
|
few
|
||||||
|
for
|
||||||
|
from
|
||||||
|
further
|
||||||
|
had
|
||||||
|
hadn't
|
||||||
|
hadnt
|
||||||
|
has
|
||||||
|
hasn't
|
||||||
|
hasnt
|
||||||
|
have
|
||||||
|
haven't
|
||||||
|
havent
|
||||||
|
having
|
||||||
|
he
|
||||||
|
he'd
|
||||||
|
hed
|
||||||
|
he'll
|
||||||
|
hell
|
||||||
|
he's
|
||||||
|
hes
|
||||||
|
her
|
||||||
|
here
|
||||||
|
here's
|
||||||
|
heres
|
||||||
|
hers
|
||||||
|
herself
|
||||||
|
him
|
||||||
|
himself
|
||||||
|
his
|
||||||
|
how
|
||||||
|
how's
|
||||||
|
hows
|
||||||
|
i
|
||||||
|
i'd
|
||||||
|
id
|
||||||
|
i'll
|
||||||
|
ill
|
||||||
|
i'm
|
||||||
|
im
|
||||||
|
i've
|
||||||
|
ive
|
||||||
|
if
|
||||||
|
in
|
||||||
|
into
|
||||||
|
is
|
||||||
|
isn't
|
||||||
|
isnt
|
||||||
|
it
|
||||||
|
it's
|
||||||
|
its
|
||||||
|
itself
|
||||||
|
let's
|
||||||
|
lets
|
||||||
|
me
|
||||||
|
more
|
||||||
|
most
|
||||||
|
mustn't
|
||||||
|
mustnt
|
||||||
|
my
|
||||||
|
myself
|
||||||
|
no
|
||||||
|
nor
|
||||||
|
not
|
||||||
|
of
|
||||||
|
off
|
||||||
|
on
|
||||||
|
one
|
||||||
|
once
|
||||||
|
only
|
||||||
|
or
|
||||||
|
other
|
||||||
|
ought
|
||||||
|
our
|
||||||
|
ours
|
||||||
|
ourselves
|
||||||
|
out
|
||||||
|
over
|
||||||
|
own
|
||||||
|
same
|
||||||
|
shan't
|
||||||
|
shant
|
||||||
|
she
|
||||||
|
she'd
|
||||||
|
shed
|
||||||
|
she'll
|
||||||
|
shell
|
||||||
|
she's
|
||||||
|
shes
|
||||||
|
should
|
||||||
|
shouldn't
|
||||||
|
shouldnt
|
||||||
|
so
|
||||||
|
some
|
||||||
|
such
|
||||||
|
than
|
||||||
|
that
|
||||||
|
that's
|
||||||
|
thats
|
||||||
|
the
|
||||||
|
their
|
||||||
|
theirs
|
||||||
|
them
|
||||||
|
themselves
|
||||||
|
then
|
||||||
|
there
|
||||||
|
there's
|
||||||
|
theres
|
||||||
|
these
|
||||||
|
they
|
||||||
|
they'd
|
||||||
|
theyd
|
||||||
|
they'll
|
||||||
|
theyll
|
||||||
|
they're
|
||||||
|
theyre
|
||||||
|
they've
|
||||||
|
theyve
|
||||||
|
this
|
||||||
|
those
|
||||||
|
through
|
||||||
|
to
|
||||||
|
too
|
||||||
|
under
|
||||||
|
until
|
||||||
|
up
|
||||||
|
very
|
||||||
|
was
|
||||||
|
wasn't
|
||||||
|
wasnt
|
||||||
|
we
|
||||||
|
we'd
|
||||||
|
wed
|
||||||
|
we'll
|
||||||
|
well
|
||||||
|
we're
|
||||||
|
were
|
||||||
|
we've
|
||||||
|
weve
|
||||||
|
were
|
||||||
|
weren't
|
||||||
|
werent
|
||||||
|
what
|
||||||
|
what's
|
||||||
|
whats
|
||||||
|
when
|
||||||
|
when's
|
||||||
|
whens
|
||||||
|
where
|
||||||
|
where's
|
||||||
|
wheres
|
||||||
|
which
|
||||||
|
while
|
||||||
|
who
|
||||||
|
who's
|
||||||
|
whos
|
||||||
|
whom
|
||||||
|
why
|
||||||
|
why's
|
||||||
|
whys
|
||||||
|
will
|
||||||
|
with
|
||||||
|
won't
|
||||||
|
wont
|
||||||
|
would
|
||||||
|
wouldn't
|
||||||
|
wouldnt
|
||||||
|
you
|
||||||
|
you'd
|
||||||
|
youd
|
||||||
|
you'll
|
||||||
|
youll
|
||||||
|
you're
|
||||||
|
youre
|
||||||
|
you've
|
||||||
|
youve
|
||||||
|
your
|
||||||
|
yours
|
||||||
|
yourself
|
||||||
|
yourselves
|
Loading…
Reference in New Issue
Block a user