From 5828f595a45557fffd318912fa68af42f54d097e Mon Sep 17 00:00:00 2001 From: Pragmatic Software Date: Sat, 10 Feb 2018 17:13:26 -0800 Subject: [PATCH] Spinach: Add stop-word filter to validation --- PBot/Plugins/Spinach.pm | 32 +++++- data/spinach/stopwords | 225 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 254 insertions(+), 3 deletions(-) create mode 100644 data/spinach/stopwords diff --git a/PBot/Plugins/Spinach.pm b/PBot/Plugins/Spinach.pm index d0a78b8e..297193fd 100644 --- a/PBot/Plugins/Spinach.pm +++ b/PBot/Plugins/Spinach.pm @@ -44,10 +44,12 @@ sub initialize { $self->{leaderboard_filename} = $self->{pbot}->{registry}->get_value('general', 'data_dir') . '/spinach/spinachlb.sqlite3'; $self->{questions_filename} = $self->{pbot}->{registry}->get_value('general', 'data_dir') . '/spinach/spinachq.json'; + $self->{stopwords_filename} = $self->{pbot}->{registry}->get_value('general', 'data_dir') . '/spinach/stopwords'; $self->create_database; $self->create_states; $self->load_questions; + $self->load_stopwords; $self->{channel} = '##spinach'; @@ -112,6 +114,21 @@ sub load_questions { $self->{pbot}->{logger}->log("Spinach: Loaded $questions questions in $categories categories.\n"); } +sub load_stopwords { + my $self = shift; + + open my $fh, '<', $self->{stopwords_filename} or do { + $self->{pbot}->{logger}->log("Spinach: Failed to open $self->{stopwords_filename}: $!\n"); + return; + }; + + foreach my $word (<$fh>) { + chomp $word; + $self->{stopwords}{$word} = 1; + } + close $fh; +} + sub create_database { my $self = shift; @@ -470,6 +487,15 @@ sub spinach_cmd { return "/msg $self->{channel} $nick has chosen $self->{state_data}->{current_category}!"; } + when ('n') { + return $self->normalize_text($arguments); + } + + when ('v') { + my ($truth, $lie) = split /;/, $arguments; + return $self->validate_lie($self->normalize_text($truth), $self->normalize_text($lie)); + } + when ('lie') { if ($self->{current_state} !~ /getlies$/) { return "$nick: It is not time to submit a lie!"; @@ -1059,10 +1085,10 @@ sub normalize_text { sub validate_lie { my ($self, $truth, $lie) = @_; - my %truth_words = @{stem map { $_ => 1 } grep { /^\w+$/ } split /\b/, $truth}; + my %truth_words = @{stem map { $_ => 1 } grep { /^\w+$/ and not exists $self->{stopwords}{lc $_} } split /\b/, $truth}; my $truth_word_count = keys %truth_words; - my %lie_words = @{stem map { $_ => 1 } grep { /^\w+$/ } split /\b/, $lie}; + my %lie_words = @{stem map { $_ => 1 } grep { /^\w+$/ and not exists $self->{stopwords}{lc $_} } split /\b/, $lie}; my $lie_word_count = keys %lie_words; my $count = 0; @@ -1083,7 +1109,7 @@ sub validate_lie { } } - if ($count == $truth_word_count) { + if ($count == $lie_word_count) { return 0; } diff --git a/data/spinach/stopwords b/data/spinach/stopwords new file mode 100644 index 00000000..34512cab --- /dev/null +++ b/data/spinach/stopwords @@ -0,0 +1,225 @@ +a +about +above +after +again +against +all +am +an +and +any +are +aren't +arent +as +at +be +because +been +before +being +below +between +both +but +by +can't +cant +cannot +could +couldn't +couldnt +did +didn't +didnt +do +does +doesn't +doesnt +doing +don't +dont +down +during +each +few +for +from +further +had +hadn't +hadnt +has +hasn't +hasnt +have +haven't +havent +having +he +he'd +hed +he'll +hell +he's +hes +her +here +here's +heres +hers +herself +him +himself +his +how +how's +hows +i +i'd +id +i'll +ill +i'm +im +i've +ive +if +in +into +is +isn't +isnt +it +it's +its +itself +let's +lets +me +more +most +mustn't +mustnt +my +myself +no +nor +not +of +off +on +one +once +only +or +other +ought +our +ours +ourselves +out +over +own +same +shan't +shant +she +she'd +shed +she'll +shell +she's +shes +should +shouldn't +shouldnt +so +some +such +than +that +that's +thats +the +their +theirs +them +themselves +then +there +there's +theres +these +they +they'd +theyd +they'll +theyll +they're +theyre +they've +theyve +this +those +through +to +too +under +until +up +very +was +wasn't +wasnt +we +we'd +wed +we'll +well +we're +were +we've +weve +were +weren't +werent +what +what's +whats +when +when's +whens +where +where's +wheres +which +while +who +who's +whos +whom +why +why's +whys +will +with +won't +wont +would +wouldn't +wouldnt +you +you'd +youd +you'll +youll +you're +youre +you've +youve +your +yours +yourself +yourselves