mirror of
https://github.com/pragma-/pbot.git
synced 2025-01-11 20:42:38 +01:00
Spinach: Improve text normalization
This commit is contained in:
parent
024cd15752
commit
0d847d7d4b
@ -17,8 +17,8 @@ use JSON;
|
|||||||
use Lingua::EN::Fractions qw/fraction2words/;
|
use Lingua::EN::Fractions qw/fraction2words/;
|
||||||
use Lingua::EN::Numbers qw/num2en num2en_ordinal/;
|
use Lingua::EN::Numbers qw/num2en num2en_ordinal/;
|
||||||
use Lingua::EN::Numbers::Years qw/year2en/;
|
use Lingua::EN::Numbers::Years qw/year2en/;
|
||||||
|
|
||||||
use Lingua::Stem qw/stem/;
|
use Lingua::Stem qw/stem/;
|
||||||
|
use Lingua::EN::ABC qw/b2a/;
|
||||||
|
|
||||||
use Data::Dumper;
|
use Data::Dumper;
|
||||||
|
|
||||||
@ -995,31 +995,52 @@ sub normalize_text {
|
|||||||
$text =~ s/^\s+|\s+$//g;
|
$text =~ s/^\s+|\s+$//g;
|
||||||
$text =~ s/\s+/ /g;
|
$text =~ s/\s+/ /g;
|
||||||
|
|
||||||
$text = substr($text, 0, 60);
|
$text = lc substr($text, 0, 80);
|
||||||
|
|
||||||
my @words = split /\b/, $text;
|
$text =~ s/\$\s+(\d)/\$$1/g;
|
||||||
|
$text =~ s/(\d)\s+%/$1%/g;
|
||||||
|
|
||||||
|
my @words = split / /, $text;
|
||||||
my @result;
|
my @result;
|
||||||
|
|
||||||
foreach my $word (@words) {
|
foreach my $word (@words) {
|
||||||
|
my $punct = $1 if $word =~ s/(\p{PosixPunct}+)$//;
|
||||||
my $newword = $word;
|
my $newword = $word;
|
||||||
|
|
||||||
if ($word =~ m/^\d{4}$/ and $word >= 1700 and $word <= 2100) {
|
if ($word =~ m/^\d{4}$/ and $word >= 1700 and $word <= 2100) {
|
||||||
$newword = year2en($word);
|
$newword = year2en($word);
|
||||||
} elsif ($word =~ m/^\d+$/) {
|
} elsif ($word =~ m/^\d+$/) {
|
||||||
$newword = num2en($word);
|
$newword = num2en($word);
|
||||||
|
|
||||||
|
if (defined $punct and $punct eq '%') {
|
||||||
|
$newword .= " percent";
|
||||||
|
$punct = undef;
|
||||||
|
}
|
||||||
} elsif ($word =~ m/^(\d+)(?:st|nd|rd|th)$/i) {
|
} elsif ($word =~ m/^(\d+)(?:st|nd|rd|th)$/i) {
|
||||||
$newword = num2en_ordinal($1);
|
$newword = num2en_ordinal($1);
|
||||||
|
} elsif ($word =~ m/^\$(\d+)(\.\d+)?$/i) {
|
||||||
|
my ($dollars, $cents) = ($1, $2);
|
||||||
|
$word = num2en($dollars);
|
||||||
|
$newword = "$word " . ($dollars == 1 ? "dollar" : "dollars");
|
||||||
|
|
||||||
|
if (defined $cents) {
|
||||||
|
$cents =~ s/^\.0*//;
|
||||||
|
$word = num2en($cents);
|
||||||
|
$newword .= " and $word cent" if $cents == 1;
|
||||||
|
$newword .= " and $word cents" if $cents > 1;
|
||||||
|
}
|
||||||
} elsif ($word =~ m/^(\d+\.\d+)(?:st|nd|rd|th)?$/i) {
|
} elsif ($word =~ m/^(\d+\.\d+)(?:st|nd|rd|th)?$/i) {
|
||||||
$newword = num2en($1);
|
$newword = num2en($1);
|
||||||
} elsif ($word =~ m{^(\d+\s*/\s*\d+)(?:st|nd|rd|th)?$}i) {
|
} elsif ($word =~ m{^(\d+\s*/\s*\d+)(?:st|nd|rd|th)?$}i) {
|
||||||
$newword = fraction2words($1);
|
$newword = fraction2words($1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$newword .= $punct if defined $punct;
|
||||||
push @result, $newword;
|
push @result, $newword;
|
||||||
}
|
}
|
||||||
|
|
||||||
$text = uc join '', @result;
|
$text = uc b2a join ' ', @result;
|
||||||
return substr $text, 0, 60;
|
return substr $text, 0, 80;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub validate_lie {
|
sub validate_lie {
|
||||||
@ -1517,6 +1538,7 @@ sub getplayers {
|
|||||||
if (++$state->{counter} > 4) {
|
if (++$state->{counter} > 4) {
|
||||||
$self->{pbot}->{conn}->privmsg($self->{channel}, "$color{bold}Not all players were ready in time. The game has been stopped.$color{reset}");
|
$self->{pbot}->{conn}->privmsg($self->{channel}, "$color{bold}Not all players were ready in time. The game has been stopped.$color{reset}");
|
||||||
$state->{result} = 'stop';
|
$state->{result} = 'stop';
|
||||||
|
$state->{players} = [];
|
||||||
return $state;
|
return $state;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user