mirror of
https://github.com/pragma-/pbot.git
synced 2024-11-23 04:19:27 +01:00
Spinach: Improve normalization, and laxen validation
This commit is contained in:
parent
16e4a0d1e4
commit
e2f9810d91
@ -1048,29 +1048,35 @@ sub normalize_text {
|
|||||||
|
|
||||||
if ($word =~ m/^\d{4}$/ and $word >= 1700 and $word <= 2100) {
|
if ($word =~ m/^\d{4}$/ and $word >= 1700 and $word <= 2100) {
|
||||||
$newword = year2en($word);
|
$newword = year2en($word);
|
||||||
} elsif ($word =~ m/^\d+$/) {
|
} elsif ($word =~ m/^-?\d+$/) {
|
||||||
$newword = num2en($word);
|
$newword = num2en($word);
|
||||||
|
|
||||||
if (defined $punct and $punct eq '%') {
|
if (defined $punct and $punct eq '%') {
|
||||||
$newword .= " percent";
|
$newword .= " percent";
|
||||||
$punct = undef;
|
$punct = undef;
|
||||||
}
|
}
|
||||||
} elsif ($word =~ m/^(\d+)(?:st|nd|rd|th)$/i) {
|
} elsif ($word =~ m/^(-?\d+)(?:st|nd|rd|th)$/i) {
|
||||||
$newword = num2en_ordinal($1);
|
$newword = num2en_ordinal($1);
|
||||||
} elsif ($word =~ m/^\$(\d+)(\.\d+)?$/i) {
|
} elsif ($word =~ m/^(-)?\$(\d+)?(\.\d+)?$/i) {
|
||||||
my ($dollars, $cents) = ($1, $2);
|
my ($neg, $dollars, $cents) = ($1, $2, $3);
|
||||||
$word = num2en($dollars);
|
$newword = '';
|
||||||
$newword = "$word " . ($dollars == 1 ? "dollar" : "dollars");
|
$dollars = "$neg$dollars" if defined $neg and defined $dollars;
|
||||||
|
|
||||||
|
if (defined $dollars) {
|
||||||
|
$word = num2en($dollars);
|
||||||
|
$newword = "$word " . (abs $dollars == 1 ? "dollar" : "dollars");
|
||||||
|
}
|
||||||
|
|
||||||
if (defined $cents) {
|
if (defined $cents) {
|
||||||
$cents =~ s/^\.0*//;
|
$cents =~ s/^\.0*//;
|
||||||
|
$cents = "$neg$cents" if defined $neg and not defined $dollars;
|
||||||
$word = num2en($cents);
|
$word = num2en($cents);
|
||||||
$newword .= " and $word cent" if $cents == 1;
|
$newword .= " and " if defined $dollars;
|
||||||
$newword .= " and $word cents" if $cents > 1;
|
$newword .= (abs $cents == 1 ? "$word cent" : "$word cents");
|
||||||
}
|
}
|
||||||
} elsif ($word =~ m/^(\d+\.\d+)(?:st|nd|rd|th)?$/i) {
|
} elsif ($word =~ m/^(-?\d*\.\d+)(?:st|nd|rd|th)?$/i) {
|
||||||
$newword = num2en($1);
|
$newword = num2en($1);
|
||||||
} elsif ($word =~ m{^(\d+\s*/\s*\d+)(?:st|nd|rd|th)?$}i) {
|
} elsif ($word =~ m{^(-?\d+\s*/\s*-?\d+)(?:st|nd|rd|th)?$}i) {
|
||||||
$newword = fraction2words($1);
|
$newword = fraction2words($1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1102,6 +1108,7 @@ sub validate_lie {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
=cut
|
||||||
$count = 0;
|
$count = 0;
|
||||||
foreach my $word (keys %lie_words) {
|
foreach my $word (keys %lie_words) {
|
||||||
if (exists $truth_words{$word}) {
|
if (exists $truth_words{$word}) {
|
||||||
@ -1112,6 +1119,7 @@ sub validate_lie {
|
|||||||
if ($count == $lie_word_count) {
|
if ($count == $lie_word_count) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
=cut
|
||||||
|
|
||||||
my $stripped_truth = $truth;
|
my $stripped_truth = $truth;
|
||||||
$stripped_truth =~ s/(?:\s|\p{PosixPunct})+//g;
|
$stripped_truth =~ s/(?:\s|\p{PosixPunct})+//g;
|
||||||
|
Loading…
Reference in New Issue
Block a user