3
0
mirror of https://github.com/pragma-/pbot.git synced 2025-10-23 03:27:32 +02:00
pbot/lib/PBot/Plugin/FuncBuiltins.pm
2025-10-21 01:23:01 -07:00

419 lines
11 KiB
Perl

# File: FuncBuiltins.pm
#
# Purpose: Registers the basic built-in Functions
# SPDX-FileCopyrightText: 2020-2023 Pragmatic Software <pragma78@gmail.com>
# SPDX-License-Identifier: MIT
package PBot::Plugin::FuncBuiltins;
use parent 'PBot::Plugin::Base';
use PBot::Imports;
use PBot::Core::Utils::Indefinite;
use Lingua::EN::Tagger;
use URI::Escape qw/uri_escape_utf8/;
use JSON::XS;
sub initialize($self, %conf) {
$self->{pbot}->{functions}->register(
'title',
{
desc => 'Title-cases text',
usage => 'title <text>',
subref => sub { $self->func_title(@_) }
}
);
$self->{pbot}->{functions}->register(
'ucfirst',
{
desc => 'Uppercases first character',
usage => 'ucfirst <text>',
subref => sub { $self->func_ucfirst(@_) }
}
);
$self->{pbot}->{functions}->register(
'uc',
{
desc => 'Uppercases all characters',
usage => 'uc <text>',
subref => sub { $self->func_uc(@_) }
}
);
$self->{pbot}->{functions}->register(
'lc',
{
desc => 'Lowercases all characters',
usage => 'lc <text>',
subref => sub { $self->func_lc(@_) }
}
);
$self->{pbot}->{functions}->register(
'unquote',
{
desc => 'removes unescaped surrounding quotes and strips escapes from escaped quotes',
usage => 'unquote <text>',
subref => sub { $self->func_unquote(@_) }
}
);
$self->{pbot}->{functions}->register(
'unescape',
{
desc => 'removes unescaped escapes',
usage => 'unescape <text>',
subref => sub { $self->func_unescape(@_) }
}
);
$self->{pbot}->{functions}->register(
'shquote',
{
desc => 'quotes text for sh invocation',
usage => 'shquote <text>',
subref => sub { $self->func_shquote(@_) }
}
);
$self->{pbot}->{functions}->register(
'quotemeta',
{
desc => 'escapes/quotes metacharacters',
usage => 'quotemeta <text>',
subref => sub { $self->func_quotemeta(@_) }
}
);
$self->{pbot}->{functions}->register(
'uri_escape',
{
desc => 'percent-encode unsafe URI characters',
usage => 'uri_escape <text>',
subref => sub { $self->func_uri_escape(@_) }
}
);
$self->{pbot}->{functions}->register(
'ana',
{
desc => 'fix-up a/an article at front of text',
usage => 'ana <text>',
subref => sub { $self->func_ana(@_) }
}
);
$self->{pbot}->{functions}->register(
'maybe-the',
{
desc => 'prepend "the" in front of text depending on the part-of-speech of the first word in text',
usage => 'maybe-the <text>',
subref => sub { $self->func_maybe_the(@_) }
}
);
$self->{pbot}->{functions}->register(
'maybe-to',
{
desc => 'prepend "to" in front of text depending on the part-of-speech of the first word in text',
usage => 'maybe-to <text>',
subref => sub { $self->func_maybe_to(@_) }
}
);
$self->{pbot}->{functions}->register(
'maybe-on',
{
desc => 'prepend "on" in front of text depending on the part-of-speech of the first word in text',
usage => 'maybe-on <text>',
subref => sub { $self->func_maybe_on(@_) }
}
);
$self->{pbot}->{functions}->register(
'length',
{
desc => 'print length of text',
usage => 'length <text>',
subref => sub { $self->func_length(@_) }
}
);
$self->{pbot}->{functions}->register(
'jsonval',
{
desc => 'extract a JSON value from a JSON structure',
usage => 'jsonval <member> <json>; e.g. jsonval .a.b[1] {"a": {"b": [10, 20, 30]}}',
subref => sub { $self->func_jsonval(@_) }
}
);
$self->{tagger} = Lingua::EN::Tagger->new;
}
sub unload($self) {
$self->{pbot}->{functions}->unregister('title');
$self->{pbot}->{functions}->unregister('ucfirst');
$self->{pbot}->{functions}->unregister('uc');
$self->{pbot}->{functions}->unregister('lc');
$self->{pbot}->{functions}->unregister('unquote');
$self->{pbot}->{functions}->unregister('unescape');
$self->{pbot}->{functions}->unregister('shquote');
$self->{pbot}->{functions}->unregister('quotemeta');
$self->{pbot}->{functions}->unregister('uri_escape');
$self->{pbot}->{functions}->unregister('ana');
$self->{pbot}->{functions}->unregister('maybe-the');
$self->{pbot}->{functions}->unregister('maybe-to');
$self->{pbot}->{functions}->unregister('maybe-on');
$self->{pbot}->{functions}->unregister('length');
$self->{pbot}->{functions}->unregister('jsonval');
}
sub func_unquote($self, @rest) {
my $text = "@rest";
$text =~ s/^"(.*?)(?<!\\)"$/$1/ || $text =~ s/^'(.*?)(?<!\\)'$/$1/;
$text =~ s/(?<!\\)\\'/'/g;
$text =~ s/(?<!\\)\\"/"/g;
return $text;
}
sub func_unescape($self, @rest) {
my $text = "@rest";
$text =~ s/(?<!\\)\\//g;
return $text;
}
sub func_title($self, @rest) {
my $text = "@rest";
$text = ucfirst lc $text;
$text =~ s/ (\w)/' ' . uc $1/ge;
return $text;
}
sub func_ucfirst($self, @rest) {
my $text = "@rest";
my ($word) = $text =~ m/^\s*([^',.;: ]+)/;
# don't ucfirst on nicks
if ($self->{pbot}->{nicklist}->is_present_any_channel($word)) {
return $text;
}
return ucfirst $text;
}
sub func_uc($self, @rest) {
my $text = "@rest";
return uc $text;
}
sub func_lc($self, @rest) {
my $text = "@rest";
return lc $text;
}
sub func_shquote($self, @rest) {
my $text = "@rest";
$text =~ s/'/'"'"'/g;
return "'$text'";
}
sub func_quotemeta($self, @rest) {
my $text = "@rest";
return quotemeta $text;
}
sub func_uri_escape($self, @rest) {
my $text = "@rest";
return uri_escape_utf8($text);
}
sub func_ana($self, @rest) {
my $text = "@rest";
if ($text =~ s/\b(an?)(\s+)//i) {
my ($article, $spaces) = ($1, $2);
my $fixed_article = select_indefinite_article $text;
if ($article eq 'AN') {
$fixed_article = uc $fixed_article;
} elsif ($article eq 'An' or $article eq 'A') {
$fixed_article = ucfirst $fixed_article;
}
$text = $fixed_article . $spaces . $text;
}
return $text;
}
sub func_maybe_the($self, @rest) {
my $text = "@rest";
my ($word) = $text =~ m/^\s*([^',.;: ]+)/;
# don't prepend if a proper-noun nick follows
if ($self->{pbot}->{nicklist}->is_present_any_channel($word)) {
return $text;
}
# special-case some indefinite nouns that Lingua::EN::Tagger treats as plain nouns
if ($word =~ m/(some|any|every|no)(thing|one|body|how|way|where|when|time|place)/i) {
return $text;
}
my $tagged = $self->{tagger}->add_tags($word);
if ($tagged !~ m/^\s*<(?:det|prps?|cd|in|nnp|to|rb|wdt|rbr|jjr)>/) {
$text = "the $text";
}
return $text;
}
sub func_maybe_to($self, @rest) {
my $text = "@rest";
$text =~ s/^to (?:the )?//;
my ($word) = $text =~ m/^\s*([^',.;: ]+)/;
if ($self->{pbot}->{nicklist}->is_present_any_channel($word)) {
return "to $text";
}
# special-case some indefinite nouns that Lingua::EN::Tagger treats as plain nouns
if ($word =~ m/(some|any|every|no)(thing|one|body|how|way|where|when|time|place)/i) {
return "to $text";
}
my $tagged = $self->{tagger}->add_tags($word);
if ($tagged !~ m/^\s*<(?:det|prps?|cd|in|nnp|to|rb|wdt|rbr|jjr)>/) {
$text = "to the $text";
} else {
unless ($tagged =~ m/^\s*<(?:in)>/) {
$text = "to $text";
}
}
return $text;
}
sub func_maybe_on($self, @rest) {
my $text = "@rest";
$text =~ s/^on (?:the )?//;
my ($word) = $text =~ m/^\s*([^',.;: ]+)/;
if ($self->{pbot}->{nicklist}->is_present_any_channel($word)) {
return "on $text";
}
# special-case some indefinite nouns that Lingua::EN::Tagger treats as plain nouns
if ($word =~ m/(?:some|any|every|no)(?:thing|one|body|how|way|where|when|time|place)/i) {
return "on $text";
}
my $tagged = $self->{tagger}->add_tags($word);
if ($tagged !~ m/^\s*<(?:det|prps?|cd|in|nnp|to|rb|wdt|rbr|jjr)>/) {
$text = "on the $text";
} else {
unless ($tagged =~ m/^\s*<(?:in)>/) {
$text = "on $text";
}
}
return $text;
}
sub func_length($self, @rest) {
my $text = "@rest";
my $count = 0;
while ($text =~ /\X/g) { $count++ }; # count graphemes
return $count;
}
sub func_jsonval($self, @rest) {
my $member = shift @rest;
my $text = "@rest";
if (!defined $member || !length $member) {
return 'Usage: jsonval <member> <json>; e.g. jsonval .a.b[1] {"a": {"b": [10, 20, 30]}}';
}
my $h = eval {
my $decoder = JSON::XS->new;
return $decoder->decode($text);
};
if (my $exception = $@) {
$exception =~ s/(.*) at .*/$1/;
return $exception;
}
my ($memb) = $member =~ m/^([^\[.]+)/;
if (defined $memb && length $memb) {
$h = eval {
return $h->{$memb};
};
if (my $exception = $@) {
$exception =~ s/(.*) at .*/$1/;
return $exception;
}
}
my $result = eval {
while ($member =~ m/
(?:
\['(?<key>(?:[^'\\]|\\.)*)'\]
| \["(?<key>(?:[^"\\]|\\.)*)"\]
| \[(?<index>[^\]]+)\]
| \.'(?<key>(?:[^'\\]|\\.)*)'
| \."(?<key>(?:[^"\\]|\\.)*)"
| \.(?<key>[^\[\.]+)
| (?<invalid>.)
)
/gx)
{
if (defined $+{invalid}) {
return "Unexpected $+{invalid} at position $-[0]";
}
if (defined $+{index}) {
my $index = $+{index};
if ($index !~ /^\d+$/) {
return "Invalid index $index at position " . ($-[0] + 1);
}
$h = $h->[$index];
}
if (defined $+{key}) {
my $key = $+{key};
$key =~ s/(?<!\\)\\//g;
$h = $h->{$key};
}
if (not defined $h) {
return 'null';
}
}
};
if (my $exception = $@) {
$exception =~ s/(.*) at .*/$1/;
return $exception;
}
if (defined $result && length $result) {
return $result;
}
if (not defined $h) {
return 'null';
}
if (ref $h eq 'HASH' || ref $h eq 'ARRAY') {
my $encoder = JSON::XS->new;
return $encoder->space_after->encode($h);
}
return $h;
}
1;