pbot/lib/PBot/Plugin/FuncBuiltins.pm

# File: FuncBuiltins.pm
#
# Purpose: Registers the basic built-in Functions

# SPDX-FileCopyrightText: 2020-2023 Pragmatic Software <pragma78@gmail.com>
# SPDX-License-Identifier: MIT

package PBot::Plugin::FuncBuiltins;
use parent 'PBot::Plugin::Base';

use PBot::Imports;

use PBot::Core::Utils::Indefinite;

use Lingua::EN::Tagger;
use URI::Escape qw/uri_escape_utf8/;

sub initialize($self, %conf) {
    $self->{pbot}->{functions}->register(
        'title',
        {
            desc   => 'Title-cases text',
            usage  => 'title <text>',
            subref => sub { $self->func_title(@_) }
        }
    );
    $self->{pbot}->{functions}->register(
        'ucfirst',
        {
            desc   => 'Uppercases first character',
            usage  => 'ucfirst <text>',
            subref => sub { $self->func_ucfirst(@_) }
        }
    );
    $self->{pbot}->{functions}->register(
        'uc',
        {
            desc   => 'Uppercases all characters',
            usage  => 'uc <text>',
            subref => sub { $self->func_uc(@_) }
        }
    );
    $self->{pbot}->{functions}->register(
        'lc',
        {
            desc   => 'Lowercases all characters',
            usage  => 'lc <text>',
            subref => sub { $self->func_lc(@_) }
        }
    );
    $self->{pbot}->{functions}->register(
        'unquote',
        {
            desc   => 'removes unescaped surrounding quotes and strips escapes from escaped quotes',
            usage  => 'unquote <text>',
            subref => sub { $self->func_unquote(@_) }
        }
    );
    $self->{pbot}->{functions}->register(
        'uri_escape',
        {
            desc   => 'percent-encode unsafe URI characters',
            usage  => 'uri_escape <text>',
            subref => sub { $self->func_uri_escape(@_) }
        }
    );
    $self->{pbot}->{functions}->register(
        'ana',
        {
            desc   => 'fix-up a/an article at front of text',
            usage  => 'ana <text>',
            subref => sub { $self->func_ana(@_) }
        }
    );
    $self->{pbot}->{functions}->register(
        'maybe-the',
        {
            desc   => 'prepend "the" in front of text depending on the part-of-speech of the first word in text',
            usage  => 'maybe-the <text>',
            subref => sub { $self->func_maybe_the(@_) }
        }
    );

    $self->{tagger} = Lingua::EN::Tagger->new;
}

sub unload($self) {
    $self->{pbot}->{functions}->unregister('title');
    $self->{pbot}->{functions}->unregister('ucfirst');
    $self->{pbot}->{functions}->unregister('uc');
    $self->{pbot}->{functions}->unregister('lc');
    $self->{pbot}->{functions}->unregister('unquote');
    $self->{pbot}->{functions}->unregister('uri_escape');
    $self->{pbot}->{functions}->unregister('ana');
    $self->{pbot}->{functions}->unregister('maybe-the');
}

sub func_unquote($self, @rest) {
    my $text = "@rest";
    $text =~ s/^"(.*?)(?<!\\)"$/$1/ || $text =~ s/^'(.*?)(?<!\\)'$/$1/;
    $text =~ s/(?<!\\)\\'/'/g;
    $text =~ s/(?<!\\)\\"/"/g;
    return $text;
}

sub func_title($self, @rest) {
    my $text = "@rest";
    $text = ucfirst lc $text;
    $text =~ s/ (\w)/' ' . uc $1/ge;
    return $text;
}

sub func_ucfirst($self, @rest) {
    my $text = "@rest";

    my ($word) = $text =~ m/^\s*([^',.;: ]+)/;

    # don't ucfirst on nicks
    if ($self->{pbot}->{nicklist}->is_present_any_channel($word)) {
        return $text;
    }

    return ucfirst $text;
}

sub func_uc($self, @rest) {
    my $text = "@rest";
    return uc $text;
}

sub func_lc($self, @rest) {
    my $text = "@rest";
    return lc $text;
}

sub func_uri_escape($self, @rest) {
    my $text = "@rest";
    return uri_escape_utf8($text);
}

sub func_ana($self, @rest) {
    my $text = "@rest";

    if ($text =~ s/\b(an?)(\s+)//i) {
        my ($article, $spaces) = ($1, $2);
        my $fixed_article = select_indefinite_article $text;

        if ($article eq 'AN') {
            $fixed_article = uc $fixed_article;
        } elsif ($article eq 'An' or $article eq 'A') {
            $fixed_article = ucfirst $fixed_article;
        }

        $text = $fixed_article . $spaces . $text;
    }

    return $text;
}

sub func_maybe_the($self, @rest) {
    my $text = "@rest";

    my ($word) = $text =~ m/^\s*([^',.;: ]+)/;

    # don't prepend "the" if a proper-noun nick follows
    if ($self->{pbot}->{nicklist}->is_present_any_channel($word)) {
        return $text;
    }

    # special-case some indefinite nouns that Lingua::EN::Tagger treats as plain nouns
    if ($word =~ m/(some|any|every|no)(thing|one|body|how|way|where|when|time|place)/i) {
        return $text;
    }

    my $tagged = $self->{tagger}->add_tags($word);

    if ($tagged !~ m/^\s*<(?:det|prps?|cd|in|nnp|to|rb|wdt|rbr|jjr)>/) {
        $text = "the $text";
    }

    return $text;
}

1;
Add FuncBuiltins plugin 2020-02-14 07:37:09 +01:00			`# File: FuncBuiltins.pm`
			`#`
			`# Purpose: Registers the basic built-in Functions`

Update copyright 2023-02-21 06:31:52 +01:00			`# SPDX-FileCopyrightText: 2020-2023 Pragmatic Software <pragma78@gmail.com>`
Relicense under MIT 2021-07-11 00:00:22 +02:00			`# SPDX-License-Identifier: MIT`
Add FuncBuiltins plugin 2020-02-14 07:37:09 +01:00
Make directory structure more Perlish 2021-07-14 04:45:56 +02:00			`package PBot::Plugin::FuncBuiltins;`
			`use parent 'PBot::Plugin::Base';`
Add FuncBuiltins plugin 2020-02-14 07:37:09 +01:00
Consolidate imports into PBot::Imports 2021-06-19 06:23:34 +02:00			`use PBot::Imports;`
Add FuncBuiltins plugin 2020-02-14 07:37:09 +01:00
Plugin/FuncBuiltins: Add `ana` function to explicitly fix-up a/an articles 2021-08-23 22:36:11 +02:00			`use PBot::Core::Utils::Indefinite;`

Plugin/FuncBuiltin: add `maybe-the` function The `maybe-the` function examines the argument's part-of-speech classification (noun, verb, etc) to determine whether to prepend the word "the". 2022-07-08 18:11:56 +02:00			`use Lingua::EN::Tagger;`
Plugin/FuncBuiltins: Add `ana` function to explicitly fix-up a/an articles 2021-08-23 22:36:11 +02:00			`use URI::Escape qw/uri_escape_utf8/;`

Update plugins to use subroutine signatures 2023-04-14 02:01:23 +02:00			`sub initialize($self, %conf) {`
Tidy things up 2020-02-15 23:38:32 +01:00			`$self->{pbot}->{functions}->register(`
			`'title',`
			`{`
			`desc => 'Title-cases text',`
			`usage => 'title <text>',`
			`subref => sub { $self->func_title(@_) }`
			`}`
			`);`
			`$self->{pbot}->{functions}->register(`
			`'ucfirst',`
			`{`
			`desc => 'Uppercases first character',`
			`usage => 'ucfirst <text>',`
			`subref => sub { $self->func_ucfirst(@_) }`
			`}`
			`);`
			`$self->{pbot}->{functions}->register(`
			`'uc',`
			`{`
			`desc => 'Uppercases all characters',`
			`usage => 'uc <text>',`
			`subref => sub { $self->func_uc(@_) }`
			`}`
			`);`
			`$self->{pbot}->{functions}->register(`
			`'lc',`
			`{`
			`desc => 'Lowercases all characters',`
			`usage => 'lc <text>',`
			`subref => sub { $self->func_lc(@_) }`
			`}`
			`);`
			`$self->{pbot}->{functions}->register(`
			`'unquote',`
			`{`
			`desc => 'removes unescaped surrounding quotes and strips escapes from escaped quotes',`
			`usage => 'unquote <text>',`
			`subref => sub { $self->func_unquote(@_) }`
			`}`
			`);`
			`$self->{pbot}->{functions}->register(`
			`'uri_escape',`
			`{`
			`desc => 'percent-encode unsafe URI characters',`
			`usage => 'uri_escape <text>',`
			`subref => sub { $self->func_uri_escape(@_) }`
			`}`
			`);`
Plugin/FuncBuiltins: Add `ana` function to explicitly fix-up a/an articles 2021-08-23 22:36:11 +02:00			`$self->{pbot}->{functions}->register(`
			`'ana',`
			`{`
			`desc => 'fix-up a/an article at front of text',`
			`usage => 'ana <text>',`
			`subref => sub { $self->func_ana(@_) }`
			`}`
			`);`
Plugin/FuncBuiltin: add `maybe-the` function The `maybe-the` function examines the argument's part-of-speech classification (noun, verb, etc) to determine whether to prepend the word "the". 2022-07-08 18:11:56 +02:00			`$self->{pbot}->{functions}->register(`
			`'maybe-the',`
			`{`
Revert "FuncBuiltins: maybe-the: do it for all words" This reverts commit 7093fa0ae4a81fe8cc6fd8ff4a4d2e832f024db0. 2022-08-03 20:25:05 +02:00			`desc => 'prepend "the" in front of text depending on the part-of-speech of the first word in text',`
Plugin/FuncBuiltin: add `maybe-the` function The `maybe-the` function examines the argument's part-of-speech classification (noun, verb, etc) to determine whether to prepend the word "the". 2022-07-08 18:11:56 +02:00			`usage => 'maybe-the <text>',`
			`subref => sub { $self->func_maybe_the(@_) }`
			`}`
			`);`

			`$self->{tagger} = Lingua::EN::Tagger->new;`
Add FuncBuiltins plugin 2020-02-14 07:37:09 +01:00			`}`

Update plugins to use subroutine signatures 2023-04-14 02:01:23 +02:00			`sub unload($self) {`
Tidy things up 2020-02-15 23:38:32 +01:00			`$self->{pbot}->{functions}->unregister('title');`
			`$self->{pbot}->{functions}->unregister('ucfirst');`
			`$self->{pbot}->{functions}->unregister('uc');`
			`$self->{pbot}->{functions}->unregister('lc');`
			`$self->{pbot}->{functions}->unregister('unquote');`
			`$self->{pbot}->{functions}->unregister('uri_escape');`
Plugin/FuncBuiltins: Add `ana` function to explicitly fix-up a/an articles 2021-08-23 22:36:11 +02:00			`$self->{pbot}->{functions}->unregister('ana');`
Plugin/FuncBuiltin: add `maybe-the` function The `maybe-the` function examines the argument's part-of-speech classification (noun, verb, etc) to determine whether to prepend the word "the". 2022-07-08 18:11:56 +02:00			`$self->{pbot}->{functions}->unregister('maybe-the');`
Add FuncBuiltins plugin 2020-02-14 07:37:09 +01:00			`}`

Update plugins to use subroutine signatures 2023-04-14 02:01:23 +02:00			`sub func_unquote($self, @rest) {`
			`my $text = "@rest";`
Tidy things up 2020-02-15 23:38:32 +01:00			`$text =~ s/^"(.?)(?<!\\)"$/$1/ \|\| $text =~ s/^'(.?)(?<!\\)'$/$1/;`
			`$text =~ s/(?<!\\)\\'/'/g;`
			`$text =~ s/(?<!\\)\\"/"/g;`
			`return $text;`
Add FuncBuiltins plugin 2020-02-14 07:37:09 +01:00			`}`

Update plugins to use subroutine signatures 2023-04-14 02:01:23 +02:00			`sub func_title($self, @rest) {`
			`my $text = "@rest";`
Tidy things up 2020-02-15 23:38:32 +01:00			`$text = ucfirst lc $text;`
			`$text =~ s/ (\w)/' ' . uc $1/ge;`
			`return $text;`
Add FuncBuiltins plugin 2020-02-14 07:37:09 +01:00			`}`

Update plugins to use subroutine signatures 2023-04-14 02:01:23 +02:00			`sub func_ucfirst($self, @rest) {`
			`my $text = "@rest";`
Plugin/FuncBuiltins: do not ucfirst on nicks; add more POS to maybe-the 2022-07-10 04:09:27 +02:00
			`my ($word) = $text =~ m/^\s*([^',.;: ]+)/;`

			`# don't ucfirst on nicks`
			`if ($self->{pbot}->{nicklist}->is_present_any_channel($word)) {`
			`return $text;`
			`}`

Tidy things up 2020-02-15 23:38:32 +01:00			`return ucfirst $text;`
Add FuncBuiltins plugin 2020-02-14 07:37:09 +01:00			`}`

Update plugins to use subroutine signatures 2023-04-14 02:01:23 +02:00			`sub func_uc($self, @rest) {`
			`my $text = "@rest";`
Tidy things up 2020-02-15 23:38:32 +01:00			`return uc $text;`
Add FuncBuiltins plugin 2020-02-14 07:37:09 +01:00			`}`

Update plugins to use subroutine signatures 2023-04-14 02:01:23 +02:00			`sub func_lc($self, @rest) {`
			`my $text = "@rest";`
Tidy things up 2020-02-15 23:38:32 +01:00			`return lc $text;`
Add FuncBuiltins plugin 2020-02-14 07:37:09 +01:00			`}`

Update plugins to use subroutine signatures 2023-04-14 02:01:23 +02:00			`sub func_uri_escape($self, @rest) {`
			`my $text = "@rest";`
Tidy things up 2020-02-15 23:38:32 +01:00			`return uri_escape_utf8($text);`
Add uri_escape to FuncBuiltins 2020-02-14 08:22:00 +01:00			`}`

Update plugins to use subroutine signatures 2023-04-14 02:01:23 +02:00			`sub func_ana($self, @rest) {`
			`my $text = "@rest";`
Plugin/FuncBuiltins: Add `ana` function to explicitly fix-up a/an articles 2021-08-23 22:36:11 +02:00
			`if ($text =~ s/\b(an?)(\s+)//i) {`
			`my ($article, $spaces) = ($1, $2);`
			`my $fixed_article = select_indefinite_article $text;`

			`if ($article eq 'AN') {`
			`$fixed_article = uc $fixed_article;`
			`} elsif ($article eq 'An' or $article eq 'A') {`
			`$fixed_article = ucfirst $fixed_article;`
			`}`

			`$text = $fixed_article . $spaces . $text;`
			`}`

			`return $text;`
			`}`

Update plugins to use subroutine signatures 2023-04-14 02:01:23 +02:00			`sub func_maybe_the($self, @rest) {`
			`my $text = "@rest";`
Plugin/FuncBuiltin: add `maybe-the` function The `maybe-the` function examines the argument's part-of-speech classification (noun, verb, etc) to determine whether to prepend the word "the". 2022-07-08 18:11:56 +02:00
Revert "FuncBuiltins: maybe-the: do it for all words" This reverts commit 7093fa0ae4a81fe8cc6fd8ff4a4d2e832f024db0. 2022-08-03 20:25:05 +02:00			`my ($word) = $text =~ m/^\s*([^',.;: ]+)/;`
Plugin/FuncBuiltin: add `maybe-the` function The `maybe-the` function examines the argument's part-of-speech classification (noun, verb, etc) to determine whether to prepend the word "the". 2022-07-08 18:11:56 +02:00
Revert "FuncBuiltins: maybe-the: do it for all words" This reverts commit 7093fa0ae4a81fe8cc6fd8ff4a4d2e832f024db0. 2022-08-03 20:25:05 +02:00			`# don't prepend "the" if a proper-noun nick follows`
			`if ($self->{pbot}->{nicklist}->is_present_any_channel($word)) {`
			`return $text;`
			`}`
FuncBuiltins: maybe-the: special-case some indefinite nouns 2022-07-14 19:15:30 +02:00
Revert "FuncBuiltins: maybe-the: do it for all words" This reverts commit 7093fa0ae4a81fe8cc6fd8ff4a4d2e832f024db0. 2022-08-03 20:25:05 +02:00			`# special-case some indefinite nouns that Lingua::EN::Tagger treats as plain nouns`
			`if ($word =~ m/(some\|any\|every\|no)(thing\|one\|body\|how\|way\|where\|when\|time\|place)/i) {`
			`return $text;`
			`}`
Plugin/FuncBuiltin: add `maybe-the` function The `maybe-the` function examines the argument's part-of-speech classification (noun, verb, etc) to determine whether to prepend the word "the". 2022-07-08 18:11:56 +02:00
Revert "FuncBuiltins: maybe-the: do it for all words" This reverts commit 7093fa0ae4a81fe8cc6fd8ff4a4d2e832f024db0. 2022-08-03 20:25:05 +02:00			`my $tagged = $self->{tagger}->add_tags($word);`
FuncBuiltins: maybe-the: do it for all words 2022-08-03 14:19:45 +02:00
Revert "FuncBuiltins: maybe-the: do it for all words" This reverts commit 7093fa0ae4a81fe8cc6fd8ff4a4d2e832f024db0. 2022-08-03 20:25:05 +02:00			`if ($tagged !~ m/^\s*<(?:det\|prps?\|cd\|in\|nnp\|to\|rb\|wdt\|rbr\|jjr)>/) {`
			`$text = "the $text";`
Plugin/FuncBuiltin: add `maybe-the` function The `maybe-the` function examines the argument's part-of-speech classification (noun, verb, etc) to determine whether to prepend the word "the". 2022-07-08 18:11:56 +02:00			`}`

Revert "FuncBuiltins: maybe-the: do it for all words" This reverts commit 7093fa0ae4a81fe8cc6fd8ff4a4d2e832f024db0. 2022-08-03 20:25:05 +02:00			`return $text;`
Plugin/FuncBuiltin: add `maybe-the` function The `maybe-the` function examines the argument's part-of-speech classification (noun, verb, etc) to determine whether to prepend the word "the". 2022-07-08 18:11:56 +02:00			`}`

Add FuncBuiltins plugin 2020-02-14 07:37:09 +01:00			`1;`