3
0
mirror of https://github.com/pragma-/pbot.git synced 2024-11-22 20:09:43 +01:00
pbot/lib/PBot/Core/Interpreter.pm

1427 lines
48 KiB
Perl
Raw Normal View History

# File: Interpreter.pm
#
# Purpose: Main entry point to parse and interpret a string into bot
# commands and dispatch the commands to registered interpreters.
# Handles argument processing, command piping, command substitution,
# command splitting, command output processing such as truncating long
# text to web paste sites, etc.
2021-07-11 00:00:22 +02:00
# SPDX-FileCopyrightText: 2021 Pragmatic Software <pragma78@gmail.com>
# SPDX-License-Identifier: MIT
License project under MPL2 This patch adds the file LICENSE which is the verbatim copy of the Mozilla Public License Version 2.0 as retreived from https://www.mozilla.org/media/MPL/2.0/index.815ca599c9df.txt on 2017-03-05. This patch also places license headers for the MPL2 type A variant of the license header in the following files: PBot/AntiFlood.pm PBot/BanTracker.pm PBot/BlackList.pm PBot/BotAdminCommands.pm PBot/BotAdmins.pm PBot/ChanOpCommands.pm PBot/ChanOps.pm PBot/Channels.pm PBot/Commands.pm PBot/DualIndexHashObject.pm PBot/EventDispatcher.pm PBot/FactoidCommands.pm PBot/FactoidModuleLauncher.pm PBot/Factoids.pm PBot/HashObject.pm PBot/IRCHandlers.pm PBot/IgnoreList.pm PBot/IgnoreListCommands.pm PBot/Interpreter.pm PBot/LagChecker.pm PBot/Logger.pm PBot/MessageHistory.pm PBot/MessageHistory_SQLite.pm PBot/NickList.pm PBot/PBot.pm PBot/Plugins.pm PBot/Plugins/AntiAway.pm PBot/Plugins/AntiKickAutoRejoin.pm PBot/Plugins/AntiRepeat.pm PBot/Plugins/AntiTwitter.pm PBot/Plugins/AutoRejoin.pm PBot/Plugins/Counter.pm PBot/Plugins/Quotegrabs.pm PBot/Plugins/Quotegrabs/Quotegrabs_Hashtable.pm PBot/Plugins/Quotegrabs/Quotegrabs_SQLite.pm PBot/Plugins/UrlTitles.pm PBot/Plugins/_Example.pm PBot/Refresher.pm PBot/Registerable.pm PBot/Registry.pm PBot/RegistryCommands.pm PBot/SQLiteLogger.pm PBot/SQLiteLoggerLayer.pm PBot/SelectHandler.pm PBot/StdinReader.pm PBot/Timer.pm PBot/Utils/ParseDate.pm PBot/VERSION.pm build/update-version.pl modules/acronym.pl modules/ago.pl modules/c11std.pl modules/c2english.pl modules/c2english/CGrammar.pm modules/c2english/c2eng.pl modules/c99std.pl modules/cdecl.pl modules/cfaq.pl modules/cjeopardy/IRCColors.pm modules/cjeopardy/QStatskeeper.pm modules/cjeopardy/Scorekeeper.pm modules/cjeopardy/cjeopardy.pl modules/cjeopardy/cjeopardy_answer.pl modules/cjeopardy/cjeopardy_filter.pl modules/cjeopardy/cjeopardy_hint.pl modules/cjeopardy/cjeopardy_qstats.pl modules/cjeopardy/cjeopardy_scores.pl modules/cjeopardy/cjeopardy_show.pl modules/codepad.pl modules/compiler_block.pl modules/compiler_client.pl modules/compiler_vm/Diff.pm modules/compiler_vm/cc modules/compiler_vm/compiler_client.pl modules/compiler_vm/compiler_server.pl modules/compiler_vm/compiler_server_vbox_win32.pl modules/compiler_vm/compiler_server_watchdog.pl modules/compiler_vm/compiler_vm_client.pl modules/compiler_vm/compiler_vm_server.pl modules/compiler_vm/compiler_watchdog.pl modules/compiler_vm/languages/_c_base.pm modules/compiler_vm/languages/_default.pm modules/compiler_vm/languages/bash.pm modules/compiler_vm/languages/bc.pm modules/compiler_vm/languages/bf.pm modules/compiler_vm/languages/c11.pm modules/compiler_vm/languages/c89.pm modules/compiler_vm/languages/c99.pm modules/compiler_vm/languages/clang.pm modules/compiler_vm/languages/clang11.pm modules/compiler_vm/languages/clang89.pm modules/compiler_vm/languages/clang99.pm modules/compiler_vm/languages/clangpp.pm modules/compiler_vm/languages/clisp.pm modules/compiler_vm/languages/cpp.pm modules/compiler_vm/languages/freebasic.pm modules/compiler_vm/languages/go.pm modules/compiler_vm/languages/haskell.pm modules/compiler_vm/languages/java.pm modules/compiler_vm/languages/javascript.pm modules/compiler_vm/languages/ksh.pm modules/compiler_vm/languages/lua.pm modules/compiler_vm/languages/perl.pm modules/compiler_vm/languages/python.pm modules/compiler_vm/languages/python3.pm modules/compiler_vm/languages/qbasic.pm modules/compiler_vm/languages/scheme.pm modules/compiler_vm/languages/server/_c_base.pm modules/compiler_vm/languages/server/_default.pm modules/compiler_vm/languages/server/c11.pm modules/compiler_vm/languages/server/c89.pm modules/compiler_vm/languages/server/c99.pm modules/compiler_vm/languages/server/clang.pm modules/compiler_vm/languages/server/clang11.pm modules/compiler_vm/languages/server/clang89.pm modules/compiler_vm/languages/server/clang99.pm modules/compiler_vm/languages/server/cpp.pm modules/compiler_vm/languages/server/freebasic.pm modules/compiler_vm/languages/server/haskell.pm modules/compiler_vm/languages/server/java.pm modules/compiler_vm/languages/server/qbasic.pm modules/compiler_vm/languages/server/tendra.pm modules/compiler_vm/languages/sh.pm modules/compiler_vm/languages/tendra.pm modules/compliment modules/cstd.pl modules/define.pl modules/dice_roll.pl modules/excuse.sh modules/expand_macros.pl modules/fnord.pl modules/funnyish_quote.pl modules/g.pl modules/gdefine.pl modules/gen_cfacts.pl modules/gencstd.pl modules/get_title.pl modules/getcfact.pl modules/google.pl modules/gspy.pl modules/gtop10.pl modules/gtop15.pl modules/headlines.pl modules/horoscope modules/horrorscope modules/ideone.pl modules/insult.pl modules/love_quote.pl modules/man.pl modules/map.pl modules/math.pl modules/prototype.pl modules/qalc.pl modules/random_quote.pl modules/seen.pl modules/urban modules/weather.pl modules/wikipedia.pl pbot.pl pbot.sh It is highly recommended that this list of files is reviewed to ensure that all files are the copyright of the sole maintainer of the repository. If any files with license headers contain the intellectual property of anyone else, it is recommended that a request is made to revise this patch or that the explicit permission of the co-author is gained to allow for the license of the work to be changed. I (Tomasz Kramkowski), the contributor, take no responsibility for any legal action taken against the maintainer of this repository for incorrectly claiming copyright to any work not owned by the maintainer of this repository.
2017-03-05 22:33:31 +01:00
2021-07-21 07:44:51 +02:00
package PBot::Core::Interpreter;
use parent 'PBot::Core::Class', 'PBot::Core::Registerable';
2021-06-19 06:23:34 +02:00
use PBot::Imports;
2019-07-11 03:40:53 +02:00
2021-07-21 07:44:51 +02:00
use PBot::Core::MessageHistory::Constants ':all';
use PBot::Core::Utils::Indefinite;
2021-07-31 00:01:38 +02:00
use PBot::Core::Utils::ValidateString;
use Encode;
2021-07-31 00:01:38 +02:00
use Getopt::Long qw(GetOptionsFromArray);
use Time::Duration;
use Time::HiRes qw(gettimeofday);
use Unicode::Truncate;
sub initialize {
2020-02-15 23:38:32 +01:00
my ($self, %conf) = @_;
2021-07-21 07:44:51 +02:00
# PBot::Core::Interpreter can register multiple interpreter subrefs.
# See also: Commands::interpreter() and Factoids::interpreter()
2021-07-21 07:44:51 +02:00
$self->PBot::Core::Registerable::initialize(%conf);
2010-04-13 06:17:54 +02:00
# registry entry for maximum recursion depth
$self->{pbot}->{registry}->add_default('text', 'interpreter', 'max_recursion', 10);
2012-07-22 21:22:30 +02:00
}
2010-04-13 06:17:54 +02:00
# this is the main entry point for a message to be parsed into commands
# and to execute those commands and process their output
sub process_line {
my ($self, $from, $nick, $user, $host, $text, $is_command) = @_;
# lowercase `from` field for case-insensitivity
$from = lc $from;
# sanitize text a bit
$text =~ s/^\s+|\s+$//g;
$text = validate_string($text, 0);
# context object maintains contextual information about the state and
# processing of this message. this object is passed between various bot
# functions and interfaces, which may themselves add more fields.
my $context = {
from => $from, # source (channel, sender hostmask, 'stdin@pbot', etc)
nick => $nick, # nickname
user => $user, # username
host => $host, # hostname/ip address
hostmask => "$nick!$user\@$host", # full hostmask
text => $text, # message contents
};
# add hostmask to user/message tracking database and get their account id
my $message_account = $self->{pbot}->{messagehistory}->get_message_account($nick, $user, $host);
# add account id to context object
2020-05-02 05:59:51 +02:00
$context->{message_account} = $message_account;
2020-02-15 23:38:32 +01:00
# add message to message history as a chat message
$self->{pbot}->{messagehistory}->add_message($message_account, $context->{hostmask}, $from, $text, MSG_CHAT);
2020-02-15 23:38:32 +01:00
# look up channel-specific flood threshold settings from registry
my $flood_threshold = $self->{pbot}->{registry}->get_value($from, 'chat_flood_threshold');
my $flood_time_threshold = $self->{pbot}->{registry}->get_value($from, 'chat_flood_time_threshold');
2020-02-15 23:38:32 +01:00
# get general flood threshold settings if there are no channel-specific settings
$flood_threshold //= $self->{pbot}->{registry}->get_value('antiflood', 'chat_flood_threshold');
$flood_time_threshold //= $self->{pbot}->{registry}->get_value('antiflood', 'chat_flood_time_threshold');
# perform anti-flood processing on this message
$self->{pbot}->{antiflood}->check_flood(
$from, $nick, $user, $host, $text,
$flood_threshold, $flood_time_threshold,
MSG_CHAT,
$context
);
# get bot nickname
2020-02-15 23:38:32 +01:00
my $botnick = $self->{pbot}->{registry}->get_value('irc', 'botnick');
2017-12-09 04:28:08 +01:00
# get channel-specific bot trigger if available
my $bot_trigger = $self->{pbot}->{registry}->get_value($from, 'trigger');
2017-12-09 04:28:08 +01:00
# otherwise get general bot trigger
$bot_trigger //= $self->{pbot}->{registry}->get_value('general', 'trigger');
2017-12-09 04:28:08 +01:00
# get nick regex from registry entry
my $nick_regex = $self->{pbot}->{registry}->get_value('regex', 'nickname');
2017-12-09 04:28:08 +01:00
# preserve original text and parse $cmd_text for bot commands
2020-02-15 23:38:32 +01:00
my $cmd_text = $text;
$cmd_text =~ s/^\/me\s+//; # remove leading /me
# parse for bot command invocation
my @commands; # all commands parsed out of this text so far
my $command; # current command being parsed
my $embedded = 0; # was command embedded within a message, e.g.: "see the !{help xyz} about that"
my $nick_prefix = undef; # addressed nickname for prefixing output
my $processed = 0; # counts how many commands were successfully processed
# check if we should treat this entire text as a command
# (i.e., it came from /msg or was otherwise flagged as a command)
if ($is_command) {
$command = $cmd_text;
$command =~ s/^$bot_trigger//; # strip leading bot trigger, if any
# restore command if stripping bot trigger makes command empty
# (they wanted to invoke a command named after the trigger itself)
# TODO: this could potentially be confusing when trying to invoke
# commands that are sequential instances of the bot trigger, e.g.
# attempting to invoke a factoid named `...` while the bot trigger
# is `.` could now prove confusing via /msg or stdin. Might need
# to rethink this and just require bot trigger all the time ...
# but for now let's see how this goes and if people can figure it
# out with minimal confusion.
$command = $cmd_text if not length $command;
goto CHECK_EMBEDDED_CMD;
}
# otherwise try to parse any potential commands
if ($cmd_text =~ m/^\s*($nick_regex)[,:]?\s+$bot_trigger\{\s*(.+?)\s*\}\s*$/) {
# "somenick: !{command}"
goto CHECK_EMBEDDED_CMD;
} elsif ($cmd_text =~ m/^\s*$bot_trigger\{\s*(.+?)\s*\}\s*$/) {
# "!{command}"
goto CHECK_EMBEDDED_CMD;
} elsif ($cmd_text =~ m/^\s*($nick_regex)[,:]\s+$bot_trigger\s*(.+)$/) {
# "somenick: !command"
my $possible_nick_prefix = $1;
2020-02-15 23:38:32 +01:00
$command = $2;
# does somenick or similar exist in channel?
my $recipient = $self->{pbot}->{nicklist}->is_present_similar($from, $possible_nick_prefix);
if ($recipient) {
$nick_prefix = $recipient;
} else {
# disregard command if no such nick is present.
$self->{pbot}->{logger}->log("No similar nick for $possible_nick_prefix; disregarding command.\n");
2020-02-15 23:38:32 +01:00
return 0;
}
} elsif ($cmd_text =~ m/^$bot_trigger\s*(.+)$/) {
# "!command"
2020-02-15 23:38:32 +01:00
$command = $1;
} elsif ($cmd_text =~ m/^.?\s*$botnick\s*[[:punct:]]?\s+(.+)$/i) {
# "botnick: command"
2020-02-15 23:38:32 +01:00
$command = $1;
} elsif ($cmd_text =~ m/^(.+?),?\s*$botnick[?!.]*$/i) {
# "command, botnick?"
2020-02-15 23:38:32 +01:00
$command = $1;
2017-12-09 04:28:08 +01:00
}
2020-02-15 23:38:32 +01:00
# check for embedded commands
2017-12-09 04:28:08 +01:00
CHECK_EMBEDDED_CMD:
# if no command was parsed yet (or if we reached this point by one of the gotos above)
# then look for embedded commands, e.g.: "today is !{date} and the weather is !{weather}"
2020-02-15 23:38:32 +01:00
if (not defined $command or $command =~ m/^\{.*\}/) {
# check for an addressed nickname
2020-02-15 23:38:32 +01:00
if ($cmd_text =~ s/^\s*($nick_regex)[,:]\s+//) {
my $possible_nick_prefix = $1;
# does somenick or similar exist in channel?
my $recipient = $self->{pbot}->{nicklist}->is_present_similar($from, $possible_nick_prefix);
if ($recipient) {
$nick_prefix = $recipient;
}
2020-02-15 23:38:32 +01:00
}
# get max embed registry value
my $max_embed = $self->{pbot}->{registry}->get_value('interpreter', 'max_embed') // 3;
# extract embedded commands
for (my $count = 0; $count < $max_embed; $count++) {
2020-02-15 23:38:32 +01:00
my ($extracted, $rest) = $self->extract_bracketed($cmd_text, '{', '}', $bot_trigger);
# nothing to extract found, all done.
2020-02-15 23:38:32 +01:00
last if not length $extracted;
# move command text buffer forwards past extracted text
2020-02-15 23:38:32 +01:00
$cmd_text = $rest;
# trim surrounding whitespace
2020-02-15 23:38:32 +01:00
$extracted =~ s/^\s+|\s+$//g;
# add command to parsed commands.
2020-02-15 23:38:32 +01:00
push @commands, $extracted;
# set embedded flag
2020-02-15 23:38:32 +01:00
$embedded = 1;
}
} else {
# otherwise a single command has already been parsed.
# so, add the command to parsed commands.
2020-02-15 23:38:32 +01:00
push @commands, $command;
}
# set $context's command output recipient field
if ($nick_prefix) {
$context->{nickprefix} = $nick_prefix;
$context->{nickprefix_forced} = 1;
}
# set $context object's embedded flag
2021-07-24 03:26:45 +02:00
$context->{embedded} = $embedded;
# interpret all parsed commands
2020-02-15 23:38:32 +01:00
foreach $command (@commands) {
# check if user is ignored
# the `login` command gets a pass on the ignore filter
if ($command !~ /^login / and $self->{pbot}->{ignorelist}->is_ignored($from, "$nick!$user\@$host")) {
$self->{pbot}->{logger}->log("Disregarding command from ignored user $nick!$user\@$host in $from.\n");
return 1;
2020-02-15 23:38:32 +01:00
}
# update $context command field
2020-05-02 05:59:51 +02:00
$context->{command} = $command;
# reset $context's interpreter recursion depth counter
$context->{interpret_depth} = 0;
2020-02-15 23:38:32 +01:00
# interpet this command
2020-05-02 05:59:51 +02:00
$context->{result} = $self->interpret($context);
# handle command output
2020-05-02 05:59:51 +02:00
$self->handle_result($context);
# increment processed counter
2020-02-15 23:38:32 +01:00
$processed++;
}
# return number of commands processed
2020-02-15 23:38:32 +01:00
return $processed;
}
# main entry point to interpret/execute a bot command.
# takes a $context object containing contextual information about the
# command such as the channel, nick, user, host, command, etc.
2015-03-31 00:04:08 +02:00
sub interpret {
my ($self, $context) = @_;
# log command invocation
$self->{pbot}->{logger}->log("=== [$context->{interpret_depth}] Got command: "
. "($context->{from}) $context->{hostmask}: $context->{command}\n");
2020-02-15 23:38:32 +01:00
# debug flag to trace $context location and contents
2020-02-15 23:38:32 +01:00
if ($self->{pbot}->{registry}->get_value('general', 'debugcontext')) {
use Data::Dumper;
$Data::Dumper::Sortkeys = 1;
$self->{pbot}->{logger}->log("Interpreter::interpret\n");
2020-05-02 05:59:51 +02:00
$self->{pbot}->{logger}->log(Dumper $context);
}
2017-12-05 03:34:34 +01:00
# enforce recursion limit
if (++$context->{interpret_depth} > $self->{pbot}->{registry}->get_value('interpreter', 'max_recursion')) {
return "Too many levels of recursion, aborted.";
}
2020-02-15 23:38:32 +01:00
# sanity check the context fields, none of these should be missing
2020-05-02 05:59:51 +02:00
if (not defined $context->{nick} || not defined $context->{user} || not defined $context->{host} || not defined $context->{command}) {
$self->{pbot}->{logger}->log("Error: Interpreter::interpret: missing field(s)\n");
return '/me coughs weakly.'; # indicate that something went wrong
2020-02-15 23:38:32 +01:00
}
2017-11-17 22:53:23 +01:00
# check for a split command, e.g. "echo Hello ;;; echo world."
2020-05-02 05:59:51 +02:00
if ($context->{command} =~ m/^(.*?)\s*(?<!\\);;;\s*(.*)/ms) {
$context->{command} = $1; # command is the first half of the split
$context->{command_split} = $2; # store the rest of the split, potentially containing more splits
2020-02-15 23:38:32 +01:00
}
# convert command string to list of arguments
2020-05-02 05:59:51 +02:00
my $cmdlist = $self->make_args($context->{command});
$context->{cmdlist} = $cmdlist;
# create context command history if non-existent
if (not exists $context->{commands}) {
$context->{commands} = [];
}
# add command to context command history
2020-05-02 05:59:51 +02:00
push @{$context->{commands}}, $context->{command};
2020-02-15 23:38:32 +01:00
# parse the command into keyword, arguments and recipient
my ($keyword, $arguments, $recipient) = ('', '', undef);
2020-02-15 23:38:32 +01:00
if ($self->arglist_size($cmdlist) >= 4 and lc $cmdlist->[0] eq 'tell' and (lc $cmdlist->[2] eq 'about' or lc $cmdlist->[2] eq 'the')) {
# tell nick about/the cmd [args]; e.g. "tell somenick about malloc" or "tell somenick the date"
# split the list into two fields (keyword and remaining arguments)
# starting at the 4th element and preserving quotes
2020-02-15 23:38:32 +01:00
($keyword, $arguments) = $self->split_args($cmdlist, 2, 3, 1);
# 2nd element is the recipient
$recipient = $cmdlist->[1];
} elsif ($self->arglist_size($cmdlist) >= 3 and lc $cmdlist->[0] eq 'give') {
# give nick cmd [args]; e.g. "give somenick date"
# split the list into two fields (keyword and remaining arguments)
# starting at the 3rd element and preserving quotes
($keyword, $arguments) = $self->split_args($cmdlist, 2, 2, 1);
# 2nd element is the recipient
$recipient = $cmdlist->[1];
} else {
# normal command, split into keywords and arguments while preserving quotes
2020-02-15 23:38:32 +01:00
($keyword, $arguments) = $self->split_args($cmdlist, 2, 0, 1);
}
2020-02-15 23:38:32 +01:00
# limit keyword length (in bytes)
# TODO: make this a registry item
{
# lexical scope for use bytes
use bytes;
if (length $keyword > 128) {
$keyword = truncate_egc $keyword, 128; # safely truncate unicode strings
$self->{pbot}->{logger}->log("Truncating keyword to <= 128 bytes: $keyword\n");
}
}
# ensure we have a $keyword
if (not defined $keyword or not length $keyword) {
$self->{pbot}->{logger}->log("Error: Missing keyword; disregarding command\n");
return undef;
}
# ensure $arguments is a string if none were given
$arguments //= '';
if (defined $recipient) {
# ensure that the recipient is present in the channel
$recipient = $self->{pbot}->{nicklist}->is_present_similar($context->{from}, $recipient);
if ($recipient) {
# if present then set and force the nickprefix
$context->{nickprefix} = $recipient;
$context->{nickprefix_forced} = 1;
} else {
# otherwise discard nickprefix
delete $context->{nickprefix};
delete $context->{nickprefix_forced};
}
2020-02-15 23:38:32 +01:00
}
# parse out a substituted command
if ($arguments =~ m/(?<!\\)&\s*\{/) {
2020-02-15 23:38:32 +01:00
my ($command) = $self->extract_bracketed($arguments, '{', '}', '&', 1);
# did we find a substituted command?
2020-02-15 23:38:32 +01:00
if (length $command) {
# replace it with a placeholder
2020-02-15 23:38:32 +01:00
$arguments =~ s/&\s*\{\Q$command\E\}/&{subcmd}/;
# add it to the list of substituted commands
2020-05-02 05:59:51 +02:00
push @{$context->{subcmd}}, "$keyword $arguments";
# trim surrounding whitespace
2020-02-15 23:38:32 +01:00
$command =~ s/^\s+|\s+$//g;
# replace contextual command
$context->{command} = $command;
# reset contextual command history
2020-05-02 05:59:51 +02:00
$context->{commands} = [];
# add command to contextual command history
2020-05-02 05:59:51 +02:00
push @{$context->{commands}}, $command;
# interpret the substituted command
2020-05-02 05:59:51 +02:00
$context->{result} = $self->interpret($context);
# return the output
2020-05-02 05:59:51 +02:00
return $context->{result};
2020-02-15 23:38:32 +01:00
}
}
2015-03-31 00:04:08 +02:00
2020-02-15 23:38:32 +01:00
# parse out a pipe
if ($arguments =~ m/(?<!\\)\|\s*\{\s*[^}]+\}\s*$/) {
2020-02-15 23:38:32 +01:00
my ($pipe, $rest) = $self->extract_bracketed($arguments, '{', '}', '|', 1);
# strip pipe and everything after it from arguments
2020-02-15 23:38:32 +01:00
$arguments =~ s/\s*(?<!\\)\|\s*{(\Q$pipe\E)}.*$//s;
2015-03-31 00:04:08 +02:00
# trim surrounding whitespace
$pipe =~ s/^\s+|\s+$//g;
# update contextual pipe data
2020-06-21 05:52:54 +02:00
if (exists $context->{pipe}) {
$context->{pipe_rest} = "$rest | { $context->{pipe} }$context->{pipe_rest}";
} else {
$context->{pipe_rest} = $rest;
}
2020-05-02 05:59:51 +02:00
$context->{pipe} = $pipe;
2020-02-15 23:38:32 +01:00
}
# unescape any escaped command splits
$arguments =~ s/\\;;;/;;;/g;
# unescape any escaped substituted commands
$arguments =~ s/\\&\s*\{/&{/g;
# unescape any escaped pipes
$arguments =~ s/\\\|\s*\{/| {/g;
# find factoid channel for dont-replace-pronouns metadata
my ($fact_channel, $fact_trigger);
my @factoids = $self->{pbot}->{factoids}->{data}->find($context->{from}, $keyword, exact_trigger => 1);
if (@factoids == 1) {
# found the factoid's channel
2021-07-31 00:01:38 +02:00
($fact_channel, $fact_trigger) = @{$factoids[0]};
} else {
# more than one factoid found, normally we would prompt to disambiguate
# but in this case we'll just go ahead and assume global
($fact_channel, $fact_trigger) = ('.*', $keyword);
}
if ($self->{pbot}->{commands}->get_meta($keyword, 'dont-replace-pronouns')
or $self->{pbot}->{factoids}->{data}->get_meta($fact_channel, $fact_trigger, 'dont-replace-pronouns'))
2020-02-15 23:38:32 +01:00
{
$context->{'dont-replace-pronouns'} = 1;
}
# replace pronouns like "i", "my", etc, with "nick", "nick's", etc
if (not $context->{'dont-replace-pronouns'}) {
# if command recipient is "me" then replace it with invoker's nick
# e.g., "!tell me about date" or "!give me date", etc
if (defined $context->{nickprefix} and lc $context->{nickprefix} eq 'me') {
$context->{nickprefix} = $context->{nick};
}
# strip trailing sentence-ending punctuators from $keyword
# TODO: why are we doing this? why here? why at all?
$keyword =~ s/(\w+)[?!.]+$/$1/;
# replace pronouns in $arguments.
# but only on the top-level command (not on subsequent recursions).
# all pronouns can be escaped to prevent replacement, e.g. "!give \me date"
if (length $arguments and $context->{interpret_depth} <= 1) {
$arguments =~ s/(?<![\w\/\-\\])i am\b/$context->{nick} is/gi;
$arguments =~ s/(?<![\w\/\-\\])me\b/$context->{nick}/gi;
$arguments =~ s/(?<![\w\/\-\\])my\b/$context->{nick}'s/gi;
# unescape any escaped pronouns
$arguments =~ s/\\i am\b/i am/gi;
$arguments =~ s/\\my\b/my/gi;
$arguments =~ s/\\me\b/me/gi;
}
2020-02-15 23:38:32 +01:00
}
# the bot doesn't like performing bot commands on itself
# unless dont-protect-self is true
if (not $self->{pbot}->{commands}->get_meta($keyword, 'dont-protect-self')
and not $self->{pbot}->{factoids}->{data}->get_meta($context->{from}, $keyword, 'dont-protect-self'))
{
2020-02-15 23:38:32 +01:00
my $botnick = $self->{pbot}->{registry}->get_value('irc', 'botnick');
if ($arguments =~ m/^(your|him|her|its|it|them|their)(self|selves)$/i || $arguments =~ m/^$botnick$/i) {
# build message structure
2020-02-15 23:38:32 +01:00
my $message = {
nick => $context->{nick},
user => $context->{user},
host => $context->{host},
hostmask => $context->{hostmask},
command => $context->{command},
checkflood => 1,
message => "$context->{nick}: Why would I want to do that to myself?",
2020-02-15 23:38:32 +01:00
};
# get a random delay
my $delay = rand(10) + 5;
# add message to output queue
2020-05-02 05:59:51 +02:00
$self->add_message_to_output_queue($context->{from}, $message, $delay);
# log upcoming message + delay
2020-02-15 23:38:32 +01:00
$delay = duration($delay);
$self->{pbot}->{logger}->log("($delay delay) $message->{message}\n");
# no output to return
2020-02-15 23:38:32 +01:00
return undef;
}
}
# set the contextual root root keyword.
# this is the keyword first used to invoke this command. it is not updated
# on subsequent command interpreter recursions.
if (not exists $context->{root_keyword}) {
$context->{root_keyword} = $keyword;
2020-02-15 23:38:32 +01:00
}
2019-08-06 19:44:10 +02:00
# update the contextual keyword field
$context->{keyword} = $keyword;
# update the contextual arguments field
$context->{arguments} = $arguments;
# update the original arguments field.
# the actual arguments field may be manipulated/overridden by
# the interpreters. the arguments field is reset with this
# field after each interpreter finishes.
$context->{original_arguments} = $arguments;
# make the argument list
$context->{arglist} = $self->make_args($arguments);
# reset utf8 flag for arguments
# arguments aren't a utf8 encoded string at this point
2020-05-02 05:59:51 +02:00
delete $context->{args_utf8};
2020-02-15 23:38:32 +01:00
# reset the special behavior
$context->{special} = '';
2020-02-15 23:38:32 +01:00
# execute all registered interpreters
my $result;
2020-02-15 23:38:32 +01:00
foreach my $func (@{$self->{handlers}}) {
# call the interpreter
$result = $func->{subref}->($context);
# exit loop if interpreter returned output
2020-02-15 23:38:32 +01:00
last if defined $result;
# reset any manipulated/overridden arguments
2020-05-02 05:59:51 +02:00
$context->{arguments} = $context->{original_arguments};
delete $context->{args_utf8};
2020-02-15 23:38:32 +01:00
}
# return command output
2020-02-15 23:38:32 +01:00
return $result;
}
# finalizes processing on a command.
# updates pipes, substitutions, splits. truncates to paste site.
# sends final command output to appropriate queues.
sub handle_result {
my ($self, $context, $result) = @_;
2020-02-15 23:38:32 +01:00
# use context result if no result argument given
$result //= $context->{result};
2020-02-15 23:38:32 +01:00
2021-08-24 06:54:07 +02:00
# ensure we have a command result to work with
return if not defined $result or not length $result;
2021-08-24 06:54:07 +02:00
# preservation of consecutive whitespace is disabled by default
$context->{preserve_whitespace} //= 0;
2020-02-15 23:38:32 +01:00
# debug flag to trace $context location and contents
if ($self->{pbot}->{registry}->get_value('general', 'debugcontext')) {
use Data::Dumper;
$Data::Dumper::Sortkeys = 1;
$self->{pbot}->{logger}->log("Interpreter::handle_result [$result]\n");
$self->{pbot}->{logger}->log(Dumper $context);
}
2020-02-15 23:38:32 +01:00
# strip and store /command prefixes
# to be re-added after result processing
if ($result =~ s!^(/say|/me|/msg \S+) !!) {
$context->{result_prefix} = $1;
} else {
delete $context->{result_prefix};
}
2020-02-15 23:38:32 +01:00
# finish piping
if (exists $context->{pipe}) {
my ($pipe, $pipe_rest) = (delete $context->{pipe}, delete $context->{pipe_rest});
2020-02-15 23:38:32 +01:00
if (not $context->{alldone}) {
$context->{command} = "$pipe $result $pipe_rest";
$context->{result} = $self->interpret($context);
}
2020-02-15 23:38:32 +01:00
$self->handle_result($context);
return 0;
}
2020-02-15 23:38:32 +01:00
# finish command substitution
if (exists $context->{subcmd}) {
my $command = pop @{$context->{subcmd}};
2020-02-15 23:38:32 +01:00
if (@{$context->{subcmd}} == 0 or $context->{alldone}) {
delete $context->{subcmd};
}
if ($command =~ s/\b(an?)(\s+)&\{subcmd\}/&{subcmd}/i) {
# fix-up a/an article
my ($article, $spaces) = ($1, $2);
my $fixed_article = select_indefinite_article $result;
if ($article eq 'AN') {
$fixed_article = uc $fixed_article;
} elsif ($article eq 'An' or $article eq 'A') {
$fixed_article = ucfirst $fixed_article;
}
$command =~ s/&\{subcmd\}/$fixed_article$spaces$result/;
} else {
$command =~ s/&\{subcmd\}/$result/;
}
if (not $context->{alldone}) {
$context->{command} = $command;
$context->{result} = $self->interpret($context);
}
$self->handle_result($context);
return 0;
}
# restore /command prefix
if ($context->{result_prefix}) {
$result = "$context->{result_prefix} $result";
}
2020-02-15 23:38:32 +01:00
# finish command split
if ($context->{command_split}) {
my $botnick = $self->{pbot}->{registry}->get_value('irc', 'botnick');
2020-02-15 23:38:32 +01:00
# update contextual command with next command in split
$context->{command} = delete $context->{command_split};
2020-02-15 23:38:32 +01:00
# reformat result to be more suitable for joining together
$result =~ s!^/say !\n!i;
$result =~ s!^/me !\n* $botnick !i;
2020-02-15 23:38:32 +01:00
if (not length $context->{split_result}) {
$result =~ s/^\n//;
$context->{split_result} = $result;
} else {
$context->{split_result} .= $result;
}
$context->{result} = $self->interpret($context);
$self->handle_result($context);
return 0;
}
# join command split
if ($context->{split_result}) {
my $botnick = $self->{pbot}->{registry}->get_value('irc', 'botnick');
# reformat result to be more suitable for joining together
$result =~ s!^/say !\n!i;
$result =~ s!^/me !\n* $botnick !i;
2020-02-15 23:38:32 +01:00
$result = $context->{split_result} . $result;
}
2020-02-15 23:38:32 +01:00
# nothing more to do here if we have no result or keyword
return 0 if not length $result or not exists $context->{keyword};
my $preserve_newlines = $self->{pbot}->{registry}->get_value($context->{from}, 'preserve_newlines');
my $original_result = $result;
$context->{original_result} = $result;
$result =~ s/[\n\r]/ /g unless $preserve_newlines;
$result =~ s/[ \t]+/ /g unless $context->{preserve_whitespace};
my $max_lines = $self->{pbot}->{registry}->get_value($context->{from}, 'max_newlines') // 4;
my $lines = 0;
# split result into lines and go over each line
foreach my $line (split /[\n\r]+/, $result) {
# skip blank lines
next if $line !~ /\S/;
2020-02-15 23:38:32 +01:00
# paste everything if we've output the maximum lines
if (++$lines >= $max_lines) {
my $link = $self->{pbot}->{webpaste}->paste("$context->{from} <$context->{nick}> $context->{text}\n\n$original_result");
2020-02-15 23:38:32 +01:00
my $message = "And that's all I have to say about that. See $link for full text.";
if ($context->{use_output_queue}) {
my $message = {
nick => $context->{nick},
user => $context->{user},
host => $context->{host},
hostmask => $context->{hostmask},
command => $context->{command},
message => $message,
checkflood => 1
};
$self->add_message_to_output_queue($context->{from}, $message, 0);
2020-02-15 23:38:32 +01:00
} else {
unless ($context->{from} eq 'stdin@pbot') {
$self->{pbot}->{conn}->privmsg($context->{from}, $message);
}
2020-02-15 23:38:32 +01:00
}
last;
2020-02-15 23:38:32 +01:00
}
if ($context->{use_output_queue}) {
my $delay = rand(10) + 5;
my $message = {
nick => $context->{nick},
user => $context->{user},
host => $context->{host},
hostmask => $context->{hostmask},
command => $context->{command},
message => $line,
checkflood => 1,
};
$self->add_message_to_output_queue($context->{from}, $message, $delay);
$delay = duration($delay);
$self->{pbot}->{logger}->log("($delay delay) $line\n");
} else {
$context->{output} = $line;
$self->output_result($context);
$self->{pbot}->{logger}->log("$line\n");
}
}
# log a separator bar after command finishes
$self->{pbot}->{logger}->log("---------------------------------------------\n");
# successful command completion
return 1;
}
# truncates a message, optionally pasting to a web paste site.
# $paste_text is the version of text (e.g. with whitespace formatting preserved, etc)
# to send to the paste site.
sub truncate_result {
my ($self, $context, $text, $paste_text) = @_;
my $max_msg_len = $self->{pbot}->{registry}->get_value('irc', 'max_msg_len');
$max_msg_len -= length "PRIVMSG $context->{from} :";
# encode text to utf8 for byte length truncation
$text = encode('UTF-8', $text);
$paste_text = encode('UTF-8', $paste_text);
if (length $text > $max_msg_len) {
my $paste_result;
if (defined $paste_text) {
# limit pastes to 32k by default, overridable via paste.max_length
my $max_paste_len = $self->{pbot}->{registry}->get_value('paste', 'max_length') // 1024 * 32;
# truncate paste to max paste length
$paste_text = truncate_egc $paste_text, $max_paste_len;
# send text to paste site
$paste_result = $self->{pbot}->{webpaste}->paste("$context->{from} <$context->{nick}> $context->{text}\n\n$paste_text");
2020-02-15 23:38:32 +01:00
}
my $trunc = '... <truncated';
if (not defined $paste_result) {
# no paste
$trunc .= '>';
} elsif ($paste_result =~ m/^http/) {
# a link
$trunc .= "; $paste_result>";
} else {
# an error or something else
$trunc .= "; $paste_result>";
}
$paste_result //= 'not pasted';
$self->{pbot}->{logger}->log("Message truncated -- $paste_result\n");
# make room to append the truncation text to the message text
# (third argument to truncate_egc is '' to prevent appending its own ellipsis)
my $text_len = length $text;
my $trunc_len = $text_len < $max_msg_len ? $text_len : $max_msg_len;
$text = truncate_egc $text, $trunc_len - length $trunc, '';
# append the truncation text
$text .= $trunc;
} else {
# decode text from utf8
$text = decode('UTF-8', $text);
}
2020-01-12 02:46:44 +01:00
return $text;
2015-03-31 00:04:08 +02:00
}
my @dehighlight_exclusions = qw/auto if unsigned break inline void case int volatile char long while const register _Alignas continue restrict _Alignof default return _Atomic do short _Bool double signed _Complex else sizeof _Generic enum static _Imaginary extern struct _Noreturn float switch _Static_assert for typedef _Thread_local goto union/;
sub dehighlight_nicks {
my ($self, $line, $channel) = @_;
2020-01-12 02:46:44 +01:00
return $line if $self->{pbot}->{registry}->get_value('general', 'no_dehighlight_nicks');
my @tokens = split / /, $line;
foreach my $token (@tokens) {
my $potential_nick = $token;
$potential_nick =~ s/^[^\w\[\]\-\\\^\{\}]+//;
$potential_nick =~ s/[^\w\[\]\-\\\^\{\}]+$//;
2014-03-15 02:53:33 +01:00
next if length $potential_nick == 1;
next if grep { /\Q$potential_nick/i } @dehighlight_exclusions;
next if not $self->{pbot}->{nicklist}->is_present($channel, $potential_nick);
my $dehighlighted_nick = $potential_nick;
$dehighlighted_nick =~ s/(.)/$1\x{feff}/;
$token =~ s/\Q$potential_nick\E(?!:)/$dehighlighted_nick/;
2020-02-15 23:38:32 +01:00
}
2014-03-15 02:53:33 +01:00
return join ' ', @tokens;
2014-03-15 02:53:33 +01:00
}
sub output_result {
my ($self, $context) = @_;
2020-02-15 23:38:32 +01:00
# debug flag to trace $context location and contents
if ($self->{pbot}->{registry}->get_value('general', 'debugcontext')) {
2020-02-15 23:38:32 +01:00
use Data::Dumper;
$Data::Dumper::Sortkeys = 1;
$self->{pbot}->{logger}->log("Interpreter::output_result\n");
2020-05-02 05:59:51 +02:00
$self->{pbot}->{logger}->log(Dumper $context);
2019-09-05 05:18:32 +02:00
}
my $output = $context->{output};
2020-02-15 23:38:32 +01:00
# nothing to do if we have nothing to do innit
return if not defined $output or not length $output;
2020-02-15 23:38:32 +01:00
# nothing more to do here if the command came from STDIN
return if $context->{from} eq 'stdin@pbot';
2017-12-05 03:34:34 +01:00
my $botnick = $self->{pbot}->{registry}->get_value('irc', 'botnick');
my $to = $context->{from};
# log the message if requested
if ($context->{checkflood}) {
$self->{pbot}->{antiflood}->check_flood($to, $botnick, $self->{pbot}->{registry}->get_value('irc', 'username'), 'pbot', $output, 0, 0, 0);
}
# nothing more to do here if the output is going to the bot
return if $to eq $botnick;
# insert null-width spaces into nicknames to prevent IRC clients
# from unncessarily highlighting people
$output = $self->dehighlight_nicks($output, $to) unless $output =~ m|^/msg |;
# handle various /command prefixes
my $type = 'echo'; # will be set to 'echo' or 'action' depending on /command prefix
if ($output =~ s/^\/say //i) {
# /say stripped off
$output = ' ' if not length $output; # ensure we output something
}
elsif ($output =~ s/^\/me //i) {
# /me stripped off
$type = 'action';
}
elsif ($output =~ s/^\/msg\s+([^\s]+) //i) {
# /msg somenick stripped off
$to = $1; # reset $to to output to somenick
2020-02-15 23:38:32 +01:00
# don't allow /msg nick1,nick2,etc
if ($to =~ /,/) {
$self->{pbot}->{logger}->log("[HACK] Disregarding attempt to /msg multiple users. $context->{hostmask} [$context->{command}] $output\n");
return;
2020-02-15 23:38:32 +01:00
}
# don't allow /msging any nicks that end with "serv" (e.g. ircd services; NickServ, ChanServ, etc)
if ($to =~ /.*serv(?:@.*)?$/i) {
$self->{pbot}->{logger}->log("[HACK] Disregarding attempt to /msg *serv. $context->{hostmask} [$context->{command}] $output]\n");
return;
}
2020-02-15 23:38:32 +01:00
if ($output =~ s/^\/me //i) {
# /me stripped off
$type = 'action';
}
else {
# strip off /say if present
$output =~ s/^\/say //i;
2020-02-15 23:38:32 +01:00
}
}
my $bot_nick = $self->{pbot}->{registry}->get_value('irc', 'botnick');
my $bot_hostmask = "$bot_nick!pbot3\@pbot";
my $bot_account = $self->{pbot}->{messagehistory}->get_message_account($bot_nick, 'pbot3', 'pbot');
if ($type eq 'echo') {
# prepend nickprefix to output
if ($context->{nickprefix} && (! $context->{nickprefix_disabled} || $context->{nickprefix_forced})) {
$output = "$context->{nickprefix}: $output";
}
elsif ($context->{add_nick}) {
$output = "$context->{nick}: $output";
}
2020-02-15 23:38:32 +01:00
# truncate if necessary, pasting original result to a web paste site
$output = $self->truncate_result($context, $output, $context->{original_result});
# add bot's output to message history for recall/grab
if ($to =~ /^#/) {
$self->{pbot}->{messagehistory}->add_message($bot_account, $bot_hostmask, $to, $output, MSG_CHAT);
}
# send the message to the channel/user
$self->{pbot}->{conn}->privmsg($to, $output);
2020-02-15 23:38:32 +01:00
}
elsif ($type eq 'action') {
# truncate if necessary, pasting original result to a web paste site
$output = $self->truncate_result($context, $output, $context->{original_result});
# add bot's output to message history for recall/grab
if ($to =~ /^#/) {
$self->{pbot}->{messagehistory}->add_message($bot_account, $bot_hostmask, $to, "/me $output", MSG_CHAT);
}
# CTCP ACTION the message to the channel/user
$self->{pbot}->{conn}->me($to, $output);
2017-11-26 05:00:55 +01:00
}
}
sub add_message_to_output_queue {
2020-02-15 23:38:32 +01:00
my ($self, $channel, $message, $delay) = @_;
$self->{pbot}->{event_queue}->enqueue_event(
sub {
2020-05-02 05:59:51 +02:00
my $context = {
from => $channel,
nick => $message->{nick},
user => $message->{user},
host => $message->{host},
hostmask => $message->{hostmask},
output => $message->{message},
command => $message->{command},
checkflood => $message->{checkflood}
};
2020-05-02 05:59:51 +02:00
$self->output_result($context);
},
$delay, "output $channel $message->{message}"
);
}
sub add_to_command_queue {
my ($self, $channel, $command, $delay, $repeating) = @_;
$self->{pbot}->{event_queue}->enqueue_event(
sub {
2020-05-02 05:59:51 +02:00
my $context = {
from => $channel,
nick => $command->{nick},
user => $command->{user},
host => $command->{host},
hostmask => $command->{hostmask},
command => $command->{command},
interpret_depth => 0,
checkflood => 0,
preserve_whitespace => 0
};
if (exists $command->{'cap-override'}) {
$self->{pbot}->{logger}->log("[command queue] Override command capability with $command->{'cap-override'}\n");
2020-05-02 05:59:51 +02:00
$context->{'cap-override'} = $command->{'cap-override'};
}
$context->{result} = $self->interpret($context);
$self->handle_result($context);
},
$delay, "command $channel $command->{command}", $repeating
);
}
sub add_botcmd_to_command_queue {
2020-02-15 23:38:32 +01:00
my ($self, $channel, $command, $delay) = @_;
2020-02-15 23:38:32 +01:00
my $botcmd = {
nick => $self->{pbot}->{registry}->get_value('irc', 'botnick'),
user => 'stdin',
host => 'pbot',
command => $command
2020-02-15 23:38:32 +01:00
};
$botcmd->{hostmask} = "$botcmd->{nick}!stdin\@pbot";
2020-02-15 23:38:32 +01:00
$self->add_to_command_queue($channel, $botcmd, $delay);
}
# extracts a bracketed substring, gracefully handling unbalanced quotes
# or brackets. opening and closing brackets may each be more than one character.
# optional prefix may be or begin with a character group.
sub extract_bracketed {
my ($self, $string, $open_bracket, $close_bracket, $optional_prefix, $allow_whitespace) = @_;
# set default values when none provided
$open_bracket //= '{';
$close_bracket //= '}';
$optional_prefix //= '';
$allow_whitespace //= 0;
my @prefix_group;
if ($optional_prefix =~ s/^\[(.*?)\]//) { @prefix_group = split //, $1; }
my @prefixes = split //, $optional_prefix;
my @opens = split //, $open_bracket;
my @closes = split //, $close_bracket;
my $prefix_index = 0;
my $open_index = 0;
my $close_index = 0;
my $result = '';
my $rest = '';
my $extracting = 0;
my $extracted = 0;
my $escaped = 0;
my $token = '';
my $ch = ' ';
my $last_ch;
my $i = 0;
my $bracket_pos;
my $bracket_level = 0;
my $prefix_group_match = @prefix_group ? 0 : 1;
my $prefix_match = @prefixes ? 0 : 1;
my $match = 0;
my @chars = split //, $string;
my $state = 'prefixgroup';
while (1) {
$last_ch = $ch;
if ($i >= @chars) {
if ($extracting) {
# reached end, but unbalanced brackets... reset to beginning and ignore them
$i = $bracket_pos;
$bracket_level = 0;
$state = 'prefixgroup';
$extracting = 0;
$last_ch = ' ';
$token = '';
$result = '';
} else {
# add final token and exit
$token .= '\\' if $escaped;
$rest .= $token if $extracted;
last;
}
}
$ch = $chars[$i++];
if ($escaped) {
$token .= "\\$ch" if $extracting or $extracted;
$escaped = 0;
next;
}
if ($ch eq '\\') {
$escaped = 1;
next;
}
if (not $extracted) {
if ($state eq 'prefixgroup' and @prefix_group and not $extracting) {
foreach my $prefix_ch (@prefix_group) {
if ($ch eq $prefix_ch) {
$prefix_group_match = 1;
$state = 'prefixes';
last;
} else {
$prefix_group_match = 0;
}
}
next if $prefix_group_match;
} elsif ($state eq 'prefixgroup' and not @prefix_group) {
$state = 'prefixes';
$prefix_index = 0;
}
if ($state eq 'prefixes') {
if (@prefixes and $ch eq $prefixes[$prefix_index]) {
$token .= $ch if $extracting;
$prefix_match = 1;
$prefix_index++;
$state = 'openbracket';
next;
} elsif ($state eq 'prefixes' and not @prefixes) {
$state = 'openbracket';
}
}
if ($extracting or ($state eq 'openbracket' and $prefix_group_match and $prefix_match)) {
$prefix_index = 0;
if ($ch eq $opens[$open_index]) {
$match = 1;
$open_index++;
} else {
if ($allow_whitespace and $ch eq ' ' and not $extracting) { next; }
elsif (not $extracting) {
$state = 'prefixgroup';
next;
}
}
}
if ($match) {
$state = 'prefixgroup';
$prefix_group_match = 0 unless not @prefix_group;
$prefix_match = 0 unless not @prefixes;
$match = 0;
$bracket_pos = $i if not $extracting;
if ($open_index == @opens) {
$extracting = 1;
$token .= $ch if $bracket_level > 0;
$bracket_level++;
$open_index = 0;
}
next;
} else {
$open_index = 0;
}
if ($ch eq $closes[$close_index]) {
if ($extracting or $extracted) {
$close_index++;
if ($close_index == @closes) {
$close_index = 0;
if (--$bracket_level == 0) {
$extracting = 0;
$extracted = 1;
$result .= $token;
$token = '';
} else {
$token .= $ch;
}
}
}
next;
} else {
$close_index = 0;
}
}
if ($extracting or $extracted) { $token .= $ch; }
}
return ($result, $rest);
}
# splits line into quoted arguments while preserving quotes.
# a string is considered quoted only if they are surrounded by
# whitespace or json separators.
# handles unbalanced quotes gracefully by treating them as
# part of the argument they were found within.
sub split_line {
my ($self, $line, %opts) = @_;
my %default_opts = (
strip_quotes => 0,
keep_spaces => 0,
preserve_escapes => 1,
strip_commas => 0,
);
%opts = (%default_opts, %opts);
return () if not length $line;
my @chars = split //, $line;
my @args;
my $escaped = 0;
my $quote;
my $token = '';
my $last_token = '';
my $ch = ' ';
my $last_ch;
my $next_ch;
my $i = 0;
my $pos;
my $ignore_quote = 0;
my $spaces = 0;
while (1) {
$last_ch = $ch;
if ($i >= @chars) {
if (defined $quote) {
# reached end, but unbalanced quote... reset to beginning of quote and ignore it
$i = $pos;
$ignore_quote = 1;
$quote = undef;
$last_ch = ' ';
$token = $last_token;
} else {
# add final token and exit
$token .= '\\' if $escaped;
push @args, $token if length $token;
last;
}
}
$ch = $chars[$i++];
$next_ch = $chars[$i];
$spaces = 0 if $ch ne ' ';
if ($escaped) {
if ($opts{preserve_escapes}) { $token .= "\\$ch"; }
else { $token .= $ch; }
$escaped = 0;
next;
}
if ($ch eq '\\') {
$escaped = 1;
next;
}
if (defined $quote) {
if ($ch eq $quote and (not defined $next_ch or $next_ch =~ /[\s,:;})\].+=]/)) {
# closing quote
$token .= $ch unless $opts{strip_quotes};
push @args, $token;
$quote = undef;
$token = '';
} else {
# still within quoted argument
$token .= $ch;
}
next;
}
if (($last_ch =~ /[\s:{(\[.+=,]/) and not defined $quote and ($ch eq "'" or $ch eq '"')) {
if ($ignore_quote) {
# treat unbalanced quote as part of this argument
$token .= $ch;
$ignore_quote = 0;
} else {
# begin potential quoted argument
$pos = $i - 1;
$quote = $ch;
$last_token = $token;
$token .= $ch unless $opts{strip_quotes};
}
next;
}
if ($ch eq ' ' or $ch eq "\n" or $ch eq "\t" or ($opts{strip_commas} and $ch eq ',')) {
if (++$spaces > 1 and $opts{keep_spaces}) {
$token .= $ch;
next;
} else {
unless ($opts{strip_commas} and $token eq ',') {
push @args, $token if length $token;
}
$token = '';
next;
}
}
$token .= $ch;
}
return @args;
}
# creates an array of arguments from a string
sub make_args {
my ($self, $string) = @_;
my @args = $self->split_line($string, keep_spaces => 1);
my @arglist;
my @arglist_unstripped;
while (@args) {
my $arg = shift @args;
# add argument with quotes and spaces preserved
push @arglist_unstripped, $arg;
# strip quotes from argument
if ($arg =~ m/^'.*'$/) {
$arg =~ s/^'//;
$arg =~ s/'$//;
} elsif ($arg =~ m/^".*"$/) {
$arg =~ s/^"//;
$arg =~ s/"$//;
}
# strip leading spaces from argument
$arg =~ s/^\s+//;
# add stripped argument
push @arglist, $arg;
}
# copy unstripped arguments to end of arglist
push @arglist, @arglist_unstripped;
return \@arglist;
}
# returns size of array of arguments
sub arglist_size {
my ($self, $args) = @_;
return @$args / 2;
}
# unshifts new argument to front
sub unshift_arg {
my ($self, $args, $arg) = @_;
splice @$args, @$args / 2, 0, $arg; # add quoted argument
unshift @$args, $arg; # add first argument
return @$args;
}
# shifts first argument off array of arguments
sub shift_arg {
my ($self, $args) = @_;
return undef if not @$args;
splice @$args, @$args / 2, 1; # remove original quoted argument
return shift @$args;
}
# returns list of unquoted arguments
sub unquoted_args {
my ($self, $args) = @_;
return undef if not @$args;
return @$args[0 .. @$args / 2 - 1];
}
# splits array of arguments into array with overflow arguments filling up last position
# split_args(qw/dog cat bird hamster/, 3) => ("dog", "cat", "bird hamster")
sub split_args {
my ($self, $args, $count, $offset, $preserve_quotes) = @_;
my @result;
my $max = $self->arglist_size($args);
$preserve_quotes //= 0;
my $i = $offset // 0;
unless ($count == 1) {
do {
my $arg = $args->[$i++];
push @result, $arg;
} while (--$count > 1 and $i < $max);
}
# join the get rest as a string
my $rest = '';
if ($preserve_quotes) {
# get from second half of args, which contains quotes
foreach my $arg (@$args[@$args / 2 + $i .. @$args - 1]) {
$rest .= ' ' unless not length $rest;
$rest .= $arg;
}
} else {
$rest = join ' ', @$args[$i .. $max - 1];
}
push @result, $rest if length $rest;
return @result;
}
# lowercases array of arguments
sub lc_args {
my ($self, $args) = @_;
for (my $i = 0; $i < @$args; $i++) { $args->[$i] = lc $args->[$i]; }
}
2021-07-31 00:01:38 +02:00
# getopt boilerplate in one place
# 99% of our getopt use is on a string
sub getopt {
my $self = shift;
$self->getopt_from_string(@_);
}
# getopt_from_string() uses our split_line() function instead of
# Getopt::Long::GetOptionsFromString's Text::ParseWords
2021-07-31 00:01:38 +02:00
sub getopt_from_string {
my ($self, $string, $result, $config, @opts) = @_;
my @opt_args = $self->split_line($string, strip_quotes => 1);
return $self->getopt_from_array(\@opt_args, $result, $config, @opts);
}
# the workhorse getopt function
2021-07-31 00:01:38 +02:00
sub getopt_from_array {
my ($self, $opt_args, $result, $config, @opts) = @_;
# emitting errors as Perl warnings instead of using die, weird.
2021-07-31 00:01:38 +02:00
my $opt_error;
local $SIG{__WARN__} = sub {
$opt_error = shift;
chomp $opt_error;
};
Getopt::Long::Configure(@$config);
GetOptionsFromArray($opt_args, $result, @opts);
return ($opt_args, $opt_error);
}
1;