From 3ddd190d8942c0752ddde9cba9e8f17544952782 Mon Sep 17 00:00:00 2001 From: Pragmatic Software Date: Tue, 8 Jun 2021 15:52:47 -0700 Subject: [PATCH] Progress on refactoring and polishing everything --- PBot/Commands.pm | 96 +++++++++++++++++++++----- PBot/Factoids.pm | 48 +++++++++---- PBot/Interpreter.pm | 127 +++++++++++++++++------------------ PBot/Modules.pm | 13 ++++ PBot/Registry.pm | 44 ++++++++---- PBot/Utils/ValidateString.pm | 86 ++++++++++++++++++++---- 6 files changed, 292 insertions(+), 122 deletions(-) diff --git a/PBot/Commands.pm b/PBot/Commands.pm index 42798b4f..97cbb1dc 100644 --- a/PBot/Commands.pm +++ b/PBot/Commands.pm @@ -20,9 +20,11 @@ use Time::Duration qw/duration/; sub initialize { my ($self, %conf) = @_; + + # PBot::Commands can register subrefs $self->PBot::Registerable::initialize(%conf); - # command metadata + # command metadata stored as a HashObject $self->{metadata} = PBot::HashObject->new(pbot => $self->{pbot}, name => 'Command metadata', filename => $conf{filename}); $self->{metadata}->load; @@ -34,15 +36,25 @@ sub initialize { sub cmd_set { my ($self, $context) = @_; + my ($command, $key, $value) = $self->{pbot}->{interpreter}->split_args($context->{arglist}, 3); - return "Usage: cmdset [key [value]]" if not defined $command; + + if (not defined $command) { + return "Usage: cmdset [key [value]]"; + } + return $self->{metadata}->set($command, $key, $value); } sub cmd_unset { my ($self, $context) = @_; + my ($command, $key) = $self->{pbot}->{interpreter}->split_args($context->{arglist}, 2); - return "Usage: cmdunset " if not defined $command or not defined $key; + + if (not defined $command or not defined $key ) { + return "Usage: cmdunset "; + } + return $self->{metadata}->unset($command, $key); } @@ -57,13 +69,19 @@ sub cmd_help { # check built-in commands first if ($self->exists($keyword)) { + + # check for command metadata if ($self->{metadata}->exists($keyword)) { my $name = $self->{metadata}->get_key_name($keyword); my $requires_cap = $self->{metadata}->get_data($keyword, 'requires_cap'); my $help = $self->{metadata}->get_data($keyword, 'help'); my $result = "/say $name: "; - $result .= "[Requires can-$keyword] " if $requires_cap; + + # prefix help text with required capability + if ($requires_cap) { + $result .= "[Requires can-$keyword] "; + } if (not defined $help or not length $help) { $result .= "I have no help text for this command yet. To add help text, use the command `cmdset $keyword help `."; @@ -74,21 +92,35 @@ sub cmd_help { return $result; } + # no command metadata available return "$keyword is a built-in command, but I have no help for it yet."; } # then factoids my $channel_arg = $self->{pbot}->{interpreter}->shift_arg($context->{arglist}); - $channel_arg = $context->{from} if not defined $channel_arg or not length $channel_arg; - $channel_arg = '.*' if $channel_arg !~ m/^#/; + if (not defined $channel_arg or not length $channel_arg) { + # set channel argument to from if no argument was passed + $channel_arg = $context->{from}; + } + + if ($channel_arg !~ /^#/) { + # set channel argument to global if it's not channel-like + $channel_arg = '.*'; + } + + # find factoids my @factoids = $self->{pbot}->{factoids}->find_factoid($channel_arg, $keyword, exact_trigger => 1); - if (not @factoids or not $factoids[0]) { return "I don't know anything about $keyword."; } + if (not @factoids or not $factoids[0]) { + # nothing found + return "I don't know anything about $keyword."; + } my ($channel, $trigger); if (@factoids > 1) { + # ask to disambiguate factoids if found in multiple channels if (not grep { $_->[0] eq $channel_arg } @factoids) { return "/say $keyword found in multiple channels: " @@ -106,44 +138,71 @@ sub cmd_help { ($channel, $trigger) = ($factoids[0]->[0], $factoids[0]->[1]); } + # get canonical channel and trigger names with original typographical casing my $channel_name = $self->{pbot}->{factoids}->{factoids}->get_key_name($channel); my $trigger_name = $self->{pbot}->{factoids}->{factoids}->get_key_name($channel, $trigger); - $channel_name = 'global channel' if $channel_name eq '.*'; - $trigger_name = "\"$trigger_name\"" if $trigger_name =~ / /; - my $result = "/say "; - $result .= "[$channel_name] " if $channel ne $context->{from} and $channel ne '.*'; - $result .= "$trigger_name: "; + # prettify channel name if it's ".*" + if ($channel_name eq '.*') { + $channel_name = 'global channel'; + } + # prettify trigger name with double-quotes if it contains spaces + if ($trigger_name =~ / /) { + $trigger_name = "\"$trigger_name\""; + } + + # get factoid's `help` metadata my $help = $self->{pbot}->{factoids}->{factoids}->get_data($channel, $trigger, 'help'); + # return immediately if no help text if (not defined $help or not length $help) { return "/say $trigger_name is a factoid for $channel_name, but I have no help text for it yet." . " To add help text, use the command `factset $trigger_name help `."; } - $result .= $help; + my $result = "/say "; + + # if factoid doesn't belong to invoked or global channel, + # then prefix with the factoid's channel name. + if ($channel ne $context->{from} and $channel ne '.*') { + $result .= "[$channel_name] "; + } + + $result .= "$trigger_name: $help"; + return $result; } sub register { my ($self, $subref, $name, $requires_cap) = @_; - Carp::croak("Missing parameters to Commands::register") if not defined $subref or not defined $name; + if (not defined $subref or not defined $name) { + Carp::croak("Missing parameters to Commands::register"); + } + + # register subref my $ref = $self->PBot::Registerable::register($subref); + + # update internal metadata $ref->{name} = lc $name; $ref->{requires_cap} = $requires_cap // 0; + # update command metadata if (not $self->{metadata}->exists($name)) { - $self->{metadata}->add($name, {requires_cap => $requires_cap, help => ''}, 1); + $self->{metadata}->add($name, { requires_cap => $requires_cap, help => '' }, 1); } else { + # metadata already exists, just update requires_cap unless it's already set. if (not defined $self->get_meta($name, 'requires_cap')) { $self->{metadata}->set($name, 'requires_cap', $requires_cap, 1); } } - # add can-cmd capability if command requires such - $self->{pbot}->{capabilities}->add("can-$name", undef, 1) if $requires_cap; + # add can- capability to PBot capabilities if required + if ($requires_cap) { + $self->{pbot}->{capabilities}->add("can-$name", undef, 1); + } + return $ref; } @@ -173,10 +232,13 @@ sub get_meta { return $self->{metadata}->get_data($command, $key); } +# main entry point for PBot::Interpreter to interpret a registered bot command +# see also PBot::Factoids::interpreter() for factoid commands sub interpreter { my ($self, $context) = @_; my $result; + # debug flag to trace $context location and contents if ($self->{pbot}->{registry}->get_value('general', 'debugcontext')) { use Data::Dumper; $Data::Dumper::Sortkeys = 1; diff --git a/PBot/Factoids.pm b/PBot/Factoids.pm index f4f67efa..e2f44038 100644 --- a/PBot/Factoids.pm +++ b/PBot/Factoids.pm @@ -1036,35 +1036,55 @@ sub execute_code_factoid_using_vm { $context->{code} = $self->expand_action_arguments($context->{code}, $context->{arguments}, $context->{nick}); } } else { + # otherwise allow nick overriding $context->{no_nickoverride} = 0; } - my %h = ( - nick => $context->{nick}, channel => $context->{from}, lang => $context->{lang}, code => $context->{code}, arguments => $context->{arguments}, - factoid => "$context->{channel}:$context->{keyword}" + # set up `compiler` module arguments + my %args = ( + nick => $context->{nick}, + channel => $context->{from}, + lang => $context->{lang}, + code => $context->{code}, + arguments => $context->{arguments}, + factoid => "$context->{channel}:$context->{keyword}", ); - if ($self->{factoids}->exists($context->{channel}, $context->{keyword}, 'persist-key')) { - $h{'persist-key'} = $self->{factoids}->get_data($context->{channel}, $context->{keyword}, 'persist-key'); + # the vm can persist filesystem data to external storage identified by a key. + # if the `persist-key` factoid metadata is set, then use this key. + my $persist_key = $self->{factoids}->get_data($context->{channel}, $context->{keyword}, 'persist-key'); + + if (defined $persist_key) { + $args{'persist-key'} = $persist_key; } - my $json = encode_json \%h; + # encode args to utf8 json string + my $json = encode_json \%args; - $context->{special} = 'code-factoid'; - $context->{root_channel} = $context->{channel}; - $context->{keyword} = 'compiler'; - $context->{arguments} = $json; - $context->{args_utf8} = 1; + # update context details + $context->{special} = 'code-factoid'; # ensure handle_result(), etc, process this as a code-factoid + $context->{root_channel} = $context->{channel}; # override root channel to current channel + $context->{keyword} = 'compiler'; # code-factoid uses `compiler` command to invoke vm + $context->{arguments} = $json; # set arguments to json string as `compiler` wants + $context->{args_utf8} = 1; # arguments are utf8 encoded by encode_json + # launch the `compiler` module $self->{pbot}->{modules}->execute_module($context); - return ""; + + # return empty string since the module process reader will + # pass the output along to the result handler + return ''; } sub execute_code_factoid { my ($self, @args) = @_; + # this sub used to contain an if-clause that selected + # an alternative method of executing code factoids. + # now it only uses the vm. maybe one day... return $self->execute_code_factoid_using_vm(@args); } +# main entry point for PBot::Interpreter to interpret a factoid command sub interpreter { my ($self, $context) = @_; my $pbot = $self->{pbot}; @@ -1096,7 +1116,9 @@ sub interpreter { $context->{arguments} = "" if not defined $context->{arguments}; # factoid > nick redirection - if ($context->{arguments} =~ s/> ([_a-zA-Z0-9\[\]{}`\\-]+)$//) { + my $nick_regex = $self->{pbot}->{registry}->get_value('regex', 'nickname'); + + if ($context->{arguments} =~ s/> ($nick_regex)$//) { my $rcpt = $1; if ($self->{pbot}->{nicklist}->is_present($context->{from}, $rcpt)) { $context->{nickoverride} = $rcpt; diff --git a/PBot/Interpreter.pm b/PBot/Interpreter.pm index 15bc439a..1049238c 100644 --- a/PBot/Interpreter.pm +++ b/PBot/Interpreter.pm @@ -17,83 +17,84 @@ use utf8; use Time::HiRes qw/gettimeofday/; use Time::Duration; + +use Encode; use Unicode::Truncate; use PBot::Utils::ValidateString; sub initialize { my ($self, %conf) = @_; + + # PBot::Interpreter can register multiple interpreter subrefs. + # See also: Commands::interpreter() and Factoids::interpreter() $self->PBot::Registerable::initialize(%conf); - $self->{pbot}->{registry}->add_default('text', 'general', 'compile_blocks', $conf{compile_blocks} // 1); - $self->{pbot}->{registry}->add_default('array', 'general', 'compile_blocks_channels', $conf{compile_blocks_channels} // '.*'); - $self->{pbot}->{registry}->add_default('array', 'general', 'compile_blocks_ignore_channels', $conf{compile_blocks_ignore_channels} // 'none'); - $self->{pbot}->{registry}->add_default('text', 'interpreter', 'max_recursion', 10); + # registry entry for maximum recursion depth + $self->{pbot}->{registry}->add_default('text', 'interpreter', 'max_recursion', 10); } +# this is the main entry point for a message to be parsed and interpreted sub process_line { my ($self, $from, $nick, $user, $host, $text) = @_; - $from = lc $from if defined $from; + # lowercase `from` field for case-insensitivity + $from = lc $from; - my $context = {from => $from, nick => $nick, user => $user, host => $host, hostmask => "$nick!$user\@$host", text => $text}; - my $pbot = $self->{pbot}; + # context object maintains contextual information about the state and + # processing of this message. this object is passed between various bot + # functions and interfaces, which may themselves add more fields. + my $context = { + from => $from, # source (channel, sender hostmask, "pbot@stdin", etc) + nick => $nick, # nickname + user => $user, # username + host => $host, # hostname/ip address + hostmask => "$nick!$user\@$host", # full hostmask + text => $text, # message contents + }; - my $message_account = $pbot->{messagehistory}->get_message_account($nick, $user, $host); - $pbot->{messagehistory}->add_message($message_account, $context->{hostmask}, $from, $text, $pbot->{messagehistory}->{MSG_CHAT}); + # add hostmask to user/message tracking database and get their account id + my $message_account = $self->{pbot}->{messagehistory}->get_message_account($nick, $user, $host); + + # add account id to context object $context->{message_account} = $message_account; - my $flood_threshold = $pbot->{registry}->get_value($from, 'chat_flood_threshold'); - my $flood_time_threshold = $pbot->{registry}->get_value($from, 'chat_flood_time_threshold'); + # add message to message history as a chat message + $self->{pbot}->{messagehistory}->add_message($message_account, $context->{hostmask}, $from, $text, $self->{pbot}->{messagehistory}->{MSG_CHAT}); - $flood_threshold = $pbot->{registry}->get_value('antiflood', 'chat_flood_threshold') if not defined $flood_threshold; - $flood_time_threshold = $pbot->{registry}->get_value('antiflood', 'chat_flood_time_threshold') if not defined $flood_time_threshold; + # look up channel-specific flood threshold settings from registry + my $flood_threshold = $self->{pbot}->{registry}->get_value($from, 'chat_flood_threshold'); + my $flood_time_threshold = $self->{pbot}->{registry}->get_value($from, 'chat_flood_time_threshold'); + # get general flood threshold settings if there are no channel-specific settings + $flood_threshold //= $self->{pbot}->{registry}->get_value('antiflood', 'chat_flood_threshold'); + $flood_time_threshold //= $self->{pbot}->{registry}->get_value('antiflood', 'chat_flood_time_threshold'); -=cut - if (defined $from and $from =~ m/^#/) { - my $chanmodes = $self->{pbot}->{channels}->get_meta($from, 'MODE'); - if (defined $chanmodes and $chanmodes =~ m/z/) { - $context->{'chan-z'} = 1; - if ($self->{pbot}->{banlist}->{quietlist}->exists($from, '$~a')) { - my $nickserv = $self->{pbot}->{messagehistory}->{database}->get_current_nickserv_account($message_account); - if (not defined $nickserv or not length $nickserv) { $context->{unidentified} = 1; } - } - - $context->{banned} = 1 if $self->{pbot}->{banlist}->is_banned($nick, $user, $host, $from); - } - } -=cut - - $pbot->{antiflood}->check_flood( - $from, $nick, $user, $host, $text, - $flood_threshold, $flood_time_threshold, - $pbot->{messagehistory}->{MSG_CHAT}, $context - ) if defined $from; - -=cut - if ($context->{banned} or $context->{unidentified}) { - $self->{pbot}->{logger}->log("Disregarding banned/unidentified user message (channel $from is +z).\n"); - return 1; - } -=cut + # perform anti-flood processing on this message + $self->{pbot}->{antiflood}->check_flood( + $from, $nick, $user, $host, $text, + $flood_threshold, $flood_time_threshold, + $self->{pbot}->{messagehistory}->{MSG_CHAT}, + $context + ); + # get bot nickname my $botnick = $self->{pbot}->{registry}->get_value('irc', 'botnick'); - # get channel-specific trigger if available - my $bot_trigger = $pbot->{registry}->get_value($from, 'trigger'); + # get channel-specific bot trigger if available + my $bot_trigger = $self->{pbot}->{registry}->get_value($from, 'trigger'); - # otherwise get general trigger - if (not defined $bot_trigger) { $bot_trigger = $pbot->{registry}->get_value('general', 'trigger'); } + # otherwise get general bot trigger + $bot_trigger //= $self->{pbot}->{registry}->get_value('general', 'trigger'); - my $nick_regex = qr/[^%!,:\(\)\+\*\/ ]+/; + # get nick regex from registry entry + my $nick_regex = $self->{pbot}->{registry}->get_value('regex', 'nickname'); - my $nick_override; + my $nick_override = undef; my $processed = 0; my $preserve_whitespace = 0; - $text =~ s/^\s+//; - $text =~ s/\s+$//; + $text =~ s/^\s+|\s+$//g; $text = validate_string($text, 0); my $cmd_text = $text; @@ -147,7 +148,7 @@ sub process_line { foreach $command (@commands) { # check if user is ignored (and command isn't `login`) - if ($command !~ /^login / && defined $from && $pbot->{ignorelist}->is_ignored($from, "$nick!$user\@$host")) { + if ($command !~ /^login / && defined $from && $self->{pbot}->{ignorelist}->is_ignored($from, "$nick!$user\@$host")) { $self->{pbot}->{logger}->log("Disregarding command from ignored user $nick!$user\@$host in $from.\n"); return 1; } @@ -330,8 +331,6 @@ sub interpret { # unescape any escaped pipes $arguments =~ s/\\\|\s*\{/| {/g if defined $arguments; - $arguments = validate_string($arguments); - # set arguments as a plain string $context->{arguments} = $arguments; delete $context->{args_utf8}; @@ -359,10 +358,11 @@ sub interpret { sub extract_bracketed { my ($self, $string, $open_bracket, $close_bracket, $optional_prefix, $allow_whitespace) = @_; - $open_bracket = '{' if not defined $open_bracket; - $close_bracket = '}' if not defined $close_bracket; - $optional_prefix = '' if not defined $optional_prefix; - $allow_whitespace = 0 if not defined $allow_whitespace; + # set default values when none provided + $open_bracket //= '{'; + $close_bracket //= '}'; + $optional_prefix //= ''; + $allow_whitespace //= 0; my @prefix_group; @@ -745,9 +745,9 @@ sub truncate_result { $max_msg_len -= length "PRIVMSG $from :"; - # encode texts to utf8 - utf8::encode $paste_text; - utf8::encode $text; + # encode text to utf8 for byte length truncation + $text = encode('UTF-8', $text); + $paste_text = encode('UTF-8', $paste_text); if (length $text > $max_msg_len) { my $paste_result; @@ -759,11 +759,6 @@ sub truncate_result { # truncate paste to max paste length $paste_text = truncate_egc $paste_text, $max_paste_len; - $self->{pbot}->{logger}->log("Truncated paste to $max_paste_len bytes\n"); - - # decode paste text from utf8 because webpaste encodes to utf8 - utf8::decode $paste_text; - # send text to paste site $paste_result = $self->{pbot}->{webpaste}->paste("($from) $nick: $command\n\n$paste_text"); } @@ -793,11 +788,11 @@ sub truncate_result { # append the truncation text $text .= $trunc; + } else { + # decode text from utf8 + $text = decode('UTF-8', $text); } - # decode text from utf8 - utf8::decode $text; - return $text; } diff --git a/PBot/Modules.pm b/PBot/Modules.pm index 5e6bcfae..6c6a472d 100644 --- a/PBot/Modules.pm +++ b/PBot/Modules.pm @@ -138,7 +138,12 @@ sub launch_module { my $args = $context->{arguments}; if (not $context->{args_utf8}) { + $self->{pbot}->{logger}->log("encoding args ($args)\n"); $args = encode('UTF-8', $args); + $self->{pbot}->{logger}->log("encoded args ($args)\n"); + + my $test = decode('UTF-8', $args); + $self->{pbot}->{logger}->log("decoded test ($test)\n"); } my @cmdline = ("./$module", $self->{pbot}->{interpreter}->split_line($args)); @@ -151,8 +156,16 @@ sub launch_module { my $exitval = $? >> 8; + $self->{pbot}->{logger}->log("stdout before: $stdout\n"); + utf8::decode $stdout; utf8::decode $stderr; + # decode('UTF-8', $stdout); + # decode('UTF-8', $stderr); + # decode('utf8', $stdout); + # decode('utf8', $stderr); + + $self->{pbot}->{logger}->log("stdout after: $stdout\n"); return ($exitval, $stdout, $stderr); }; diff --git a/PBot/Registry.pm b/PBot/Registry.pm index c74e6782..a0a7e837 100644 --- a/PBot/Registry.pm +++ b/PBot/Registry.pm @@ -37,14 +37,23 @@ sub initialize { # prepare registry-specific bot commands PBot::RegistryCommands->new(pbot => $self->{pbot}); + # load existing registry entries from file (if exists) + if (-e $filename) { + $self->load; + } else { + $self->{pbot}->{logger}->log("No registry found at $filename, using defaults.\n"); + } + # add default registry items $self->add_default('text', 'general', 'data_dir', $conf{data_dir}); $self->add_default('text', 'general', 'module_dir', $conf{module_dir}); $self->add_default('text', 'general', 'plugin_dir', $conf{plugin_dir}); $self->add_default('text', 'general', 'update_dir', $conf{update_dir}); + # bot trigger $self->add_default('text', 'general', 'trigger', $conf{trigger} // '!'); + # irc $self->add_default('text', 'irc', 'debug', $conf{irc_debug} // 0); $self->add_default('text', 'irc', 'show_motd', $conf{show_motd} // 1); $self->add_default('text', 'irc', 'max_msg_len', $conf{max_msg_len} // 425); @@ -59,16 +68,13 @@ sub initialize { $self->add_default('text', 'irc', 'identify_password', $conf{identify_password} // ''); $self->add_default('text', 'irc', 'log_default_handler', 1); + # make sensitive entries private $self->set_default('irc', 'SSL_ca_file', 'private', 1); $self->set_default('irc', 'SSL_ca_path', 'private', 1); $self->set_default('irc', 'identify_password', 'private', 1); - # load existing registry entries from file (if exists) to overwrite defaults - if (-e $filename) { - $self->load; - } else { - $self->{pbot}->{logger}->log("No registry found at $filename, using defaults.\n"); - } + # customizable regular expressions + $self->add_default('text', 'regex', 'nickname', '[_a-zA-Z0-9\[\]{}`\\-]+'); # update important paths $self->set('general', 'data_dir', 'value', $conf{data_dir}, 0, 1); @@ -140,24 +146,34 @@ sub add_default { } sub add { - my $self = shift; - my ($type, $section, $item, $value, $is_default) = @_; + my ($self, $type, $section, $item, $value, $is_default) = @_; + $type = lc $type; - if ($is_default) { - # don't replace existing registry values if we're just adding a default value - return if $self->{registry}->exists($section, $item); - } - if (not $self->{registry}->exists($section, $item)) { + # registry entry does not exist + my $data = { value => $value, type => $type, }; + $self->{registry}->add($section, $item, $data, 1); } else { + # registry entry already exists + + if ($is_default) { + # don't replace existing registry values if we're just adding a default value + return; + } + + # update value $self->{registry}->set($section, $item, 'value', $value, 1); - $self->{registry}->set($section, $item, 'type', $type, 1) unless $self->{registry}->exists($section, $item, 'type'); + + # update type only if it doesn't exist + unless ($self->{registry}->exists($section, $item, 'type')) { + $self->{registry}->set($section, $item, 'type', $type, 1); + } } unless ($is_default) { diff --git a/PBot/Utils/ValidateString.pm b/PBot/Utils/ValidateString.pm index bd4eac53..672da7ac 100644 --- a/PBot/Utils/ValidateString.pm +++ b/PBot/Utils/ValidateString.pm @@ -4,38 +4,100 @@ use warnings; use strict; use feature 'unicode_strings'; +use utf8; +# export validate_string() subroutine require Exporter; our @ISA = qw/Exporter/; our @EXPORT = qw/validate_string/; use JSON; +use Unicode::Truncate; + +# validate_string converts a given string to one that conforms to +# PBot's limitations for internal strings. This means ensuring the +# string is not too long, does not have undesired characters, etc. +# +# If the given string contains a JSON structure, it will be parsed +# and each value will be validated. JSON structures must have a depth +# of one level only. +# +# Note that $max_length represents bytes, not characters. The string +# is encoded to utf8, validated, and then decoded back. Truncation +# uses Unicode::Truncate to find the longest Unicode string that can +# fit within $max_length bytes without corruption of the characters. +# +# if $max_length is undefined, it defaults to 8k. +# +# if $max_length is 0, no truncation occurs. sub validate_string { my ($string, $max_length) = @_; - return $string if not defined $string or not length $string; - $max_length = 1024 * 8 if not defined $max_length; + if (not defined $string or not length $string) { + # nothing to validate; return as-is. + return $string; + } + + # set default max length if none given + $max_length //= 1024 * 8; local $@; eval { - my $h = decode_json($string); + # attempt to decode as a JSON string + # throws exception if fails + my $data = decode_json($string); - if (not defined $h) { - $string = 'null'; - } else { - foreach my $k (keys %$h) { $h->{$k} = substr $h->{$k}, 0, $max_length unless $max_length <= 0; } - $string = encode_json($h); + # no exception thrown, must be JSON. + # so we validate all of its values. + + if (not defined $data) { + # decode_json decodes "null" to undef. so we just + # go ahead and return "null" as-is. otherwise, if we allow + # encode_json to encode it back to a string, the string + # will be "{}". bit weird. + return 'null'; } + + # validate values + foreach my $key (keys %$data) { + $data->{$key} = validate_this_string($data->{$key}, $max_length); + } + + # encode back to a JSON string + $string = encode_json($data); }; if ($@) { - # not a json string - $string = substr $string, 0, $max_length unless $max_length <= 0; + # not a JSON string, so validate as a normal string. + $string = validate_this_string($string, $max_length); } - # $string =~ s/(\P{PosixGraph})/my $ch = $1; if ($ch =~ m{[\s\x03\x02\x1d\x1f\x16\x0f]}) { $ch } else { sprintf "\\x%02X", ord $ch }/ge; - # $string = substr $string, 0, $max_length unless $max_length <= 0; + # all validated! + return $string; +} + +# validates the string. +# safely performs Unicode truncation given a byte length, handles +# unwanted characters, etc. +sub validate_this_string { + my ($string, $max_length) = @_; + + # truncate safely + if ($max_length > 0) { + $string = truncate_egc $string, $max_length; + } + + # allow only these characters. + # TODO: probably going to delete this code. + # replace any extraneous characters with escaped-hexadecimal representation + # $string =~ s/(\P{PosixGraph})/ + # my $ch = $1; + # if ($ch =~ m{[\s\x03\x02\x1d\x1f\x16\x0f]}) { + # $ch; + # } else { + # sprintf "\\x%02X", ord $ch; + # }/gxe; return $string; }