3
0
mirror of https://github.com/pragma-/pbot.git synced 2025-02-11 03:50:53 +01:00
pbot/lib/PBot/Core/Utils/ValidateString.pm

113 lines
3.2 KiB
Perl
Raw Normal View History

# File: ValidateString.pm
#
# Purpose: ensures that a given string conforms to PBot's limitations
# for internal strings. This means ensuring the string is not too long,
# does not have undesired characters, etc.
2018-04-08 19:34:24 -07:00
2021-07-10 15:00:22 -07:00
# SPDX-FileCopyrightText: 2021 Pragmatic Software <pragma78@gmail.com>
# SPDX-License-Identifier: MIT
2021-07-23 19:22:25 -07:00
package PBot::Core::Utils::ValidateString;
2017-09-05 00:18:03 -07:00
use PBot::Imports;
2019-07-10 18:40:53 -07:00
# export validate_string subroutine
2017-09-05 00:18:03 -07:00
require Exporter;
2020-02-15 14:38:32 -08:00
our @ISA = qw/Exporter/;
2017-09-05 00:18:03 -07:00
our @EXPORT = qw/validate_string/;
2018-04-08 19:34:24 -07:00
use JSON;
use Encode;
use Unicode::Truncate;
# validate_string converts a given string to one that conforms to
# PBot's limitations for internal strings. This means ensuring the
# string is not too long, does not have undesired characters, etc.
#
# If the given string contains a JSON structure, it will be parsed
# and each value will be validated. JSON structures must have a depth
# of one level only.
#
# Note that $max_length represents bytes, not characters. The string
# is encoded to utf8, validated, and then decoded back. Truncation
# uses Unicode::Truncate to find the longest Unicode string that can
# fit within $max_length bytes without corruption of the characters.
#
# if $max_length is undefined, it defaults to 8k.
#
# if $max_length is 0, no truncation occurs.
2018-04-08 19:34:24 -07:00
2017-09-05 00:18:03 -07:00
sub validate_string {
2020-02-15 14:38:32 -08:00
my ($string, $max_length) = @_;
2018-04-08 19:34:24 -07:00
if (not defined $string or not length $string) {
# nothing to validate; return as-is.
return $string;
}
# set default max length if none given
$max_length //= 1024 * 8;
2018-04-08 19:34:24 -07:00
2020-02-16 15:55:48 -08:00
local $@;
2020-02-15 14:38:32 -08:00
eval {
# attempt to decode as a JSON string
# throws exception if fails
my $data = decode_json($string);
# no exception thrown, must be JSON.
# so we validate all of its values.
2020-06-28 20:28:54 -07:00
if (not defined $data) {
# decode_json decodes "null" to undef. so we just
# go ahead and return "null" as-is. otherwise, if we allow
# encode_json to encode it back to a string, the string
# will be "{}". bit weird.
return 'null';
2020-06-28 20:28:54 -07:00
}
# validate values
foreach my $key (keys %$data) {
$data->{$key} = validate_this_string($data->{$key}, $max_length);
}
# encode back to a JSON string
$string = encode_json($data);
2020-02-15 14:38:32 -08:00
};
2018-04-08 19:34:24 -07:00
2020-02-15 14:38:32 -08:00
if ($@) {
# not a JSON string, so validate as a normal string.
$string = validate_this_string($string, $max_length);
}
# all validated!
return $string;
}
# validates the string.
# safely performs Unicode truncation given a byte length, handles
# unwanted characters, etc.
sub validate_this_string {
my ($string, $max_length) = @_;
# truncate safely
if ($max_length > 0) {
$string = encode('UTF-8', $string);
$string = truncate_egc $string, $max_length;
2020-02-15 14:38:32 -08:00
}
2018-04-08 19:34:24 -07:00
# allow only these characters.
# TODO: probably going to delete this code.
# replace any extraneous characters with escaped-hexadecimal representation
# $string =~ s/(\P{PosixGraph})/
# my $ch = $1;
# if ($ch =~ m{[\s\x03\x02\x1d\x1f\x16\x0f]}) {
# $ch;
# } else {
# sprintf "\\x%02X", ord $ch;
# }/gxe;
2018-04-08 19:34:24 -07:00
2020-02-15 14:38:32 -08:00
return $string;
2017-09-05 00:18:03 -07:00
}
1;