mirror of
https://github.com/pragma-/pbot.git
synced 2024-11-16 08:59:34 +01:00
106 lines
3.1 KiB
Perl
106 lines
3.1 KiB
Perl
# File: ValidateString.pm
|
|
#
|
|
# Purpose: ensures that a given string conforms to PBot's limitations
|
|
# for internal strings. This means ensuring the string is not too long,
|
|
# does not have undesired characters, etc.
|
|
|
|
# SPDX-FileCopyrightText: 2017-2023 Pragmatic Software <pragma78@gmail.com>
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
package PBot::Core::Utils::ValidateString;
|
|
|
|
use PBot::Imports;
|
|
|
|
# export validate_string subroutine
|
|
require Exporter;
|
|
our @ISA = qw/Exporter/;
|
|
our @EXPORT = qw/validate_string/;
|
|
|
|
use JSON;
|
|
use Encode;
|
|
use Unicode::Truncate;
|
|
|
|
# validate_string converts a given string to one that conforms to
|
|
# PBot's limitations for internal strings. This means ensuring the
|
|
# string is not too long, does not have undesired characters, etc.
|
|
#
|
|
# If the given string contains a JSON structure, it will be parsed
|
|
# and each value will be validated. JSON structures must have a depth
|
|
# of one level only.
|
|
#
|
|
# Note that $max_length represents bytes, not characters. The string
|
|
# is encoded to utf8, validated, and then decoded back. Truncation
|
|
# uses Unicode::Truncate to find the longest Unicode string that can
|
|
# fit within $max_length bytes without corruption of the characters.
|
|
#
|
|
# if $max_length is undefined, it defaults to 8k.
|
|
#
|
|
# if $max_length is 0, no truncation occurs.
|
|
|
|
sub validate_string($string, $max_length = 1024 * 8) {
|
|
if (not defined $string or not length $string) {
|
|
# nothing to validate; return as-is.
|
|
return $string;
|
|
}
|
|
|
|
local $@;
|
|
eval {
|
|
# attempt to decode as a JSON string
|
|
# throws exception if fails
|
|
my $data = decode_json($string);
|
|
|
|
# no exception thrown, must be JSON.
|
|
# so we validate all of its values.
|
|
|
|
if (not defined $data) {
|
|
# decode_json decodes "null" to undef. so we just
|
|
# go ahead and return "null" as-is. otherwise, if we allow
|
|
# encode_json to encode it back to a string, the string
|
|
# will be "{}". bit weird.
|
|
return 'null';
|
|
}
|
|
|
|
# validate values
|
|
foreach my $key (keys %$data) {
|
|
$data->{$key} = validate_this_string($data->{$key}, $max_length);
|
|
}
|
|
|
|
# encode back to a JSON string
|
|
$string = encode_json($data);
|
|
};
|
|
|
|
if ($@) {
|
|
# not a JSON string, so validate as a normal string.
|
|
$string = validate_this_string($string, $max_length);
|
|
}
|
|
|
|
# all validated!
|
|
return $string;
|
|
}
|
|
|
|
# validates the string.
|
|
# safely performs Unicode truncation given a byte length, handles
|
|
# unwanted characters, etc.
|
|
sub validate_this_string($string, $max_length = 1024 * 8) {
|
|
# truncate safely
|
|
if ($max_length > 0) {
|
|
$string = encode('UTF-8', $string);
|
|
$string = truncate_egc $string, $max_length;
|
|
}
|
|
|
|
# allow only these characters.
|
|
# TODO: probably going to delete this code.
|
|
# replace any extraneous characters with escaped-hexadecimal representation
|
|
# $string =~ s/(\P{PosixGraph})/
|
|
# my $ch = $1;
|
|
# if ($ch =~ m{[\s\x03\x02\x1d\x1f\x16\x0f]}) {
|
|
# $ch;
|
|
# } else {
|
|
# sprintf "\\x%02X", ord $ch;
|
|
# }/gxe;
|
|
|
|
return $string;
|
|
}
|
|
|
|
1;
|