mirror of
https://github.com/pragma-/pbot.git
synced 2024-11-26 13:59:47 +01:00
Fix remaining paste truncation Unicode issues
Uses Unicode::Truncate CPAN module to truncate UTF-8 strings to byte lengths without corruption.
This commit is contained in:
parent
45d6576b5a
commit
33f82c6523
1
MODULES
1
MODULES
@ -42,6 +42,7 @@ Text::Unidecode
|
|||||||
Time::Duration
|
Time::Duration
|
||||||
Time::ParseDate
|
Time::ParseDate
|
||||||
re::engine::RE2
|
re::engine::RE2
|
||||||
|
Unicode::Truncate
|
||||||
URI::Escape
|
URI::Escape
|
||||||
WebService::UrbanDictionary
|
WebService::UrbanDictionary
|
||||||
WWW::Google::CustomSearch
|
WWW::Google::CustomSearch
|
||||||
|
@ -17,6 +17,7 @@ use utf8;
|
|||||||
|
|
||||||
use Time::HiRes qw/gettimeofday/;
|
use Time::HiRes qw/gettimeofday/;
|
||||||
use Time::Duration;
|
use Time::Duration;
|
||||||
|
use Unicode::Truncate;
|
||||||
|
|
||||||
use PBot::Utils::ValidateString;
|
use PBot::Utils::ValidateString;
|
||||||
|
|
||||||
@ -244,7 +245,7 @@ sub interpret {
|
|||||||
$arguments = '' if not defined $arguments;
|
$arguments = '' if not defined $arguments;
|
||||||
}
|
}
|
||||||
|
|
||||||
# FIXME: make this a registry item
|
# TODO: make this a registry item
|
||||||
if (length $keyword > 128) {
|
if (length $keyword > 128) {
|
||||||
$keyword = substr($keyword, 0, 128);
|
$keyword = substr($keyword, 0, 128);
|
||||||
$self->{pbot}->{logger}->log("Truncating keyword to 128 chars: $keyword\n");
|
$self->{pbot}->{logger}->log("Truncating keyword to 128 chars: $keyword\n");
|
||||||
@ -742,8 +743,9 @@ sub truncate_result {
|
|||||||
|
|
||||||
my $max_msg_len = $self->{pbot}->{registry}->get_value('irc', 'max_msg_len');
|
my $max_msg_len = $self->{pbot}->{registry}->get_value('irc', 'max_msg_len');
|
||||||
|
|
||||||
$max_msg_len -= length "PRIVMSG $from :" if defined $from;
|
$max_msg_len -= length "PRIVMSG $from :";
|
||||||
|
|
||||||
|
# encode texts to utf8
|
||||||
utf8::encode $paste_text;
|
utf8::encode $paste_text;
|
||||||
utf8::encode $text;
|
utf8::encode $text;
|
||||||
|
|
||||||
@ -755,9 +757,11 @@ sub truncate_result {
|
|||||||
my $max_paste_len = $self->{pbot}->{registry}->get_value('paste', 'max_length') // 1024 * 32;
|
my $max_paste_len = $self->{pbot}->{registry}->get_value('paste', 'max_length') // 1024 * 32;
|
||||||
|
|
||||||
# truncate paste to max paste length
|
# truncate paste to max paste length
|
||||||
# FIXME: this potentially chops unicode characters in wrong places
|
$paste_text = truncate_egc $paste_text, $max_paste_len;
|
||||||
$paste_text = substr $paste_text, 0, $max_paste_len;
|
|
||||||
|
|
||||||
|
$self->{pbot}->{logger}->log("Truncated paste to $max_paste_len bytes\n");
|
||||||
|
|
||||||
|
# decode paste text from utf8 because webpaste encodes to utf8
|
||||||
utf8::decode $paste_text;
|
utf8::decode $paste_text;
|
||||||
|
|
||||||
# send text to paste site
|
# send text to paste site
|
||||||
@ -782,13 +786,18 @@ sub truncate_result {
|
|||||||
$self->{pbot}->{logger}->log("Message truncated -- $paste_result\n");
|
$self->{pbot}->{logger}->log("Message truncated -- $paste_result\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# make room to append the truncation text to the message text
|
||||||
|
# (third argument to truncate_egc is '' to prevent appending its own ellipsis)
|
||||||
my $trunc_len = length $text < $max_msg_len ? length $text : $max_msg_len;
|
my $trunc_len = length $text < $max_msg_len ? length $text : $max_msg_len;
|
||||||
# FIXME: this potentially chops unicode characters in wrong places
|
$text = truncate_egc $text, $trunc_len - length $trunc, '';
|
||||||
$text = substr($text, 0, $trunc_len);
|
|
||||||
substr($text, $trunc_len - length $trunc) = $trunc;
|
# append the truncation text
|
||||||
|
$text .= $trunc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# decode text from utf8
|
||||||
utf8::decode $text;
|
utf8::decode $text;
|
||||||
|
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user