mirror of
https://github.com/pragma-/pbot.git
synced 2025-01-01 15:42:37 +01:00
691 lines
18 KiB
Perl
Executable File
691 lines
18 KiB
Perl
Executable File
#!/usr/bin/perl
|
||
|
||
# File: guest-gdb
|
||
#
|
||
# Purpose: Wraps a GNU Debugger instance around a program. Watches for
|
||
# invocations of a gdb() function to handle gdb commands from the program.
|
||
# Reports information about local variables when signals are detected.
|
||
#
|
||
# Usage: guest-gdb [executable]
|
||
#
|
||
# If [executable] is omitted, it will default to `prog`. Don't forget to
|
||
# compile with -g for debugging symbols.
|
||
|
||
# The DEBUG environment variable can be set to an integer value to enable
|
||
# internal debugging output. Set it to 1 for minimal debugging; to 2 to
|
||
# also dump the parsed GDB/MI data structures.
|
||
|
||
# SPDX-FileCopyrightText: 2022 Pragmatic Software <pragma78@gmail.com>
|
||
# SPDX-License-Identifier: MIT
|
||
|
||
use 5.020;
|
||
|
||
use warnings;
|
||
use strict;
|
||
|
||
use feature qw(signatures);
|
||
no warnings qw(experimental::signatures);
|
||
|
||
use IPC::Open2;
|
||
use JSON::XS;
|
||
use Data::Dumper;
|
||
use Encode qw(decode encode);
|
||
|
||
# set debug flag from DEBUG env or here
|
||
my $debug = $ENV{DEBUG} // 0;
|
||
|
||
# output from gdb will be prefixed with GDB_PREFIX and suffixed with GDB_SUFFIX.
|
||
# e.g., to wrap gdb output with <>'s set GDB_PREFIX to "<" and GDB_SUFFIX to ">".
|
||
use constant {
|
||
GDB_PREFIX => "\n",
|
||
GDB_SUFFIX => "\n\n",
|
||
};
|
||
|
||
# files to read stdin and write stdout
|
||
use constant {
|
||
INPUT_FILENAME => '.input',
|
||
OUTPUT_FILENAME => '.output',
|
||
};
|
||
|
||
# GDB/MI message types
|
||
use constant {
|
||
STATUS => '+',
|
||
EXEC => '*',
|
||
NOTIFY => '=',
|
||
CONSOLE => '~',
|
||
TARGET => '@',
|
||
LOG => '&',
|
||
RESULT => '^',
|
||
};
|
||
|
||
# send a command to gdb
|
||
sub gdb_send($in, $command) {
|
||
print STDERR "-> $command\n" if $debug;
|
||
print $in "$command\n";
|
||
}
|
||
|
||
# read a line from gdb
|
||
sub gdb_read($out) {
|
||
my $line;
|
||
|
||
while ($line = <$out>) {
|
||
chomp $line;
|
||
print STDERR "<- [$line]\n" if $debug;
|
||
next if $line eq '(gdb) '; # ignore gdb prompt
|
||
last;
|
||
}
|
||
|
||
return undef if not defined $line;
|
||
return parse_gdbmi_output($line);
|
||
}
|
||
|
||
# send a command to gdb and return all ~console and ^result output
|
||
sub gdb_send_and_read_console($context, $command, $report_error = 1) {
|
||
gdb_send($context->{gdb_input}, $command);
|
||
|
||
my $gdb = $context->{gdb_output};
|
||
my @console;
|
||
my $output;
|
||
|
||
while ($output = gdb_read_console($gdb)) {
|
||
push @console, $output;
|
||
last if $output->{_type} eq RESULT;
|
||
}
|
||
|
||
if ($report_error && $output->{_type} eq RESULT && $output->{_class} eq 'error') {
|
||
print_gdb_output($context, $output->{msg});
|
||
}
|
||
|
||
return \@console;
|
||
}
|
||
|
||
# send a command to gdb and return only the ^result output
|
||
sub gdb_send_and_read_result($context, $command, $report_error = 1) {
|
||
gdb_send($context->{gdb_input}, $command);
|
||
|
||
my $gdb = $context->{gdb_output};
|
||
my $output;
|
||
|
||
while ($output = gdb_read($gdb)) {
|
||
last if $output->{_type} eq RESULT;
|
||
}
|
||
|
||
if ($report_error && $output->{_class} eq 'error') {
|
||
print_gdb_output($context, $output->{msg});
|
||
}
|
||
|
||
return $output;
|
||
}
|
||
|
||
# alias to send_and_read_result
|
||
*gdb_send_and_discard = \&gdb_send_and_read_result;
|
||
|
||
# read line from gdb, discarding types other than ~console or ^result
|
||
sub gdb_read_console($out) {
|
||
my $output;
|
||
|
||
while ($output = gdb_read($out)) {
|
||
last if $output->{_type} eq RESULT;
|
||
last if $output->{_type} eq CONSOLE;
|
||
}
|
||
|
||
return $output;
|
||
}
|
||
|
||
# read and discard all output until ^result is reached
|
||
sub gdb_read_result($out) {
|
||
my $output;
|
||
|
||
while ($output = gdb_read($out)) {
|
||
last if $output->{_type} eq RESULT;
|
||
}
|
||
|
||
return $output;
|
||
}
|
||
|
||
# convert gdb/mi text to hashtable, converting to JSON first if necessary
|
||
sub gdbmi_to_hash($text, $makejson = 1) {
|
||
# convert to JSON first if necessary (gdb/mi is nearly JSON already!)
|
||
$text =~ s/([\w-]+)=/"$1":/g if $makejson;
|
||
|
||
# decode gdb octal escapes
|
||
# (sometimes \\123, othertimes \123 and I can't be arsed to investigate
|
||
# when and why the backslashes double up)
|
||
$text =~ s/\\+(\d{3})/$1 >= 0x20 ? chr oct $1 : "\\\\$1"/ge;
|
||
|
||
# escape malformed unicode
|
||
my $octets = decode('UTF-8', $text, sub { sprintf '\\\\x%X', shift });
|
||
$text = encode('UTF-8', $octets, Encode::FB_CROAK);
|
||
|
||
# escape invalid JSON characters
|
||
$text =~ s/([\x10-\x1f])/sprintf '\\\\x%X', ord $1/ge;
|
||
|
||
# return hashtable decoded from json
|
||
return decode_json("{$text}");
|
||
}
|
||
|
||
sub parse_gdbmi_output($line) {
|
||
my ($type, $text) = $line =~ /^\d*(.)(.*)/;
|
||
|
||
my $class = 'none';
|
||
my $makejson = 0;
|
||
|
||
if ($type =~ /[+*=]/) {
|
||
($class, $text) = split /,/, $text, 2;
|
||
$makejson = 1 if length $text;
|
||
} elsif ($type eq RESULT) {
|
||
($class, $text) = split /,/, $text, 2;
|
||
$text //= '';
|
||
$makejson = 1 if length $text;
|
||
} else {
|
||
$text = "\"_text\":$text";
|
||
}
|
||
|
||
my $output = gdbmi_to_hash($text, $makejson);
|
||
|
||
if (exists $output->{_text}) {
|
||
chomp $output->{_text};
|
||
}
|
||
|
||
$output->{_type} = $type;
|
||
$output->{_class} = $class;
|
||
|
||
print STDERR Dumper($output), "\n" if $debug >= 2;
|
||
|
||
return $output;
|
||
}
|
||
|
||
# get local variables at current frame
|
||
sub get_locals($context) {
|
||
# use `info locals` gdb command
|
||
my $console = gdb_send_and_read_console($context, "info locals");
|
||
my @locals;
|
||
|
||
foreach my $output (@$console) {
|
||
last if $output->{_type} eq RESULT;
|
||
|
||
if ($output->{_text} =~ /([^=]+)\s+=\s+(.*)/) {
|
||
push @locals, [$1, $2];
|
||
}
|
||
}
|
||
|
||
return \@locals;
|
||
}
|
||
|
||
# compare two lists of locals and returns list containing just the
|
||
# newly modified locals
|
||
sub compare_locals($old, $new) {
|
||
# build hashtable of left-side locals for easier access
|
||
my %ht;
|
||
|
||
foreach my $local (@$old) {
|
||
my ($ident, $value) = @$local;
|
||
$ht{$ident} = $value;
|
||
}
|
||
|
||
# check for modified locals and add to modified list
|
||
my @modified;
|
||
|
||
foreach my $local (@$new) {
|
||
my ($ident, $value) = @$local;
|
||
if (!exists $ht{$ident} || $ht{$ident} ne $value) {
|
||
push @modified, [$ident, $value];
|
||
}
|
||
}
|
||
|
||
return \@modified;
|
||
}
|
||
|
||
sub locals_to_string($locals) {
|
||
my @strings;
|
||
|
||
foreach my $local (@$locals) {
|
||
my ($ident, $value) = @$local;
|
||
push @strings, "$ident = $value";
|
||
}
|
||
|
||
return join '; ', @strings;
|
||
}
|
||
|
||
sub args_to_string($args) {
|
||
my @strings;
|
||
|
||
foreach my $arg (@$args) {
|
||
push @strings, "$arg->{name}=$arg->{value}";
|
||
}
|
||
|
||
return join ', ', @strings;
|
||
}
|
||
|
||
# determine on which line numbers the main() function begins and ends
|
||
sub get_main_start_end($context) {
|
||
gdb_send_and_discard($context, 'set listsize unlimited');
|
||
|
||
my ($start, $end);
|
||
|
||
# use `info functions` to find where main starts
|
||
my $console = gdb_send_and_read_console($context, 'info functions -q ^main$');
|
||
|
||
foreach my $output (@$console) {
|
||
last if $output->{_type} eq RESULT;
|
||
($start) = $output->{_text} =~ /^(\d+)/;
|
||
}
|
||
|
||
$console = gdb_send_and_read_console($context, 'list main');
|
||
|
||
# gdb shows extra context surrounding main, so we have to parse the output
|
||
# and count the braces to find the true end of main()
|
||
my $braces = 0;
|
||
|
||
foreach my $output (@$console) {
|
||
last if $output->{_type} eq RESULT;
|
||
|
||
# we have already found end; don't parse output
|
||
last if $end;
|
||
|
||
my ($line, $code) = split /\t/, $output->{_text};
|
||
|
||
# this line isn't part of main() yet
|
||
next if $line < $start;
|
||
|
||
# blank out contents of string and char literals so we don't count
|
||
# any braces within
|
||
$code =~ s/(?:\"((?:\\\"|(?!\").)*)\")/'"' . ('-' x length $1) . '"'/ge;
|
||
$code =~ s/(?:\'((?:\\\'|(?!\').)*)\')/"'" . ('-' x length $1) . "'"/ge;
|
||
|
||
my @chars = split //, $code;
|
||
foreach my $char (@chars) {
|
||
if ($char eq '{') {
|
||
$braces++;
|
||
} elsif ($char eq '}') {
|
||
$braces--;
|
||
|
||
if ($braces == 0) {
|
||
$end = $line - 1;
|
||
last;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return ($start, $end);
|
||
}
|
||
|
||
sub get_backtrace($context) {
|
||
my $console = gdb_send_and_read_console($context, "bt");
|
||
|
||
my @backtrace;
|
||
|
||
foreach my $output (@$console) {
|
||
last if $output->{_type} eq RESULT;
|
||
$output->{_text} =~ /in (\w+) (.*) at/;
|
||
push @backtrace, { func => $1, args => $2 };
|
||
}
|
||
|
||
return \@backtrace;
|
||
}
|
||
|
||
# returns the source line at a given line number
|
||
sub get_lineno($context, $lineno) {
|
||
my $console = gdb_send_and_read_console($context, "list $lineno,$lineno", 0);
|
||
|
||
my $line;
|
||
|
||
foreach my $output (@$console) {
|
||
last if $output->{_type} eq RESULT;
|
||
($line) = $output->{_text} =~ /\t\s+(.*)/;
|
||
}
|
||
|
||
return $line;
|
||
}
|
||
|
||
sub handle_breakpoint_hit($context, $data) {
|
||
if ($data->{frame}->{func} eq 'main') {
|
||
if ($data->{frame}->{line} == $context->{main_end}) {
|
||
$context->{locals_end} = get_locals($context);
|
||
}
|
||
}
|
||
elsif ($data->{frame}->{func} eq 'gdb') {
|
||
my $command = $data->{frame}->{args}->[0]->{value};;
|
||
|
||
# strip gdb junk from command
|
||
$command =~ s/^.*?"//;
|
||
$command =~ s/"$//;
|
||
|
||
dispatch_user_command($context, $command);
|
||
}
|
||
|
||
gdb_send_and_discard($context, "cont");
|
||
}
|
||
|
||
# gdb commands invoked by end-users
|
||
sub dispatch_user_command($context, $command) {
|
||
state %commands = (
|
||
'ptype' => \&cmd_ptype,
|
||
'whatis' => \&cmd_whatis,
|
||
'print' => \&cmd_print,
|
||
'dump' => \&cmd_print,
|
||
|
||
# special PBot command
|
||
'print_last_statement' => \&cmd_print_last_statement,
|
||
);
|
||
|
||
$command = unescape($command);
|
||
|
||
# move out of gdb() function to caller
|
||
gdb_send_and_discard($context, "up");
|
||
|
||
my ($cmd, $args) = split /\s+/, $command, 2;
|
||
|
||
if (not exists $commands{$cmd}) {
|
||
cmd_gdb($context, $command);
|
||
} else {
|
||
$commands{$cmd}->($context, $args);
|
||
}
|
||
}
|
||
|
||
sub cmd_gdb($context, $command) {
|
||
my $console = gdb_send_and_read_console($context, "$command");
|
||
|
||
my $text = '';
|
||
|
||
foreach my $output (@$console) {
|
||
last if $output->{_type} eq RESULT;
|
||
$text .= "$output->{_text}\n";
|
||
}
|
||
|
||
print_gdb_output($context, $text);
|
||
}
|
||
|
||
sub cmd_ptype($context, $args) {
|
||
my $console = gdb_send_and_read_console($context, "ptype $args");
|
||
|
||
my $text = '';
|
||
|
||
foreach my $output (@$console) {
|
||
last if $output->{_type} eq RESULT;
|
||
|
||
if ($output->{_text} =~ /type\s*=\s*(.*)/) {
|
||
$text .= "$args = $1"
|
||
} else {
|
||
$output->{_text} =~ s/^\s+//;
|
||
$text .= " $output->{_text}";
|
||
}
|
||
}
|
||
|
||
print_gdb_output($context, $text);
|
||
}
|
||
|
||
sub cmd_whatis($context, $args) {
|
||
my $console = gdb_send_and_read_console($context, "whatis $args");
|
||
|
||
foreach my $output (@$console) {
|
||
last if $output->{_type} eq RESULT;
|
||
|
||
if ($output->{_text} =~ /type\s*=\s*(.*)/) {
|
||
print_gdb_output($context, "$args = $1");
|
||
}
|
||
}
|
||
}
|
||
|
||
sub cmd_print($context, $args) {
|
||
my $console = gdb_send_and_read_console($context, "print $args");
|
||
|
||
foreach my $output (@$console) {
|
||
last if $output->{_type} eq RESULT;
|
||
|
||
if ($output->{_text} =~ /^\$\d+\s*=\s*(.*)/) {
|
||
print_gdb_output($context, "$args = $1");
|
||
}
|
||
}
|
||
}
|
||
|
||
sub cmd_print_last_statement($context, $args) {
|
||
# invoke the last statement
|
||
my $console = gdb_send_and_read_console($context, "print $args");
|
||
|
||
# don't print last statement if there was program output
|
||
return if -s OUTPUT_FILENAME;
|
||
|
||
foreach my $output (@$console) {
|
||
last if $output->{_type} eq RESULT;
|
||
|
||
if ($output->{_text} =~ /^\$\d+\s*=\s*(.*)/) {
|
||
print_gdb_output($context, "no output: $args = $1");
|
||
}
|
||
}
|
||
}
|
||
|
||
sub handle_program_exit($context, $data) {
|
||
if (not -s OUTPUT_FILENAME) { # -s gets size of file
|
||
my $locals = locals_to_string($context->{locals_end});
|
||
|
||
if (length $locals) {
|
||
print_gdb_output($context, "no output: $locals");
|
||
}
|
||
}
|
||
|
||
if (exists $data->{'exit-code'} && $data->{'exit-code'} != 0) {
|
||
$context->{'exit-code'} = oct $data->{'exit-code'};
|
||
}
|
||
|
||
_exit($context);
|
||
}
|
||
|
||
sub handle_program_signal($context, $data) {
|
||
my $locals = locals_to_string(get_locals($context));
|
||
|
||
my $args = args_to_string($data->{frame}->{args});
|
||
|
||
my $text = "Program received signal $data->{'signal-name'}, $data->{'signal-meaning'} ";
|
||
|
||
$text .= "in $data->{frame}->{func} ($args) ";
|
||
|
||
my $line;
|
||
|
||
if (exists $data->{frame}->{line}) {
|
||
if ($line = get_lineno($context, $data->{frame}->{line})) {
|
||
$text .= "at statement: $line ";
|
||
}
|
||
}
|
||
|
||
my $backtrace = get_backtrace($context);
|
||
|
||
shift @$backtrace; # remove current frame
|
||
|
||
foreach my $trace (@$backtrace) {
|
||
my $console = gdb_send_and_read_console($context, "up");
|
||
|
||
foreach my $output (@$console) {
|
||
last if $output->{_type} eq RESULT;
|
||
|
||
if ($output->{_text} =~ /^\d+\t\s+(.*)/) {
|
||
$line = $1;
|
||
}
|
||
}
|
||
|
||
$text .= "called by $trace->{func} $trace->{args} " if $trace->{func};
|
||
$text .= "at statement: $line " if $line;
|
||
}
|
||
|
||
if (length $locals) {
|
||
$text .= "<local variables: $locals>"
|
||
} else {
|
||
$text =~ s/\s+$//;
|
||
}
|
||
|
||
print_gdb_output($context, $text);
|
||
_exit($context);
|
||
}
|
||
|
||
sub unescape($text) {
|
||
state %unescape = (
|
||
"\\n" => "\n",
|
||
"\\t" => "\t",
|
||
"\\\\" => "\\",
|
||
"\\\"" => '"',
|
||
);
|
||
|
||
$text =~ s/(\\n|\\t|\\\\|\\")/$unescape{$1}/g;
|
||
return $text;
|
||
}
|
||
|
||
# dispatch output from gdb commands to handlers
|
||
sub dispatch_gdbmi_output($context, $output) {
|
||
state %dispatch = (
|
||
STATUS , \&handle_status_async_output,
|
||
EXEC , \&handle_exec_async_output,
|
||
NOTIFY , \&handle_notify_async_output,
|
||
CONSOLE , \&handle_console_stream_output,
|
||
TARGET , \&handle_target_stream_output,
|
||
LOG , \&handle_log_stream_output,
|
||
RESULT , \&handle_result_output,
|
||
);
|
||
|
||
$dispatch{$output->{_type}}->($context, $output);
|
||
}
|
||
|
||
# (+) status-async-output contains on-going status information about the progress
|
||
# of a slow operation. It can be discarded.
|
||
sub handle_status_async_output($context, $output) {
|
||
}
|
||
|
||
# (*) exec-async-output contains asynchronous state change on the target (stopped,
|
||
# started, disappeared).
|
||
sub handle_exec_async_output($context, $output) {
|
||
if ($output->{_class} eq 'stopped') {
|
||
my $reason = $output->{reason};
|
||
|
||
if ($reason eq 'breakpoint-hit') {
|
||
handle_breakpoint_hit($context, $output);
|
||
}
|
||
elsif ($reason eq 'exited-normally' || $reason eq 'exited') {
|
||
handle_program_exit($context, $output);
|
||
}
|
||
elsif ($reason eq 'signal-received') {
|
||
handle_program_signal($context, $output);
|
||
}
|
||
}
|
||
}
|
||
|
||
# (=) notify-async-output contains supplementary information that the client should
|
||
# handle (e.g., a new breakpoint information).
|
||
sub handle_notify_async_output($context, $output) {
|
||
}
|
||
|
||
# (~) console-stream-output is output that should be displayed as is in the console.
|
||
# It is the textual response to a CLI command.
|
||
sub handle_console_stream_output($context, $output) {
|
||
if ($output->{_text} =~ /^Reading symbols/) {
|
||
perform_preamble($context);
|
||
run_program($context);
|
||
}
|
||
}
|
||
|
||
# (@) target-stream-output is the output produced by the target program.
|
||
sub handle_target_stream_output($context, $output) {
|
||
}
|
||
|
||
# (&) log-stream-output is output text coming from GDB’s internals, for instance
|
||
# messages that should be displayed as part of an error log.
|
||
sub handle_log_stream_output($context, $output) {
|
||
}
|
||
|
||
# (^) result-output "done" | "running" | "connected" | "error" | "exit"
|
||
sub handle_result_output($context, $output) {
|
||
}
|
||
|
||
sub start_timeout($context, $timeout) {
|
||
$SIG{ALRM} = sub {
|
||
print_prog_output($context, "[gdb time-out]\n");
|
||
_exit($context);
|
||
};
|
||
alarm $timeout;
|
||
}
|
||
|
||
sub _exit($context) {
|
||
close $context->{prog_output};
|
||
|
||
open my $fh, '<', OUTPUT_FILENAME or die "could not open ".OUTPUT_FILENAME." for read: $!\n";
|
||
my $output = do { local $/; <$fh> };
|
||
close $fh;
|
||
|
||
print STDOUT "$output";
|
||
|
||
exit ($context->{'exit-code'} // 0);
|
||
}
|
||
|
||
# send text to OUTPUT_FILENAME file
|
||
sub print_prog_output($context, $text) {
|
||
print { $context->{prog_output} } $text;
|
||
$context->{prog_output}->flush();
|
||
}
|
||
|
||
# send gdb output to OUTPUT_FILENAME file, wrapped with prefix and suffix
|
||
sub print_gdb_output($context, $text) {
|
||
print { $context->{prog_output} } GDB_PREFIX . $text . GDB_SUFFIX;
|
||
$context->{prog_output}->flush();
|
||
}
|
||
|
||
sub perform_preamble($context) {
|
||
gdb_send_and_discard($context, "set charset UTF-8");
|
||
gdb_send_and_discard($context, "set print sevenbit-strings on");
|
||
|
||
# get start and end line numbers for main() function
|
||
my ($start, $end) = get_main_start_end($context);
|
||
|
||
$context->{main_start} = $start;
|
||
$context->{main_end} = $end;
|
||
|
||
# break on main start and end so we can get locals
|
||
gdb_send_and_discard($context, "break $start");
|
||
gdb_send_and_discard($context, "break $end");
|
||
|
||
# break on gdb() function so we can pass along user-submitted gdb commands
|
||
gdb_send_and_discard($context, "break gdb");
|
||
gdb_send_and_discard($context, "set print null-stop on");
|
||
}
|
||
|
||
sub shellquote_args_to_string(@args) {
|
||
my $string = '';
|
||
|
||
foreach my $arg (@args) {
|
||
$arg =~ s/'/'"'"'/g;
|
||
$string .= "'$arg' ";
|
||
}
|
||
|
||
return $string;
|
||
}
|
||
|
||
sub run_program($context) {
|
||
my $cmdline_args = shellquote_args_to_string(@ARGV);
|
||
gdb_send($context->{gdb_input}, "run $cmdline_args < ".INPUT_FILENAME." >> ".OUTPUT_FILENAME);
|
||
}
|
||
|
||
sub main {
|
||
# first command-line argument can override file to debug
|
||
my $prog = shift @ARGV // './prog';
|
||
|
||
# start gdb and grab references to its input and output streams
|
||
open2(my $out, my $in, "LIBC_FATAL_STDERR=1 MALLOC_CHECK_=1 gdb -i mi3 -q -nx $prog");
|
||
|
||
my $context = {
|
||
gdb_output => $out, # gdb output stream
|
||
gdb_input => $in, # gdb input stream
|
||
};
|
||
|
||
# open OUTPUT_FILENAME file to send gdb output
|
||
unlink OUTPUT_FILENAME;
|
||
open($context->{prog_output}, '>>:encoding(UTF-8)', OUTPUT_FILENAME) or die "Could not open ".OUTPUT_FILENAME." for append: $!";
|
||
|
||
# time-out after a few seconds
|
||
start_timeout($context, 8);
|
||
|
||
# handle gdb output
|
||
while (my $output = gdb_read($out)) {
|
||
dispatch_gdbmi_output($context, $output);
|
||
}
|
||
}
|
||
|
||
main();
|