3
0
mirror of https://github.com/pragma-/pbot.git synced 2024-11-09 21:49:34 +01:00
pbot/applets/pbot-vm/guest/bin/guest-gdb

698 lines
18 KiB
Perl
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/perl
# File: guest-gdb
#
# Purpose: Wraps a GNU Debugger instance around a program. Watches for
# invocations of a gdb() function to handle gdb commands from the program.
# Reports information about local variables when signals are detected.
#
# Usage: guest-gdb [executable]
#
# If [executable] is omitted, it will default to `prog`. Don't forget to
# compile with -g for debugging symbols.
# The DEBUG environment variable can be set to an integer value to enable
# internal debugging output. Set it to 1 for minimal debugging; to 2 to
# also dump the parsed GDB/MI data structures.
# SPDX-FileCopyrightText: 2022 Pragmatic Software <pragma78@gmail.com>
# SPDX-License-Identifier: MIT
use 5.020;
use warnings;
use strict;
use feature qw(signatures);
no warnings qw(experimental::signatures);
use IPC::Open2;
use JSON::XS;
use Data::Dumper;
use Encode qw(decode encode);
# set debug flag from DEBUG env or here
my $debug = $ENV{DEBUG} // 0;
# output from gdb will be prefixed with GDB_PREFIX and suffixed with GDB_SUFFIX.
# e.g., to wrap gdb output with <>'s set GDB_PREFIX to "<" and GDB_SUFFIX to ">".
use constant {
GDB_PREFIX => "\n",
GDB_SUFFIX => "\n\n",
};
# files to read stdin and write stdout
use constant {
INPUT_FILENAME => '.input',
OUTPUT_FILENAME => '.output',
};
# GDB/MI message types
use constant {
STATUS => '+',
EXEC => '*',
NOTIFY => '=',
CONSOLE => '~',
TARGET => '@',
LOG => '&',
RESULT => '^',
};
# send a command to gdb
sub gdb_send($in, $command) {
print STDERR "-> $command\n" if $debug;
print $in "$command\n";
}
# read a line from gdb
sub gdb_read($out) {
my $line;
while ($line = <$out>) {
chomp $line;
print STDERR "<- [$line]\n" if $debug;
next if $line eq '(gdb) '; # ignore gdb prompt
last;
}
return undef if not defined $line;
return parse_gdbmi_output($line);
}
# send a command to gdb and return all ~console and ^result output
sub gdb_send_and_read_console($context, $command, $report_error = 1) {
gdb_send($context->{gdb_input}, $command);
my $gdb = $context->{gdb_output};
my @console;
my $output;
while ($output = gdb_read_console($gdb)) {
push @console, $output;
last if $output->{_type} eq RESULT;
}
if ($report_error && $output->{_type} eq RESULT && $output->{_class} eq 'error') {
print_gdb_output($context, $output->{msg});
}
return \@console;
}
# send a command to gdb and return only the ^result output
sub gdb_send_and_read_result($context, $command, $report_error = 1) {
gdb_send($context->{gdb_input}, $command);
my $gdb = $context->{gdb_output};
my $output;
while ($output = gdb_read($gdb)) {
last if $output->{_type} eq RESULT;
}
if ($report_error && $output->{_class} eq 'error') {
print_gdb_output($context, $output->{msg});
}
return $output;
}
# alias to send_and_read_result
*gdb_send_and_discard = \&gdb_send_and_read_result;
# read line from gdb, discarding types other than ~console or ^result
sub gdb_read_console($out) {
my $output;
while ($output = gdb_read($out)) {
last if $output->{_type} eq RESULT;
last if $output->{_type} eq CONSOLE;
}
return $output;
}
# read and discard all output until ^result is reached
sub gdb_read_result($out) {
my $output;
while ($output = gdb_read($out)) {
last if $output->{_type} eq RESULT;
}
return $output;
}
# convert gdb/mi text to hashtable, converting to JSON first if necessary
sub gdbmi_to_hash($text, $makejson = 1) {
# convert to JSON first if necessary (gdb/mi is nearly JSON already!)
$text =~ s/"(?:[^"\\]|\\.)*"(*SKIP)(*F)|([\w-]+)=/"$1":/g if $makejson;
# decode gdb octal escapes
# (sometimes \\123, othertimes \123 and I can't be arsed to investigate
# when and why the backslashes double up)
$text =~ s/\\+(\d{3})/$1 >= 0x20 ? chr oct $1 : "\\\\$1"/ge;
# escape malformed unicode
my $octets = decode('UTF-8', $text, sub { sprintf '\\\\x%02X', shift });
$text = encode('UTF-8', $octets, Encode::FB_CROAK);
# escape invalid JSON characters
$text =~ s/([\x10-\x1f])/sprintf '\\\\x%02X', ord $1/ge;
# return hashtable decoded from json
my $result = eval { decode_json("{$text}") };
if ($@) {
print "Error parsing GDB/MI: $@ (input: $text)\n";
exit 1;
}
return $result;
}
sub parse_gdbmi_output($line) {
my ($type, $text) = $line =~ /^\d*(.)(.*)/;
my $class = 'none';
my $makejson = 0;
if ($type =~ /[+*=]/) {
($class, $text) = split /,/, $text, 2;
$makejson = 1 if length $text;
} elsif ($type eq RESULT) {
($class, $text) = split /,/, $text, 2;
$text //= '';
$makejson = 1 if length $text;
} else {
$text = "\"_text\":$text";
}
my $output = gdbmi_to_hash($text, $makejson);
if (exists $output->{_text}) {
chomp $output->{_text};
}
$output->{_type} = $type;
$output->{_class} = $class;
print STDERR Dumper($output), "\n" if $debug >= 2;
return $output;
}
# get local variables at current frame
sub get_locals($context) {
# use `info locals` gdb command
my $console = gdb_send_and_read_console($context, "info locals");
my @locals;
foreach my $output (@$console) {
last if $output->{_type} eq RESULT;
if ($output->{_text} =~ /([^=]+)\s+=\s+(.*)/) {
push @locals, [$1, $2];
}
}
return \@locals;
}
# compare two lists of locals and returns list containing just the
# newly modified locals
sub compare_locals($old, $new) {
# build hashtable of left-side locals for easier access
my %ht;
foreach my $local (@$old) {
my ($ident, $value) = @$local;
$ht{$ident} = $value;
}
# check for modified locals and add to modified list
my @modified;
foreach my $local (@$new) {
my ($ident, $value) = @$local;
if (!exists $ht{$ident} || $ht{$ident} ne $value) {
push @modified, [$ident, $value];
}
}
return \@modified;
}
sub locals_to_string($locals) {
my @strings;
foreach my $local (@$locals) {
my ($ident, $value) = @$local;
push @strings, "$ident = $value";
}
return join '; ', @strings;
}
sub args_to_string($args) {
my @strings;
foreach my $arg (@$args) {
push @strings, "$arg->{name}=$arg->{value}";
}
return join ', ', @strings;
}
# determine on which line numbers the main() function begins and ends
sub get_main_start_end($context) {
gdb_send_and_discard($context, 'set listsize unlimited');
my ($start, $end);
# use `info functions` to find where main starts
my $console = gdb_send_and_read_console($context, 'info functions -q ^main$');
foreach my $output (@$console) {
last if $output->{_type} eq RESULT;
($start) = $output->{_text} =~ /^(\d+)/;
}
$console = gdb_send_and_read_console($context, 'list main');
# gdb shows extra context surrounding main, so we have to parse the output
# and count the braces to find the true end of main()
my $braces = 0;
foreach my $output (@$console) {
last if $output->{_type} eq RESULT;
# we have already found end; don't parse output
last if $end;
my ($line, $code) = split /\t/, $output->{_text};
# this line isn't part of main() yet
next if $line < $start;
# blank out contents of string and char literals so we don't count
# any braces within
$code =~ s/(?:\"((?:\\\"|(?!\").)*)\")/'"' . ('-' x length $1) . '"'/ge;
$code =~ s/(?:\'((?:\\\'|(?!\').)*)\')/"'" . ('-' x length $1) . "'"/ge;
my @chars = split //, $code;
foreach my $char (@chars) {
if ($char eq '{') {
$braces++;
} elsif ($char eq '}') {
$braces--;
if ($braces == 0) {
$end = $line - 1;
last;
}
}
}
}
return ($start, $end);
}
sub get_backtrace($context) {
my $console = gdb_send_and_read_console($context, "bt");
my @backtrace;
foreach my $output (@$console) {
last if $output->{_type} eq RESULT;
$output->{_text} =~ /in (\w+) (.*) at/;
push @backtrace, { func => $1, args => $2 };
}
return \@backtrace;
}
# returns the source line at a given line number
sub get_lineno($context, $lineno) {
my $console = gdb_send_and_read_console($context, "list $lineno,$lineno", 0);
my $line;
foreach my $output (@$console) {
last if $output->{_type} eq RESULT;
($line) = $output->{_text} =~ /\t\s+(.*)/;
}
return $line;
}
sub handle_breakpoint_hit($context, $data) {
if ($data->{frame}->{func} eq 'main') {
if ($data->{frame}->{line} == $context->{main_end}) {
$context->{locals_end} = get_locals($context);
}
}
elsif ($data->{frame}->{func} eq 'gdb') {
my $command = $data->{frame}->{args}->[0]->{value};;
# strip gdb junk from command
$command =~ s/^.*?"//;
$command =~ s/"$//;
dispatch_user_command($context, $command);
}
gdb_send_and_discard($context, "cont");
}
# gdb commands invoked by end-users
sub dispatch_user_command($context, $command) {
state %commands = (
'ptype' => \&cmd_ptype,
'whatis' => \&cmd_whatis,
'print' => \&cmd_print,
'dump' => \&cmd_print,
# special PBot command
'print_last_statement' => \&cmd_print_last_statement,
);
$command = unescape($command);
# move out of gdb() function to caller
gdb_send_and_discard($context, "up");
my ($cmd, $args) = split /\s+/, $command, 2;
if (not exists $commands{$cmd}) {
cmd_gdb($context, $command);
} else {
$commands{$cmd}->($context, $args);
}
}
sub cmd_gdb($context, $command) {
my $console = gdb_send_and_read_console($context, "$command");
my $text = '';
foreach my $output (@$console) {
last if $output->{_type} eq RESULT;
$text .= "$output->{_text}\n";
}
print_gdb_output($context, $text);
}
sub cmd_ptype($context, $args) {
my $console = gdb_send_and_read_console($context, "ptype $args");
my $text = '';
foreach my $output (@$console) {
last if $output->{_type} eq RESULT;
if ($output->{_text} =~ /type\s*=\s*(.*)/) {
$text .= "$args = $1"
} else {
$output->{_text} =~ s/^\s+//;
$text .= " $output->{_text}";
}
}
print_gdb_output($context, $text);
}
sub cmd_whatis($context, $args) {
my $console = gdb_send_and_read_console($context, "whatis $args");
foreach my $output (@$console) {
last if $output->{_type} eq RESULT;
if ($output->{_text} =~ /type\s*=\s*(.*)/) {
print_gdb_output($context, "$args = $1");
}
}
}
sub cmd_print($context, $args) {
my $console = gdb_send_and_read_console($context, "print $args");
foreach my $output (@$console) {
last if $output->{_type} eq RESULT;
if ($output->{_text} =~ /^\$\d+\s*=\s*(.*)/) {
print_gdb_output($context, "$args = $1");
}
}
}
sub cmd_print_last_statement($context, $args) {
# invoke the last statement
my $console = gdb_send_and_read_console($context, "print $args");
# don't print last statement if there was program output
return if -s OUTPUT_FILENAME;
foreach my $output (@$console) {
last if $output->{_type} eq RESULT;
if ($output->{_text} =~ /^\$\d+\s*=\s*(.*)/) {
print_gdb_output($context, "no output: $args = $1");
}
}
}
sub handle_program_exit($context, $data) {
if (not -s OUTPUT_FILENAME) { # -s gets size of file
my $locals = locals_to_string($context->{locals_end});
if (length $locals) {
print_gdb_output($context, "no output: $locals");
}
}
if (exists $data->{'exit-code'} && $data->{'exit-code'} != 0) {
$context->{'exit-code'} = oct $data->{'exit-code'};
}
_exit($context);
}
sub handle_program_signal($context, $data) {
my $locals = locals_to_string(get_locals($context));
my $args = args_to_string($data->{frame}->{args});
my $text = "Program received signal $data->{'signal-name'}, $data->{'signal-meaning'} ";
$text .= "in $data->{frame}->{func} ($args) ";
my $line;
if (exists $data->{frame}->{line}) {
if ($line = get_lineno($context, $data->{frame}->{line})) {
$text .= "at statement: $line ";
}
}
my $backtrace = get_backtrace($context);
shift @$backtrace; # remove current frame
foreach my $trace (@$backtrace) {
my $console = gdb_send_and_read_console($context, "up");
foreach my $output (@$console) {
last if $output->{_type} eq RESULT;
if ($output->{_text} =~ /^\d+\t\s+(.*)/) {
$line = $1;
}
}
$text .= "called by $trace->{func} $trace->{args} " if $trace->{func};
$text .= "at statement: $line " if $line;
}
if (length $locals) {
$text .= "<local variables: $locals>"
} else {
$text =~ s/\s+$//;
}
print_gdb_output($context, $text);
_exit($context);
}
sub unescape($text) {
state %unescape = (
"\\n" => "\n",
"\\t" => "\t",
"\\\\" => "\\",
"\\\"" => '"',
);
$text =~ s/(\\n|\\t|\\\\|\\")/$unescape{$1}/g;
return $text;
}
# dispatch output from gdb commands to handlers
sub dispatch_gdbmi_output($context, $output) {
state %dispatch = (
STATUS , \&handle_status_async_output,
EXEC , \&handle_exec_async_output,
NOTIFY , \&handle_notify_async_output,
CONSOLE , \&handle_console_stream_output,
TARGET , \&handle_target_stream_output,
LOG , \&handle_log_stream_output,
RESULT , \&handle_result_output,
);
$dispatch{$output->{_type}}->($context, $output);
}
# (+) status-async-output contains on-going status information about the progress
# of a slow operation. It can be discarded.
sub handle_status_async_output($context, $output) {
}
# (*) exec-async-output contains asynchronous state change on the target (stopped,
# started, disappeared).
sub handle_exec_async_output($context, $output) {
if ($output->{_class} eq 'stopped') {
my $reason = $output->{reason};
if ($reason eq 'breakpoint-hit') {
handle_breakpoint_hit($context, $output);
}
elsif ($reason eq 'exited-normally' || $reason eq 'exited') {
handle_program_exit($context, $output);
}
elsif ($reason eq 'signal-received') {
handle_program_signal($context, $output);
}
}
}
# (=) notify-async-output contains supplementary information that the client should
# handle (e.g., a new breakpoint information).
sub handle_notify_async_output($context, $output) {
}
# (~) console-stream-output is output that should be displayed as is in the console.
# It is the textual response to a CLI command.
sub handle_console_stream_output($context, $output) {
if ($output->{_text} =~ /^Reading symbols/) {
perform_preamble($context);
run_program($context);
}
}
# (@) target-stream-output is the output produced by the target program.
sub handle_target_stream_output($context, $output) {
}
# (&) log-stream-output is output text coming from GDBs internals, for instance
# messages that should be displayed as part of an error log.
sub handle_log_stream_output($context, $output) {
}
# (^) result-output "done" | "running" | "connected" | "error" | "exit"
sub handle_result_output($context, $output) {
}
sub start_timeout($context, $timeout) {
$SIG{ALRM} = sub {
print_prog_output($context, "[gdb time-out]\n");
_exit($context);
};
alarm $timeout;
}
sub _exit($context) {
close $context->{prog_output};
open my $fh, '<', OUTPUT_FILENAME or die "could not open ".OUTPUT_FILENAME." for read: $!\n";
my $output = do { local $/; <$fh> };
close $fh;
print STDOUT "$output";
exit ($context->{'exit-code'} // 0);
}
# send text to OUTPUT_FILENAME file
sub print_prog_output($context, $text) {
print { $context->{prog_output} } $text;
$context->{prog_output}->flush();
}
# send gdb output to OUTPUT_FILENAME file, wrapped with prefix and suffix
sub print_gdb_output($context, $text) {
print { $context->{prog_output} } GDB_PREFIX . $text . GDB_SUFFIX;
$context->{prog_output}->flush();
}
sub perform_preamble($context) {
gdb_send_and_discard($context, "set charset UTF-8");
gdb_send_and_discard($context, "set print sevenbit-strings on");
# get start and end line numbers for main() function
my ($start, $end) = get_main_start_end($context);
$context->{main_start} = $start;
$context->{main_end} = $end;
# break on main start and end so we can get locals
gdb_send_and_discard($context, "break $start");
gdb_send_and_discard($context, "break $end");
# break on gdb() function so we can pass along user-submitted gdb commands
gdb_send_and_discard($context, "break gdb");
gdb_send_and_discard($context, "set print null-stop on");
}
sub shellquote_args_to_string(@args) {
my $string = '';
foreach my $arg (@args) {
$arg =~ s/'/'"'"'/g;
$string .= "'$arg' ";
}
return $string;
}
sub run_program($context) {
my $cmdline_args = shellquote_args_to_string(@ARGV);
gdb_send($context->{gdb_input}, "run $cmdline_args < ".INPUT_FILENAME." >> ".OUTPUT_FILENAME);
}
sub main {
# first command-line argument can override file to debug
my $prog = shift @ARGV // './prog';
# start gdb and grab references to its input and output streams
open2(my $out, my $in, "LIBC_FATAL_STDERR=1 MALLOC_CHECK_=1 gdb -i mi3 -q -nx $prog");
my $context = {
gdb_output => $out, # gdb output stream
gdb_input => $in, # gdb input stream
};
# open OUTPUT_FILENAME file to send gdb output
unlink OUTPUT_FILENAME;
open($context->{prog_output}, '>>:encoding(UTF-8)', OUTPUT_FILENAME) or die "Could not open ".OUTPUT_FILENAME." for append: $!";
# time-out after a few seconds
start_timeout($context, 8);
# handle gdb output
while (my $output = gdb_read($out)) {
dispatch_gdbmi_output($context, $output);
}
}
main();