#!/usr/bin/perl

# File: guest-gdb
#
# Purpose: Wraps a GNU Debugger instance around a program. Watches for
# invocations of a gdb() function to handle gdb commands from the program.
# Reports information about local variables when signals are detected.
#
# Usage: guest-gdb [executable]
#
# If [executable] is omitted, it will default to `prog`. Don't forget to
# compile with -g for debugging symbols.

# The DEBUG environment variable can be set to an integer value to enable
# internal debugging output. Set it to 1 for minimal debugging; to 2 to
# also dump the parsed GDB/MI data structures.

# SPDX-FileCopyrightText: 2022 Pragmatic Software <pragma78@gmail.com>
# SPDX-License-Identifier: MIT

use 5.020;

use warnings;
use strict;

use feature qw(signatures);
no warnings qw(experimental::signatures);

use IPC::Open2;
use JSON::XS;
use Data::Dumper;
use Encode qw(decode encode);

# set debug flag from DEBUG env or here
my $debug = $ENV{DEBUG} // 0;

# output from gdb will be prefixed with GDB_PREFIX and suffixed with GDB_SUFFIX.
# e.g., to wrap gdb output with <>'s set GDB_PREFIX to "<" and GDB_SUFFIX to ">".
use constant {
    GDB_PREFIX => "\n",
    GDB_SUFFIX => "\n\n",
};

# files to read stdin and write stdout
use constant {
    INPUT_FILENAME  => '.input',
    OUTPUT_FILENAME => '.output',
};

# GDB/MI message types
use constant {
    STATUS  => '+',
    EXEC    => '*',
    NOTIFY  => '=',
    CONSOLE => '~',
    TARGET  => '@',
    LOG     => '&',
    RESULT  => '^',
};

# send a command to gdb
sub gdb_send($in, $command) {
    print STDERR "-> $command\n" if $debug;
    print $in "$command\n";
}

# read a line from gdb
sub gdb_read($out) {
    my $line;

    while ($line = <$out>) {
        chomp $line;
        print STDERR "<- [$line]\n" if $debug;
        next if $line eq '(gdb) '; # ignore gdb prompt
        last;
    }

    return undef if not defined $line;
    return parse_gdbmi_output($line);
}

# send a command to gdb and return all ~console and ^result output
sub gdb_send_and_read_console($context, $command, $report_error = 1) {
    gdb_send($context->{gdb_input}, $command);

    my $gdb = $context->{gdb_output};
    my @console;
    my $output;

    while ($output = gdb_read_console($gdb)) {
        push @console, $output;
        last if $output->{_type} eq RESULT;
    }

    if ($report_error && $output->{_type} eq RESULT && $output->{_class} eq 'error') {
        print_gdb_output($context, $output->{msg});
    }

    return \@console;
}

# send a command to gdb and return only the ^result output
sub gdb_send_and_read_result($context, $command, $report_error = 1) {
    gdb_send($context->{gdb_input}, $command);

    my $gdb = $context->{gdb_output};
    my $output;

    while ($output = gdb_read($gdb)) {
        last if $output->{_type} eq RESULT;
    }

    if ($report_error && $output->{_class} eq 'error') {
        print_gdb_output($context, $output->{msg});
    }

    return $output;
}

# alias to send_and_read_result
*gdb_send_and_discard = \&gdb_send_and_read_result;

# read line from gdb, discarding types other than ~console or ^result
sub gdb_read_console($out) {
    my $output;

    while ($output = gdb_read($out)) {
        last if $output->{_type} eq RESULT;
        last if $output->{_type} eq CONSOLE;
    }

    return $output;
}

# read and discard all output until ^result is reached
sub gdb_read_result($out) {
    my $output;

    while ($output = gdb_read($out)) {
        last if $output->{_type} eq RESULT;
    }

    return $output;
}

# convert gdb/mi text to hashtable, converting to JSON first if necessary
sub gdbmi_to_hash($text, $makejson = 1) {
    # convert to JSON first if necessary (gdb/mi is nearly JSON already!)
    $text =~ s/"(?:[^"\\]|\\.)*"(*SKIP)(*F)|([\w-]+)=/"$1":/g if $makejson;

    # decode gdb octal escapes
    # (sometimes \\123, othertimes \123 and I can't be arsed to investigate
    # when and why the backslashes double up)
    $text =~ s/\\+(\d{3})/$1 >= 0x20 ? chr oct $1 : "\\\\$1"/ge;

    # escape malformed unicode
    my $octets = decode('UTF-8', $text, sub { sprintf '\\\\x%02X', shift });
    $text = encode('UTF-8', $octets, Encode::FB_CROAK);

    # escape invalid JSON characters
    $text =~ s/([\x10-\x1f])/sprintf '\\\\x%02X', ord $1/ge;

    # return hashtable decoded from json
    my $result = eval { decode_json("{$text}") };

    if ($@) {
        print "Error parsing GDB/MI: $@ (input: $text)\n";
        exit 1;
    }

    return $result;
}

sub parse_gdbmi_output($line) {
    my ($type, $text) = $line =~ /^\d*(.)(.*)/;

    my $class = 'none';
    my $makejson = 0;

    if ($type =~ /[+*=]/) {
        ($class, $text) = split /,/, $text, 2;
        $makejson = 1 if length $text;
    } elsif ($type eq RESULT) {
        ($class, $text) = split /,/, $text, 2;
        $text //= '';
        $makejson = 1 if length $text;
    } else {
        $text = "\"_text\":$text";
    }

    my $output = gdbmi_to_hash($text, $makejson);

    if (exists $output->{_text}) {
        chomp $output->{_text};
    }

    $output->{_type}  = $type;
    $output->{_class} = $class;

    print STDERR Dumper($output), "\n" if $debug >= 2;

    return $output;
}

# get local variables at current frame
sub get_locals($context) {
    # use `info locals` gdb command
    my $console = gdb_send_and_read_console($context, "info locals");
    my @locals;

    foreach my $output (@$console) {
        last if $output->{_type} eq RESULT;

        if ($output->{_text} =~ /([^=]+)\s+=\s+(.*)/) {
            push @locals, [$1, $2];
        }
    }

    return \@locals;
}

# compare two lists of locals and returns list containing just the
# newly modified locals
sub compare_locals($old, $new) {
    # build hashtable of left-side locals for easier access
    my %ht;

    foreach my $local (@$old) {
        my ($ident, $value) = @$local;
        $ht{$ident} = $value;
    }

    # check for modified locals and add to modified list
    my @modified;

    foreach my $local (@$new) {
        my ($ident, $value) = @$local;
        if (!exists $ht{$ident} || $ht{$ident} ne $value) {
            push @modified, [$ident, $value];
        }
    }

    return \@modified;
}

sub locals_to_string($locals) {
    my @strings;

    foreach my $local (@$locals) {
        my ($ident, $value) = @$local;
        push @strings, "$ident = $value";
    }

    return join '; ', @strings;
}

sub args_to_string($args) {
    my @strings;

    foreach my $arg (@$args) {
        push @strings, "$arg->{name}=$arg->{value}";
    }

    return join ', ', @strings;
}

# determine on which line numbers the main() function begins and ends
sub get_main_start_end($context) {
    gdb_send_and_discard($context, 'set listsize unlimited');

    my ($start, $end);

    # use `info functions` to find where main starts
    my $console = gdb_send_and_read_console($context, 'info functions -q ^main$');

    foreach my $output (@$console) {
        last if $output->{_type} eq RESULT;
        ($start) = $output->{_text} =~ /^(\d+)/;
    }

    $console = gdb_send_and_read_console($context, 'list main');

    # gdb shows extra context surrounding main, so we have to parse the output
    # and count the braces to find the true end of main()
    my $braces = 0;

    foreach my $output (@$console) {
        last if $output->{_type} eq RESULT;

        # we have already found end; don't parse output
        last if $end;

        my ($line, $code) = split /\t/, $output->{_text};

        if (not defined $start) {
            # info functions didn't work, try to find main
            if ($code =~ /\bmain[\b\s\)]*\(/) {
                $start = $line;
            }
            next;
        } else {
            # this line isn't part of main() yet
            next if $line < $start;
        }

        # blank out contents of string and char literals so we don't count
        # any braces within
        $code =~ s/(?:\"((?:\\\"|(?!\").)*)\")/'"' . ('-' x length $1) . '"'/ge;
        $code =~ s/(?:\'((?:\\\'|(?!\').)*)\')/"'" . ('-' x length $1) . "'"/ge;

        my @chars = split //, $code;
        foreach my $char (@chars) {
            if ($char eq '{') {
                $braces++;
            } elsif ($char eq '}') {
                $braces--;

                if ($braces == 0) {
                    $end = $line - 1;
                    last;
                }
            }
        }
    }

    return ($start, $end);
}

sub get_backtrace($context) {
    my $console = gdb_send_and_read_console($context, "bt");

    my @backtrace;

    foreach my $output (@$console) {
        last if $output->{_type} eq RESULT;
        $output->{_text} =~ /in (\w+) (.*) at/;
        push @backtrace, { func => $1, args => $2 };
    }

    return \@backtrace;
}

# returns the source line at a given line number
sub get_lineno($context, $lineno) {
    my $console = gdb_send_and_read_console($context, "list $lineno,$lineno", 0);

    my $line;

    foreach my $output (@$console) {
        last if $output->{_type} eq RESULT;
        ($line) = $output->{_text} =~ /\t\s+(.*)/;
    }

    return $line;
}

sub handle_breakpoint_hit($context, $data) {
    if ($data->{frame}->{func} eq 'main') {
        if ($data->{frame}->{line} == $context->{main_end}) {
            $context->{locals_end} = get_locals($context);
        }
    }
    elsif ($data->{frame}->{func} eq 'gdb') {
        my $command = $data->{frame}->{args}->[0]->{value};;

        # strip gdb junk from command
        $command =~ s/^.*?"//;
        $command =~ s/"$//;

        dispatch_user_command($context, $command);
    }

    gdb_send_and_discard($context, "cont");
}

# gdb commands invoked by end-users
sub dispatch_user_command($context, $command) {
    state %commands = (
        'ptype'  => \&cmd_ptype,
        'whatis' => \&cmd_whatis,
        'print'  => \&cmd_print,
        'dump'   => \&cmd_print,

        # special PBot command
        'print_last_statement' => \&cmd_print_last_statement,
    );

    $command = unescape($command);

    # move out of gdb() function to caller
    gdb_send_and_discard($context, "up");

    my ($cmd, $args) = split /\s+/, $command, 2;

    if (not exists $commands{$cmd}) {
        cmd_gdb($context, $command);
    } else {
        $commands{$cmd}->($context, $args);
    }
}

sub cmd_gdb($context, $command) {
    my $console = gdb_send_and_read_console($context, "$command");

    my $text = '';

    foreach my $output (@$console) {
        last if $output->{_type} eq RESULT;
        $text .= "$output->{_text}\n";
    }

    print_gdb_output($context, $text);
}

sub cmd_ptype($context, $args) {
    my $console = gdb_send_and_read_console($context, "ptype $args");

    my $text = '';

    foreach my $output (@$console) {
        last if $output->{_type} eq RESULT;

        if ($output->{_text} =~ /type\s*=\s*(.*)/) {
            $text .= "$args = $1"
        } else {
            $output->{_text} =~ s/^\s+//;
            $text .= " $output->{_text}";
        }
    }

    print_gdb_output($context, $text);
}

sub cmd_whatis($context, $args) {
    my $console = gdb_send_and_read_console($context, "whatis $args");

    foreach my $output (@$console) {
        last if $output->{_type} eq RESULT;

        if ($output->{_text} =~ /type\s*=\s*(.*)/) {
            print_gdb_output($context, "$args = $1");
        }
    }
}

sub cmd_print($context, $args) {
    my $console = gdb_send_and_read_console($context, "print $args");

    foreach my $output (@$console) {
        last if $output->{_type} eq RESULT;

        if ($output->{_text} =~ /^\$\d+\s*=\s*(.*)/) {
            print_gdb_output($context, "$args = $1");
        }
    }
}

sub cmd_print_last_statement($context, $args) {
    # invoke the last statement
    my $console = gdb_send_and_read_console($context, "print $args");

    # don't print last statement if there was program output
    return if -s OUTPUT_FILENAME;

    foreach my $output (@$console) {
        last if $output->{_type} eq RESULT;

        if ($output->{_text} =~ /^\$\d+\s*=\s*(.*)/) {
            print_gdb_output($context, "no output: $args = $1");
        }
    }
}

sub handle_program_exit($context, $data) {
    if (not -s OUTPUT_FILENAME) { # -s gets size of file
        my $locals = locals_to_string($context->{locals_end});

        if (length $locals) {
            print_gdb_output($context, "no output: $locals");
        }
    }

    if (exists $data->{'exit-code'} && $data->{'exit-code'} != 0) {
        $context->{'exit-code'} = oct $data->{'exit-code'};
    }

    _exit($context);
}

sub handle_program_signal($context, $data) {
    my $locals = locals_to_string(get_locals($context));

    my $args = args_to_string($data->{frame}->{args});

    my $text = "Program received signal $data->{'signal-name'}, $data->{'signal-meaning'} ";

    $text .= "in $data->{frame}->{func} ($args) ";

    my $line;

    if (exists $data->{frame}->{line}) {
        if ($line = get_lineno($context, $data->{frame}->{line})) {
            $text .= "at statement: $line ";
        }
    }

    my $backtrace = get_backtrace($context);

    shift @$backtrace; # remove current frame

    foreach my $trace (@$backtrace) {
        my $console = gdb_send_and_read_console($context, "up");

        foreach my $output (@$console) {
            last if $output->{_type} eq RESULT;

            if ($output->{_text} =~ /^\d+\t\s+(.*)/) {
                $line = $1;
            }
        }

        $text .= "called by $trace->{func} $trace->{args} " if $trace->{func};
        $text .= "at statement: $line " if $line;
    }

    if (length $locals) {
        $text .= "<local variables: $locals>"
    } else {
        $text =~ s/\s+$//;
    }

    print_gdb_output($context, $text);
    _exit($context);
}

sub unescape($text) {
    state %unescape = (
        "\\n"  => "\n",
        "\\t"  => "\t",
        "\\\\" => "\\",
        "\\\"" => '"',
    );

    $text =~ s/(\\n|\\t|\\\\|\\")/$unescape{$1}/g;
    return $text;
}

# dispatch output from gdb commands to handlers
sub dispatch_gdbmi_output($context, $output) {
    state %dispatch = (
        STATUS  , \&handle_status_async_output,
        EXEC    , \&handle_exec_async_output,
        NOTIFY  , \&handle_notify_async_output,
        CONSOLE , \&handle_console_stream_output,
        TARGET  , \&handle_target_stream_output,
        LOG     , \&handle_log_stream_output,
        RESULT  , \&handle_result_output,
    );

    $dispatch{$output->{_type}}->($context, $output);
}

# (+) status-async-output contains on-going status information about the progress
# of a slow operation. It can be discarded.
sub handle_status_async_output($context, $output) {
}

# (*) exec-async-output contains asynchronous state change on the target (stopped,
# started, disappeared).
sub handle_exec_async_output($context, $output) {
    if ($output->{_class} eq 'stopped') {
        my $reason = $output->{reason};

        if ($reason eq 'breakpoint-hit') {
            handle_breakpoint_hit($context, $output);
        }
        elsif ($reason eq 'exited-normally' || $reason eq 'exited') {
            handle_program_exit($context, $output);
        }
        elsif ($reason eq 'signal-received') {
            handle_program_signal($context, $output);
        }
    }
}

# (=) notify-async-output contains supplementary information that the client should
# handle (e.g., a new breakpoint information).
sub handle_notify_async_output($context, $output) {
}

# (~) console-stream-output is output that should be displayed as is in the console.
# It is the textual response to a CLI command.
sub handle_console_stream_output($context, $output) {
    if ($output->{_text} =~ /^Reading symbols/) {
        perform_preamble($context);
        run_program($context);
    }
}

# (@) target-stream-output is the output produced by the target program.
sub handle_target_stream_output($context, $output) {
}

# (&) log-stream-output is output text coming from GDB’s internals, for instance
# messages that should be displayed as part of an error log.
sub handle_log_stream_output($context, $output) {
}

# (^) result-output "done" | "running" | "connected" | "error" | "exit"
sub handle_result_output($context, $output) {
}

sub start_timeout($context, $timeout) {
    $SIG{ALRM} = sub {
        print_prog_output($context, "[gdb time-out]\n");
        _exit($context);
    };
    alarm $timeout;
}

sub _exit($context) {
    close $context->{prog_output};

    open my $fh, '<', OUTPUT_FILENAME or die "could not open ".OUTPUT_FILENAME." for read: $!\n";
    my $output = do { local $/; <$fh> };
    close $fh;

    print STDOUT "$output";

    exit ($context->{'exit-code'} // 0);
}

# send text to OUTPUT_FILENAME file
sub print_prog_output($context, $text) {
    print { $context->{prog_output} } $text;
    $context->{prog_output}->flush();
}

# send gdb output to OUTPUT_FILENAME file, wrapped with prefix and suffix
sub print_gdb_output($context, $text) {
    print { $context->{prog_output} } GDB_PREFIX . $text . GDB_SUFFIX;
    $context->{prog_output}->flush();
}

sub perform_preamble($context) {
    gdb_send_and_discard($context, "set charset UTF-8");
    gdb_send_and_discard($context, "set print sevenbit-strings on");

    # get start and end line numbers for main() function
    my ($start, $end) = get_main_start_end($context);

    $context->{main_start} = $start;
    $context->{main_end}   = $end;

    # break on main start and end so we can get locals
    gdb_send_and_discard($context, "break $start");
    gdb_send_and_discard($context, "break $end");

    # break on gdb() function so we can pass along user-submitted gdb commands
    gdb_send_and_discard($context, "break gdb");
    gdb_send_and_discard($context, "set print null-stop on");
}

sub shellquote_args_to_string(@args) {
    my $string = '';

    foreach my $arg (@args) {
        $arg =~ s/'/'"'"'/g;
        $string .= "'$arg' ";
    }

    return $string;
}

sub run_program($context) {
    my $cmdline_args = shellquote_args_to_string(@ARGV);
    gdb_send($context->{gdb_input}, "run $cmdline_args < ".INPUT_FILENAME." >> ".OUTPUT_FILENAME);
}

sub main {
    # first command-line argument can override file to debug
    my $prog = shift @ARGV // './prog';

    # start gdb and grab references to its input and output streams
    open2(my $out, my $in, "LIBC_FATAL_STDERR=1 MALLOC_CHECK_=1 gdb -i mi3 -q -nx $prog");

    my $context = {
        gdb_output => $out,  # gdb output stream
        gdb_input  => $in,   # gdb input stream
    };

    # open OUTPUT_FILENAME file to send gdb output
    unlink OUTPUT_FILENAME;
    open($context->{prog_output}, '>>:encoding(UTF-8)', OUTPUT_FILENAME) or die "Could not open ".OUTPUT_FILENAME." for append: $!";

    # time-out after a few seconds
    start_timeout($context, 8);

    # handle gdb output
    while (my $output = gdb_read($out)) {
        dispatch_gdbmi_output($context, $output);
    }
}

main();
