pbot/modules/compiler_vm/languages/_c_base.pm

#!/usr/bin/perl

use warnings;
use strict;
use feature "switch";

no if $] >= 5.018, warnings => "experimental::smartmatch";

package _c_base;
use parent '_default';

use Text::Balanced qw/extract_bracketed/;

sub initialize {
  my ($self, %conf) = @_;

  $self->{sourcefile}      = 'prog.c';
  $self->{execfile}        = 'prog';
  $self->{default_options} = '-Wextra -Wall -Wno-unused -pedantic -Wfloat-equal -Wshadow -std=c11 -lm -Wfatal-errors';
  $self->{options_paste}   = '-fdiagnostics-show-caret';
  $self->{options_nopaste} = '-fno-diagnostics-show-caret';
  $self->{cmdline}         = 'gcc -ggdb -g3 $sourcefile $options -o $execfile';

  $self->{prelude} = <<'END';
#define _XOPEN_SOURCE 9001
#define __USE_XOPEN
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <math.h>
#include <limits.h>
#include <sys/types.h>
#include <stdint.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdarg.h>
#include <stdnoreturn.h>
#include <stdalign.h>
#include <ctype.h>
#include <inttypes.h>
#include <float.h>
#include <errno.h>
#include <time.h>
#include <assert.h>
#include <complex.h>
#include <setjmp.h>
#include <wchar.h>
#include <wctype.h>
#include <tgmath.h>
#include <fenv.h>
#include <locale.h>
#include <iso646.h>
#include <signal.h>
#include <uchar.h>
#include <prelude.h>

END
}

sub process_custom_options {
  my $self = shift;
  $self->{code} = $self->{code};

  $self->add_option("-nomain") if $self->{code} =~ s/(?:^|(?<=\s))-nomain\s*//i;
  $self->add_option("-noheaders") if $self->{code} =~ s/(?:^|(?<=\s))-noheaders\s*//i;

  $self->{include_options} = "";
  while ($self->{code} =~ s/(?:^|(?<=\s))-include\s+(\S+)\s+//) {
    $self->{include_options} .= "#include <$1> ";
    $self->add_option("-include $1");
  }

  $self->{code} = $self->{code};
}

sub pretty_format {
  my $self = shift;
  my $code = join '', @_;
  my $result;

  $code = $self->{code} if not defined $code;

  open my $fh, ">$self->{sourcefile}" or die "Couldn't write $self->{sourcefile}: $!";
  print $fh $code;
  close $fh;

  system("astyle", "-A3 -UHpnfq", $self->{sourcefile});

  open $fh, "<$self->{sourcefile}" or die "Couldn't read $self->{sourcefile}: $!";
  $result = join '', <$fh>;
  close $fh;

  return $result;
}

sub preprocess_code {
  my $self = shift;
  $self->SUPER::preprocess_code;

  my $default_prelude = exists $self->{options}->{'-noheaders'} ? '' : $self->{prelude};

  $self->{code} = $self->{include_options} . $self->{code};

  print "code before: [$self->{code}]\n" if $self->{debug};

  # add newlines to ends of statements and #includes
  my $single_quote = 0;
  my $double_quote = 0;
  my $parens = 0;
  my $cpp = 0; # preprocessor
  my $escaped = 0;

  while($self->{code} =~ m/(.)/msg) {
    my $ch = $1;
    my $pos = pos $self->{code};

    print "adding newlines, ch = [$ch], parens: $parens, cpp: $cpp, single: $single_quote, double: $double_quote, escaped: $escaped, pos: $pos\n" if $self->{debug} >= 10;

    if($ch eq '\\') {
      $escaped = not $escaped;
    } elsif($ch eq '#' and not $cpp and not $escaped and not $single_quote and not $double_quote) {
      $cpp = 1;

      if($self->{code} =~ m/include\s*<([^>\n]*)>/msg) {
        my $match = $1;
        $pos = pos $self->{code};
        substr ($self->{code}, $pos, 0) = "\n";
        pos $self->{code} = $pos;
        $cpp = 0;
      } elsif($self->{code} =~ m/include\s*"([^"\n]*)"/msg) {
        my $match = $1;
        $pos = pos $self->{code};
        substr ($self->{code}, $pos, 0) = "\n";
        pos $self->{code} = $pos;
        $cpp = 0;
      } else {
        pos $self->{code} = $pos;
      }
    } elsif($ch eq '"') {
      $double_quote = not $double_quote unless $escaped or $single_quote;
      $escaped = 0;
    } elsif($ch eq '(' and not $single_quote and not $double_quote) {
      $parens++;
    } elsif($ch eq ')' and not $single_quote and not $double_quote) {
      $parens--;
      $parens = 0 if $parens < 0;
    } elsif($ch eq ';' and not $cpp and not $single_quote and not $double_quote and $parens == 0) {
      if(not substr($self->{code}, $pos, 1) =~ m/[\n\r]/) {
        substr ($self->{code}, $pos, 0) = "\n";
        pos $self->{code} = $pos + 1;
      }
    } elsif($ch eq "'") {
      $single_quote = not $single_quote unless $escaped or $double_quote;
      $escaped = 0;
    } elsif($ch eq 'n' and $escaped) {
      if(not $single_quote and not $double_quote) {
        print "added newline\n" if $self->{debug} >= 10;
        substr ($self->{code}, $pos - 2, 2) = "\n";
        pos $self->{code} = $pos;
        $cpp = 0;
      }
      $escaped = 0;
    } elsif($ch eq '{' and not $cpp and not $single_quote and not $double_quote) {
      if(not substr($self->{code}, $pos, 1) =~ m/[\n\r]/) {
        substr ($self->{code}, $pos, 0) = "\n";
        pos $self->{code} = $pos + 1;
      }
    } elsif($ch eq '}' and not $cpp and not $single_quote and not $double_quote) {
      if(not substr($self->{code}, $pos, 1) =~ m/[\n\r;]/) {
        substr ($self->{code}, $pos, 0) = "\n";
        pos $self->{code} = $pos + 1;
      }
    } elsif($ch eq "\n" and $cpp and not $single_quote and not $double_quote) {
      $cpp = 0;
    } else {
      $escaped = 0;
    }
  }

  print "code after \\n additions: [$self->{code}]\n" if $self->{debug};

  # white-out contents of quoted literals so content within literals aren't parsed as code
  my $white_code = $self->{code};
  $white_code =~ s/(?:\"((?:\\\"|(?!\").)*)\")/'"' . ('-' x length $1) . '"'/ge;
  $white_code =~ s/(?:\'((?:\\\'|(?!\').)*)\')/"'" . ('-' x length $1) . "'"/ge;

  my $precode;

  if($white_code =~ m/#include/) {
    $precode = $self->{code}; 
  } else {
    $precode = $default_prelude . $self->{code};
  }

  $self->{code} = '';

  print "--- precode: [$precode]\n" if $self->{debug};

  $self->{warn_unterminated_define} = 0;

  my $has_main = 0;

  my $prelude = '';
  while($precode =~ s/^\s*(#.*\n{1,2})//g) {
    $prelude .= $1;
  }

  if($precode =~ m/^\s*(#.*)/ms) {
    my $line = $1;

    if($line !~ m/\n/) {
      $self->{warn_unterminated_define} = 1;
    }
  }

  print "*** prelude: [$prelude]\n   precode: [$precode]\n" if $self->{debug};

  my $preprecode = $precode;

  # white-out contents of quoted literals
  $preprecode =~ s/(?:\"((?:\\\"|(?!\").)*)\")/'"' . ('-' x length $1) . '"'/ge;
  $preprecode =~ s/(?:\'((?:\\\'|(?!\').)*)\')/"'" . ('-' x length $1) . "'"/ge;

  # strip comments
  if ($self->{lang} eq 'c89') {
    $preprecode =~ s#/\*[^*]*\*+([^/*][^*]*\*+)*/# #gs;
    $preprecode =~ s#|//([^\\]|[^\n][\n]?)*?\n|("(\\.|[^"\\])*"|'(\\.|[^'\\])*'|.[^/"'\\]*)#defined $2 ? $2 : ""#gse;
  } else {
    $preprecode =~ s#|//([^\\]|[^\n][\n]?)*?\n|("(\\.|[^"\\])*"|'(\\.|[^'\\])*'|.[^/"'\\]*)#defined $2 ? $2 : ""#gse;
    $preprecode =~ s#/\*[^*]*\*+([^/*][^*]*\*+)*/# #gs;
  }

  print "preprecode: [$preprecode]\n" if $self->{debug};

  print "looking for functions, has main: $has_main\n" if $self->{debug} >= 2;

  my $func_regex = qr/^([ *\w]+)\s+([ ()*\w:]+)\s*\(([^;{]*)\s*\)\s*({.*|<%.*|\?\?<.*)/ims;

  # look for potential functions to extract
  while($preprecode =~ /$func_regex/ms) {
    my ($pre_ret, $pre_ident, $pre_params, $pre_potential_body) = ($1, $2, $3, $4);
    my $precode_code;

    print "looking for functions, found [$pre_ret][$pre_ident][$pre_params][$pre_potential_body], has main: $has_main\n" if $self->{debug} >= 1;

    # find the pos at which this function lives, for extracting from precode
    $preprecode =~ m/(\Q$pre_ret\E\s+\Q$pre_ident\E\s*\(\s*\Q$pre_params\E\s*\)\s*\Q$pre_potential_body\E)/g;
    my $extract_pos = (pos $preprecode) - (length $1);

    # now that we have the pos, substitute out the extracted potential function from preprecode
    $preprecode =~ s/$func_regex//ms;

    # create tmpcode object that starts from extract pos, to skip any quoted code
    my $tmpcode = substr($precode, $extract_pos);
    print "tmpcode: [$tmpcode]\n" if $self->{debug};

    $precode = substr($precode, 0, $extract_pos);
    print "precode: [$precode]\n" if $self->{debug};
    $precode_code = $precode;

    $tmpcode =~ m/$func_regex/ms;
    my ($ret, $ident, $params, $potential_body) = ($1, $2, $3, $4);

    print "1st extract: [$ret][$ident][$params][$potential_body]\n" if $self->{debug};

    $ret =~ s/^\s+//;
    $ret =~ s/\s+$//;

    if(not length $ret or $ret eq "else" or $ret eq "while" or $ret eq "if" or $ret eq "for" or $ident eq "for" or $ident eq "while" or $ident eq "if") {
      $precode .= "$ret $ident ($params) $potential_body";
      next;
    } else {
      $tmpcode =~ s/$func_regex//ms;
    }

    $potential_body =~ s/^\s*<%/{/ms;
    $potential_body =~ s/%>\s*$/}/ms;
    $potential_body =~ s/^\s*\?\?</{/ms;
    $potential_body =~ s/\?\?>$/}/ms;

    my @extract = extract_bracketed($potential_body, '{}');
    my $body;
    if(not defined $extract[0]) {
      if($self->{debug} == 0) {
        print "error: unmatched brackets\n";
      } else {
        print "error: unmatched brackets for function '$ident';\n";
        print "body: [$potential_body]\n";
      }
      exit;
    } else {
      $body = $extract[0];
      $preprecode = $extract[1];
      $precode = $extract[1];
    }

    print "final extract: [$ret][$ident][$params][$body]\n" if $self->{debug};
    $self->{code} .= "$precode_code\n$ret $ident($params) $body\n";

    if($self->{debug} >= 2) { print '-' x 20 . "\n" }
    print "     code: [$self->{code}]\n" if $self->{debug} >= 2;
    if($self->{debug} >= 2) { print '-' x 20 . "\n" }
    print "  precode: [$precode]\n" if $self->{debug} >= 2;

    $has_main = 1 if $ident =~ m/^\s*\(?\s*main\s*\)?\s*$/;
  }

  $precode =~ s/^\s+//;
  $precode =~ s/\s+$//;

  $precode =~ s/^{(.*)}$/$1/s;

  if(not $has_main and not exists $self->{options}->{'-nomain'}) {
    if ($precode =~ s/^(};?)//) {
      $self->{code} .= $1;
    }

    $self->{code} = "$prelude\n$self->{code}\n" . "int main(void) {\n$precode\n;\nreturn 0;\n}\n";
  } else {
    $self->{code} = "$prelude\n$self->{code}\n";
  }

  print "after func extract, code: [$self->{code}]\n" if $self->{debug};

  $self->{code} =~ s/\|n/\n/g;
  $self->{code} =~ s/^\s+//;
  $self->{code} =~ s/\s+$//;
  $self->{code} =~ s/;\s*;\n/;\n/gs;
  $self->{code} =~ s/;(\s*\/\*.*?\*\/\s*);\n/;$1/gs;
  $self->{code} =~ s/;(\s*\/\/.*?\s*);\n/;$1/gs;
  $self->{code} =~ s/({|})\n\s*;\n/$1\n/gs;
  $self->{code} =~ s/(?:\n\n)+/\n\n/g;

  print "final code: [$self->{code}]\n" if $self->{debug};
}

sub postprocess_output {
  my $self = shift;
  $self->SUPER::postprocess_output;

  my $output = $self->{output};

  $output =~ s/In file included from .*?:\d+:\d+.\s*from $self->{sourcefile}:\d+.\s*//msg;
  $output =~ s/In file included from .*?:\d+:\d+.\s*//msg;
  $output =~ s/\s*from $self->{sourcefile}:\d+.\s*//g;
  $output =~ s/$self->{execfile}: $self->{sourcefile}:\d+: [^:]+: Assertion/Assertion/g;
  $output =~ s,/usr/include/[^:]+:\d+:\d+:\s+,,g;

  unless(exists $self->{options}->{'-paste'} or (defined $self->{got_run} and $self->{got_run} eq "paste")) {
    $output =~ s/ Line \d+ ://g;
    $output =~ s/$self->{sourcefile}:[:\d]*//g;
  } else {
    $output =~ s/$self->{sourcefile}:(\d+)/\n$1/g;
    $output =~ s/$self->{sourcefile}://g;
  }

  $output =~ s/;?\s?__PRETTY_FUNCTION__ = "[^"]+"//g;
  $output =~ s/(\d+:\d+:\s*)*cc1: (all\s+)?warnings being treated as errors//;
  $output =~ s/(\d+:\d+:\s*)* \(first use in this function\)//g;
  $output =~ s/(\d+:\d+:\s*)*error: \(Each undeclared identifier is reported only once.*?\)//msg;
  $output =~ s/(\d+:\d+:\s*)*ld: warning: cannot find entry symbol _start; defaulting to [^ ]+//;
  $output =~ s/(\d+:\d+:\s*)*\/tmp\/.*\.o://g;
  $output =~ s/(\d+:\d+:\s*)*collect2: ld returned \d+ exit status//g;
  $output =~ s/\(\.text\+[^)]+\)://g;
  $output =~ s/\[ In/[In/;
  $output =~ s/(\d+:\d+:\s*)*warning: Can't read pathname for load map: Input.output error.//g;
  my $left_quote = chr(226) . chr(128) . chr(152);
  my $right_quote = chr(226) . chr(128) . chr(153);
  $output =~ s/$left_quote/'/msg;
  $output =~ s/$right_quote/'/msg;
  $output =~ s/`/'/msg;
  $output =~ s/\t/   /g;
  if($output =~ /In function '([^']+)':/) {
    if($1 eq 'main') {
      $output =~ s/(\d+:\d+:\s*)*\s?In function .main.:\s*//g;
    } else {
      $output =~ s/(\d+:\d+:\s*)*\s?In function .main.:\s?/In function 'main':/g;
    }
  }
  $output =~ s/(\d+:\d+:\s*)*warning: unknown conversion type character 'b' in format \[-Wformat=?\]\s+(\d+:\d+:\s*)*warning: too many arguments for format \[-Wformat-extra-args\]/info: %b is a candide extension/g; #gcc
  $output =~ s/(\d+:\d+:\s*)*warning: invalid conversion specifier 'b' \[-Wformat-invalid-specifier\]/info: %b is a candide extension/g; #clang
  $output =~ s/(\d+:\d+:\s*)*warning: unknown conversion type character 'b' in format \[-Wformat=?\]//g;
  $output =~ s/\s\(core dumped\)/./;
  $output =~ s/ \[enabled by default\]//g;
  $output =~ s/initializer\s+warning: \(near/initializer (near/g;
  $output =~ s/(\d+:\d+:\s*)*note: each undeclared identifier is reported only once for each function it appears in//g;
  $output =~ s/\(gdb\)//g;
  $output =~ s/", '\\(\d{3})' <repeats \d+ times>,? ?"/\\$1/g;
  $output =~ s/, '\\(\d{3})' <repeats \d+ times>\s*//g;
  $output =~ s/(\\000)+/\\0/g;
  $output =~ s/\\0[^">']+/\\0/g;
  $output =~ s/= (\d+) '\\0'/= $1/g;
  $output =~ s/\\0"/"/g;
  $output =~ s/"\\0/"/g;
  $output =~ s/\.\.\.>/>/g;
  $output =~ s/<\s*included at \/home\/compiler\/>\s*//g;
  $output =~ s/\s*compilation terminated due to -Wfatal-errors\.//g;
  $output =~ s/^======= Backtrace.*\[vsyscall\]\s*$//ms;
  $output =~ s/glibc detected \*\*\* \/home\/compiler\/$self->{execfile}: //;
  $output =~ s/: \/home\/compiler\/$self->{execfile} terminated//;
  $output =~ s/<Defined at \/home\/compiler\/>/<Defined at \/home\/compiler\/$self->{sourcefile}:0>/g;
  $output =~ s/\s*In file included from\s+\/usr\/include\/.*?:\d+:\d+:\s*/, /g;
  $output =~ s/\s*collect2: error: ld returned 1 exit status//g;
  $output =~ s/In function\s*`main':\s*\/home\/compiler\/ undefined reference to/error: undefined reference to/g;
  $output =~ s/\/home\/compiler\///g;
  $output =~ s/compilation terminated.//;
  $output =~ s/'(.*?)' = char/'$1' = int/g; $output =~ s/(\(\s*char\s*\)\s*'.*?') = int/$1 = char/; # gdb thinks 'a' is type char, which is not true for C
  $output =~ s/= (-?\d+) ''/= $1/g;
  $output =~ s/, <incomplete sequence >//g;
  $output =~ s/\s*warning: shadowed declaration is here \[-Wshadow\]//g unless exists $self->{options}->{'-paste'} or (defined $self->{got_run} and $self->{got_run} eq 'paste');
  $output =~ s/\s*note: shadowed declaration is here//g unless exists $self->{options}->{'-paste'} or (defined $self->{got_run} and $self->{got_run} eq 'paste');
  $output =~ s/preprocessor macro>\s+<at\s+>/preprocessor macro>/g;
  $output =~ s/<No symbol table is loaded.  Use the "file" command.>\s*//g;
  $output =~ s/cc1: all warnings being treated as; errors//g;
  $output =~ s/, note: this is the location of the previous definition//g;
  $output =~ s/ called by gdb \(\) at statement: void gdb\(\) { __asm__\(""\); }//g;
  $output =~ s/called by \?\? \(\) //g;
  $output =~ s/\s0x[a-z0-9]+: note: pointer points here.*?\^//gms;

  my $removed_warning = 0;

  $removed_warning++ if $output =~ s/\s*warning: ISO C forbids nested functions \[-pedantic\]\s*/ /g;
  $removed_warning++ if $output =~ s/\s*warning: too many arguments in call to 'gdb'\s+note: expanded from macro '.*?'\s*/ /msg;

  if($removed_warning) {
    $output =~ s/^\[\s*\]\s//;
    $output =~ s/^\[\s+/[/m;
    $output =~ s/\s+\]$/]/m;
  }

  $output =~ s/^\[\s+(warning:|info:)/[$1/;  # remove leading spaces in first warning/info

  # backspace
  my $boutput = "";
  my $active_position = 0;
  $output =~ s/\n$//;
  while($output =~ /(.)/gms) {
    my $c = $1;
    if($c eq "\b") {
      if(--$active_position <= 0) {
        $active_position = 0;
      }
      next;
    }
    substr($boutput, $active_position++, 1) = $c;
  }
  $output = $boutput;

  if($self->{warn_unterminated_define} == 1) {
    if($output =~ m/^\[(warning:|info:)/) {
      $output =~ s/^\[/[warning: preprocessor directive not terminated by \\n, the remainder of the line will be part of this directive /;
    } else {
      $output =~ s/^/[warning: preprocessor directive not terminated by \\n, the remainder of the line will be part of this directive] /;
    }
  }

  $self->{output} = $output;
}

1;
Add compiler_vm support for clang Also renamed c11.pm to _c_base.pm and set the C languages to inherit from _c_base.pm instead of c11.pm. 2015-01-15 10:21:18 +01:00			`#!/usr/bin/perl`

			`use warnings;`
			`use strict;`
			`use feature "switch";`

			`no if $] >= 5.018, warnings => "experimental::smartmatch";`

			`package _c_base;`
			`use parent '_default';`

			`use Text::Balanced qw/extract_bracketed/;`

			`sub initialize {`
			`my ($self, %conf) = @_;`

			`$self->{sourcefile} = 'prog.c';`
			`$self->{execfile} = 'prog';`
			`$self->{default_options} = '-Wextra -Wall -Wno-unused -pedantic -Wfloat-equal -Wshadow -std=c11 -lm -Wfatal-errors';`
Add options conditional on whether code is being pasted 2015-01-17 13:41:50 +01:00			`$self->{options_paste} = '-fdiagnostics-show-caret';`
			`$self->{options_nopaste} = '-fno-diagnostics-show-caret';`
Add compiler_vm support for clang Also renamed c11.pm to _c_base.pm and set the C languages to inherit from _c_base.pm instead of c11.pm. 2015-01-15 10:21:18 +01:00			`$self->{cmdline} = 'gcc -ggdb -g3 $sourcefile $options -o $execfile';`

			`$self->{prelude} = <<'END';`
			`#define _XOPEN_SOURCE 9001`
			`#define __USE_XOPEN`
			`#include <stdio.h>`
			`#include <stdlib.h>`
			`#include <string.h>`
			`#include <unistd.h>`
			`#include <math.h>`
			`#include <limits.h>`
			`#include <sys/types.h>`
			`#include <stdint.h>`
			`#include <stdbool.h>`
			`#include <stddef.h>`
			`#include <stdarg.h>`
			`#include <stdnoreturn.h>`
			`#include <stdalign.h>`
			`#include <ctype.h>`
			`#include <inttypes.h>`
			`#include <float.h>`
			`#include <errno.h>`
			`#include <time.h>`
			`#include <assert.h>`
			`#include <complex.h>`
			`#include <setjmp.h>`
			`#include <wchar.h>`
			`#include <wctype.h>`
			`#include <tgmath.h>`
			`#include <fenv.h>`
			`#include <locale.h>`
			`#include <iso646.h>`
			`#include <signal.h>`
			`#include <uchar.h>`
			`#include <prelude.h>`

			`END`
			`}`

			`sub process_custom_options {`
			`my $self = shift;`
			`$self->{code} = $self->{code};`

			`$self->add_option("-nomain") if $self->{code} =~ s/(?:^\|(?<=\s))-nomain\s*//i;`
Add -noheaders option to disable pre-included headers 2015-01-25 04:01:44 +01:00			`$self->add_option("-noheaders") if $self->{code} =~ s/(?:^\|(?<=\s))-noheaders\s*//i;`
Add compiler_vm support for clang Also renamed c11.pm to _c_base.pm and set the C languages to inherit from _c_base.pm instead of c11.pm. 2015-01-15 10:21:18 +01:00
			`$self->{include_options} = "";`
			`while ($self->{code} =~ s/(?:^\|(?<=\s))-include\s+(\S+)\s+//) {`
			`$self->{include_options} .= "#include <$1> ";`
			`$self->add_option("-include $1");`
			`}`

			`$self->{code} = $self->{code};`
			`}`

			`sub pretty_format {`
			`my $self = shift;`
			`my $code = join '', @_;`
			`my $result;`

			`$code = $self->{code} if not defined $code;`

			`open my $fh, ">$self->{sourcefile}" or die "Couldn't write $self->{sourcefile}: $!";`
			`print $fh $code;`
			`close $fh;`

Improve indentation for pretty-pasting; remove newline replacement code 2015-04-05 11:24:56 +02:00			`system("astyle", "-A3 -UHpnfq", $self->{sourcefile});`
Add compiler_vm support for clang Also renamed c11.pm to _c_base.pm and set the C languages to inherit from _c_base.pm instead of c11.pm. 2015-01-15 10:21:18 +01:00
			`open $fh, "<$self->{sourcefile}" or die "Couldn't read $self->{sourcefile}: $!";`
			`$result = join '', <$fh>;`
			`close $fh;`

			`return $result;`
			`}`

			`sub preprocess_code {`
			`my $self = shift;`
			`$self->SUPER::preprocess_code;`

Add -noheaders option to disable pre-included headers 2015-01-25 04:01:44 +01:00			`my $default_prelude = exists $self->{options}->{'-noheaders'} ? '' : $self->{prelude};`
Add compiler_vm support for clang Also renamed c11.pm to _c_base.pm and set the C languages to inherit from _c_base.pm instead of c11.pm. 2015-01-15 10:21:18 +01:00
			`$self->{code} = $self->{include_options} . $self->{code};`

			`print "code before: [$self->{code}]\n" if $self->{debug};`

			`# add newlines to ends of statements and #includes`
			`my $single_quote = 0;`
			`my $double_quote = 0;`
			`my $parens = 0;`
			`my $cpp = 0; # preprocessor`
Improve indentation for pretty-pasting; remove newline replacement code 2015-04-05 11:24:56 +02:00			`my $escaped = 0;`
Add compiler_vm support for clang Also renamed c11.pm to _c_base.pm and set the C languages to inherit from _c_base.pm instead of c11.pm. 2015-01-15 10:21:18 +01:00
			`while($self->{code} =~ m/(.)/msg) {`
			`my $ch = $1;`
			`my $pos = pos $self->{code};`

			`print "adding newlines, ch = [$ch], parens: $parens, cpp: $cpp, single: $single_quote, double: $double_quote, escaped: $escaped, pos: $pos\n" if $self->{debug} >= 10;`

			`if($ch eq '\\') {`
			`$escaped = not $escaped;`
			`} elsif($ch eq '#' and not $cpp and not $escaped and not $single_quote and not $double_quote) {`
			`$cpp = 1;`

			`if($self->{code} =~ m/include\s<([^>\n])>/msg) {`
			`my $match = $1;`
			`$pos = pos $self->{code};`
			`substr ($self->{code}, $pos, 0) = "\n";`
			`pos $self->{code} = $pos;`
			`$cpp = 0;`
			`} elsif($self->{code} =~ m/include\s"([^"\n])"/msg) {`
			`my $match = $1;`
			`$pos = pos $self->{code};`
			`substr ($self->{code}, $pos, 0) = "\n";`
			`pos $self->{code} = $pos;`
			`$cpp = 0;`
			`} else {`
			`pos $self->{code} = $pos;`
			`}`
			`} elsif($ch eq '"') {`
			`$double_quote = not $double_quote unless $escaped or $single_quote;`
			`$escaped = 0;`
			`} elsif($ch eq '(' and not $single_quote and not $double_quote) {`
			`$parens++;`
			`} elsif($ch eq ')' and not $single_quote and not $double_quote) {`
			`$parens--;`
			`$parens = 0 if $parens < 0;`
			`} elsif($ch eq ';' and not $cpp and not $single_quote and not $double_quote and $parens == 0) {`
			`if(not substr($self->{code}, $pos, 1) =~ m/[\n\r]/) {`
			`substr ($self->{code}, $pos, 0) = "\n";`
			`pos $self->{code} = $pos + 1;`
			`}`
			`} elsif($ch eq "'") {`
			`$single_quote = not $single_quote unless $escaped or $double_quote;`
			`$escaped = 0;`
			`} elsif($ch eq 'n' and $escaped) {`
			`if(not $single_quote and not $double_quote) {`
			`print "added newline\n" if $self->{debug} >= 10;`
			`substr ($self->{code}, $pos - 2, 2) = "\n";`
			`pos $self->{code} = $pos;`
			`$cpp = 0;`
			`}`
			`$escaped = 0;`
			`} elsif($ch eq '{' and not $cpp and not $single_quote and not $double_quote) {`
			`if(not substr($self->{code}, $pos, 1) =~ m/[\n\r]/) {`
			`substr ($self->{code}, $pos, 0) = "\n";`
			`pos $self->{code} = $pos + 1;`
			`}`
			`} elsif($ch eq '}' and not $cpp and not $single_quote and not $double_quote) {`
			`if(not substr($self->{code}, $pos, 1) =~ m/[\n\r;]/) {`
			`substr ($self->{code}, $pos, 0) = "\n";`
			`pos $self->{code} = $pos + 1;`
			`}`
			`} elsif($ch eq "\n" and $cpp and not $single_quote and not $double_quote) {`
			`$cpp = 0;`
			`} else {`
			`$escaped = 0;`
			`}`
			`}`

			`print "code after \\n additions: [$self->{code}]\n" if $self->{debug};`

			`# white-out contents of quoted literals so content within literals aren't parsed as code`
			`my $white_code = $self->{code};`
			`$white_code =~ s/(?:\"((?:\\\"\|(?!\").)*)\")/'"' . ('-' x length $1) . '"'/ge;`
			`$white_code =~ s/(?:\'((?:\\\'\|(?!\').)*)\')/"'" . ('-' x length $1) . "'"/ge;`

			`my $precode;`

			`if($white_code =~ m/#include/) {`
			`$precode = $self->{code};`
			`} else {`
			`$precode = $default_prelude . $self->{code};`
			`}`

			`$self->{code} = '';`

			`print "--- precode: [$precode]\n" if $self->{debug};`

			`$self->{warn_unterminated_define} = 0;`

			`my $has_main = 0;`

			`my $prelude = '';`
			`while($precode =~ s/^\s(#.\n{1,2})//g) {`
			`$prelude .= $1;`
			`}`

			`if($precode =~ m/^\s(#.)/ms) {`
			`my $line = $1;`

			`if($line !~ m/\n/) {`
			`$self->{warn_unterminated_define} = 1;`
			`}`
			`}`

			`print "*** prelude: [$prelude]\n precode: [$precode]\n" if $self->{debug};`

			`my $preprecode = $precode;`

			`# white-out contents of quoted literals`
			`$preprecode =~ s/(?:\"((?:\\\"\|(?!\").)*)\")/'"' . ('-' x length $1) . '"'/ge;`
			`$preprecode =~ s/(?:\'((?:\\\'\|(?!\').)*)\')/"'" . ('-' x length $1) . "'"/ge;`

			`# strip comments`
			`if ($self->{lang} eq 'c89') {`
			`$preprecode =~ s#/\[^]\+([^/][^]\+)*/# #gs;`
			`$preprecode =~ s#\|//([^\\]\|[^\n][\n]?)?\n\|("(\\.\|[^"\\])"\|'(\\.\|[^'\\])'\|.[^/"'\\])#defined $2 ? $2 : ""#gse;`
			`} else {`
			`$preprecode =~ s#\|//([^\\]\|[^\n][\n]?)?\n\|("(\\.\|[^"\\])"\|'(\\.\|[^'\\])'\|.[^/"'\\])#defined $2 ? $2 : ""#gse;`
			`$preprecode =~ s#/\[^]\+([^/][^]\+)*/# #gs;`
			`}`

			`print "preprecode: [$preprecode]\n" if $self->{debug};`

			`print "looking for functions, has main: $has_main\n" if $self->{debug} >= 2;`

Improve indentation for pretty-pasting; remove newline replacement code 2015-04-05 11:24:56 +02:00			`my $func_regex = qr/^([ \w]+)\s+([ ()\w:]+)\s\(([^;{])\s\)\s({.\|<%.\|\?\?<.*)/ims;`
Add compiler_vm support for clang Also renamed c11.pm to _c_base.pm and set the C languages to inherit from _c_base.pm instead of c11.pm. 2015-01-15 10:21:18 +01:00
			`# look for potential functions to extract`
			`while($preprecode =~ /$func_regex/ms) {`
			`my ($pre_ret, $pre_ident, $pre_params, $pre_potential_body) = ($1, $2, $3, $4);`
			`my $precode_code;`

			`print "looking for functions, found [$pre_ret][$pre_ident][$pre_params][$pre_potential_body], has main: $has_main\n" if $self->{debug} >= 1;`

			`# find the pos at which this function lives, for extracting from precode`
			`$preprecode =~ m/(\Q$pre_ret\E\s+\Q$pre_ident\E\s\(\s\Q$pre_params\E\s\)\s\Q$pre_potential_body\E)/g;`
			`my $extract_pos = (pos $preprecode) - (length $1);`

			`# now that we have the pos, substitute out the extracted potential function from preprecode`
			`$preprecode =~ s/$func_regex//ms;`

			`# create tmpcode object that starts from extract pos, to skip any quoted code`
			`my $tmpcode = substr($precode, $extract_pos);`
			`print "tmpcode: [$tmpcode]\n" if $self->{debug};`

			`$precode = substr($precode, 0, $extract_pos);`
			`print "precode: [$precode]\n" if $self->{debug};`
			`$precode_code = $precode;`

			`$tmpcode =~ m/$func_regex/ms;`
			`my ($ret, $ident, $params, $potential_body) = ($1, $2, $3, $4);`

			`print "1st extract: [$ret][$ident][$params][$potential_body]\n" if $self->{debug};`

			`$ret =~ s/^\s+//;`
			`$ret =~ s/\s+$//;`

			`if(not length $ret or $ret eq "else" or $ret eq "while" or $ret eq "if" or $ret eq "for" or $ident eq "for" or $ident eq "while" or $ident eq "if") {`
			`$precode .= "$ret $ident ($params) $potential_body";`
			`next;`
			`} else {`
			`$tmpcode =~ s/$func_regex//ms;`
			`}`

			`$potential_body =~ s/^\s*<%/{/ms;`
			`$potential_body =~ s/%>\s*$/}/ms;`
			`$potential_body =~ s/^\s*\?\?</{/ms;`
			`$potential_body =~ s/\?\?>$/}/ms;`

			`my @extract = extract_bracketed($potential_body, '{}');`
			`my $body;`
			`if(not defined $extract[0]) {`
			`if($self->{debug} == 0) {`
			`print "error: unmatched brackets\n";`
			`} else {`
			`print "error: unmatched brackets for function '$ident';\n";`
			`print "body: [$potential_body]\n";`
			`}`
			`exit;`
			`} else {`
			`$body = $extract[0];`
			`$preprecode = $extract[1];`
			`$precode = $extract[1];`
			`}`

			`print "final extract: [$ret][$ident][$params][$body]\n" if $self->{debug};`
			`$self->{code} .= "$precode_code\n$ret $ident($params) $body\n";`

			`if($self->{debug} >= 2) { print '-' x 20 . "\n" }`
			`print " code: [$self->{code}]\n" if $self->{debug} >= 2;`
			`if($self->{debug} >= 2) { print '-' x 20 . "\n" }`
			`print " precode: [$precode]\n" if $self->{debug} >= 2;`

			`$has_main = 1 if $ident =~ m/^\s\(?\smain\s\)?\s$/;`
			`}`

			`$precode =~ s/^\s+//;`
			`$precode =~ s/\s+$//;`

			`$precode =~ s/^{(.*)}$/$1/s;`

			`if(not $has_main and not exists $self->{options}->{'-nomain'}) {`
Improve support for C++ functions within structs/classes 2015-04-09 20:03:42 +02:00			`if ($precode =~ s/^(};?)//) {`
			`$self->{code} .= $1;`
			`}`

Add compiler_vm support for clang Also renamed c11.pm to _c_base.pm and set the C languages to inherit from _c_base.pm instead of c11.pm. 2015-01-15 10:21:18 +01:00			`$self->{code} = "$prelude\n$self->{code}\n" . "int main(void) {\n$precode\n;\nreturn 0;\n}\n";`
			`} else {`
			`$self->{code} = "$prelude\n$self->{code}\n";`
			`}`

			`print "after func extract, code: [$self->{code}]\n" if $self->{debug};`

			`$self->{code} =~ s/\\|n/\n/g;`
			`$self->{code} =~ s/^\s+//;`
			`$self->{code} =~ s/\s+$//;`
			`$self->{code} =~ s/;\s*;\n/;\n/gs;`
			`$self->{code} =~ s/;(\s\/\.?\\/\s*);\n/;$1/gs;`
			`$self->{code} =~ s/;(\s\/\/.?\s*);\n/;$1/gs;`
			`$self->{code} =~ s/({\|})\n\s*;\n/$1\n/gs;`
			`$self->{code} =~ s/(?:\n\n)+/\n\n/g;`

			`print "final code: [$self->{code}]\n" if $self->{debug};`
			`}`

			`sub postprocess_output {`
			`my $self = shift;`
			`$self->SUPER::postprocess_output;`

			`my $output = $self->{output};`

			`$output =~ s/In file included from .?:\d+:\d+.\sfrom $self->{sourcefile}:\d+.\s*//msg;`
			`$output =~ s/In file included from .?:\d+:\d+.\s//msg;`
			`$output =~ s/\sfrom $self->{sourcefile}:\d+.\s//g;`
			`$output =~ s/$self->{execfile}: $self->{sourcefile}:\d+: [^:]+: Assertion/Assertion/g;`
			`$output =~ s,/usr/include/[^:]+:\d+:\d+:\s+,,g;`

			`unless(exists $self->{options}->{'-paste'} or (defined $self->{got_run} and $self->{got_run} eq "paste")) {`
			`$output =~ s/ Line \d+ ://g;`
			`$output =~ s/$self->{sourcefile}:[:\d]*//g;`
			`} else {`
			`$output =~ s/$self->{sourcefile}:(\d+)/\n$1/g;`
			`$output =~ s/$self->{sourcefile}://g;`
			`}`

			`$output =~ s/;?\s?__PRETTY_FUNCTION__ = "[^"]+"//g;`
			`$output =~ s/(\d+:\d+:\s)cc1: (all\s+)?warnings being treated as errors//;`
			`$output =~ s/(\d+:\d+:\s) \(first use in this function\)//g;`
			`$output =~ s/(\d+:\d+:\s)error: \(Each undeclared identifier is reported only once.*?\)//msg;`
			`$output =~ s/(\d+:\d+:\s)ld: warning: cannot find entry symbol _start; defaulting to [^ ]+//;`
			`$output =~ s/(\d+:\d+:\s)\/tmp\/.*\.o://g;`
			`$output =~ s/(\d+:\d+:\s)collect2: ld returned \d+ exit status//g;`
			`$output =~ s/\(\.text\+[^)]+\)://g;`
			`$output =~ s/\[ In/[In/;`
			`$output =~ s/(\d+:\d+:\s)warning: Can't read pathname for load map: Input.output error.//g;`
			`my $left_quote = chr(226) . chr(128) . chr(152);`
			`my $right_quote = chr(226) . chr(128) . chr(153);`
			`$output =~ s/$left_quote/'/msg;`
			`$output =~ s/$right_quote/'/msg;`
			$output =~ s/`/'/msg;
			`$output =~ s/\t/ /g;`
			`if($output =~ /In function '([^']+)':/) {`
			`if($1 eq 'main') {`
			`$output =~ s/(\d+:\d+:\s)\s?In function .main.:\s*//g;`
			`} else {`
			`$output =~ s/(\d+:\d+:\s)\s?In function .main.:\s?/In function 'main':/g;`
			`}`
			`}`
Postprocess clang output for invalid %b format specifier 2015-01-18 14:43:16 +01:00			`$output =~ s/(\d+:\d+:\s)warning: unknown conversion type character 'b' in format \[-Wformat=?\]\s+(\d+:\d+:\s)warning: too many arguments for format \[-Wformat-extra-args\]/info: %b is a candide extension/g; #gcc`
			`$output =~ s/(\d+:\d+:\s)warning: invalid conversion specifier 'b' \[-Wformat-invalid-specifier\]/info: %b is a candide extension/g; #clang`
Add compiler_vm support for clang Also renamed c11.pm to _c_base.pm and set the C languages to inherit from _c_base.pm instead of c11.pm. 2015-01-15 10:21:18 +01:00			`$output =~ s/(\d+:\d+:\s)warning: unknown conversion type character 'b' in format \[-Wformat=?\]//g;`
			`$output =~ s/\s\(core dumped\)/./;`
			`$output =~ s/ \[enabled by default\]//g;`
			`$output =~ s/initializer\s+warning: \(near/initializer (near/g;`
			`$output =~ s/(\d+:\d+:\s)note: each undeclared identifier is reported only once for each function it appears in//g;`
			`$output =~ s/\(gdb\)//g;`
			`$output =~ s/", '\\(\d{3})' <repeats \d+ times>,? ?"/\\$1/g;`
			`$output =~ s/, '\\(\d{3})' <repeats \d+ times>\s*//g;`
			`$output =~ s/(\\000)+/\\0/g;`
			`$output =~ s/\\0[^">']+/\\0/g;`
			`$output =~ s/= (\d+) '\\0'/= $1/g;`
			`$output =~ s/\\0"/"/g;`
			`$output =~ s/"\\0/"/g;`
			`$output =~ s/\.\.\.>/>/g;`
			`$output =~ s/<\sincluded at \/home\/compiler\/>\s//g;`
			`$output =~ s/\s*compilation terminated due to -Wfatal-errors\.//g;`
			`$output =~ s/^======= Backtrace.\[vsyscall\]\s$//ms;`
			`$output =~ s/glibc detected \\\* \/home\/compiler\/$self->{execfile}: //;`
			`$output =~ s/: \/home\/compiler\/$self->{execfile} terminated//;`
			`$output =~ s/<Defined at \/home\/compiler\/>/<Defined at \/home\/compiler\/$self->{sourcefile}:0>/g;`
			`$output =~ s/\sIn file included from\s+\/usr\/include\/.?:\d+:\d+:\s*/, /g;`
			`$output =~ s/\s*collect2: error: ld returned 1 exit status//g;`
			$output =~ s/In function\s`main':\s\/home\/compiler\/ undefined reference to/error: undefined reference to/g;
			`$output =~ s/\/home\/compiler\///g;`
			`$output =~ s/compilation terminated.//;`
			`$output =~ s/'(.?)' = char/'$1' = int/g; $output =~ s/(\(\schar\s\)\s'.*?') = int/$1 = char/; # gdb thinks 'a' is type char, which is not true for C`
			`$output =~ s/= (-?\d+) ''/= $1/g;`
			`$output =~ s/, <incomplete sequence >//g;`
			`$output =~ s/\s*warning: shadowed declaration is here \[-Wshadow\]//g unless exists $self->{options}->{'-paste'} or (defined $self->{got_run} and $self->{got_run} eq 'paste');`
			`$output =~ s/\s*note: shadowed declaration is here//g unless exists $self->{options}->{'-paste'} or (defined $self->{got_run} and $self->{got_run} eq 'paste');`
			`$output =~ s/preprocessor macro>\s+<at\s+>/preprocessor macro>/g;`
			`$output =~ s/<No symbol table is loaded. Use the "file" command.>\s*//g;`
			`$output =~ s/cc1: all warnings being treated as; errors//g;`
			`$output =~ s/, note: this is the location of the previous definition//g;`
			`$output =~ s/ called by gdb \(\) at statement: void gdb\(\) { __asm__\(""\); }//g;`
Improve indentation for pretty-pasting; remove newline replacement code 2015-04-05 11:24:56 +02:00			`$output =~ s/called by \?\? \(\) //g;`
			`$output =~ s/\s0x[a-z0-9]+: note: pointer points here.*?\^//gms;`
Add compiler_vm support for clang Also renamed c11.pm to _c_base.pm and set the C languages to inherit from _c_base.pm instead of c11.pm. 2015-01-15 10:21:18 +01:00
			`my $removed_warning = 0;`

Improve indentation for pretty-pasting; remove newline replacement code 2015-04-05 11:24:56 +02:00			`$removed_warning++ if $output =~ s/\swarning: ISO C forbids nested functions \[-pedantic\]\s/ /g;`
			`$removed_warning++ if $output =~ s/\swarning: too many arguments in call to 'gdb'\s+note: expanded from macro '.?'\s*/ /msg;`
Add compiler_vm support for clang Also renamed c11.pm to _c_base.pm and set the C languages to inherit from _c_base.pm instead of c11.pm. 2015-01-15 10:21:18 +01:00
			`if($removed_warning) {`
Improve indentation for pretty-pasting; remove newline replacement code 2015-04-05 11:24:56 +02:00			`$output =~ s/^\[\s*\]\s//;`
			`$output =~ s/^\[\s+/[/m;`
			`$output =~ s/\s+\]$/]/m;`
Add compiler_vm support for clang Also renamed c11.pm to _c_base.pm and set the C languages to inherit from _c_base.pm instead of c11.pm. 2015-01-15 10:21:18 +01:00			`}`

			`$output =~ s/^\[\s+(warning:\|info:)/[$1/; # remove leading spaces in first warning/info`

			`# backspace`
			`my $boutput = "";`
			`my $active_position = 0;`
			`$output =~ s/\n$//;`
			`while($output =~ /(.)/gms) {`
			`my $c = $1;`
			`if($c eq "\b") {`
			`if(--$active_position <= 0) {`
			`$active_position = 0;`
			`}`
			`next;`
			`}`
			`substr($boutput, $active_position++, 1) = $c;`
			`}`
			`$output = $boutput;`

			`if($self->{warn_unterminated_define} == 1) {`
			`if($output =~ m/^\[(warning:\|info:)/) {`
			`$output =~ s/^\[/[warning: preprocessor directive not terminated by \\n, the remainder of the line will be part of this directive /;`
			`} else {`
			`$output =~ s/^/[warning: preprocessor directive not terminated by \\n, the remainder of the line will be part of this directive] /;`
			`}`
			`}`

			`$self->{output} = $output;`
			`}`

			`1;`