3
0
mirror of https://github.com/pragma-/pbot.git synced 2024-11-24 12:59:35 +01:00

applets: add C23 draft (n3047) command

I really, really need to clean this mess up and combine
c99std.pl, c11std.pl and c23std.pl into one script!
This commit is contained in:
Pragmatic Software 2022-08-07 19:10:34 -07:00
parent c4602bc295
commit 9ddd38038a
7 changed files with 129373 additions and 1 deletions

272
applets/c23std.pl vendored Executable file
View File

@ -0,0 +1,272 @@
#!/usr/bin/env perl
# SPDX-FileCopyrightText: 2021 Pragmatic Software <pragma78@gmail.com>
# SPDX-License-Identifier: MIT
use warnings;
use strict;
my $debug = 0;
# for paragraphs
my $USER_SPECIFIED = 1;
my $RESULTS_SPECIFIED = 2;
my $search = join ' ', @ARGV;
if (not length $search) {
print
"Usage: c23std [-list] [-n#] [-section <section>] [search text] [-text <regex>] -- `section` must be in the form of `X.Y[pZ]` where `X` and `Y` are section/chapter and, optionally, `pZ` is paragraph. If both `section` and `search text` are specified, then the search space will be within the specified section. Use `-n <n>` to skip to the nth match. To list only the section numbers containing 'search text', add -list. To display specific text, use `-text <regex>`.\n";
exit 0;
}
my ($section, $paragraph, $section_specified, $paragraph_specified, $match, $list_only, $list_titles, $match_text);
$section_specified = 0;
$paragraph_specified = 0;
if ($search =~ s/-section\s*([A-Z0-9\.p]+)//i or $search =~ s/\b([A-Z0-9]+\.[0-9\.p]+)//i) {
$section = $1;
if ($section =~ s/p(\d+)//i) {
$paragraph = $1;
$paragraph_specified = $USER_SPECIFIED;
} else {
$paragraph = 1;
}
$section = "$section." if $section =~ m/^[A-Z0-9]+$/i;
$section_specified = 1;
}
if ($search =~ s/-n\s*(\d+)//) {
$match = $1;
} else {
$match = 1;
}
if ($search =~ s/-list//i) {
$list_only = 1;
$list_titles = 1; # Added here instead of removing -titles option
}
if ($search =~ s/-titles//i) {
$list_only = 1;
$list_titles = 1;
}
if ($search =~ s/-text ([^ ]+)//) {
$match_text = $1;
}
$search =~ s/^\s+//;
$search =~ s/\s+$//;
if (not defined $section) {
$section = "1.";
$paragraph = 1;
}
if ($list_only and not length $search) {
print "You must specify some search text to use with -list.\n";
exit 0;
}
open FH, "<n3047.out" or die "Could not open n3047: $!";
my @contents = <FH>;
close FH;
my $text = join '', @contents;
$text =~ s/\r//g;
my $result;
my $found_section = "";
my $found_section_title = "";
my $section_title;
my $found_paragraph;
my $found = 0;
my $matches = 0;
my $this_section;
my $comma = "";
if ($list_only) { $result = "Sections containing '$search':\n "; }
my $qsearch = quotemeta $search;
$qsearch =~ s/\\ / /g;
$qsearch =~ s/\s+/\\s+/g;
while ($text =~ m/^\s{0,4}([0-9A-Z]+\.[0-9\.]*)/msg) {
$this_section = $1;
print "----------------------------------\n" if $debug >= 2;
print "Processing section [$this_section]\n" if $debug;
if ($section_specified and $this_section !~ m/^$section/i) {
print "No section match, skipping.\n" if $debug >= 4;
next;
}
my $section_text;
if ($text =~ m/(.*?)^(?=\s{0,4}(?!FOOTNOTE)[0-9A-Z]+\.)/msg) { $section_text = $1; }
else {
print "No section text, end of file marker found.\n" if $debug >= 4;
last;
}
if ($section =~ /FOOTNOTE/i) {
$section_text =~ s/^\s{4}//ms;
$section_text =~ s/^\s{4}FOOTNOTE.*//msi;
$section_text =~ s/^\d.*//ms;
} elsif ($section_text =~ m/(.*?)$/msg) {
$section_title = $1 if length $1;
$section_title =~ s/^\s+//;
$section_title =~ s/\s+$//;
}
print "$this_section [$section_title]\n" if $debug >= 2;
while ($section_text =~ m/^(\d+)\s(.*?)^(?=\d)/msgic or $section_text =~ m/^(\d+)\s(.*)/msgi) {
my $p = $1;
my $t = $2;
print "paragraph $p: [$t]\n" if $debug >= 3;
if ($paragraph_specified == $USER_SPECIFIED and not length $search and $p == $paragraph) {
$result = $t if not $found;
$found_paragraph = $p;
$found_section = $this_section;
$found_section_title = $section_title;
$found = 1;
last;
}
if (length $search) {
eval {
if ($t =~ m/\b$qsearch\b/mis or $section_title =~ m/\b$qsearch\b/mis) {
$matches++;
if ($matches >= $match) {
if ($list_only) {
$result .= sprintf("%s%-15s", $comma, $this_section . "p" . $p);
$result .= " $section_title" if $list_titles;
$comma = ",\n ";
} else {
if (not $found) {
$result = $t;
$found_section = $this_section;
$found_section_title = $section_title;
$found_paragraph = $p;
$paragraph_specified = $RESULTS_SPECIFIED;
}
$found = 1;
}
}
}
};
if ($@) {
print "Error in search regex; you may need to escape characters such as *, ?, ., etc.\n";
exit 0;
}
}
}
last if $found && $paragraph_specified == $USER_SPECIFIED;
if ($paragraph_specified == $USER_SPECIFIED) {
if (length $search) { print "No such text '$search' in paragraph $paragraph of section $section of n3047.\n"; }
else { print "No such paragraph $paragraph in section $section of n3047.\n"; }
exit 0;
}
if (defined $section_specified and not length $search) {
$found = 1;
$found_section = $this_section;
$found_section_title = $section_title;
$found_paragraph = $paragraph;
$result = $section_text;
last;
}
}
if (not $found and $comma eq "") {
$search =~ s/\\s\+/ /g;
if ($section_specified) {
print "No such text '$search' found within section '$section' in C23 Draft Standard (n3047).\n" if length $search;
print "No such section '$section' in C23 Draft Standard (n3047).\n" if not length $search;
exit 0;
}
print "No such section '$section' in C23 Draft Standard (n3047).\n" if not length $search;
print "No such text '$search' found in C23 Draft Standard (n3047).\n" if length $search;
exit 0;
}
$result =~ s/$found_section_title// if length $found_section_title;
$result =~ s/^\s+//;
$result =~ s/\s+$//;
=cut
$result =~ s/\s+/ /g;
$result =~ s/[\n\r]/ /g;
=cut
if ($matches > 1 and not $list_only) { print "Displaying $match of $matches matches: "; }
if ($comma eq "") {
=cut
print $found_section;
print "p" . $found_paragraph if $paragraph_specified;
=cut
print "http://www.iso-9899.info/n3047.html\#$found_section";
print "p" . $found_paragraph if $paragraph_specified;
print "\n\n";
print "[", $found_section_title, "]\n\n" if length $found_section_title;
}
$result =~ s/\s*Constraints\s*$//;
$result =~ s/\s*Semantics\s*$//;
$result =~ s/\s*Description\s*$//;
$result =~ s/\s*Returns\s*$//;
$result =~ s/\s*Runtime-constraints\s*$//;
$result =~ s/\s*Recommended practice\s*$//;
if (length $match_text) {
my $match_result = $result;
$match_result =~ s/\s+/ /g;
my $match = eval {
my @matches = ($match_result =~ m/($match_text)/msp);
if (@matches > 1) {
shift @matches;
@matches = grep { length $_ } @matches;
}
return [${^PREMATCH}, join (' ... ', @matches), ${^POSTMATCH}];
};
if ($@) {
print "Error in -text option: $@\n";
exit 1;
}
$result = '';
if (length $match->[0]) {
$result = '... ';
}
if (length $match->[1]) {
$result .= $match->[1];
} else {
$result = "No text found for `$match_text`.";
}
if (length $match->[2]) {
$result .= ' ...';
}
}
print "$result\n";

553
applets/gencstd23.pl vendored Executable file
View File

@ -0,0 +1,553 @@
#!/usr/bin/env perl
# SPDX-FileCopyrightText: 2021 Pragmatic Software <pragma78@gmail.com>
# SPDX-License-Identifier: MIT
# ugly and hacked together
# pdftotext -layout -nopgbrk -y 75 -H 700 -W 1000 n3047.pdf n3047.txt
use warnings;
use strict;
use HTML::Entities;
use Data::Dumper;
my $debug = 100;
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");
my $input = "@ARGV";
if (not length $input) {
print STDERR "Usage: $0 <input .txt file>\n";
exit 1;
}
open FH, "<:encoding(UTF-8)", $input or die "Could not open $input: $!";
my @contents = <FH>;
close FH;
my $text = join '', @contents;
$text =~ s/\r//g;
my $section_title;
my $this_section = '';
my %sections;
my @last_section_number;
my @section_number;
my $last_section = '';
my @footnotes;
my $footnote = 0;
my $last_footnote = 0;
gen_data();
gen_txt();
#gen_html();
sub gen_data {
while ($text =~ m/^\f?\s{0,5}([0-9A-Z]+\.[0-9\.]*)/msg) {
$last_section = $this_section;
$this_section = $1;
@last_section_number = @section_number;
@section_number = split /\./, $this_section;
print STDERR "----------------------------------\n" if $debug;
print STDERR "Processing section [$this_section]\n" if $debug;
validate_section_difference();
my $section_text;
if ($text =~ m/(.*?)^(?=\f?\s{0,4}[0-9A-Z]+\.)/msg) {
$section_text = $1;
} else {
print STDERR "No section text, end of file marker found.\n";
last;
}
if ($section_text =~ m/(.*?)$/msg) {
if (length $1) {
$section_title = $1;
$section_title =~ s/^\s+//;
$section_title =~ s/\s+$//;
print STDERR "+++ set new section title: [$section_title]\n" if $debug;
} else {
print STDERR "--- no length for section title\n" if $debug;
}
} else {
print STDERR "--- no new section title\n" if $debug;
}
$sections{$this_section}{title} = $section_title;
($section_text) = $section_text =~ m/\s*(.*)/msg;
print STDERR "+++ $this_section [$section_title]\n" if $debug >= 2;
print STDERR "+++ section text: [$section_text]\n" if $debug >= 2;
if (not $section_text =~ m/^(?=\d+\s)/msg) {
print STDERR "??? no paragraphs in section\n" if $debug;
$section_text =~ s/~~//msg;
$section_text =~ s/ZZZ//msg;
$sections{$this_section}{text} = $section_text;
} else {
my $last_p = 0;
my $p = 0;
print STDERR "+++ getting paragraphs for $this_section\n" if $debug;
my $pretext;
if ($section_text =~ m/^(?!\f?\d+\s)/) {
($pretext) = $section_text =~ m/^(.*?)^(?=\f?\d+\s)/ms;
print STDERR "pretext captured: [$pretext]\n";
}
while ($section_text =~ m/^\f?(\d+)\s(.*?)^(?=\f?\d)/msgc or $section_text =~ m/^\f?(\d+)\s(.*)/msg) {
$last_p = $p;
$p = $1;
my $t = $2;
if (length $pretext) {
$t = "$pretext $t";
$pretext = '';
}
print STDERR "paragraph $p: [$t]\n" if $debug >= 3;
if ($p - $last_p != 1) { die "Paragraph diff invalid"; }
# check for footnotes
my @new_footnotes;
while ($t =~ m/^\s*(\d+)\)\s*(.*?)$/mgc) {
$footnote = $1;
my $footnote_text = "$2\n";
print STDERR "processing 1st footnote $footnote [last: $last_footnote]\n" if $debug;
print STDERR "footnote text [$footnote_text]\n" if $debug;
if ($last_footnote - $footnote != -1) {
die "Footnote diff invalid";
}
$last_footnote = $footnote;
push @new_footnotes, $footnote;
print STDERR "footnote $footnote text: [$footnote_text]\n" if $debug >= 4;
while ($t =~ m/^(.*?)$/mgc) {
my $line = $1;
print STDERR "processing [$line]\n" if $debug;
if ($line =~ m/^\f/mg) {
print STDERR "end of footnote $footnote\n";
last;
}
if (not length $line or $line =~ m/^\s+$/) {
print STDERR "skipping empty line\n";
next;
}
if ($line =~ m/^\s*(\d+)\)\s*(.*?)$/mg) {
print STDERR "----------------\n" if $debug >= 1;
print STDERR "+++ added footnote $footnote: [$footnote_text]\n" if $debug >= 1;
$footnotes[$footnote] = $footnote_text;
print STDERR "----------------\n" if $debug >= 1;
$footnote = $1;
$footnote_text = "$2\n";
print STDERR "processing 2nd footnote $footnote [last: $last_footnote]\n" if $debug;
if ($last_footnote - $footnote != -1) {
die "Footnote diff invalid";
}
$last_footnote = $footnote;
push @new_footnotes, $footnote;
print STDERR "footnote $footnote text: [$footnote_text]\n" if $debug >= 4;
next;
}
if (not length $line or $line =~ m/^\s+$/) {
print STDERR "footnote $footnote: skipping empty line\n";
} else {
$footnote_text .= "$line\n";
print STDERR "footnote $footnote text: appending [$line]\n" if $debug >= 3;
}
}
print STDERR "----------------\n" if $debug >= 1;
print STDERR "+++ added footnote $footnote: [$footnote_text]\n" if $debug >= 1;
$footnotes[$footnote] = $footnote_text;
print STDERR "----------------\n" if $debug >= 1;
}
# strip footnotes from section text
foreach my $fn (@new_footnotes) {
my $sub = quotemeta $footnotes[$fn];
$sub =~ s/(\\ )+/\\s*/g;
#print STDERR "subbing out [$footnote) $sub]\n";
$t =~ s/^\s*$fn\)\s*$sub//ms;
}
$t =~ s/\f//g;
$t =~ s/~~//msg;
$t =~ s/ZZZ//msg;
$sections{$this_section . "p$p"}{text} = "$p $t";
print STDERR "+++ added ${this_section}p$p:\n$p $t\n" if $debug;
}
print STDERR "+++ paragraphs done\n" if $debug;
}
}
}
sub bysection {
my $inverse = 1;
my ($a1, $p1) = split /p/, $a;
my ($b1, $p2) = split /p/, $b;
$p1 //= 0;
$p2 //= 0;
my @k1 = split /\./, $a1;
my @k2 = split /\./, $b1;
my @r;
if ($#k2 > $#k1) {
my @tk = @k1;
@k1 = @k2;
@k2 = @tk;
my $tp = $p1;
$p1 = $p2;
$p2 = $tp;
$inverse = -1;
} else {
$inverse = 1;
}
my $i = 0;
for (; $i < $#k1 + 1; $i++) {
if (not defined $k2[$i]) { $r[$i] = 1; }
else {
if ($i == 0) { $r[$i] = $k1[$i] cmp $k2[$i]; }
else { $r[$i] = $k1[$i] <=> $k2[$i]; }
}
}
$r[$i] = ($p1 <=> $p2);
my $ret = 0;
foreach my $rv (@r) {
if ($rv != 0) {
$ret = $rv;
last;
}
}
return $ret * $inverse;
}
sub gen_txt {
my $footer = "";
my $paren = 0;
my $section_head;
my $section_title;
foreach my $this_section (sort bysection keys %sections) {
print STDERR "writing section $this_section\n" if $debug;
if (not $this_section =~ m/p/) {
print " $this_section $sections{$this_section}{title}\n";
$section_head = $this_section;
$section_title = $sections{$this_section}{title};
}
my $section_text = $sections{$this_section}{text};
while ($section_text =~ m/^(.*?)$/msg) {
my $line = $1;
print STDERR "paren reset, line [$line]\n" if $debug >= 8;
my $number = "";
while ($line =~ m/(.)/g) {
my $c = $1;
if ($c =~ m/[0-9]/) { $number .= $c; }
elsif ($c eq ' ') { $number = ""; }
elsif ($c eq '(') {
$paren++;
print STDERR "got $paren (\n" if $debug >= 8;
} elsif ($c eq ')') {
$paren--;
print STDERR "got $paren )\n" if $debug >= 8;
if ($paren == -1) {
if (length $number and defined $footnotes[$number]) {
print STDERR "Got footnote $number here!\n" if $debug;
$footer .= "\nFOOTNOTE.$number) $footnotes[$number]\n";
}
$paren = 0;
}
} else {
$number = "";
}
}
}
print "$section_text\n";
if (length $footer) {
print $footer;
$footer = "";
}
}
}
sub make_link {
my ($text) = @_;
if (exists $sections{$text}) {
return "<a href='#$text'>$text</a>";
} else {
return $text;
}
}
sub linkify {
my ($text) = @_;
$text =~ s/\b((?:[A-Z]|[1-9])\.(?:\.?[0-9]+)*)\b/make_link($1)/ge;
return $text;
}
sub gen_html {
print "<html>\n<body>\n";
my @paragraphs = sort bysection keys %sections;
foreach my $section (qw/ABSTRACT. CONTENTS. FOREWORD. INTRO./) {
foreach my $paragraph (@paragraphs) {
if ($paragraph =~ m/^$section/) {
write_html_section($paragraph);
print STDERR "delete section [$paragraph]\n";
delete $sections{$paragraph};
}
}
delete $sections{$section};
}
foreach my $section (sort bysection keys %sections) {
write_html_section($section);
}
print "\n</body>\n</html>\n";
}
sub write_html_section {
my ($this_section) = @_;
my $footer = "";
my $paren = 0;
print STDERR "writing section [$this_section]\n" if $debug;
print "<a name='", encode_entities($this_section), "'></a>\n";
if (not $this_section =~ m/p/) {
print "<hr>\n<h3>", encode_entities($this_section), " [", encode_entities($sections{$this_section}{title}), "]</h3>\n";
}
my $section_text = $sections{$this_section}{text};
next if not length $section_text;
$section_text = encode_entities $section_text;
while ($section_text =~ m/^(.*?)$/msg) {
my $line = $1;
print STDERR "paren reset, line [$line]\n" if $debug >= 8;
my $number = "";
while ($line =~ m/(.)/g) {
my $c = $1;
if ($c =~ m/[0-9]/) { $number .= $c; }
elsif ($c eq ' ') { $number = ""; }
elsif ($c eq '(') {
$paren++;
print STDERR "got $paren (\n" if $debug >= 8;
} elsif ($c eq ')') {
$paren--;
print STDERR "got $paren )\n" if $debug >= 8;
if ($paren == -1) {
if (length $number and defined $footnotes[$number]) {
print STDERR "Got footnote $number here!\n" if $debug;
$section_text =~ s/$number\)/<a href='#FOOTNOTE.$number'><sup>[$number]<\/sup><\/a>/;
$footer .= "<a name='FOOTNOTE.$number'>\n<pre><i><b>Footnote $number)</b> ".encode_entities($footnotes[$number])."</i></pre>\n</a>\n";
}
$paren = 0;
}
} else {
$number = "";
}
}
}
$section_text = linkify($section_text);
$footer = linkify($footer);
if ($this_section eq 'CONTENTS.') {
$section_text =~ s/Annex ([A-Z])/<a href='#$1.'>Annex $1<\/a>/mg;
$section_text =~ s/^(\d+\.)/<a href='#$1'>$1<\/a>/mg;
$section_text =~ s/^Foreword/<a href='#FOREWORD.'>Foreword<\/a>/mg;
$section_text =~ s/^Introduction/<a href='#INTRO.'>Introduction<\/a>/mg;
}
print "<pre>", $section_text, "</pre>\n";
if (length $footer) {
print $footer;
$footer = '';
}
}
# this mess of code verifies that two given section numbers are within 1 unit of distance of each other
# this ensures that no sections were skipped due to misparses
sub validate_section_difference {
if (@last_section_number && $last_section_number[0] !~ /(?:ABSTRACT|CONTENTS|FOREWORD|INTRO)/) {
my $fail = 0;
my $skip = 0;
print STDERR "comparing last section ", join('.', @last_section_number), " vs ", join('.', @section_number), "\n";
if (@section_number > @last_section_number) {
if (@section_number - @last_section_number != 1) {
$fail = 1;
print STDERR "size difference too great\n";
}
unless ($fail) {
if ($section_number[0] =~ /^[A-Z]+$/) {
if ($last_section_number[0] =~ /^[A-Z]+$/) {
for (my $i = 0; $i < @last_section_number; $i++) {
if ($section_number[$i] ne $last_section_number[$i]) {
$fail = 1;
print STDERR "digits different\n";
last;
}
}
} else {
print STDERR "disregarding section namespace change from number to alphabet\n";
$skip = 1;
}
} else {
for (my $i = 0; $i < @last_section_number; $i++) {
if ($section_number[$i] ne $last_section_number[$i]) {
$fail = 1;
print STDERR "digits different\n";
last;
}
}
}
}
if (!$skip && ($fail || $section_number[$#section_number] != 1)) {
print STDERR "difference too great ", join('.', @last_section_number), " vs ", join('.', @section_number), "\n";
die;
}
} elsif (@last_section_number > @section_number) {
if ($section_number[0] =~ /^[A-Z]+$/) {
if ($last_section_number[0] =~ /^[A-Z]+$/) {
if ($section_number[0] ne $last_section_number[0]) {
if (ord($section_number[0]) - ord($last_section_number[0]) != 1) {
$fail = 1;
print STDERR "letter difference too great\n";
} else {
$skip = 1;
print STDERR "letter difference good\n";
}
}
unless ($fail) {
for (my $i = 1; $i < @section_number - 1; $i++) {
if ($section_number[$i] != $last_section_number[$i]) {
if ($section_number[$i] - $last_section_number[$i] != 1) {
print STDERR "digit difference too great\n";
$fail = 1;
}
last;
}
}
}
} else {
print STDERR "disregarding section namespace change from number to alphabet\n";
$skip = 1;
}
} else {
for (my $i = 0; $i < @section_number - 1; $i++) {
if ($section_number[$i] != $last_section_number[$i]) {
if ($section_number[$i] - $last_section_number[$i] != 1) {
print STDERR "digit difference too great\n";
$fail = 1;
}
last;
}
}
}
if (!$skip && ($fail || $section_number[$#section_number] - $last_section_number[$#section_number] != 1)) {
print STDERR "difference too great ", join('.', @last_section_number), " vs ", join('.', @section_number), "\n";
die;
}
} else {
my @rev_last = reverse @last_section_number;
my @rev_curr = reverse @section_number;
if ($rev_curr[$#rev_curr] =~ /^[A-Z]+$/) {
if ($rev_last[$#rev_last] =~ /^[A-Z]+$/) {
if ($rev_curr[$#rev_curr] ne $rev_last[$#rev_last]) {
if (ord($rev_curr[$#rev_curr]) - ord($rev_last[$#rev_last]) != 1) {
$fail = 1;
print STDERR "letter difference too great\n";
}
}
for (my $i = 1; $i < @rev_curr; $i++) {
if ($rev_curr[$i] != $rev_last[$i]) {
if ($rev_curr[$i] - $rev_last[$i] > 1) {
$fail = 1;
}
last;
}
}
} else {
print STDERR "disregarding section namespace change from number to alphabet\n";
$skip = 1;
}
} else {
for (my $i = 0; $i < @rev_curr; $i++) {
if ($rev_curr[$i] != $rev_last[$i]) {
if ($rev_curr[$i] - $rev_last[$i] > 1) {
$fail = 1;
}
last;
}
}
}
if (!$skip && $fail) {
print STDERR "difference too great ", join('.', @last_section_number), " vs ", join('.', @section_number), "\n";
die;
}
}
}
}

48810
applets/n3047.html vendored Normal file

File diff suppressed because it is too large Load Diff

42805
applets/n3047.out vendored Normal file

File diff suppressed because it is too large Load Diff

36932
applets/n3047.txt vendored Normal file

File diff suppressed because it is too large Load Diff

BIN
data/factoids.sqlite3 vendored

Binary file not shown.

View File

@ -25,7 +25,7 @@ use PBot::Imports;
# These are set by the /misc/update_version script
use constant {
BUILD_NAME => "PBot",
BUILD_REVISION => 4565,
BUILD_REVISION => 4566,
BUILD_DATE => "2022-08-07",
};