mirror of
https://github.com/pragma-/pbot.git
synced 2024-11-09 13:39:34 +01:00
311 lines
8.3 KiB
Perl
Executable File
Vendored
311 lines
8.3 KiB
Perl
Executable File
Vendored
#!/usr/bin/env perl
|
|
|
|
# SPDX-FileCopyrightText: 2021 Pragmatic Software <pragma78@gmail.com>
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
use warnings;
|
|
use strict;
|
|
|
|
use Getopt::Long qw/GetOptionsFromArray/;
|
|
use Encode;
|
|
|
|
my %standards = (
|
|
C99 => 'n1256.out',
|
|
C11 => 'n1570.out',
|
|
C23 => 'n3047.out',
|
|
);
|
|
|
|
binmode(STDOUT, ":utf8");
|
|
binmode(STDERR, ":utf8");
|
|
|
|
@ARGV = map { decode('UTF-8', $_, 1) } @ARGV;
|
|
|
|
my ($std, $search, $section, $paragraph, $debug);
|
|
my ($match, $list_only, $match_text);
|
|
|
|
{
|
|
my $opt_error;
|
|
local $SIG{__WARN__} = sub {
|
|
$opt_error = shift;
|
|
chomp $opt_error;
|
|
};
|
|
|
|
Getopt::Long::Configure("bundling_override");
|
|
|
|
GetOptionsFromArray(
|
|
\@ARGV,
|
|
'std=s' => \$std,
|
|
'section|s=s' => \$section,
|
|
'num|n=i' => \$match,
|
|
'text|t=s' => \$match_text,
|
|
'list|l' => \$list_only,
|
|
'debug|d=i' => \$debug,
|
|
);
|
|
|
|
$std //= 'C99';
|
|
$section //= '';
|
|
$match //= 1;
|
|
$list_only //= 0;
|
|
$debug //= 0;
|
|
|
|
$std = uc $std;
|
|
|
|
if (not exists $standards{$std}) {
|
|
print "Invalid -std=$std selected. Valid -std= values are: ", join(', ', sort keys %standards), "\n";
|
|
exit 1;
|
|
}
|
|
|
|
my $usage = "Usage: $std [-list] [-n#] [-section <section>] [search text] [-text <regex>] -- `section` must be in the form of `X.Y[pZ]` where `X` and `Y` are section/chapter and, optionally, `pZ` is paragraph. If both `section` and `search text` are specified, then the search space will be within the specified section. Use `-n <n>` to skip to the nth match. To list only the section numbers containing 'search text', add -list. To display specific text, use `-text <regex>`.\n";
|
|
|
|
if ($opt_error) {
|
|
print "$opt_error: $usage\n";
|
|
exit 1;
|
|
}
|
|
|
|
$search = "@ARGV";
|
|
|
|
if (!length $section && !length $search) {
|
|
print $usage;
|
|
exit 1;
|
|
}
|
|
}
|
|
|
|
# for paragraphs
|
|
use constant {
|
|
USER_SPECIFIED => 1,
|
|
RESULTS_SPECIFIED => 2,
|
|
};
|
|
|
|
my $section_specified = length $section ? 1 : 0;
|
|
my $paragraph_specified = 0;
|
|
|
|
if ($search =~ s/\b([A-Z0-9]+\.[0-9.p]*)//i) {
|
|
$section = $1;
|
|
|
|
if ($section =~ s/p(\d+)//i) {
|
|
$paragraph = $1;
|
|
$paragraph_specified = USER_SPECIFIED;
|
|
} else {
|
|
$paragraph = 1;
|
|
}
|
|
|
|
$section_specified = 1;
|
|
}
|
|
|
|
# add trailing dot if missing
|
|
if ($section =~ /^[A-Z0-9]+$/i) {
|
|
$section .= '.';
|
|
}
|
|
|
|
$search =~ s/^\s+//;
|
|
$search =~ s/\s+$//;
|
|
|
|
if (not length $section) {
|
|
$section = "1.";
|
|
$paragraph = 1;
|
|
}
|
|
|
|
if ($list_only and not length $search) {
|
|
print "You must specify some search text to use with -list.\n";
|
|
exit 1;
|
|
}
|
|
|
|
open FH, "<:encoding(UTF-8)", $standards{$std} or die "Could not open $standards{$std}: $!";
|
|
my @contents = <FH>;
|
|
close FH;
|
|
|
|
my $text = join '', @contents;
|
|
$text =~ s/\r//g;
|
|
|
|
my $std_name = $standards{$std};
|
|
$std_name =~ s/(.*)\..*$/$1/;
|
|
|
|
my $result;
|
|
my $found_section = "";
|
|
my $found_section_title = "";
|
|
my $section_title;
|
|
my $found_paragraph;
|
|
my $found = 0;
|
|
my $matches = 0;
|
|
my $this_section;
|
|
my $comma = "";
|
|
|
|
if ($list_only) { $result = "Sections containing '$search':\n "; }
|
|
|
|
my $qsearch = quotemeta $search;
|
|
$qsearch =~ s/\\ / /g;
|
|
$qsearch =~ s/\s+/\\s+/g;
|
|
|
|
while ($text =~ m/^([0-9A-Z]+\.[0-9.]*)/msgi) {
|
|
$this_section = $1;
|
|
|
|
print "----------------------------------\n" if $debug >= 2;
|
|
print "Processing section [$this_section]\n" if $debug;
|
|
|
|
if ($section_specified and $this_section !~ m/^\Q$section/i) {
|
|
print "No section match, skipping.\n" if $debug >= 4;
|
|
next;
|
|
}
|
|
|
|
my $section_text;
|
|
|
|
if ($text =~ /(.*?)^(?=(?!Footnote)[0-9A-Z]+\.)/msg) {
|
|
$section_text = $1;
|
|
} else {
|
|
print "No section text, end of file marker found.\n" if $debug >= 4;
|
|
last;
|
|
}
|
|
|
|
if ($section =~ /Footnote/i) {
|
|
$section_text =~ s/^Footnote.*//msi;
|
|
$section_text =~ s/^\d.*//ms;
|
|
$section_text = $this_section . $section_text;
|
|
} elsif ($section_text =~ m/(.*?)$/msg) {
|
|
$section_title = $1 if length $1;
|
|
$section_title =~ s/^\s+//;
|
|
$section_title =~ s/\s+$//;
|
|
}
|
|
|
|
print "$this_section [$section_title]\n" if $debug >= 2;
|
|
|
|
while ($section_text =~ m/^(\d+)\s(.*?)^(?=\d)/msgic or $section_text =~ m/^(\d+)\s(.*)/msgi) {
|
|
my $p = $1;
|
|
my $t = $2;
|
|
|
|
print "paragraph $p: [$t]\n" if $debug >= 3;
|
|
|
|
if ($paragraph_specified == USER_SPECIFIED and not length $search and $p == $paragraph) {
|
|
$result = $t if not $found;
|
|
$found_paragraph = $p;
|
|
$found_section = $this_section;
|
|
$found_section_title = $section_title;
|
|
$found = 1;
|
|
last;
|
|
}
|
|
|
|
if (length $search) {
|
|
eval {
|
|
if ($t =~ m/\b$qsearch\b/mis or $section_title =~ m/\b$qsearch\b/mis) {
|
|
$matches++;
|
|
if ($matches >= $match) {
|
|
if ($list_only) {
|
|
$result .= sprintf("%s%-15s", $comma, $this_section . "p" . $p);
|
|
$result .= " $section_title";
|
|
$comma = ",\n ";
|
|
} else {
|
|
if (not $found) {
|
|
$result = $t;
|
|
$found_section = $this_section;
|
|
$found_section_title = $section_title;
|
|
$found_paragraph = $p;
|
|
$paragraph_specified = RESULTS_SPECIFIED;
|
|
}
|
|
$found = 1;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
if (my $err = $@) {
|
|
$err =~ s/.* at .*$//;
|
|
print "Error in search regex: $err\n";
|
|
exit 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
last if $found && $paragraph_specified == USER_SPECIFIED;
|
|
|
|
if ($paragraph_specified == USER_SPECIFIED) {
|
|
if (length $search) {
|
|
print "No such text '$search' in paragraph $paragraph of section $section of $std_name.\n";
|
|
} else {
|
|
print "No such paragraph $paragraph in section $section of $std_name.\n";
|
|
}
|
|
exit 1;
|
|
}
|
|
|
|
if (defined $section_specified and not length $search) {
|
|
$found = 1;
|
|
$found_section = $this_section;
|
|
$found_section_title = $section_title;
|
|
$found_paragraph = $paragraph;
|
|
$result = $section_text;
|
|
last;
|
|
}
|
|
}
|
|
|
|
if (not $found and $comma eq "") {
|
|
$search =~ s/\\s\+/ /g;
|
|
if (length $search) {
|
|
print "No such text '$search' found ";
|
|
|
|
if ($section_specified) {
|
|
print "within section '$section' ";
|
|
}
|
|
} else {
|
|
print "No such section '$section' ";
|
|
}
|
|
print "in $std Draft Standard ($std_name).\n";
|
|
exit 1;
|
|
}
|
|
|
|
$result =~ s/\Q$found_section_title// if length $found_section_title;
|
|
$result =~ s/^\s+//;
|
|
$result =~ s/\s+$//;
|
|
|
|
if ($matches > 1 and not $list_only) { print "Displaying $match of $matches matches: "; }
|
|
|
|
if ($comma eq "") {
|
|
$found_section =~ s/Footnote/FOOTNOTE/;
|
|
print "http://www.iso-9899.info/$std_name.html\#$found_section";
|
|
print "p" . $found_paragraph if $paragraph_specified;
|
|
print "\n\n";
|
|
print "[", $found_section_title, "]\n\n" if length $found_section_title;
|
|
}
|
|
|
|
$result =~ s/\s*Constraints\s*$//;
|
|
$result =~ s/\s*Semantics\s*$//;
|
|
$result =~ s/\s*Description\s*$//;
|
|
$result =~ s/\s*Returns\s*$//;
|
|
$result =~ s/\s*Runtime-constraints\s*$//;
|
|
$result =~ s/\s*Recommended practice\s*$//;
|
|
$result =~ s/Footnote\.(\d)/Footnote $1/g;
|
|
|
|
if (length $match_text) {
|
|
my $match_result = $result;
|
|
$match_result =~ s/\s+/ /g;
|
|
|
|
my $match = eval {
|
|
my @matches = ($match_result =~ m/($match_text)/msp);
|
|
if (@matches > 1) {
|
|
shift @matches;
|
|
@matches = grep { length $_ } @matches;
|
|
}
|
|
return [${^PREMATCH}, join (' ... ', @matches), ${^POSTMATCH}];
|
|
};
|
|
|
|
if ($@) {
|
|
print "Error in -text option: $@\n";
|
|
exit 1;
|
|
}
|
|
|
|
$result = '';
|
|
|
|
if (length $match->[0]) {
|
|
$result = '... ';
|
|
}
|
|
|
|
if (length $match->[1]) {
|
|
$result .= $match->[1];
|
|
} else {
|
|
$result = "No text found for `$match_text`.";
|
|
}
|
|
|
|
if (length $match->[2]) {
|
|
$result .= ' ...';
|
|
}
|
|
}
|
|
|
|
print "$result\n";
|