2021-02-07 23:37:12 +01:00
#!/usr/bin/env perl
2012-01-16 16:34:32 +01:00
2023-02-21 06:31:52 +01:00
# SPDX-FileCopyrightText: 2010-2023 Pragmatic Software <pragma78@gmail.com>
2021-07-11 00:00:22 +02:00
# SPDX-License-Identifier: MIT
2017-03-05 22:33:31 +01:00
2012-01-16 16:34:32 +01:00
use warnings ;
use strict ;
2022-08-12 21:28:58 +02:00
use Getopt::Long qw/GetOptionsFromArray/ ;
use Encode ;
my % standards = (
C99 = > 'n1256.out' ,
C11 = > 'n1570.out' ,
C23 = > 'n3047.out' ,
) ;
2022-08-12 22:02:00 +02:00
binmode ( STDOUT , ":utf8" ) ;
binmode ( STDERR , ":utf8" ) ;
2022-08-12 21:28:58 +02:00
@ ARGV = map { decode ( 'UTF-8' , $ _ , 1 ) } @ ARGV ;
my ( $ std , $ search , $ section , $ paragraph , $ debug ) ;
my ( $ match , $ list_only , $ match_text ) ;
{
my $ opt_error ;
local $ SIG { __WARN__ } = sub {
$ opt_error = shift ;
chomp $ opt_error ;
} ;
Getopt::Long:: Configure ( "bundling_override" ) ;
GetOptionsFromArray (
\ @ ARGV ,
'std=s' = > \ $ std ,
'section|s=s' = > \ $ section ,
'num|n=i' = > \ $ match ,
'text|t=s' = > \ $ match_text ,
'list|l' = > \ $ list_only ,
'debug|d=i' = > \ $ debug ,
) ;
$ std // = 'C99' ;
$ section // = '' ;
$ match // = 1 ;
$ list_only // = 0 ;
$ debug // = 0 ;
$ std = uc $ std ;
if ( not exists $ standards { $ std } ) {
print "Invalid -std=$std selected. Valid -std= values are: " , join ( ', ' , sort keys % standards ) , "\n" ;
exit 1 ;
}
2012-01-16 16:34:32 +01:00
2022-08-12 21:28:58 +02:00
my $ usage = "Usage: $std [-list] [-n#] [-section <section>] [search text] [-text <regex>] -- `section` must be in the form of `X.Y[pZ]` where `X` and `Y` are section/chapter and, optionally, `pZ` is paragraph. If both `section` and `search text` are specified, then the search space will be within the specified section. Use `-n <n>` to skip to the nth match. To list only the section numbers containing 'search text', add -list. To display specific text, use `-text <regex>`.\n" ;
if ( $ opt_error ) {
print "$opt_error: $usage\n" ;
exit 1 ;
}
2012-01-16 16:34:32 +01:00
2022-08-12 21:28:58 +02:00
$ search = "@ARGV" ;
2012-01-16 16:34:32 +01:00
2022-08-12 21:28:58 +02:00
if ( ! length $ section && ! length $ search ) {
print $ usage ;
exit 1 ;
}
2012-01-16 16:34:32 +01:00
}
2022-08-12 21:28:58 +02:00
# for paragraphs
use constant {
USER_SPECIFIED = > 1 ,
RESULTS_SPECIFIED = > 2 ,
} ;
2012-01-16 16:34:32 +01:00
2022-08-12 21:28:58 +02:00
my $ section_specified = length $ section ? 1 : 0 ;
my $ paragraph_specified = 0 ;
2012-01-16 16:34:32 +01:00
2022-08-12 21:28:58 +02:00
if ( $ search =~ s/\b([A-Z0-9]+\.[0-9.p]*)//i ) {
2020-02-15 23:38:32 +01:00
$ section = $ 1 ;
2012-01-16 16:34:32 +01:00
2020-02-15 23:38:32 +01:00
if ( $ section =~ s/p(\d+)//i ) {
$ paragraph = $ 1 ;
2022-08-12 21:28:58 +02:00
$ paragraph_specified = USER_SPECIFIED ;
2020-02-15 23:38:32 +01:00
} else {
$ paragraph = 1 ;
}
2012-01-16 16:34:32 +01:00
2020-02-15 23:38:32 +01:00
$ section_specified = 1 ;
2012-01-16 16:34:32 +01:00
}
2022-08-12 21:28:58 +02:00
# add trailing dot if missing
if ( $ section =~ /^[A-Z0-9]+$/i ) {
$ section . = '.' ;
2022-04-04 19:42:42 +02:00
}
2012-01-16 16:34:32 +01:00
$ search =~ s/^\s+// ;
$ search =~ s/\s+$// ;
2022-08-12 21:28:58 +02:00
if ( not length $ section ) {
2020-02-15 23:38:32 +01:00
$ section = "1." ;
$ paragraph = 1 ;
2012-01-16 16:34:32 +01:00
}
2019-05-28 18:19:42 +02:00
if ( $ list_only and not length $ search ) {
2020-02-15 23:38:32 +01:00
print "You must specify some search text to use with -list.\n" ;
2022-08-12 21:28:58 +02:00
exit 1 ;
2012-01-16 16:34:32 +01:00
}
2022-08-12 21:28:58 +02:00
open FH , "<:encoding(UTF-8)" , $ standards { $ std } or die "Could not open $standards{$std}: $!" ;
2012-01-16 16:34:32 +01:00
my @ contents = <FH> ;
close FH ;
my $ text = join '' , @ contents ;
$ text =~ s/\r//g ;
2022-08-12 21:28:58 +02:00
my $ std_name = $ standards { $ std } ;
$ std_name =~ s/(.*)\..*$/$1/ ;
2012-01-16 16:34:32 +01:00
my $ result ;
2020-02-15 23:38:32 +01:00
my $ found_section = "" ;
2012-01-16 16:34:32 +01:00
my $ found_section_title = "" ;
my $ section_title ;
my $ found_paragraph ;
2020-02-15 23:38:32 +01:00
my $ found = 0 ;
2012-01-16 16:34:32 +01:00
my $ matches = 0 ;
my $ this_section ;
my $ comma = "" ;
2020-02-15 23:38:32 +01:00
if ( $ list_only ) { $ result = "Sections containing '$search':\n " ; }
2012-01-16 16:34:32 +01:00
2014-12-30 08:24:54 +01:00
my $ qsearch = quotemeta $ search ;
$ qsearch =~ s/\\ / /g ;
$ qsearch =~ s/\s+/\\s+/g ;
2012-01-16 16:34:32 +01:00
2022-08-12 22:02:00 +02:00
while ( $ text =~ m/^([0-9A-Z]+\.[0-9.]*)/msgi ) {
2020-02-15 23:38:32 +01:00
$ this_section = $ 1 ;
print "----------------------------------\n" if $ debug >= 2 ;
print "Processing section [$this_section]\n" if $ debug ;
2022-08-12 22:02:00 +02:00
if ( $ section_specified and $ this_section !~ m/^\Q$section/i ) {
2020-02-15 23:38:32 +01:00
print "No section match, skipping.\n" if $ debug >= 4 ;
next ;
}
my $ section_text ;
2022-08-26 20:18:20 +02:00
if ( $ text =~ /(.*?)^(?=(?!Footnote)[0-9A-Z]+\.)/msg ) {
2022-08-12 21:28:58 +02:00
$ section_text = $ 1 ;
} else {
2020-02-15 23:38:32 +01:00
print "No section text, end of file marker found.\n" if $ debug >= 4 ;
last ;
2012-01-16 16:34:32 +01:00
}
2022-08-12 21:28:58 +02:00
if ( $ section =~ /Footnote/i ) {
2022-08-26 20:18:20 +02:00
$ section_text =~ s/^Footnote.*//msi ;
2020-02-15 23:38:32 +01:00
$ section_text =~ s/^\d.*//ms ;
2022-08-12 22:02:00 +02:00
$ section_text = $ this_section . $ section_text ;
2020-02-15 23:38:32 +01:00
} elsif ( $ section_text =~ m/(.*?)$/msg ) {
$ section_title = $ 1 if length $ 1 ;
$ section_title =~ s/^\s+// ;
$ section_title =~ s/\s+$// ;
}
print "$this_section [$section_title]\n" if $ debug >= 2 ;
while ( $ section_text =~ m/^(\d+)\s(.*?)^(?=\d)/msgic or $ section_text =~ m/^(\d+)\s(.*)/msgi ) {
my $ p = $ 1 ;
my $ t = $ 2 ;
print "paragraph $p: [$t]\n" if $ debug >= 3 ;
2022-08-12 21:28:58 +02:00
if ( $ paragraph_specified == USER_SPECIFIED and not length $ search and $ p == $ paragraph ) {
2020-02-15 23:38:32 +01:00
$ result = $ t if not $ found ;
$ found_paragraph = $ p ;
$ found_section = $ this_section ;
$ found_section_title = $ section_title ;
$ found = 1 ;
last ;
}
if ( length $ search ) {
eval {
if ( $ t =~ m/\b$qsearch\b/mis or $ section_title =~ m/\b$qsearch\b/mis ) {
$ matches + + ;
if ( $ matches >= $ match ) {
if ( $ list_only ) {
$ result . = sprintf ( "%s%-15s" , $ comma , $ this_section . "p" . $ p ) ;
2022-08-12 21:28:58 +02:00
$ result . = " $section_title" ;
2020-02-15 23:38:32 +01:00
$ comma = ",\n " ;
} else {
if ( not $ found ) {
$ result = $ t ;
$ found_section = $ this_section ;
$ found_section_title = $ section_title ;
$ found_paragraph = $ p ;
2022-08-12 21:28:58 +02:00
$ paragraph_specified = RESULTS_SPECIFIED ;
2020-02-15 23:38:32 +01:00
}
$ found = 1 ;
}
}
}
} ;
2022-08-12 21:28:58 +02:00
if ( my $ err = $@ ) {
$ err =~ s/.* at .*$// ;
print "Error in search regex: $err\n" ;
2020-02-15 23:38:32 +01:00
exit 0 ;
2012-01-16 16:34:32 +01:00
}
}
2020-02-15 23:38:32 +01:00
}
2022-08-12 21:28:58 +02:00
last if $ found && $ paragraph_specified == USER_SPECIFIED ;
2012-01-16 16:34:32 +01:00
2022-08-12 21:28:58 +02:00
if ( $ paragraph_specified == USER_SPECIFIED ) {
if ( length $ search ) {
print "No such text '$search' in paragraph $paragraph of section $section of $std_name.\n" ;
} else {
print "No such paragraph $paragraph in section $section of $std_name.\n" ;
}
exit 1 ;
2012-01-16 16:34:32 +01:00
}
2020-02-15 23:38:32 +01:00
if ( defined $ section_specified and not length $ search ) {
$ found = 1 ;
$ found_section = $ this_section ;
$ found_section_title = $ section_title ;
$ found_paragraph = $ paragraph ;
$ result = $ section_text ;
last ;
2014-02-22 03:42:25 +01:00
}
2012-01-16 16:34:32 +01:00
}
2019-05-28 18:19:42 +02:00
if ( not $ found and $ comma eq "" ) {
2020-02-15 23:38:32 +01:00
$ search =~ s/\\s\+/ /g ;
2022-08-12 21:28:58 +02:00
if ( length $ search ) {
print "No such text '$search' found " ;
2012-01-16 16:34:32 +01:00
2022-08-12 21:28:58 +02:00
if ( $ section_specified ) {
print "within section '$section' " ;
}
} else {
print "No such section '$section' " ;
}
print "in $std Draft Standard ($std_name).\n" ;
exit 1 ;
2012-01-16 16:34:32 +01:00
}
2022-08-12 22:02:00 +02:00
$ result =~ s/\Q$found_section_title// if length $ found_section_title ;
2012-01-16 16:34:32 +01:00
$ result =~ s/^\s+// ;
$ result =~ s/\s+$// ;
2020-02-15 23:38:32 +01:00
if ( $ matches > 1 and not $ list_only ) { print "Displaying $match of $matches matches: " ; }
2012-01-16 16:34:32 +01:00
2019-05-28 18:19:42 +02:00
if ( $ comma eq "" ) {
2022-12-26 03:07:04 +01:00
$ found_section =~ s/Footnote/FOOTNOTE/ ;
2022-08-12 22:02:00 +02:00
print "http://www.iso-9899.info/$std_name.html\#$found_section" ;
print "p" . $ found_paragraph if $ paragraph_specified ;
print "\n\n" ;
print "[" , $ found_section_title , "]\n\n" if length $ found_section_title ;
2012-01-16 16:34:32 +01:00
}
2012-11-02 23:08:20 +01:00
$ result =~ s/\s*Constraints\s*$// ;
$ result =~ s/\s*Semantics\s*$// ;
$ result =~ s/\s*Description\s*$// ;
$ result =~ s/\s*Returns\s*$// ;
$ result =~ s/\s*Runtime-constraints\s*$// ;
$ result =~ s/\s*Recommended practice\s*$// ;
2022-08-12 22:02:00 +02:00
$ result =~ s/Footnote\.(\d)/Footnote $1/g ;
2012-11-02 23:08:20 +01:00
2022-04-04 19:42:42 +02:00
if ( length $ match_text ) {
my $ match_result = $ result ;
$ match_result =~ s/\s+/ /g ;
my $ match = eval {
2022-07-04 18:52:50 +02:00
my @ matches = ( $ match_result =~ m/($match_text)/ms p ) ;
if ( @ matches > 1 ) {
shift @ matches ;
@ matches = grep { length $ _ } @ matches ;
}
return [ $ { ^ PREMATCH } , join ( ' ... ' , @ matches ) , $ { ^ POSTMATCH } ] ;
2022-04-04 19:42:42 +02:00
} ;
if ( $@ ) {
print "Error in -text option: $@\n" ;
exit 1 ;
}
$ result = '' ;
if ( length $ match - > [ 0 ] ) {
$ result = '... ' ;
}
if ( length $ match - > [ 1 ] ) {
$ result . = $ match - > [ 1 ] ;
} else {
$ result = "No text found for `$match_text`." ;
}
if ( length $ match - > [ 2 ] ) {
$ result . = ' ...' ;
}
}
2012-01-16 16:34:32 +01:00
print "$result\n" ;