2010-04-11 22:29:57 +02:00
#!/usr/bin/perl
use warnings ;
use strict ;
2010-04-12 19:34:55 +02:00
my $ debug = 0 ;
2010-04-13 22:47:07 +02:00
# for paragraphs
my $ USER_SPECIFIED = 1 ;
my $ RESULTS_SPECIFIED = 2 ;
2010-04-11 22:29:57 +02:00
my $ search = join ' ' , @ ARGV ;
if ( not length $ search ) {
2010-04-13 22:47:07 +02:00
print "Usage: cstd [-list] [-n#] [-section <section>] [search text] -- 'section' must be in the form of X.YpZ where X and Y are section/chapter and, optionally, Z is paragraph. If both 'section' and 'search text' are specified, then the search space will be within the specified section. You may use -n # to skip to the #th match. To list only the section numbers containing 'search text', add -list.\n" ;
2010-04-11 22:29:57 +02:00
exit 0 ;
}
2010-04-20 08:25:08 +02:00
my ( $ section , $ paragraph , $ section_specified , $ paragraph_specified , $ match , $ list_only , $ list_titles ) ;
2010-04-11 22:29:57 +02:00
2010-04-13 22:47:07 +02:00
$ section_specified = 0 ;
$ paragraph_specified = 0 ;
2010-04-12 23:56:09 +02:00
if ( $ search =~ s/-section\s*([0-9\.p]+)//i or $ search =~ s/\b(\d+\.[0-9\.p]*)//i ) {
2010-04-11 22:29:57 +02:00
$ section = $ 1 ;
if ( $ section =~ s/p(\d+)//i ) {
$ paragraph = $ 1 ;
2010-04-13 22:47:07 +02:00
$ paragraph_specified = $ USER_SPECIFIED ;
2010-04-11 22:29:57 +02:00
} else {
$ paragraph = 1 ;
}
$ section = "$section." if $ section =~ m/^\d+$/ ;
$ section_specified = 1 ;
}
if ( $ search =~ s/-n\s*(\d+)// ) {
$ match = $ 1 ;
} else {
$ match = 1 ;
}
if ( $ search =~ s/-list//i ) {
$ list_only = 1 ;
2010-04-20 08:25:08 +02:00
$ list_titles = 1 ; # Added here instead of removing -titles option
}
if ( $ search =~ s/-titles//i ) {
$ list_only = 1 ;
$ list_titles = 1 ;
2010-04-11 22:29:57 +02:00
}
$ search =~ s/^\s+// ;
$ search =~ s/\s+$// ;
if ( not defined $ section ) {
$ section = "1." ;
$ paragraph = 1 ;
}
if ( $ list_only and not length $ search ) {
print "You must specify some search text to use with -list.\n" ;
exit 0 ;
}
open FH , "<n1256.txt" or die "Could not open n1256.txt: $!" ;
my @ contents = <FH> ;
close FH ;
my $ text = join '' , @ contents ;
$ text =~ s/\r//g ;
my $ result ;
2010-04-13 22:47:07 +02:00
my $ found_section = "" ;
my $ found_section_title = "" ;
2010-04-11 22:29:57 +02:00
my $ section_title ;
2010-04-17 21:22:22 +02:00
my $ found_paragraph ;
2010-04-11 22:29:57 +02:00
my $ found = 0 ;
my $ matches = 0 ;
my $ this_section ;
my $ comma = "" ;
if ( $ list_only ) {
2010-04-20 08:25:08 +02:00
$ result = "Sections containing '$search':\n " ;
2010-04-11 22:29:57 +02:00
}
2010-05-02 09:57:27 +02:00
$ search =~ s/\s/\\s+/g ;
2010-04-11 22:29:57 +02:00
2010-04-26 01:25:28 +02:00
while ( $ text =~ m/^\s{4,6}(\d+\.[0-9\.]*)/msg ) {
2010-04-11 22:29:57 +02:00
$ this_section = $ 1 ;
2010-04-12 19:34:55 +02:00
print "----------------------------------\n" if $ debug >= 2 ;
print "Processing section [$this_section]\n" if $ debug ;
2010-04-11 22:29:57 +02:00
my $ section_text ;
2010-04-26 01:25:28 +02:00
if ( $ text =~ m/(.*?)^(?=\s{4,6}\d+\.)/msg ) {
2010-04-11 22:29:57 +02:00
$ section_text = $ 1 ;
} else {
2010-04-13 22:47:07 +02:00
print "No section text, end of file marker found.\n" if $ debug >= 4 ;
2010-04-11 22:29:57 +02:00
last ;
}
if ( $ section_text =~ m/(.*?)$/msg ) {
$ section_title = $ 1 if length $ 1 ;
$ section_title =~ s/^\s+// ;
$ section_title =~ s/\s+$// ;
}
2010-04-12 19:34:55 +02:00
if ( $ section_specified and $ this_section !~ m/^$section/ ) {
print "No section match, skipping.\n" if $ debug >= 4 ;
next ;
2010-04-11 22:29:57 +02:00
}
2010-04-12 19:34:55 +02:00
print "$this_section [$section_title]\n" if $ debug >= 2 ;
2010-04-11 22:29:57 +02:00
while ( $ section_text =~ m/^(\d+)\s(.*?)^(?=\d)/msgc or $ section_text =~ m/^(\d+)\s(.*)/msg ) {
2010-04-12 19:34:55 +02:00
my $ p = $ 1 ;
2010-04-11 22:29:57 +02:00
my $ t = $ 2 ;
2010-04-12 19:34:55 +02:00
print "paragraph $p: [$t]\n" if $ debug >= 3 ;
2010-04-12 23:56:09 +02:00
2010-04-13 22:47:07 +02:00
if ( $ paragraph_specified == $ USER_SPECIFIED and not length $ search and $ p == $ paragraph ) {
$ result = $ t if not $ found ;
2010-04-17 21:22:22 +02:00
$ found_paragraph = $ p ;
2010-04-14 00:07:45 +02:00
$ found_section = $ this_section ;
$ found_section_title = $ section_title ;
2010-04-11 22:29:57 +02:00
$ found = 1 ;
last ;
}
if ( length $ search ) {
2010-04-12 23:56:09 +02:00
eval {
2010-04-20 07:54:06 +02:00
if ( $ t =~ m/\b$search/mis or $ section_title =~ m/\b$search/mis ) {
2010-04-12 23:56:09 +02:00
$ matches + + ;
if ( $ matches >= $ match ) {
if ( $ list_only ) {
2010-04-20 08:25:08 +02:00
$ result . = sprintf ( "%s%-15s" , $ comma , $ this_section . "p" . $ p ) ;
$ result . = " $section_title" if $ list_titles ;
$ comma = ",\n " ;
2010-04-12 23:56:09 +02:00
} else {
2010-04-13 22:47:07 +02:00
if ( not $ found ) {
$ result = $ t ;
$ found_section = $ this_section ;
$ found_section_title = $ section_title ;
2010-04-17 21:22:22 +02:00
$ found_paragraph = $ p ;
2010-04-13 22:47:07 +02:00
$ paragraph_specified = $ RESULTS_SPECIFIED ;
}
2010-04-12 23:56:09 +02:00
$ found = 1 ;
}
2010-04-11 22:29:57 +02:00
}
}
2010-04-12 23:56:09 +02:00
} ;
if ( $@ ) {
print "Error in search regex; you may need to escape characters such as *, ?, ., etc.\n" ;
exit 0 ;
2010-04-11 22:29:57 +02:00
}
}
}
2010-04-13 22:47:07 +02:00
last if $ found && $ paragraph_specified == $ USER_SPECIFIED ;
if ( $ paragraph_specified == $ USER_SPECIFIED ) {
2010-04-11 22:29:57 +02:00
print "No such paragraph '$paragraph' in section '$section' of n1256.\n" ;
exit 0 ;
}
if ( defined $ section_specified and not length $ search ) {
$ found = 1 ;
2010-04-13 22:47:07 +02:00
$ found_section = $ this_section ;
$ found_section_title = $ section_title ;
2010-04-17 21:22:22 +02:00
$ found_paragraph = $ paragraph ;
2010-04-11 22:29:57 +02:00
$ result = $ section_text ;
last ;
}
}
if ( not $ found and $ comma eq "" ) {
2010-05-02 09:57:27 +02:00
$ search =~ s/\\s\+/ /g ;
2010-04-11 22:29:57 +02:00
if ( $ section_specified ) {
print "No such text '$search' found within section '$section' in n1256.\n" if length $ search ;
print "No such section '$section' in n1256.\n" if not length $ search ;
exit 0 ;
}
print "No such section '$section' in n1256.\n" if not length $ search ;
print "No such text '$search' found in n1256.\n" if length $ search ;
exit 0 ;
}
2010-04-13 22:47:07 +02:00
$ result =~ s/$found_section_title// if length $ found_section_title ;
2010-04-11 22:29:57 +02:00
$ result =~ s/^\s+// ;
$ result =~ s/\s+$// ;
2010-04-17 21:22:22 +02:00
= cut
2010-04-11 22:29:57 +02:00
$ result =~ s/\s+/ /g ;
$ result =~ s/[\n\r]/ /g ;
2010-04-17 21:22:22 +02:00
= cut
2010-04-11 22:29:57 +02:00
2010-04-13 22:47:07 +02:00
if ( $ matches > 1 and not $ list_only ) {
print "Displaying \#$match of $matches matches: " ;
}
2010-04-11 22:29:57 +02:00
if ( $ comma eq "" ) {
2010-04-20 07:54:06 +02:00
= cut
2010-04-13 22:47:07 +02:00
print $ found_section ;
2010-04-17 21:22:22 +02:00
print "p" . $ found_paragraph if $ paragraph_specified ;
2010-04-20 07:54:06 +02:00
= cut
print "http://blackshell.com/~msmud/cstd.html\#$found_section" ;
print "p" . $ found_paragraph if $ paragraph_specified ;
print " : " ;
2010-04-13 22:47:07 +02:00
print "[" , $ found_section_title , "] " if length $ found_section_title ;
2010-04-11 22:29:57 +02:00
}
print "$result\n" ;