3
0
mirror of https://github.com/pragma-/pbot.git synced 2024-10-03 01:48:38 +02:00

Replaced copy/pasta from PDF with nicely formatted native text and updated cstd.pl to match -- now processes sections and paragraphs with 100% accuracy; support for footnotes search possible

This commit is contained in:
Pragmatic Software 2010-04-12 17:34:55 +00:00
parent 456a230813
commit f9cd8ae88d
2 changed files with 15375 additions and 13407 deletions

View File

@ -3,6 +3,8 @@
use warnings; use warnings;
use strict; use strict;
my $debug = 0;
my $search = join ' ', @ARGV; my $search = join ' ', @ARGV;
if(not length $search) { if(not length $search) {
@ -64,22 +66,24 @@ my $matches = 0;
my $this_section; my $this_section;
my $comma = ""; my $comma = "";
#print "search: [$search]; section: [$section]\n";
if($list_only) { if($list_only) {
$result = "Sections containing '$search': "; $result = "Sections containing '$search': ";
} }
$search =~ s/\s+/.*/g; $search =~ s/\s+/.*/g;
while($text =~ m/^(\d+\.[0-9\.]*)/msg) { while($text =~ m/^\s{4}(\d+\.[0-9\.]*)/msg) {
$this_section = $1; $this_section = $1;
print "----------------------------------\n" if $debug >= 2;
print "Processing section [$this_section]\n" if $debug;
my $section_text; my $section_text;
if($text =~ m/(.*?)^(?=\d+\.)/msg) { if($text =~ m/(.*?)^(?=\s{4}\d+\.)/msg) {
$section_text = $1; $section_text = $1;
} else { } else {
print "No section text, skipping.\n" if $debug >= 4;
last; last;
} }
@ -89,21 +93,18 @@ while($text =~ m/^(\d+\.[0-9\.]*)/msg) {
$section_title =~ s/\s+$//; $section_title =~ s/\s+$//;
} }
if(not defined $section_text) { if($section_specified and $this_section !~ m/^$section/) {
print "Fatal error: no section text\n"; print "No section match, skipping.\n" if $debug >= 4;
die; next;
} }
if($section_specified) { print "$this_section [$section_title]\n" if $debug >= 2;
next if $this_section !~ m/^$section/;
}
# print "----------------------------------\n";
# print "$this_section text: [$section_text]\n";
while($section_text =~ m/^(\d+)\s(.*?)^(?=\d)/msgc or $section_text =~ m/^(\d+)\s(.*)/msg) { while($section_text =~ m/^(\d+)\s(.*?)^(?=\d)/msgc or $section_text =~ m/^(\d+)\s(.*)/msg) {
my $p = $1; my $p = $1 ;
my $t = $2; my $t = $2;
print "paragraph $p: [$t]\n" if $debug >= 3;
if($paragraph_specified and not length $search and $p == $paragraph) { if($paragraph_specified and not length $search and $p == $paragraph) {
$found = 1; $found = 1;
@ -116,7 +117,7 @@ while($text =~ m/^(\d+\.[0-9\.]*)/msg) {
# print "$p\n"; # print "$p\n";
# print "[$t]\n"; # print "[$t]\n";
if($t =~ m/$search/msi) { if($t =~ m/$search/ms) {
$matches++; $matches++;
if($matches >= $match) { if($matches >= $match) {
if($list_only) { if($list_only) {

File diff suppressed because it is too large Load Diff