2014-08-02 18:01:51 +02:00
|
|
|
#!/usr/bin/env perl
|
2014-07-22 03:46:16 +02:00
|
|
|
|
2017-03-05 22:33:31 +01:00
|
|
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
|
2014-07-22 03:46:16 +02:00
|
|
|
# quick and dirty
|
|
|
|
|
|
|
|
use warnings;
|
|
|
|
use strict;
|
|
|
|
|
2014-08-02 18:01:51 +02:00
|
|
|
use HTML::Entities;
|
|
|
|
|
|
|
|
my $STD = 'n1570.html';
|
2014-07-22 03:46:16 +02:00
|
|
|
|
|
|
|
my $text;
|
|
|
|
{
|
|
|
|
local $/ = undef;
|
|
|
|
open my $fh, "<", $STD or die "Could not open $STD: $!";
|
|
|
|
$text = <$fh>;
|
|
|
|
close $fh;
|
|
|
|
}
|
|
|
|
|
|
|
|
my $cfact_regex = qr/
|
|
|
|
(
|
2014-08-02 18:01:51 +02:00
|
|
|
\s+\S+\s+which\s+is.*?
|
|
|
|
|\s+\S+\s+which\s+expand.*?
|
|
|
|
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*(EXAMPLE\s*|NOTE\s*)?)An?\s+[^.]+describes.*?
|
|
|
|
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*(EXAMPLE\s*|NOTE\s*))An?\s+[^.]+is.*?
|
|
|
|
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*)[^.]+shall.*?
|
|
|
|
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*)If.*?
|
|
|
|
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*)[^.]+is\s+named.*?
|
|
|
|
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*)[^.]+is\s+known.*?
|
|
|
|
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*)[^.]+are\s+known.*?
|
|
|
|
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*)[^.]+is\s+called.*?
|
|
|
|
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*)[^.]+are\s+called.*?
|
|
|
|
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*)When.*?
|
|
|
|
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*)The\s+\S+\s+function.*?
|
2014-07-22 03:46:16 +02:00
|
|
|
)
|
2014-08-02 18:01:51 +02:00
|
|
|
(?:\.(?!(\d|h))|<\/pre>)
|
2014-07-22 03:46:16 +02:00
|
|
|
/msx;
|
|
|
|
|
|
|
|
my @sections;
|
2014-08-02 18:01:51 +02:00
|
|
|
while ($text =~ /^<h3>(.*?)<\/h3>/mg) {
|
|
|
|
my $section = $1;
|
|
|
|
$section =~ s/[\[\]]//g;
|
|
|
|
unshift @sections, [pos $text, $section];
|
2014-07-22 03:46:16 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
while ($text =~ /$cfact_regex/gms) {
|
|
|
|
my $fact = $1;
|
|
|
|
next unless length $fact;
|
|
|
|
|
|
|
|
$fact =~ s/[\n\r]/ /g;
|
|
|
|
$fact =~ s/ +/ /g;
|
|
|
|
$fact =~ s/^\.\s*//;
|
|
|
|
$fact =~ s/^\s*--\s*//;
|
|
|
|
$fact =~ s/^\d+\s*//;
|
|
|
|
$fact =~ s/- ([a-z])/-$1/g;
|
|
|
|
$fact =~ s/\s+\././g;
|
2014-08-02 18:01:51 +02:00
|
|
|
$fact =~ s/^\s*<pre>\s*\d*\s*//;
|
|
|
|
$fact =~ s/^\s*EXAMPLE\s*//;
|
|
|
|
$fact =~ s/^\s*NOTE\s*//;
|
|
|
|
$fact =~ s/^\s+//;
|
|
|
|
$fact =~ s/\s+$//;
|
2014-07-22 03:46:16 +02:00
|
|
|
|
|
|
|
my $section = '';
|
|
|
|
foreach my $s (@sections) {
|
|
|
|
if (pos $text >= $s->[0]) {
|
|
|
|
$section = "[$s->[1]] ";
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-02 18:01:51 +02:00
|
|
|
$fact = decode_entities($fact);
|
|
|
|
$fact =~ s/[a-z;,.]\K\d+\)//g; # remove footnote markers
|
2014-07-22 03:46:16 +02:00
|
|
|
|
2014-08-02 18:01:51 +02:00
|
|
|
print "$section$fact.\n";
|
2014-07-22 03:46:16 +02:00
|
|
|
}
|