C facts: Improve generation of C facts; update cfacts.txt with new facts

This commit is contained in:
Pragmatic Software 2014-08-02 16:01:51 +00:00
parent 9651847165
commit bc871bf4b2
3 changed files with 1442 additions and 527 deletions

View File

@ -13,8 +13,8 @@ use warnings;
# These are set automatically by the build/commit script # These are set automatically by the build/commit script
use constant { use constant {
BUILD_NAME => "PBot", BUILD_NAME => "PBot",
BUILD_REVISION => 758, BUILD_REVISION => 759,
BUILD_DATE => "2014-07-29", BUILD_DATE => "2014-08-02",
}; };
1; 1;

File diff suppressed because it is too large Load Diff

View File

@ -1,11 +1,13 @@
#!/bin/env perl #!/usr/bin/env perl
# quick and dirty # quick and dirty
use warnings; use warnings;
use strict; use strict;
my $STD = 'n1570-cfact.txt'; use HTML::Entities;
my $STD = 'n1570.html';
my $text; my $text;
{ {
@ -17,17 +19,28 @@ my $text;
my $cfact_regex = qr/ my $cfact_regex = qr/
( (
A(n)?\s+[^.]+is[^.]+\. \s+\S+\s+which\s+is.*?
|\.\s+[^.]+shall[^.]+\. |\s+\S+\s+which\s+expand.*?
|If[^.]+\. |(?:\-\-\s+|\s+|<pre>\s*\d*\s*(EXAMPLE\s*|NOTE\s*)?)An?\s+[^.]+describes.*?
|\.\s+[^.]+is\s+known[^.]+\. |(?:\-\-\s+|\s+|<pre>\s*\d*\s*(EXAMPLE\s*|NOTE\s*))An?\s+[^.]+is.*?
|\.\s+[^.]+is\s+called[^.]+\. |(?:\-\-\s+|\s+|<pre>\s*\d*\s*)[^.]+shall.*?
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*)If.*?
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*)[^.]+is\s+named.*?
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*)[^.]+is\s+known.*?
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*)[^.]+are\s+known.*?
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*)[^.]+is\s+called.*?
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*)[^.]+are\s+called.*?
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*)When.*?
|(?:\-\-\s+|\s+|<pre>\s*\d*\s*)The\s+\S+\s+function.*?
) )
(?:\.(?!(\d|h))|<\/pre>)
/msx; /msx;
my @sections; my @sections;
while ($text =~ /^\s{4}([A-Z\d]+\.[0-9\.]* +.*?)\r\n/mg) { while ($text =~ /^<h3>(.*?)<\/h3>/mg) {
unshift @sections, [pos $text, $1]; my $section = $1;
$section =~ s/[\[\]]//g;
unshift @sections, [pos $text, $section];
} }
while ($text =~ /$cfact_regex/gms) { while ($text =~ /$cfact_regex/gms) {
@ -41,6 +54,11 @@ while ($text =~ /$cfact_regex/gms) {
$fact =~ s/^\d+\s*//; $fact =~ s/^\d+\s*//;
$fact =~ s/- ([a-z])/-$1/g; $fact =~ s/- ([a-z])/-$1/g;
$fact =~ s/\s+\././g; $fact =~ s/\s+\././g;
$fact =~ s/^\s*<pre>\s*\d*\s*//;
$fact =~ s/^\s*EXAMPLE\s*//;
$fact =~ s/^\s*NOTE\s*//;
$fact =~ s/^\s+//;
$fact =~ s/\s+$//;
my $section = ''; my $section = '';
foreach my $s (@sections) { foreach my $s (@sections) {
@ -50,7 +68,8 @@ while ($text =~ /$cfact_regex/gms) {
} }
} }
next if length "$section$fact" > 400; $fact = decode_entities($fact);
$fact =~ s/[a-z;,.]\K\d+\)//g; # remove footnote markers
print "$section$fact\n"; print "$section$fact.\n";
} }