3
0
mirror of https://github.com/pragma-/pbot.git synced 2025-01-25 19:44:26 +01:00
pbot/applets/gencstd.pl

371 lines
12 KiB
Perl
Raw Normal View History

2021-02-07 23:37:12 +01:00
#!/usr/bin/env perl
2021-07-11 00:00:22 +02:00
# SPDX-FileCopyrightText: 2021 Pragmatic Software <pragma78@gmail.com>
# SPDX-License-Identifier: MIT
License project under MPL2 This patch adds the file LICENSE which is the verbatim copy of the Mozilla Public License Version 2.0 as retreived from https://www.mozilla.org/media/MPL/2.0/index.815ca599c9df.txt on 2017-03-05. This patch also places license headers for the MPL2 type A variant of the license header in the following files: PBot/AntiFlood.pm PBot/BanTracker.pm PBot/BlackList.pm PBot/BotAdminCommands.pm PBot/BotAdmins.pm PBot/ChanOpCommands.pm PBot/ChanOps.pm PBot/Channels.pm PBot/Commands.pm PBot/DualIndexHashObject.pm PBot/EventDispatcher.pm PBot/FactoidCommands.pm PBot/FactoidModuleLauncher.pm PBot/Factoids.pm PBot/HashObject.pm PBot/IRCHandlers.pm PBot/IgnoreList.pm PBot/IgnoreListCommands.pm PBot/Interpreter.pm PBot/LagChecker.pm PBot/Logger.pm PBot/MessageHistory.pm PBot/MessageHistory_SQLite.pm PBot/NickList.pm PBot/PBot.pm PBot/Plugins.pm PBot/Plugins/AntiAway.pm PBot/Plugins/AntiKickAutoRejoin.pm PBot/Plugins/AntiRepeat.pm PBot/Plugins/AntiTwitter.pm PBot/Plugins/AutoRejoin.pm PBot/Plugins/Counter.pm PBot/Plugins/Quotegrabs.pm PBot/Plugins/Quotegrabs/Quotegrabs_Hashtable.pm PBot/Plugins/Quotegrabs/Quotegrabs_SQLite.pm PBot/Plugins/UrlTitles.pm PBot/Plugins/_Example.pm PBot/Refresher.pm PBot/Registerable.pm PBot/Registry.pm PBot/RegistryCommands.pm PBot/SQLiteLogger.pm PBot/SQLiteLoggerLayer.pm PBot/SelectHandler.pm PBot/StdinReader.pm PBot/Timer.pm PBot/Utils/ParseDate.pm PBot/VERSION.pm build/update-version.pl modules/acronym.pl modules/ago.pl modules/c11std.pl modules/c2english.pl modules/c2english/CGrammar.pm modules/c2english/c2eng.pl modules/c99std.pl modules/cdecl.pl modules/cfaq.pl modules/cjeopardy/IRCColors.pm modules/cjeopardy/QStatskeeper.pm modules/cjeopardy/Scorekeeper.pm modules/cjeopardy/cjeopardy.pl modules/cjeopardy/cjeopardy_answer.pl modules/cjeopardy/cjeopardy_filter.pl modules/cjeopardy/cjeopardy_hint.pl modules/cjeopardy/cjeopardy_qstats.pl modules/cjeopardy/cjeopardy_scores.pl modules/cjeopardy/cjeopardy_show.pl modules/codepad.pl modules/compiler_block.pl modules/compiler_client.pl modules/compiler_vm/Diff.pm modules/compiler_vm/cc modules/compiler_vm/compiler_client.pl modules/compiler_vm/compiler_server.pl modules/compiler_vm/compiler_server_vbox_win32.pl modules/compiler_vm/compiler_server_watchdog.pl modules/compiler_vm/compiler_vm_client.pl modules/compiler_vm/compiler_vm_server.pl modules/compiler_vm/compiler_watchdog.pl modules/compiler_vm/languages/_c_base.pm modules/compiler_vm/languages/_default.pm modules/compiler_vm/languages/bash.pm modules/compiler_vm/languages/bc.pm modules/compiler_vm/languages/bf.pm modules/compiler_vm/languages/c11.pm modules/compiler_vm/languages/c89.pm modules/compiler_vm/languages/c99.pm modules/compiler_vm/languages/clang.pm modules/compiler_vm/languages/clang11.pm modules/compiler_vm/languages/clang89.pm modules/compiler_vm/languages/clang99.pm modules/compiler_vm/languages/clangpp.pm modules/compiler_vm/languages/clisp.pm modules/compiler_vm/languages/cpp.pm modules/compiler_vm/languages/freebasic.pm modules/compiler_vm/languages/go.pm modules/compiler_vm/languages/haskell.pm modules/compiler_vm/languages/java.pm modules/compiler_vm/languages/javascript.pm modules/compiler_vm/languages/ksh.pm modules/compiler_vm/languages/lua.pm modules/compiler_vm/languages/perl.pm modules/compiler_vm/languages/python.pm modules/compiler_vm/languages/python3.pm modules/compiler_vm/languages/qbasic.pm modules/compiler_vm/languages/scheme.pm modules/compiler_vm/languages/server/_c_base.pm modules/compiler_vm/languages/server/_default.pm modules/compiler_vm/languages/server/c11.pm modules/compiler_vm/languages/server/c89.pm modules/compiler_vm/languages/server/c99.pm modules/compiler_vm/languages/server/clang.pm modules/compiler_vm/languages/server/clang11.pm modules/compiler_vm/languages/server/clang89.pm modules/compiler_vm/languages/server/clang99.pm modules/compiler_vm/languages/server/cpp.pm modules/compiler_vm/languages/server/freebasic.pm modules/compiler_vm/languages/server/haskell.pm modules/compiler_vm/languages/server/java.pm modules/compiler_vm/languages/server/qbasic.pm modules/compiler_vm/languages/server/tendra.pm modules/compiler_vm/languages/sh.pm modules/compiler_vm/languages/tendra.pm modules/compliment modules/cstd.pl modules/define.pl modules/dice_roll.pl modules/excuse.sh modules/expand_macros.pl modules/fnord.pl modules/funnyish_quote.pl modules/g.pl modules/gdefine.pl modules/gen_cfacts.pl modules/gencstd.pl modules/get_title.pl modules/getcfact.pl modules/google.pl modules/gspy.pl modules/gtop10.pl modules/gtop15.pl modules/headlines.pl modules/horoscope modules/horrorscope modules/ideone.pl modules/insult.pl modules/love_quote.pl modules/man.pl modules/map.pl modules/math.pl modules/prototype.pl modules/qalc.pl modules/random_quote.pl modules/seen.pl modules/urban modules/weather.pl modules/wikipedia.pl pbot.pl pbot.sh It is highly recommended that this list of files is reviewed to ensure that all files are the copyright of the sole maintainer of the repository. If any files with license headers contain the intellectual property of anyone else, it is recommended that a request is made to revise this patch or that the explicit permission of the co-author is gained to allow for the license of the work to be changed. I (Tomasz Kramkowski), the contributor, take no responsibility for any legal action taken against the maintainer of this repository for incorrectly claiming copyright to any work not owned by the maintainer of this repository.
2017-03-05 22:33:31 +01:00
2019-06-26 18:34:19 +02:00
# ugly and hacked together
use warnings;
use strict;
use HTML::Entities;
use Data::Dumper;
2019-06-26 18:34:19 +02:00
my $debug = 1000;
sub gen_data;
sub gen_txt;
sub gen_html;
open FH, "<n1256.txt" or die "Could not open n1256.txt: $!";
2020-02-15 23:38:32 +01:00
#open FH, "<n1570.txt" or die "Could not open n1570.txt: $!";
my @contents = <FH>;
close FH;
my $text = join '', @contents;
$text =~ s/\r//g;
my ($section_title, $this_section);
my %sections;
my $last_section_number = 0;
2020-02-15 23:38:32 +01:00
my $section_number = 0;
my $last_section;
my @footnotes;
2020-02-15 23:38:32 +01:00
my $footnote = 0;
my $last_footnote = 0;
gen_data;
2020-02-15 23:38:32 +01:00
#gen_txt;
gen_html;
sub gen_data {
2020-02-15 23:38:32 +01:00
while ($text =~ m/^\s{0,5}([0-9A-Z]+\.[0-9\.]*)/msg) {
$last_section_number = $section_number;
$last_section = $this_section;
$this_section = $1;
2020-02-15 23:38:32 +01:00
($section_number) = $this_section =~ /([^.]+)\./;
2020-02-15 23:38:32 +01:00
print STDERR "----------------------------------\n" if $debug;
print STDERR "Processing section [$this_section]; number [$section_number]\n" if $debug;
2020-02-15 23:38:32 +01:00
print STDERR "this_section: [$this_section]; last_section: [$last_section]\n" if $debug >= 2;
print STDERR "Section diff: ", ($this_section - $last_section), "\n" if $debug >= 2;
2020-02-15 23:38:32 +01:00
my $diff = $section_number - $last_section_number;
print STDERR "Diff: $diff\n" if $debug >= 2;
2020-02-15 23:38:32 +01:00
if ($section_number > 0 and $diff < 0 or $diff > 1) {
print STDERR "Diff out of bounds: $diff\n";
last;
}
2020-02-15 23:38:32 +01:00
my $section_text;
2020-02-15 23:38:32 +01:00
if ($text =~ m/(.*?)^(?=\s{0,4}[0-9A-Z]+\.)/msg) { $section_text = $1; }
else {
print STDERR "No section text, end of file marker found.\n";
last;
}
2020-02-15 23:38:32 +01:00
if ($section_text =~ m/(.*?)$/msg) {
$section_title = $1 if length $1;
$section_title =~ s/^\s+//;
$section_title =~ s/\s+$//;
}
2020-02-15 23:38:32 +01:00
print STDERR "$this_section [$section_title]\n" if $debug >= 2;
$sections{$this_section}{title} = $section_title;
print STDERR "section text: [$section_text]\n" if $debug >= 2;
if (not $section_text =~ m/^(?=\d+\s)/msg) { $sections{$this_section}{text} = $section_text; }
else {
my $last_p = 0;
my $p = 0;
while ($section_text =~ m/^(\d+)\s(.*?)^(?=\d)/msgc or $section_text =~ m/^(\d+)\s(.*)/msg) {
$last_p = $p;
$p = $1;
my $t = $2;
print STDERR "paragraph $p: [$t]\n" if $debug >= 3;
if (($last_p - $p) != -1) { die "Paragraph diff invalid"; }
while ($t =~ m/^(\s*)(\d+)\)(\s*)(.*?)$/msg) {
my $leading_spaces = $1;
$footnote = $2;
my $middle_spaces = $3;
my $footnote_text = "$4\n";
print STDERR "1st footnote\n" if $debug;
print STDERR "processing footnote $footnote [last: $last_footnote]\n" if $debug >= 2;
if ($last_footnote - $footnote != -1) {
print STDERR "footnotes dump: \n" if $debug > 5;
shift @footnotes;
my $dump = Dumper(@footnotes) if $debug > 5;
#print STDERR "$dump\n";
die "Footnote diff invalid";
}
$last_footnote = $footnote;
my $indent = (length $leading_spaces) + (length $footnote) + (length ')') + (length $middle_spaces);
$indent--;
print STDERR "footnote $footnote text [indent=$indent]: [$footnote_text]\n" if $debug >= 4;
while ($t =~ m/^(.*?)$/msgc) {
my $line = $1;
print STDERR "processing [$line]\n" if $debug;
if ($line =~ m/^(\s*)(\d+)\)(\s*)(.*?)$/msg) {
print STDERR "----------------\n" if $debug >= 1;
print STDERR "footnote $footnote: [$footnote_text]\n" if $debug >= 1;
$footnotes[$footnote] = $footnote_text;
print STDERR "----------------\n" if $debug >= 1;
$leading_spaces = $1;
$footnote = $2;
$middle_spaces = $3;
$footnote_text = "$4\n";
print STDERR "2nd footnote\n" if $debug >= 2;
print STDERR "processing footnote $footnote [last: $last_footnote]\n" if $debug >= 2;
if ($last_footnote - $footnote != -1) {
print STDERR "footnotes dump: \n";
shift @footnotes;
my $dump = Dumper(@footnotes);
print STDERR "$dump\n" if $debug >= 3;
die "Footnote diff invalid";
}
$last_footnote = $footnote;
my $indent = (length $leading_spaces) + (length $footnote) + (length ')') + (length $middle_spaces);
$indent--;
print STDERR "footnote $footnote text [indent=$indent]: [$footnote_text]\n" if $debug >= 4;
next;
}
if (not $line =~ m/^\s{$indent}/msg) {
print STDERR "INTERRUPTED FOOTNOTE\n";
last;
}
$footnote_text .= "$line\n";
print STDERR "footnote $footnote text: appending [$line]\n" if $debug >= 3;
}
print STDERR "----------------\n" if $debug >= 1;
print STDERR "footnote $footnote: [$footnote_text]\n" if $debug >= 1;
$footnotes[$footnote] = $footnote_text;
print STDERR "----------------\n" if $debug >= 1;
}
$sections{$this_section . "p$p"}{text} = "$p $t";
}
}
}
}
sub bysection {
2020-02-15 23:38:32 +01:00
my $inverse = 1;
print STDERR "section cmp $a <=> $b\n" if $debug > 10;
my ($a1, $p1) = split /p/, $a;
my ($b1, $p2) = split /p/, $b;
$p1 = 0 if not defined $p1;
$p2 = 0 if not defined $p2;
my @k1 = split /\./, $a1;
my @k2 = split /\./, $b1;
my @r;
if ($#k2 > $#k1) {
my @t = @k1;
@k1 = @k2;
@k2 = @t;
my $tp = $p1;
$p1 = $p2;
$p2 = $tp;
$inverse = -1;
} else {
$inverse = 1;
}
=cut
print STDERR "k1 vals:\n";
print STDERR Dumper(@k1), "\n";
print STDERR "p1: $p1\n";
print STDERR "k2 vals:\n";
print STDERR Dumper(@k2), "\n";
print STDERR "p2: $p2\n";
=cut
2020-02-15 23:38:32 +01:00
my $i = 0;
for (; $i < $#k1 + 1; $i++) {
if (not defined $k2[$i]) { $r[$i] = 1; }
else {
print STDERR " cmp k1[$i] ($k1[$i]) vs k2[$i] ($k2[$i])\n" if $debug >= 5;
if ($i == 0) { $r[$i] = $k1[$i] cmp $k2[$i]; }
else { $r[$i] = $k1[$i] <=> $k2[$i]; }
}
print STDERR " r[$i] = $r[$i]\n" if $debug >= 5;
}
2020-02-15 23:38:32 +01:00
$r[$i] = ($p1 <=> $p2);
print STDERR " $p1 <=> $p2 => r[$i] = $r[$i]\n" if $debug >= 5;
my $ret = 0;
foreach my $rv (@r) {
print STDERR " checking r: $rv\n" if $debug >= 5;
if ($rv != 0) {
$ret = $rv;
last;
}
}
2020-02-15 23:38:32 +01:00
$ret = $ret * $inverse;
2020-02-15 23:38:32 +01:00
print STDERR "ret $ret\n" if $debug >= 5;
return $ret;
}
sub gen_txt {
2020-02-15 23:38:32 +01:00
my $footer = "";
my $paren = 0;
my $section_head;
my $section_title;
foreach my $this_section (sort bysection keys %sections) {
print STDERR "writing section $this_section\n" if $debug;
if (not $this_section =~ m/p/) {
print " $this_section $sections{$this_section}{title}\n";
$section_head = $this_section;
$section_title = $sections{$this_section}{title};
}
2020-02-15 23:38:32 +01:00
my $section_text = $sections{$this_section}{text};
2020-02-15 23:38:32 +01:00
for ($footnote = 1; $footnote < $#footnotes; $footnote++) {
my $sub = quotemeta $footnotes[$footnote];
$sub =~ s/(\\ )+/\\s*/g;
2020-02-15 23:38:32 +01:00
#print STDERR "subbing out [$footnote) $sub]\n";
$section_text =~ s/^\s*$footnote\)\s*$sub//ms;
}
2020-02-15 23:38:32 +01:00
while ($section_text =~ m/^(.*?)$/msg) {
my $line = $1;
print STDERR "paren reset, line [$line]\n" if $debug >= 8;
my $number = "";
while ($line =~ m/(.)/g) {
my $c = $1;
if ($c =~ m/[0-9]/) { $number .= $c; }
elsif ($c eq ' ') { $number = ""; }
elsif ($c eq '(') {
$paren++;
print STDERR "got $paren (\n" if $debug >= 8;
} elsif ($c eq ')') {
$paren--;
print STDERR "got $paren )\n" if $debug >= 8;
if ($paren == -1) {
if (length $number and defined $footnotes[$number]) {
print STDERR "Got footnote $number here!\n" if $debug;
$footer .= " FOOTNOTE.$number\n $footnotes[$number]\n";
}
$paren = 0;
}
} else {
$number = "";
}
}
}
2020-02-15 23:38:32 +01:00
print "$section_text\n";
2020-02-15 23:38:32 +01:00
if (length $footer) {
print $footer;
$footer = "";
}
}
}
sub gen_html {
2020-02-15 23:38:32 +01:00
print "<html>\n<body>\n";
2020-02-15 23:38:32 +01:00
my $footer = "";
my $paren = 0;
2020-02-15 23:38:32 +01:00
foreach my $this_section (sort bysection keys %sections) {
print STDERR "writing section $this_section\n" if $debug;
print "<a name='", encode_entities $this_section, "'>\n";
print "<hr>\n<h3>", encode_entities $this_section, " [", encode_entities $sections{$this_section}{title}, "]</h3>\n" if not $this_section =~ m/p/;
2020-02-15 23:38:32 +01:00
my $section_text = $sections{$this_section}{text};
2020-02-15 23:38:32 +01:00
for ($footnote = 1; $footnote < $#footnotes; $footnote++) {
my $sub = quotemeta $footnotes[$footnote];
$sub =~ s/(\\ )+/\\s*/g;
2020-02-15 23:38:32 +01:00
#print STDERR "subbing out [$footnote) $sub]\n";
$section_text =~ s/^\s*$footnote\)\s*$sub//ms;
}
$section_text = encode_entities $section_text;
2020-02-15 23:38:32 +01:00
while ($section_text =~ m/^(.*?)$/msg) {
my $line = $1;
print STDERR "paren reset, line [$line]\n" if $debug >= 8;
my $number = "";
while ($line =~ m/(.)/g) {
my $c = $1;
if ($c =~ m/[0-9]/) { $number .= $c; }
elsif ($c eq ' ') { $number = ""; }
elsif ($c eq '(') {
$paren++;
print STDERR "got $paren (\n" if $debug >= 8;
} elsif ($c eq ')') {
$paren--;
print STDERR "got $paren )\n" if $debug >= 8;
if ($paren == -1) {
if (length $number and defined $footnotes[$number]) {
print STDERR "Got footnote $number here!\n" if $debug;
$section_text =~ s/$number\)/<sup>[$number]<\/sup>/;
$footer .= "<a name='FOOTNOTE.$number'>\n<pre><i><b>Footnote $number)</b> ", encode_entities $footnotes[$number], "</i></pre>\n</a>\n";
}
$paren = 0;
}
} else {
$number = "";
}
}
}
2020-02-15 23:38:32 +01:00
$section_text =~ s/\(([0-9.]+)\)/(<a href="#$1">$1<\/a>)/g;
$footer =~ s/\(([0-9.]+)\)/(<a href="#$1">$1<\/a>)/g;
2020-02-15 23:38:32 +01:00
print "<pre>", $section_text, "</pre>\n";
print "</a>\n";
2020-02-15 23:38:32 +01:00
if (length $footer) {
print $footer;
$footer = "";
}
}
2020-02-15 23:38:32 +01:00
print "\n</body>\n</html>\n";
}