mirror of
https://github.com/pragma-/pbot.git
synced 2024-12-23 03:02:47 +01:00
Spinach: Add misc tools
This commit is contained in:
parent
c032450d82
commit
6dc911c100
174
misc/spinach/ja_convert.pl
Executable file
174
misc/spinach/ja_convert.pl
Executable file
@ -0,0 +1,174 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
use Data::Dumper;
|
||||
use HTML::DOM;
|
||||
use HTML::Entities;
|
||||
use JSON;
|
||||
|
||||
my $questions = { questions => [] };
|
||||
|
||||
my $debug = 0;
|
||||
my $kill_id = 9999999;
|
||||
#my $kill_id = 10;
|
||||
|
||||
my @files = glob '*game*.html';
|
||||
#my @files = glob '*game-1022.html';
|
||||
|
||||
my $id = 0;
|
||||
|
||||
foreach my $file (@files) {
|
||||
print "Processing $file...\n";
|
||||
|
||||
my $text;
|
||||
|
||||
{
|
||||
open my $fh, '<', $file or die "Couldn't open $file: $!";
|
||||
local $/ = undef;
|
||||
$text = <$fh>;
|
||||
close $fh;
|
||||
}
|
||||
|
||||
my $doc = HTML::DOM->new;
|
||||
$doc->write($text);
|
||||
|
||||
my @rounds = $doc->getElementsByClassName('round');
|
||||
my $round_nr = 0;
|
||||
|
||||
foreach my $round (@rounds) {
|
||||
$round_nr++;
|
||||
print " Round $round_nr!\n";
|
||||
|
||||
my ($category, $question, $answer);
|
||||
|
||||
my @categories = $round->getElementsByClassName('category_name');
|
||||
my @clues = $round->getElementsByClassName('clue');
|
||||
|
||||
foreach my $clue (@clues) {
|
||||
my $div = $clue->getElementsByTagName('div');
|
||||
|
||||
if (not defined $div->[0]) {
|
||||
print "No div!\n";
|
||||
next;
|
||||
}
|
||||
|
||||
my $mouseover = $div->[0]->{onmouseover};
|
||||
my $mouseout = $div->[0]->{onmouseout};
|
||||
|
||||
my $clue_values = $div->[0]->getElementsByClassName('clue_value');
|
||||
|
||||
if (not defined $clue_values->[0]) {
|
||||
$clue_values = $div->[0]->getElementsByClassName('clue_value_daily_double');
|
||||
}
|
||||
|
||||
if (not defined $clue_values->[0]) {
|
||||
print "No clue value.\n";
|
||||
die;
|
||||
}
|
||||
|
||||
my $clue_value = $clue_values->[0]->innerHTML;
|
||||
|
||||
if ($debug) {
|
||||
print " mouseover: $mouseover\n";
|
||||
print " mouseout: $mouseout\n";
|
||||
print " clue value: $clue_value\n";
|
||||
}
|
||||
|
||||
my ($col, $row);
|
||||
if ($mouseover =~ m/J_(\d+)_(\d+)/) {
|
||||
$col = $1;
|
||||
$row = $2;
|
||||
} else {
|
||||
print "Failed to find col, row\n";
|
||||
print " mouseover: $mouseover\n";
|
||||
print " mouseout: $mouseout\n";
|
||||
die;
|
||||
}
|
||||
|
||||
if ($mouseover =~ m{<em class="correct_response">(.*?)</em>}) {
|
||||
$answer = $1;
|
||||
} else {
|
||||
print "Failed to find answer.\n";
|
||||
print " mouseover: $mouseover\n";
|
||||
print " mouseout: $mouseout\n";
|
||||
die;
|
||||
}
|
||||
|
||||
if ($mouseout =~ m/toggle\('clue[^']+', '[^']+', '(.*?)'\)$/) {
|
||||
$question = $1;
|
||||
} else {
|
||||
print "Failed to find question.\n";
|
||||
print " mouseover: $mouseover\n";
|
||||
print " mouseout: $mouseout\n";
|
||||
die;
|
||||
}
|
||||
|
||||
print "row: $row, col: $col\n";
|
||||
|
||||
$category = $categories[$col - 1]->innerHTML;
|
||||
|
||||
$category =~ s/\\'/'/g;
|
||||
$question =~ s/\\'/'/g;
|
||||
$answer =~ s/\\'/'/g;
|
||||
|
||||
next if $category =~ m/<a href/;
|
||||
next if $question =~ m/<a href/;
|
||||
next if $answer =~ m/<a href/;
|
||||
|
||||
$category =~ s/<[^>]*>//gs;
|
||||
$question =~ s/<[^>]*>//gs;
|
||||
$answer =~ s/<[^>]*>//gs;
|
||||
|
||||
$category = decode_entities $category;
|
||||
$question = decode_entities $question;
|
||||
$answer = decode_entities $answer;
|
||||
|
||||
$answer =~ s/\(.*?\)//g;
|
||||
|
||||
$category =~ s/^\s+|\s+$//g;
|
||||
$question =~ s/^\s+|\s+$//g;
|
||||
$answer =~ s/^\s+|\s+$//g;
|
||||
|
||||
if ($clue_value =~ m/\$(\d+,?\d+)/) {
|
||||
$clue_value = $1;
|
||||
} elsif ($clue_value =~ m/^(\d+)$/) {
|
||||
$clue_value = $1;
|
||||
}
|
||||
|
||||
if (not $clue_value) {
|
||||
print "Bad clue value.\n";
|
||||
die;
|
||||
}
|
||||
|
||||
print "$id: [$category] $question ($answer) $clue_value\n";
|
||||
|
||||
my @alternates = split / or |\//i, $answer;
|
||||
my $answer = shift @alternates;
|
||||
|
||||
if (@alternates) {
|
||||
print "Has alternates: [$answer] ", join (', ', @alternates), "\n";
|
||||
}
|
||||
|
||||
my $new_question = {
|
||||
alternativeSpellings => \@alternates,
|
||||
suggestions => [],
|
||||
question => $question,
|
||||
id => ++$id,
|
||||
answer => $answer,
|
||||
category => $category,
|
||||
last_seen => 0,
|
||||
value => $clue_value
|
||||
};
|
||||
|
||||
push @{$questions->{questions}}, $new_question;
|
||||
}
|
||||
}
|
||||
last if $id >= $kill_id;
|
||||
}
|
||||
|
||||
my $json = encode_json $questions;
|
||||
open my $fh, '>', 'jeopardy.json';
|
||||
print $fh "$json\n";
|
||||
close $fh;
|
55
misc/spinach/ja_scrape.pl
Executable file
55
misc/spinach/ja_scrape.pl
Executable file
@ -0,0 +1,55 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
use LWP::UserAgent::WithCache;
|
||||
|
||||
use Data::Dumper;
|
||||
|
||||
my %cache_opt = (
|
||||
'namespace' => 'ja',
|
||||
'cache_root' => File::Spec->catfile(File::HomeDir->my_home, '.jacache'),
|
||||
'default_expires_in' => 600 * 6 * 24);
|
||||
my $ua = LWP::UserAgent::WithCache->new(\%cache_opt);
|
||||
|
||||
$ua->agent("Mozilla 5.0");
|
||||
$ua->cookie_jar({ file => "$ENV{HOME}/.jacookies" });
|
||||
|
||||
my @seasons = (1 .. 35, 'superjeopardy');
|
||||
|
||||
foreach my $season (@seasons) {
|
||||
print "Downloading season $season ... \n";
|
||||
|
||||
my $response = $ua->get("http://website.com/showseason.php?season=$season");
|
||||
|
||||
if (not $response->is_success) {
|
||||
print Dumper $response;
|
||||
die;
|
||||
}
|
||||
|
||||
my $text = $response->content;
|
||||
|
||||
open my $fh, '>', "season-$season.html";
|
||||
print $fh "$text\n";
|
||||
close $fh;
|
||||
|
||||
while ($text =~ m{http://www.website.com/showgame.php\?game_id=(\d+)}g) {
|
||||
my $gameid = $1;
|
||||
|
||||
print " Downloading game $gameid ...\n";
|
||||
|
||||
$response = $ua->get("http://website.com/showgame.php?game_id=$gameid");
|
||||
|
||||
if (not $response->is_success) {
|
||||
print Dumper $response;
|
||||
die;
|
||||
}
|
||||
|
||||
my $gametext = $response->content;
|
||||
|
||||
open $fh, '>', "game-$gameid.html";
|
||||
print $fh "$gametext\n";
|
||||
close $fh;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user