3
0
mirror of https://github.com/pragma-/pbot.git synced 2024-12-23 19:22:40 +01:00

Add Wiktionary module

This commit is contained in:
Pragmatic Software 2021-07-10 02:33:39 -07:00
parent 7da0ec1ed3
commit 92a2be7406
2 changed files with 215 additions and 0 deletions

192
modules/wiktionary.pl vendored Executable file
View File

@ -0,0 +1,192 @@
#!/usr/bin/env perl
# File: wiktionary.pl
#
# Purpose: Queries Wiktionary website.
#
# This is a rough first draft. There are more parts of the definitions
# to process. Might reconsider --all and --unique.
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
use warnings;
use strict;
use Cache::FileCache;
use Getopt::Long qw/GetOptionsFromArray/;
use JSON;
binmode(STDOUT, ":utf8");
my $usage = "Usage: wiktionary <term> [-e] [-p] [-l --lang <language>] [-s --section <section>] [-n --num <number>] [-a --all] [-u --unique]\n";
my ($term, $lang, $section, $num, $all, $unique, $opt_e, $opt_p);
{
my $opt_error;
local $SIG{__WARN__} = sub {
$opt_error = shift;
chomp $opt_error;
};
Getopt::Long::Configure("bundling_override");
GetOptionsFromArray(
\@ARGV,
'lang|l=s' => \$lang,
'section|s=s' => \$section,
'num|n=i' => \$num,
'all|a' => \$all,
'unique|u' => \$unique,
'p' => \$opt_p,
'e' => \$opt_e,
);
if ($opt_error) {
print "$opt_error: $usage\n";
exit 1;
}
$term = "@ARGV";
if (not length $term) {
print $usage;
exit 1;
}
}
$lang //= 'English';
$section //= 'definitions';
if ($opt_p and $opt_e) {
print "Options -e and -p cannot be used together.\n";
exit 1;
}
if ($opt_p) {
$unique = 1 unless defined $num;
$section = 'pronunciations';
}
if ($opt_e) {
$unique = 1 unless defined $num;
$section = 'etymology';
}
$num //= 1;
my $cache = Cache::FileCache->new({ namespace => 'wiktionary', default_expires_in => '1 week' });
my $cache_id = "$term $lang";
my $entries = $cache->get($cache_id);
if (not defined $entries) {
my $json = `python3 wiktionary.py \Q$term\E \Q$lang\E`;
$entries = decode_json $json;
$cache->set($cache_id, $entries);
}
my @valid_sections = qw/definitions etymology pronunciations/;
if (not grep { $_ eq $section } @valid_sections) {
print "Unknown section `$section`. Available sections are: " . join(', ', sort @valid_sections) . "\n";
exit 1;
}
my $entry_count = @$entries;
my $entries_text = $entry_count == 1 ? 'entry' : 'entries';
if ($num > $entry_count) {
print "No such entry $num. There are $entry_count $entries_text for `$term`.\n";
exit 1;
}
print "$entry_count $entries_text for $term:\n\n";
my $start = 0;
if ($num <= 0 or $all or $unique) {
$num = $entry_count;
} else {
$start = $num - 1;
}
my @results;
for (my $i = $start; $i < $num; $i++) {
my $entry = $entries->[$i];
if ($section eq 'etymology') {
my $ety = $entry->{etymology};
chomp $ety;
if (not length $ety or $ety =~ /This etymology is missing or incomplete./) {
$ety = 'N/A';
}
push @results, $ety;
}
elsif ($section eq 'pronunciations') {
if (exists $entry->{pronunciations}
and exists $entry->{pronunciations}->{text})
{
my $text = join '; ', @{$entry->{pronunciations}->{text}};
$text = 'N/A' if not length $text;
push @results, $text;
} else {
push @results, 'N/A';
}
}
elsif ($section eq 'definitions') {
my $text;
foreach my $definition (@{$entry->{definitions}}) {
$text .= "$definition->{partOfSpeech}) ";
$text .= join("\n\n", @{$definition->{text}}) . "\n\n";
if (@{$definition->{examples}}) {
$text .= "examples:\n\n";
$text .= join("\n\n", @{$definition->{examples}}) . "\n\n";
}
}
push @results, $text;
}
}
if ($unique) {
my %uniq;
my $i = 0;
foreach my $result (@results) {
$i++;
next if not $result or $result eq 'N/A';
$uniq{$result} = $i;
}
if (not keys %uniq) {
print "No $section available for $term.\n";
exit;
}
foreach my $key (sort { $uniq{$a} <=> $uniq{$b} } keys %uniq) {
print "$uniq{$key}) $key\n\n";
}
exit;
}
my $i = @results == 1 ? $num - 1 : 0;
foreach my $result (@results) {
$i++;
next if not $result;
print "$i) $result\n\n";
}

23
modules/wiktionary.py vendored Normal file
View File

@ -0,0 +1,23 @@
# File: wiktionary.py
#
# Purpose: Parses Wiktionary pages into JSON and dumps it to STDOUT.
#
# This was written for wiktionary.pl since Wiktionary::Parser in CPAN
# seems to be broken and abandoned.
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from wiktionaryparser import WiktionaryParser
import sys
import json
word=sys.argv[1]
language=sys.argv[2]
parser = WiktionaryParser()
entries = parser.fetch(word, language=language)
print(json.dumps(entries))