Plugin/Wordle: add Dutch language

2025-12-23 11:28:12 +01:00 · 2025-12-20 02:42:32 -08:00 · 2025-12-20 02:42:32 -08:00 · 6282008a61
commit 6282008a61
parent 235f6332a8
15 changed files with 341286 additions and 2 deletions
--- a/data/wordle/dutch
+++ b/data/wordle/dutch
--- a/lib/PBot/Plugin/Wordle.pm
+++ b/lib/PBot/Plugin/Wordle.pm
@ -81,6 +81,11 @@ my %wordlists = (
        wlist   => '/wordle/canadian',
        glist   => ['insane', 'british', 'urban'],
    },
+    dutch => {
+        name    => 'Dutch',
+        prompt  => 'Raad het Nederlandse woord!',
+        wlist   => '/wordle/dutch',
+    },
    finnish => {
        name    => 'Finnish',
        prompt  => 'Arvaa suomenkielinen sana!',
--- a/lib/PBot/VERSION.pm
+++ b/lib/PBot/VERSION.pm
@ -25,8 +25,8 @@ use PBot::Imports;
 # These are set by the /misc/update_version script
 use constant {
    BUILD_NAME     => "PBot",
-    BUILD_REVISION => 4929,
-    BUILD_DATE     => "2025-12-19",
+    BUILD_REVISION => 4931,
+    BUILD_DATE     => "2025-12-20",
 };

 sub initialize {}
--- a/misc/quotegrabs/import-quotegrabs-from-html.py
+++ b/misc/quotegrabs/import-quotegrabs-from-html.py
@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+import requests
+import csv
+import datetime
+import time
+import re
+from bs4 import BeautifulSoup
+
+#url = 'https://www.iso-9899.info/candide/quotegrabs.html'
+#response = requests.get(url)
+
+with open('quotegrabs.html', 'r') as file:
+    soup = BeautifulSoup(file, 'html.parser')
+
+channels = soup.find_all('h3')
+
+with open('quotes.csv', 'w', newline='') as csvfile:
+    writer = csv.writer(csvfile)
+
+    for channel in channels:
+        table = channel.find_next_sibling('table')
+        rows = table.find_all('tr')
+
+        for row in rows:
+            print(row)
+            tds = row.find_all('td')
+            if len(tds) != 5: continue
+            id, authors, text, date, grabber = [td.text for td in tds]
+            first_author = authors.split(', ')[0]
+            timestamp = time.mktime(datetime.datetime.strptime(date, '%Y/%m/%d %a %H:%M:%S').timetuple())
+
+            if text[0] == '<':
+                text = re.sub(r'^<[^>]+> ', '', text, count=1)
+            else:
+                text = re.sub(r'^\* ([^\s]+)', '/me', text, count=1)
+
+            messages = []
+            authors = []
+
+            for i, message in enumerate(text.split('   ')):
+                message = message.strip()
+                author = re.match(r'^\* ([^ ]+)', message) or re.match(r'<([^>]+)>', message)
+                print(author, message)
+                if i > 0 and not author: continue
+                author = author.group(1) if i > 0 else first_author
+                authors.append(author)
+
+            print(authors, text)
+            writer.writerow([id, '+'.join(authors), channel.text, grabber, text, timestamp])
--- a/misc/wordle/filter
+++ b/misc/wordle/filter
@ -0,0 +1 @@
+for f in *; do cat $f | perl -ne '$x = $_; chomp $x; next if not $x =~ /^[a-z]+$/; print "$x\n" if length $x >= 3 && length $x <= 22' > filtered/$f; done
--- a/misc/wordle/filter_dutch
+++ b/misc/wordle/filter_dutch
@ -0,0 +1 @@
+cat dutch | perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[a-z]+$/; print "$x\n" if length $x >= 3 && length $x <= 22' > filtered/dutch
--- a/misc/wordle/filter_finnish
+++ b/misc/wordle/filter_finnish
@ -0,0 +1,2 @@
+cat finnish | perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[a-zåäöšž]+$/; print "$x\n" if length $x >= 5 && length $x <= 8' > filtered/finnish
+
--- a/misc/wordle/filter_french
+++ b/misc/wordle/filter_french
@ -0,0 +1 @@
+cat french | perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[a-zéàèùçâêîôûëïü]+$/; print "$x\n" if length $x >= 3 && length $x <= 22' > filtered/french
--- a/misc/wordle/filter_german
+++ b/misc/wordle/filter_german
@ -0,0 +1 @@
+cat german | perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[A-Z]?[a-zäÄöÖüÜßẞ]+$/; print "$x\n" if length $x >= 3 && length $x <= 22' > filtered/german
--- a/misc/wordle/filter_italian
+++ b/misc/wordle/filter_italian
@ -0,0 +1 @@
+cat italian | perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[a-zàèéìòù]+$/; print "$x\n" if length $x >= 3 && length $x <= 22' > filtered/italian
--- a/misc/wordle/filter_ngerman
+++ b/misc/wordle/filter_ngerman
@ -0,0 +1 @@
+cat ngerman | perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[A-Z]?[a-zäÄöÖüÜßẞ]+$/; print "$x\n" if length $x >= 3 && length $x <= 22' > filtered/german
--- a/misc/wordle/filter_polish
+++ b/misc/wordle/filter_polish
@ -0,0 +1,2 @@
+cat polish | perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[a-zćńóśźżąęł]+$/; print "$x\n" if length $x >= 3 && length $x <= 8' > filtered/polish
+
--- a/misc/wordle/filter_spanish
+++ b/misc/wordle/filter_spanish
@ -0,0 +1 @@
+cat spanish | perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[a-záéíóúüñ]+$/; print "$x\n" if length $x >= 3 && length $x <= 22' > filtered/spanish
--- a/misc/wordle/filter_udict
+++ b/misc/wordle/filter_udict
@ -0,0 +1 @@
+cat udict_full | perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[A-Z]?[a-z]+$/; print "$x\n" if length $x >= 3 && length $x <= 22' > filtered/urban
--- a/misc/wordle/filter_urban
+++ b/misc/wordle/filter_urban
@ -0,0 +1 @@
+cat urban | perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[a-z]+$/; next if $x =~ /(\w)\1{2,}/; print "$x\n" if length $x >= 3 && length $x <= 22' > filtered/urban
				`@ -0,0 +1 @@`
				`for f in *; do cat $f \| perl -ne '$x = $_; chomp $x; next if not $x =~ /^[a-z]+$/; print "$x\n" if length $x >= 3 && length $x <= 22' > filtered/$f; done`
				`@ -0,0 +1 @@`
				`cat dutch \| perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[a-z]+$/; print "$x\n" if length $x >= 3 && length $x <= 22' > filtered/dutch`
				`@ -0,0 +1,2 @@`
				`cat finnish \| perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[a-zåäöšž]+$/; print "$x\n" if length $x >= 5 && length $x <= 8' > filtered/finnish`
				`@ -0,0 +1 @@`
				`cat french \| perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[a-zéàèùçâêîôûëïü]+$/; print "$x\n" if length $x >= 3 && length $x <= 22' > filtered/french`
				`@ -0,0 +1 @@`
				`cat german \| perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[A-Z]?[a-zäÄöÖüÜßẞ]+$/; print "$x\n" if length $x >= 3 && length $x <= 22' > filtered/german`
				`@ -0,0 +1 @@`
				`cat italian \| perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[a-zàèéìòù]+$/; print "$x\n" if length $x >= 3 && length $x <= 22' > filtered/italian`
				`@ -0,0 +1 @@`
				`cat ngerman \| perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[A-Z]?[a-zäÄöÖüÜßẞ]+$/; print "$x\n" if length $x >= 3 && length $x <= 22' > filtered/german`
				`@ -0,0 +1,2 @@`
				`cat polish \| perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[a-zćńóśźżąęł]+$/; print "$x\n" if length $x >= 3 && length $x <= 8' > filtered/polish`
				`@ -0,0 +1 @@`
				`cat spanish \| perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[a-záéíóúüñ]+$/; print "$x\n" if length $x >= 3 && length $x <= 22' > filtered/spanish`
				`@ -0,0 +1 @@`
				`cat udict_full \| perl -CIO -Mutf8 -ne 'use feature "unicode_strings"; $x = $_; chomp $x; next if not $x =~ /^[A-Z]?[a-z]+$/; print "$x\n" if length $x >= 3 && length $x <= 22' > filtered/urban`