From 27c513135cb38eb1c7ac104f6c3470707edb1f76 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Mon, 8 Nov 2021 23:22:17 +0100 Subject: [PATCH 01/12] Geography: Early draft, with buggy implementation of timezone_from_qid. --- plugins/Geography/README.md | 1 + plugins/Geography/__init__.py | 71 ++++++++++++++++ plugins/Geography/config.py | 51 +++++++++++ plugins/Geography/local/__init__.py | 1 + plugins/Geography/plugin.py | 46 ++++++++++ plugins/Geography/test.py | 86 +++++++++++++++++++ plugins/Geography/wikidata.py | 127 ++++++++++++++++++++++++++++ 7 files changed, 383 insertions(+) create mode 100644 plugins/Geography/README.md create mode 100644 plugins/Geography/__init__.py create mode 100644 plugins/Geography/config.py create mode 100644 plugins/Geography/local/__init__.py create mode 100644 plugins/Geography/plugin.py create mode 100644 plugins/Geography/test.py create mode 100644 plugins/Geography/wikidata.py diff --git a/plugins/Geography/README.md b/plugins/Geography/README.md new file mode 100644 index 000000000..15d14074e --- /dev/null +++ b/plugins/Geography/README.md @@ -0,0 +1 @@ +Provides geography facts, such as timezones. diff --git a/plugins/Geography/__init__.py b/plugins/Geography/__init__.py new file mode 100644 index 000000000..bbbcabf43 --- /dev/null +++ b/plugins/Geography/__init__.py @@ -0,0 +1,71 @@ +### +# Copyright (c) 2021, Valentin Lorentz +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +""" +Geography: Provides geography facts, such as timezones. +""" + +import sys +import supybot +from supybot import world + +# Use this for the version of this plugin. +__version__ = "" + +# XXX Replace this with an appropriate author or supybot.Author instance. +__author__ = supybot.authors.unknown + +# This is a dictionary mapping supybot.Author instances to lists of +# contributions. +__contributors__ = {} + +# This is a url where the most recent plugin package can be downloaded. +__url__ = '' + +from . import config +from . import plugin +if sys.version_info >= (3, 4): + from importlib import reload +else: + from imp import reload +# In case we're being reloaded. +reload(config) +reload(plugin) +# Add more reloads here if you add third-party modules and want them to be +# reloaded when this plugin is reloaded. Don't forget to import them as well! + +if world.testing: + from . import test + +Class = plugin.Class +configure = config.configure + + +# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: diff --git a/plugins/Geography/config.py b/plugins/Geography/config.py new file mode 100644 index 000000000..c71ed0973 --- /dev/null +++ b/plugins/Geography/config.py @@ -0,0 +1,51 @@ +### +# Copyright (c) 2021, Valentin Lorentz +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +from supybot import conf, registry +from supybot.i18n import PluginInternationalization +_ = PluginInternationalization('Geography') + + +def configure(advanced): + # This will be called by supybot to configure this module. advanced is + # a bool that specifies whether the user identified themself as an advanced + # user or not. You should effect your configuration by manipulating the + # registry as appropriate. + from supybot.questions import expect, anything, something, yn + conf.registerPlugin('Geography', True) + + +Geography = conf.registerPlugin('Geography') +# This is where your configuration variables (if any) should go. For example: +# conf.registerGlobalValue(Geography, 'someConfigVariableName', +# registry.Boolean(False, _("""Help for someConfigVariableName."""))) + + +# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: diff --git a/plugins/Geography/local/__init__.py b/plugins/Geography/local/__init__.py new file mode 100644 index 000000000..e86e97b86 --- /dev/null +++ b/plugins/Geography/local/__init__.py @@ -0,0 +1 @@ +# Stub so local is a module, used for third-party modules diff --git a/plugins/Geography/plugin.py b/plugins/Geography/plugin.py new file mode 100644 index 000000000..fa395a002 --- /dev/null +++ b/plugins/Geography/plugin.py @@ -0,0 +1,46 @@ +### +# Copyright (c) 2021, Valentin Lorentz +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +from supybot import utils, plugins, ircutils, callbacks +from supybot.commands import * +from supybot.i18n import PluginInternationalization + +_ = PluginInternationalization('Geography') + + +class Geography(callbacks.Plugin): + """Provides geography facts, such as timezones.""" + threaded = True + + +Class = Geography + + +# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: diff --git a/plugins/Geography/test.py b/plugins/Geography/test.py new file mode 100644 index 000000000..09f05cae1 --- /dev/null +++ b/plugins/Geography/test.py @@ -0,0 +1,86 @@ +### +# Copyright (c) 2021, Valentin Lorentz +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +import datetime +from unittest import skipIf + +from supybot.test import * +from supybot import utils + +from . import wikidata + +class GeographyTestCase(PluginTestCase): + plugins = ('Geography',) + + +class GeographyWikidataTestCase(SupyTestCase): + @skipIf(not network, "Network test") + def testDirect(self): + """The queried object directly has a timezone property""" + self.assertEqual( + wikidata.timezone_from_qid("Q1384"), # New York + utils.time.iana_timezone("America/New_York") + ) + + @skipIf(not network, "Network test") + def testParent(self): + """The queried object does not have a TZ property + but it is part of an object that does""" + self.assertEqual( + wikidata.timezone_from_qid("Q22690"), # Metz, France + utils.time.iana_timezone("Europe/Paris") + ) + + @skipIf(not network, "Network test") + def testParentAndIgnoreSelf(self): + """The queried object has a TZ property, but it's useless to us; + however it is part of an object that has a useful one.""" + self.assertEqual( + wikidata.timezone_from_qid("Q60"), # New York City, NY + utils.time.iana_timezone("America/New_York") + ) + + @skipIf(not network, "Network test") + def testParentQualifiedIgnorePreferred(self): + """The queried object does not have a TZ property, + and is part of an object that does. + However, this parent's 'preferred' timezone is not the + right one, so we must make sure to select the right one + based on P518 ('applies to part').""" + # La Réunion is a French region, but in UTC+4. + # France has a bunch of timezone statements, and 'Europe/Paris' + # is marked as Preferred because it is the time of metropolitan + # France. However, it is not valid for La Réunion. + self.assertEqual( + wikidata.timezone_from_qid("Q17070"), # La Réunion + datetime.timezone(datetime.timedelta(hours=4)) + ) + +# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: diff --git a/plugins/Geography/wikidata.py b/plugins/Geography/wikidata.py new file mode 100644 index 000000000..cd5f28497 --- /dev/null +++ b/plugins/Geography/wikidata.py @@ -0,0 +1,127 @@ +### +# Copyright (c) 2021, Valentin Lorentz +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +import json +import string +import datetime +import urllib.parse + +import supybot.utils as utils + +SPARQL_URL = "https://query.wikidata.org/sparql" + +TIMEZONE_QUERY = string.Template(""" +SELECT ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid (MIN(?area) AS ?min_area) WHERE { + + # find all entities that the subject is part of, recursively + wd:$subject (wdt:P131*) ?item. + + # Get all timezones (returns a superset of "?item wdt:P421 ?timezone", as it does not filter on rank) + ?item p:P421 ?statement. + ?statement ps:P421 ?timezone. + + ?statement wikibase:rank ?rank. + + # fetch the end of validity of the given statement (TODO: check it) + OPTIONAL { ?statement pq:P582 ?endtime. } + + # filter out statements that applies only to a part of ?item... + { + FILTER NOT EXISTS { + ?statement pq:P518 ?appliestopart. + } + } + UNION { + # ... unless it applies to a part that contains what we are interested in + ?statement pq:P518 ?appliestopart. + wd:$subject (wdt:P131*) ?appliestopart. + } + + # Filter out values only valid in certain periods of the year (DST vs normal time) + FILTER NOT EXISTS { + ?statement pq:P1264 ?validinperiod. + } + + # store the identifier of the object the statement applies to + BIND(IF(BOUND(?appliestopart),?appliestopart,?item) AS ?statementsubject). + + #OPTIONAL { ?statementsubject wdt:P1082 ?population. } + OPTIONAL { ?statementsubject wdt:P2046 ?area. } + + # ?timezone wdt:P31 ?timezonetype. + # ?timezone wdt:P31 wd:Q17272692. # only keep IANA timezones + # ?timezone wdt:P31 wd:Q17272482. # only keep UTC offsets + + { + # Get either an IANA timezone ID... + ?timezone wdt:P6687 ?tzid. + } + UNION + { + # ... or an absolute UTC offset that is not subject to DST + ?timezone p:P2907 ?utcoffset_statement. + ?utcoffset_statement ps:P2907 ?utcoffset. + + FILTER NOT EXISTS { + ?utcoffset_statement wdt:P1264 ?utcoffset_validinperiod. + } + } + + SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } +} + +# Deduplicate in case there is more than one ?area statement +GROUP BY ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid + +# Get the smallest entities first. As they are more specific, +# they are more likely to be correct. +ORDER BY ?min_area DESC(?tzid) + +LIMIT 1 +""") + +def _query_sparql(query): + params = {"format": "json", "query": query} + url = SPARQL_URL + "?" + urllib.parse.urlencode(params) + content = utils.web.getUrlContent(url) + return json.loads(content) + +def timezone_from_qid(location_qid): + """Returns a :class:datetime.tzinfo object, given a Wikidata Q-ID. + eg. ``"Q60"`` for New York City.""" + data = _query_sparql(TIMEZONE_QUERY.substitute(subject=location_qid)) + results = data["results"]["bindings"] + for result in results: + if "tzid" in result: + return utils.time.iana_timezone(result["tzid"]["value"]) + else: + assert "utcoffset" in result + utc_offset = float(result["utcoffset"]["value"]) + return datetime.timezone(datetime.timedelta(hours=utc_offset)) From 0a6f5982950b37f2f8308a451463d93cf0187682 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Mon, 8 Nov 2021 23:41:26 +0100 Subject: [PATCH 02/12] Geography: Set a better user-agent to access Wikidata --- plugins/Geography/wikidata.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/plugins/Geography/wikidata.py b/plugins/Geography/wikidata.py index cd5f28497..959d2c970 100644 --- a/plugins/Geography/wikidata.py +++ b/plugins/Geography/wikidata.py @@ -110,7 +110,12 @@ LIMIT 1 def _query_sparql(query): params = {"format": "json", "query": query} url = SPARQL_URL + "?" + urllib.parse.urlencode(params) - content = utils.web.getUrlContent(url) + + # Comply with https://meta.wikimedia.org/wiki/User-Agent_policy + headers = utils.web.defaultHeaders.copy() + headers["User-agent"] += " https://github.com/progval/Limnoria/ - Geography plugin" + + content = utils.web.getUrlContent(url, headers=headers) return json.loads(content) def timezone_from_qid(location_qid): From 3b7379ec0f954843b7b4bad84cc20cd9531825f0 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Mon, 8 Nov 2021 23:42:55 +0100 Subject: [PATCH 03/12] Geography: Fix typo, preventing exclusion of non-constant offset statements on timezones eg. it did not exclude the offsets from Q941023 (Eastern Time Zone) --- plugins/Geography/wikidata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/Geography/wikidata.py b/plugins/Geography/wikidata.py index 959d2c970..cc924a90c 100644 --- a/plugins/Geography/wikidata.py +++ b/plugins/Geography/wikidata.py @@ -90,7 +90,7 @@ SELECT ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid (MIN(?are ?utcoffset_statement ps:P2907 ?utcoffset. FILTER NOT EXISTS { - ?utcoffset_statement wdt:P1264 ?utcoffset_validinperiod. + ?utcoffset_statement pq:P1264 ?utcoffset_validinperiod. } } From 849acb21f4dbba2b5edeb4169628b52fad6e38ca Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Mon, 8 Nov 2021 23:46:51 +0100 Subject: [PATCH 04/12] Geography: Clean up the SPARQL statement. --- plugins/Geography/wikidata.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/plugins/Geography/wikidata.py b/plugins/Geography/wikidata.py index cc924a90c..5eefa075f 100644 --- a/plugins/Geography/wikidata.py +++ b/plugins/Geography/wikidata.py @@ -40,20 +40,21 @@ SPARQL_URL = "https://query.wikidata.org/sparql" TIMEZONE_QUERY = string.Template(""" SELECT ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid (MIN(?area) AS ?min_area) WHERE { - # find all entities that the subject is part of, recursively + # find all ?item entities that the subject is part of, recursively; wd:$subject (wdt:P131*) ?item. # Get all timezones (returns a superset of "?item wdt:P421 ?timezone", as it does not filter on rank) ?item p:P421 ?statement. ?statement ps:P421 ?timezone. + # TODO: order the final result based on the rank? ?statement wikibase:rank ?rank. # fetch the end of validity of the given statement (TODO: check it) OPTIONAL { ?statement pq:P582 ?endtime. } - # filter out statements that applies only to a part of ?item... { + # filter out statements that apply only to a part of ?item... FILTER NOT EXISTS { ?statement pq:P518 ?appliestopart. } @@ -72,23 +73,20 @@ SELECT ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid (MIN(?are # store the identifier of the object the statement applies to BIND(IF(BOUND(?appliestopart),?appliestopart,?item) AS ?statementsubject). - #OPTIONAL { ?statementsubject wdt:P1082 ?population. } + # Get the area, will be used to order by specificity OPTIONAL { ?statementsubject wdt:P2046 ?area. } - # ?timezone wdt:P31 ?timezonetype. - # ?timezone wdt:P31 wd:Q17272692. # only keep IANA timezones - # ?timezone wdt:P31 wd:Q17272482. # only keep UTC offsets - { # Get either an IANA timezone ID... ?timezone wdt:P6687 ?tzid. } UNION { - # ... or an absolute UTC offset that is not subject to DST + # ... or an absolute UTC offset ?timezone p:P2907 ?utcoffset_statement. ?utcoffset_statement ps:P2907 ?utcoffset. + # unless it is only valid in certain periods of the year (DST vs normal time) FILTER NOT EXISTS { ?utcoffset_statement pq:P1264 ?utcoffset_validinperiod. } From 1a72a7d6d06ceee954080123bd7814f46006d56f Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Tue, 9 Nov 2021 20:40:35 +0100 Subject: [PATCH 05/12] Geography: run Black --- plugins/Geography/__init__.py | 3 ++- plugins/Geography/config.py | 8 +++++--- plugins/Geography/plugin.py | 3 ++- plugins/Geography/test.py | 2 +- plugins/Geography/wikidata.py | 3 ++- pyproject.toml | 2 +- 6 files changed, 13 insertions(+), 8 deletions(-) diff --git a/plugins/Geography/__init__.py b/plugins/Geography/__init__.py index bbbcabf43..559ff38d9 100644 --- a/plugins/Geography/__init__.py +++ b/plugins/Geography/__init__.py @@ -47,10 +47,11 @@ __author__ = supybot.authors.unknown __contributors__ = {} # This is a url where the most recent plugin package can be downloaded. -__url__ = '' +__url__ = "" from . import config from . import plugin + if sys.version_info >= (3, 4): from importlib import reload else: diff --git a/plugins/Geography/config.py b/plugins/Geography/config.py index c71ed0973..396b7f9c0 100644 --- a/plugins/Geography/config.py +++ b/plugins/Geography/config.py @@ -30,7 +30,8 @@ from supybot import conf, registry from supybot.i18n import PluginInternationalization -_ = PluginInternationalization('Geography') + +_ = PluginInternationalization("Geography") def configure(advanced): @@ -39,10 +40,11 @@ def configure(advanced): # user or not. You should effect your configuration by manipulating the # registry as appropriate. from supybot.questions import expect, anything, something, yn - conf.registerPlugin('Geography', True) + + conf.registerPlugin("Geography", True) -Geography = conf.registerPlugin('Geography') +Geography = conf.registerPlugin("Geography") # This is where your configuration variables (if any) should go. For example: # conf.registerGlobalValue(Geography, 'someConfigVariableName', # registry.Boolean(False, _("""Help for someConfigVariableName."""))) diff --git a/plugins/Geography/plugin.py b/plugins/Geography/plugin.py index fa395a002..59cc2d3d3 100644 --- a/plugins/Geography/plugin.py +++ b/plugins/Geography/plugin.py @@ -32,11 +32,12 @@ from supybot import utils, plugins, ircutils, callbacks from supybot.commands import * from supybot.i18n import PluginInternationalization -_ = PluginInternationalization('Geography') +_ = PluginInternationalization("Geography") class Geography(callbacks.Plugin): """Provides geography facts, such as timezones.""" + threaded = True diff --git a/plugins/Geography/test.py b/plugins/Geography/test.py index 09f05cae1..3304ab820 100644 --- a/plugins/Geography/test.py +++ b/plugins/Geography/test.py @@ -37,7 +37,7 @@ from supybot import utils from . import wikidata class GeographyTestCase(PluginTestCase): - plugins = ('Geography',) + plugins = ("Geography",) class GeographyWikidataTestCase(SupyTestCase): diff --git a/plugins/Geography/wikidata.py b/plugins/Geography/wikidata.py index 5eefa075f..68978c10f 100644 --- a/plugins/Geography/wikidata.py +++ b/plugins/Geography/wikidata.py @@ -37,7 +37,8 @@ import supybot.utils as utils SPARQL_URL = "https://query.wikidata.org/sparql" -TIMEZONE_QUERY = string.Template(""" +TIMEZONE_QUERY = string.Template( + """ SELECT ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid (MIN(?area) AS ?min_area) WHERE { # find all ?item entities that the subject is part of, recursively; diff --git a/pyproject.toml b/pyproject.toml index 99248acc8..27271b59c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,4 +5,4 @@ build-backend = "setuptools.build_meta" [tool.black] line-length = 79 -include = 'plugins/(Autocomplete|Fediverse|Poll)/.*\.pyi?$' +include = 'plugins/(Autocomplete|Fediverse|Geography|Poll)/.*\.pyi?$' From 35c1407779f75c833aa4066d2953b60aee06152c Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Tue, 9 Nov 2021 20:41:38 +0100 Subject: [PATCH 06/12] Geography: Add support for querying Wikidata from an OSM ID. --- plugins/Geography/test.py | 45 +++++++++++++++++++++++++---------- plugins/Geography/wikidata.py | 32 +++++++++++++++++++++---- 2 files changed, 60 insertions(+), 17 deletions(-) diff --git a/plugins/Geography/test.py b/plugins/Geography/test.py index 3304ab820..563629ae7 100644 --- a/plugins/Geography/test.py +++ b/plugins/Geography/test.py @@ -41,31 +41,47 @@ class GeographyTestCase(PluginTestCase): class GeographyWikidataTestCase(SupyTestCase): + @skipIf(not network, "Network test") + def testOsmidToTimezone(self): + self.assertEqual( + wikidata.uri_from_osmid(450381), + "http://www.wikidata.org/entity/Q22690", + ) + self.assertEqual( + wikidata.uri_from_osmid(192468), + "http://www.wikidata.org/entity/Q47045", + ) + @skipIf(not network, "Network test") def testDirect(self): """The queried object directly has a timezone property""" self.assertEqual( - wikidata.timezone_from_qid("Q1384"), # New York - utils.time.iana_timezone("America/New_York") - ) + # New York + wikidata.timezone_from_uri("http://www.wikidata.org/entity/Q1384"), + utils.time.iana_timezone("America/New_York"), + ) @skipIf(not network, "Network test") def testParent(self): """The queried object does not have a TZ property but it is part of an object that does""" self.assertEqual( - wikidata.timezone_from_qid("Q22690"), # Metz, France - utils.time.iana_timezone("Europe/Paris") - ) + # Metz, France + wikidata.timezone_from_uri( + "http://www.wikidata.org/entity/Q22690" + ), + utils.time.iana_timezone("Europe/Paris"), + ) @skipIf(not network, "Network test") def testParentAndIgnoreSelf(self): """The queried object has a TZ property, but it's useless to us; however it is part of an object that has a useful one.""" self.assertEqual( - wikidata.timezone_from_qid("Q60"), # New York City, NY - utils.time.iana_timezone("America/New_York") - ) + # New York City, NY + wikidata.timezone_from_uri("http://www.wikidata.org/entity/Q60"), + utils.time.iana_timezone("America/New_York"), + ) @skipIf(not network, "Network test") def testParentQualifiedIgnorePreferred(self): @@ -79,8 +95,13 @@ class GeographyWikidataTestCase(SupyTestCase): # is marked as Preferred because it is the time of metropolitan # France. However, it is not valid for La Réunion. self.assertEqual( - wikidata.timezone_from_qid("Q17070"), # La Réunion - datetime.timezone(datetime.timedelta(hours=4)) - ) + # La Réunion + wikidata.timezone_from_uri( + "http://www.wikidata.org/entity/Q17070" + ), + datetime.timezone(datetime.timedelta(hours=4)), + ) + + # vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: diff --git a/plugins/Geography/wikidata.py b/plugins/Geography/wikidata.py index 68978c10f..27a06eac6 100644 --- a/plugins/Geography/wikidata.py +++ b/plugins/Geography/wikidata.py @@ -42,7 +42,7 @@ TIMEZONE_QUERY = string.Template( SELECT ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid (MIN(?area) AS ?min_area) WHERE { # find all ?item entities that the subject is part of, recursively; - wd:$subject (wdt:P131*) ?item. + <$subject> (wdt:P131*) ?item. # Get all timezones (returns a superset of "?item wdt:P421 ?timezone", as it does not filter on rank) ?item p:P421 ?statement. @@ -63,7 +63,7 @@ SELECT ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid (MIN(?are UNION { # ... unless it applies to a part that contains what we are interested in ?statement pq:P518 ?appliestopart. - wd:$subject (wdt:P131*) ?appliestopart. + <$subject> (wdt:P131*) ?appliestopart. } # Filter out values only valid in certain periods of the year (DST vs normal time) @@ -104,7 +104,18 @@ GROUP BY ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid ORDER BY ?min_area DESC(?tzid) LIMIT 1 -""") +""" +) + +OSMID_QUERY = string.Template( + """ +SELECT ?item WHERE { + ?item wdt:P402 "$osmid". +} +LIMIT 1 +""" +) + def _query_sparql(query): params = {"format": "json", "query": query} @@ -117,10 +128,11 @@ def _query_sparql(query): content = utils.web.getUrlContent(url, headers=headers) return json.loads(content) -def timezone_from_qid(location_qid): + +def timezone_from_uri(location_uri): """Returns a :class:datetime.tzinfo object, given a Wikidata Q-ID. eg. ``"Q60"`` for New York City.""" - data = _query_sparql(TIMEZONE_QUERY.substitute(subject=location_qid)) + data = _query_sparql(TIMEZONE_QUERY.substitute(subject=location_uri)) results = data["results"]["bindings"] for result in results: if "tzid" in result: @@ -129,3 +141,13 @@ def timezone_from_qid(location_qid): assert "utcoffset" in result utc_offset = float(result["utcoffset"]["value"]) return datetime.timezone(datetime.timedelta(hours=utc_offset)) + + return None + + +def uri_from_osmid(location_osmid): + """Returns the wikidata Q-id from an OpenStreetMap ID.""" + data = _query_sparql(OSMID_QUERY.substitute(osmid=location_osmid)) + results = data["results"]["bindings"] + for result in results: + return result["item"]["value"] From 162e974aba9b6659f77a4d2201a59dcb84f7ac9e Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Tue, 9 Nov 2021 20:42:42 +0100 Subject: [PATCH 07/12] Geography: Implement search for OSM IDs using Nominatim. --- plugins/Geography/common.py | 43 ++++++++++++++++++ plugins/Geography/nominatim.py | 80 ++++++++++++++++++++++++++++++++++ plugins/Geography/test.py | 11 +++++ plugins/Geography/wikidata.py | 8 ++-- 4 files changed, 137 insertions(+), 5 deletions(-) create mode 100644 plugins/Geography/common.py create mode 100644 plugins/Geography/nominatim.py diff --git a/plugins/Geography/common.py b/plugins/Geography/common.py new file mode 100644 index 000000000..97f413013 --- /dev/null +++ b/plugins/Geography/common.py @@ -0,0 +1,43 @@ +### +# Copyright (c) 2021, Valentin Lorentz +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +import supybot.utils as utils + + +def headers(): + headers = utils.web.defaultHeaders.copy() + + # Comply with https://meta.wikimedia.org/wiki/User-Agent_policy + # and https://operations.osmfoundation.org/policies/nominatim/ + headers[ + "User-agent" + ] += " https://github.com/progval/Limnoria/ - Geography plugin" + + return headers diff --git a/plugins/Geography/nominatim.py b/plugins/Geography/nominatim.py new file mode 100644 index 000000000..b919e87d1 --- /dev/null +++ b/plugins/Geography/nominatim.py @@ -0,0 +1,80 @@ +### +# Copyright (c) 2021, Valentin Lorentz +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + + +import json +import time +import string +import datetime +import threading +import urllib.parse + +import supybot.utils as utils +from .common import headers + +NOMINATIM_BASE_URL = "https://nominatim.openstreetmap.org" + +_QUERY_LOCK = threading.Lock() +_LAST_QUERY_TIME = 0 + + +def _wait_before_query(): + """Should be called before any API access. Blocks the current thread + in order to follow the rate limit: + https://operations.osmfoundation.org/policies/nominatim/""" + + global _LAST_QUERY_TIME + + min_time_between_queries = 1.0 + + with _QUERY_LOCK: + time_since_last_query = _LAST_QUERY_TIME - time.time() + if time_since_last_query >= min_time_between_queries: + time.sleep(min_time_between_queries - time_since_last_query) + _LAST_QUERY_TIME = time.time() + + +def _query_nominatim(path, params): + url = NOMINATIM_BASE_URL + path + "?" + urllib.parse.urlencode(params) + + _wait_before_query() + + content = utils.web.getUrlContent(url, headers=headers()) + return json.loads(content) + + +def search_osmids(query): + """Queries nominatim's search endpoint and returns a list of OSM ids + + See https://nominatim.org/release-docs/develop/api/Search/ for details + on the query format""" + data = _query_nominatim("/search", {"format": "json", "q": query}) + + return [item["osm_id"] for item in data] diff --git a/plugins/Geography/test.py b/plugins/Geography/test.py index 563629ae7..e16e54901 100644 --- a/plugins/Geography/test.py +++ b/plugins/Geography/test.py @@ -35,6 +35,8 @@ from supybot.test import * from supybot import utils from . import wikidata +from . import nominatim + class GeographyTestCase(PluginTestCase): plugins = ("Geography",) @@ -103,5 +105,14 @@ class GeographyWikidataTestCase(SupyTestCase): ) +class GeographyNominatimTestCase(SupyTestCase): + @skipIf(not network, "Network test") + def testSearch(self): + self.assertIn(450381, nominatim.search_osmids("Metz")) + + results = nominatim.search_osmids("Metz, France") + self.assertEqual(results[0], 450381, results) + + # vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: diff --git a/plugins/Geography/wikidata.py b/plugins/Geography/wikidata.py index 27a06eac6..df7ff7ced 100644 --- a/plugins/Geography/wikidata.py +++ b/plugins/Geography/wikidata.py @@ -35,6 +35,8 @@ import urllib.parse import supybot.utils as utils +from .common import headers + SPARQL_URL = "https://query.wikidata.org/sparql" TIMEZONE_QUERY = string.Template( @@ -121,11 +123,7 @@ def _query_sparql(query): params = {"format": "json", "query": query} url = SPARQL_URL + "?" + urllib.parse.urlencode(params) - # Comply with https://meta.wikimedia.org/wiki/User-Agent_policy - headers = utils.web.defaultHeaders.copy() - headers["User-agent"] += " https://github.com/progval/Limnoria/ - Geography plugin" - - content = utils.web.getUrlContent(url, headers=headers) + content = utils.web.getUrlContent(url, headers=headers()) return json.loads(content) From 696d82ccfe6a2cbd30f35b4a4758d179c13460c6 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Tue, 9 Nov 2021 22:01:59 +0100 Subject: [PATCH 08/12] Geography: Fix crash in nominatim.py when some results are not OSM objects --- plugins/Geography/nominatim.py | 4 +--- plugins/Geography/test.py | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/plugins/Geography/nominatim.py b/plugins/Geography/nominatim.py index b919e87d1..d244e5199 100644 --- a/plugins/Geography/nominatim.py +++ b/plugins/Geography/nominatim.py @@ -31,8 +31,6 @@ import json import time -import string -import datetime import threading import urllib.parse @@ -77,4 +75,4 @@ def search_osmids(query): on the query format""" data = _query_nominatim("/search", {"format": "json", "q": query}) - return [item["osm_id"] for item in data] + return [item["osm_id"] for item in data if item.get("osm_id")] diff --git a/plugins/Geography/test.py b/plugins/Geography/test.py index e16e54901..330fa6626 100644 --- a/plugins/Geography/test.py +++ b/plugins/Geography/test.py @@ -114,5 +114,4 @@ class GeographyNominatimTestCase(SupyTestCase): self.assertEqual(results[0], 450381, results) - # vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: From 36ade18319fd54138fd2632e39f39857853f5a4f Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Tue, 9 Nov 2021 22:32:29 +0100 Subject: [PATCH 09/12] Geography: Add 'timezone' command --- plugins/Geography/plugin.py | 75 ++++++++++++++++++++++++++++++++++++- plugins/Geography/test.py | 73 +++++++++++++++++++++++++++++++----- 2 files changed, 137 insertions(+), 11 deletions(-) diff --git a/plugins/Geography/plugin.py b/plugins/Geography/plugin.py index 59cc2d3d3..fca150e40 100644 --- a/plugins/Geography/plugin.py +++ b/plugins/Geography/plugin.py @@ -28,18 +28,91 @@ ### +import datetime + from supybot import utils, plugins, ircutils, callbacks from supybot.commands import * from supybot.i18n import PluginInternationalization +from . import nominatim +from . import wikidata + _ = PluginInternationalization("Geography") class Geography(callbacks.Plugin): - """Provides geography facts, such as timezones.""" + """Provides geography facts, such as timezones. + + This plugin uses data from `Wikidata `_ + and `OSM/Nominatim `. + """ threaded = True + @wrap(["text"]) + def timezone(self, irc, msg, args, query): + """ + + Returns the timezone used in the given location. For example, + the name could be "Paris" or "Paris, France". + This uses data from Wikidata and Nominatim.""" + osmids = nominatim.search_osmids(query) + if not osmids: + irc.error(_("Could not find the location"), Raise=True) + + now = datetime.datetime.now(tz=datetime.timezone.utc) + + for osmid in osmids: + uri = wikidata.uri_from_osmid(osmid) + if not uri: + continue + + # Get the timezone object (and handle various errors) + try: + timezone = wikidata.timezone_from_uri(uri) + except utils.time.UnknownTimeZone as e: + irc.error( + format(_("Could not understand timezone: %s"), e.args[0]), + Raise=True, + ) + except utils.time.MissingTimezoneLibrary: + irc.error( + _( + "Timezone-related commands are not available. " + "Your administrator need to either upgrade Python to " + "version 3.9 or greater, or install pytz." + ), + Raise=True, + ) + except utils.time.TimezoneException as e: + irc.error(e.args[0], Raise=True) + + # Extract a human-friendly name, depending on the type of + # the timezone object: + if hasattr(timezone, "key"): + # instance of zoneinfo.ZoneInfo + irc.reply(timezone.key) + return + elif hasattr(timezone, "zone"): + # instance of pytz.timezone + irc.reply(timezone.zone) + return + else: + # probably datetime.timezone built from a constant offset + try: + offset = timezone.utcoffset(now).seconds + except NotImplementedError: + continue + + hours = int(offset / 3600) + minutes = int(offset / 60 % 60) + irc.reply("UTC+%0.2i:%0.2i" % (hours, minutes)) + return + + irc.error( + _("Could not find the timezone of this location."), Raise=True + ) + Class = Geography diff --git a/plugins/Geography/test.py b/plugins/Geography/test.py index 330fa6626..035e424f2 100644 --- a/plugins/Geography/test.py +++ b/plugins/Geography/test.py @@ -29,7 +29,19 @@ ### import datetime +import contextlib from unittest import skipIf +from unittest.mock import patch + +try: + import pytz +except ImportError: + pytz = None + +try: + import zoneinfo +except ImportError: + zoneinfo = None from supybot.test import * from supybot import utils @@ -41,6 +53,43 @@ from . import nominatim class GeographyTestCase(PluginTestCase): plugins = ("Geography",) + @skipIf(not pytz, "pytz is not available") + @patch.object(nominatim, "search_osmids", return_value=[42]) + @patch.object(wikidata, "uri_from_osmid", return_value="foo") + def testTimezonePytz(self, _, __): + tz = pytz.timezone("Europe/Paris") + + with patch.object(wikidata, "timezone_from_uri", return_value=tz): + self.assertResponse("timezone Foo Bar", "Europe/Paris") + + @skipIf(not zoneinfo, "Python is older than 3.9") + @patch.object(nominatim, "search_osmids", return_value=[42]) + @patch.object(wikidata, "uri_from_osmid", return_value="foo") + def testTimezoneZoneinfo(self, _, __): + tz = zoneinfo.ZoneInfo("Europe/Paris") + + with patch.object(wikidata, "timezone_from_uri", return_value=tz): + self.assertResponse("timezone Foo Bar", "Europe/Paris") + + @skipIf(not zoneinfo, "Python is older than 3.9") + @patch.object(nominatim, "search_osmids", return_value=[42]) + @patch.object(wikidata, "uri_from_osmid", return_value="foo") + def testTimezoneAbsolute(self, _, __): + tz = datetime.timezone(datetime.timedelta(hours=4)) + + with patch.object(wikidata, "timezone_from_uri", return_value=tz): + self.assertResponse("timezone Foo Bar", "UTC+04:00") + + tz = datetime.timezone(datetime.timedelta(hours=4, minutes=30)) + + with patch.object(wikidata, "timezone_from_uri", return_value=tz): + self.assertResponse("timezone Foo Bar", "UTC+04:30") + + @skipIf(not network, "Network test") + def testTimezoneIntegration(self): + self.assertResponse("timezone Metz, France", "Europe/Paris") + self.assertResponse("timezone Saint-Denis, La Réunion", "UTC+04:00") + class GeographyWikidataTestCase(SupyTestCase): @skipIf(not network, "Network test") @@ -56,7 +105,7 @@ class GeographyWikidataTestCase(SupyTestCase): @skipIf(not network, "Network test") def testDirect(self): - """The queried object directly has a timezone property""" + # The queried object directly has a timezone property self.assertEqual( # New York wikidata.timezone_from_uri("http://www.wikidata.org/entity/Q1384"), @@ -65,8 +114,8 @@ class GeographyWikidataTestCase(SupyTestCase): @skipIf(not network, "Network test") def testParent(self): - """The queried object does not have a TZ property - but it is part of an object that does""" + # The queried object does not have a TZ property but it is part + # of an object that does self.assertEqual( # Metz, France wikidata.timezone_from_uri( @@ -77,8 +126,8 @@ class GeographyWikidataTestCase(SupyTestCase): @skipIf(not network, "Network test") def testParentAndIgnoreSelf(self): - """The queried object has a TZ property, but it's useless to us; - however it is part of an object that has a useful one.""" + # The queried object has a TZ property, but it's useless to us; + # however it is part of an object that has a useful one.""" self.assertEqual( # New York City, NY wikidata.timezone_from_uri("http://www.wikidata.org/entity/Q60"), @@ -87,11 +136,12 @@ class GeographyWikidataTestCase(SupyTestCase): @skipIf(not network, "Network test") def testParentQualifiedIgnorePreferred(self): - """The queried object does not have a TZ property, - and is part of an object that does. - However, this parent's 'preferred' timezone is not the - right one, so we must make sure to select the right one - based on P518 ('applies to part').""" + # The queried object does not have a TZ property, + # and is part of an object that does. + # However, this parent's 'preferred' timezone is not the + # right one, so we must make sure to select the right one + # based on P518 ('applies to part'). + # La Réunion is a French region, but in UTC+4. # France has a bunch of timezone statements, and 'Europe/Paris' # is marked as Preferred because it is the time of metropolitan @@ -113,5 +163,8 @@ class GeographyNominatimTestCase(SupyTestCase): results = nominatim.search_osmids("Metz, France") self.assertEqual(results[0], 450381, results) + results = nominatim.search_osmids("Saint-Denis, La Réunion") + self.assertEqual(results[0], 192468, results) + # vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: From 93a407a9ac57f2621af8d8c74cb34ec84569359f Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Tue, 9 Nov 2021 23:10:55 +0100 Subject: [PATCH 10/12] Geography: Add 'localtime' command --- plugins/Geography/plugin.py | 74 +++++++++++++++++++++++++++---------- plugins/Geography/test.py | 66 ++++++++++++++++++++++++++++----- 2 files changed, 111 insertions(+), 29 deletions(-) diff --git a/plugins/Geography/plugin.py b/plugins/Geography/plugin.py index fca150e40..07c75122a 100644 --- a/plugins/Geography/plugin.py +++ b/plugins/Geography/plugin.py @@ -30,7 +30,7 @@ import datetime -from supybot import utils, plugins, ircutils, callbacks +from supybot import conf, utils, plugins, ircutils, callbacks from supybot.commands import * from supybot.i18n import PluginInternationalization @@ -40,6 +40,27 @@ from . import wikidata _ = PluginInternationalization("Geography") +def timezone_from_uri(irc, uri): + try: + return wikidata.timezone_from_uri(uri) + except utils.time.UnknownTimeZone as e: + irc.error( + format(_("Could not understand timezone: %s"), e.args[0]), + Raise=True, + ) + except utils.time.MissingTimezoneLibrary: + irc.error( + _( + "Timezone-related commands are not available. " + "Your administrator need to either upgrade Python to " + "version 3.9 or greater, or install pytz." + ), + Raise=True, + ) + except utils.time.TimezoneException as e: + irc.error(e.args[0], Raise=True) + + class Geography(callbacks.Plugin): """Provides geography facts, such as timezones. @@ -49,6 +70,38 @@ class Geography(callbacks.Plugin): threaded = True + @wrap(["text"]) + def localtime(self, irc, msg, args, query): + """ + + Returns the current used in the given location. For example, + the name could be "Paris" or "Paris, France". The response is + formatted according to supybot.reply.format.time + This uses data from Wikidata and Nominatim.""" + osmids = nominatim.search_osmids(query) + if not osmids: + irc.error(_("Could not find the location"), Raise=True) + + for osmid in osmids: + uri = wikidata.uri_from_osmid(osmid) + if not uri: + continue + + # Get the timezone object (and handle various errors) + timezone = timezone_from_uri(irc, uri) + + # Get the local time + now = datetime.datetime.now(tz=timezone) + + format_ = conf.supybot.reply.format.time.getSpecific( + channel=msg.channel, network=irc.network + )() + + # Return it + irc.reply(now.strftime(format_)) + + return + @wrap(["text"]) def timezone(self, irc, msg, args, query): """ @@ -68,24 +121,7 @@ class Geography(callbacks.Plugin): continue # Get the timezone object (and handle various errors) - try: - timezone = wikidata.timezone_from_uri(uri) - except utils.time.UnknownTimeZone as e: - irc.error( - format(_("Could not understand timezone: %s"), e.args[0]), - Raise=True, - ) - except utils.time.MissingTimezoneLibrary: - irc.error( - _( - "Timezone-related commands are not available. " - "Your administrator need to either upgrade Python to " - "version 3.9 or greater, or install pytz." - ), - Raise=True, - ) - except utils.time.TimezoneException as e: - irc.error(e.args[0], Raise=True) + timezone = timezone_from_uri(irc, uri) # Extract a human-friendly name, depending on the type of # the timezone object: diff --git a/plugins/Geography/test.py b/plugins/Geography/test.py index 035e424f2..7accfa44b 100644 --- a/plugins/Geography/test.py +++ b/plugins/Geography/test.py @@ -29,6 +29,7 @@ ### import datetime +import functools import contextlib from unittest import skipIf from unittest.mock import patch @@ -50,31 +51,38 @@ from . import wikidata from . import nominatim -class GeographyTestCase(PluginTestCase): +def mock(f): + @functools.wraps(f) + def newf(self): + with patch.object(wikidata, "uri_from_osmid", return_value="foo"): + with patch.object(nominatim, "search_osmids", return_value=[42]): + f(self) + + return newf + + +class GeographyTimezoneTestCase(PluginTestCase): plugins = ("Geography",) @skipIf(not pytz, "pytz is not available") - @patch.object(nominatim, "search_osmids", return_value=[42]) - @patch.object(wikidata, "uri_from_osmid", return_value="foo") - def testTimezonePytz(self, _, __): + @mock + def testTimezonePytz(self): tz = pytz.timezone("Europe/Paris") with patch.object(wikidata, "timezone_from_uri", return_value=tz): self.assertResponse("timezone Foo Bar", "Europe/Paris") @skipIf(not zoneinfo, "Python is older than 3.9") - @patch.object(nominatim, "search_osmids", return_value=[42]) - @patch.object(wikidata, "uri_from_osmid", return_value="foo") - def testTimezoneZoneinfo(self, _, __): + @mock + def testTimezoneZoneinfo(self): tz = zoneinfo.ZoneInfo("Europe/Paris") with patch.object(wikidata, "timezone_from_uri", return_value=tz): self.assertResponse("timezone Foo Bar", "Europe/Paris") @skipIf(not zoneinfo, "Python is older than 3.9") - @patch.object(nominatim, "search_osmids", return_value=[42]) - @patch.object(wikidata, "uri_from_osmid", return_value="foo") - def testTimezoneAbsolute(self, _, __): + @mock + def testTimezoneAbsolute(self): tz = datetime.timezone(datetime.timedelta(hours=4)) with patch.object(wikidata, "timezone_from_uri", return_value=tz): @@ -91,6 +99,44 @@ class GeographyTestCase(PluginTestCase): self.assertResponse("timezone Saint-Denis, La Réunion", "UTC+04:00") +class GeographyLocaltimeTestCase(PluginTestCase): + plugins = ("Geography",) + + @skipIf(not pytz, "pytz is not available") + @mock + def testLocaltimePytz(self): + tz = pytz.timezone("Europe/Paris") + + with patch.object(wikidata, "timezone_from_uri", return_value=tz): + self.assertRegexp("localtime Foo Bar", r".*\+0[12]00$") + + @skipIf(not zoneinfo, "Python is older than 3.9") + @mock + def testLocaltimeZoneinfo(self): + tz = zoneinfo.ZoneInfo("Europe/Paris") + + with patch.object(wikidata, "timezone_from_uri", return_value=tz): + self.assertRegexp("localtime Foo Bar", r".*\+0[12]00$") + + @skipIf(not zoneinfo, "Python is older than 3.9") + @mock + def testLocaltimeAbsolute(self): + tz = datetime.timezone(datetime.timedelta(hours=4)) + + with patch.object(wikidata, "timezone_from_uri", return_value=tz): + self.assertRegexp("localtime Foo Bar", r".*\+0400$") + + tz = datetime.timezone(datetime.timedelta(hours=4, minutes=30)) + + with patch.object(wikidata, "timezone_from_uri", return_value=tz): + self.assertRegexp("localtime Foo Bar", r".*\+0430$") + + @skipIf(not network, "Network test") + def testLocaltimeIntegration(self): + self.assertRegexp("localtime Metz, France", r".*\+0[12]00$") + self.assertRegexp("localtime Saint-Denis, La Réunion", r".*\+0400$") + + class GeographyWikidataTestCase(SupyTestCase): @skipIf(not network, "Network test") def testOsmidToTimezone(self): From 76b9d6f0ce6e65c7ce29e373240ef61daa63f8f7 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Tue, 9 Nov 2021 23:38:31 +0100 Subject: [PATCH 11/12] Geography: Properly handle missing timezones --- plugins/Geography/plugin.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/plugins/Geography/plugin.py b/plugins/Geography/plugin.py index 07c75122a..95c66acaa 100644 --- a/plugins/Geography/plugin.py +++ b/plugins/Geography/plugin.py @@ -90,6 +90,9 @@ class Geography(callbacks.Plugin): # Get the timezone object (and handle various errors) timezone = timezone_from_uri(irc, uri) + if timezone is None: + continue + # Get the local time now = datetime.datetime.now(tz=timezone) @@ -102,6 +105,10 @@ class Geography(callbacks.Plugin): return + irc.error( + _("Could not find the timezone of this location."), Raise=True + ) + @wrap(["text"]) def timezone(self, irc, msg, args, query): """ @@ -123,6 +130,9 @@ class Geography(callbacks.Plugin): # Get the timezone object (and handle various errors) timezone = timezone_from_uri(irc, uri) + if timezone is None: + continue + # Extract a human-friendly name, depending on the type of # the timezone object: if hasattr(timezone, "key"): From d2091e8f71ce2dda90c86e3b3a4edce3ec723981 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Thu, 11 Nov 2021 19:07:31 +0100 Subject: [PATCH 12/12] Geography: Always prefer IANA timezones over UTC offsets Even if the UTC offsets are given on a more specific entity, they are usually worthless to us (eg. DST). --- plugins/Geography/test.py | 6 ++++++ plugins/Geography/wikidata.py | 25 ++++++++++++++++--------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/plugins/Geography/test.py b/plugins/Geography/test.py index 7accfa44b..7cbb7a77b 100644 --- a/plugins/Geography/test.py +++ b/plugins/Geography/test.py @@ -180,6 +180,12 @@ class GeographyWikidataTestCase(SupyTestCase): utils.time.iana_timezone("America/New_York"), ) + self.assertEqual( + # Paris, France + wikidata.timezone_from_uri("http://www.wikidata.org/entity/Q90"), + utils.time.iana_timezone("Europe/Paris"), + ) + @skipIf(not network, "Network test") def testParentQualifiedIgnorePreferred(self): # The queried object does not have a TZ property, diff --git a/plugins/Geography/wikidata.py b/plugins/Geography/wikidata.py index df7ff7ced..712fe01a1 100644 --- a/plugins/Geography/wikidata.py +++ b/plugins/Geography/wikidata.py @@ -79,6 +79,9 @@ SELECT ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid (MIN(?are # Get the area, will be used to order by specificity OPTIONAL { ?statementsubject wdt:P2046 ?area. } + # Require that ?timezone be an instance of... + ?timezone (wdt:P31/wdt:P279*) <$tztype>. + { # Get either an IANA timezone ID... ?timezone wdt:P6687 ?tzid. @@ -130,15 +133,19 @@ def _query_sparql(query): def timezone_from_uri(location_uri): """Returns a :class:datetime.tzinfo object, given a Wikidata Q-ID. eg. ``"Q60"`` for New York City.""" - data = _query_sparql(TIMEZONE_QUERY.substitute(subject=location_uri)) - results = data["results"]["bindings"] - for result in results: - if "tzid" in result: - return utils.time.iana_timezone(result["tzid"]["value"]) - else: - assert "utcoffset" in result - utc_offset = float(result["utcoffset"]["value"]) - return datetime.timezone(datetime.timedelta(hours=utc_offset)) + for tztype in [ + "http://www.wikidata.org/entity/Q17272692", # IANA timezones first + "http://www.wikidata.org/entity/Q12143", # any timezone as a fallback + ]: + data = _query_sparql(TIMEZONE_QUERY.substitute(subject=location_uri, tztype=tztype)) + results = data["results"]["bindings"] + for result in results: + if "tzid" in result: + return utils.time.iana_timezone(result["tzid"]["value"]) + else: + assert "utcoffset" in result + utc_offset = float(result["utcoffset"]["value"]) + return datetime.timezone(datetime.timedelta(hours=utc_offset)) return None