diff --git a/plugins/Geography/README.md b/plugins/Geography/README.md new file mode 100644 index 000000000..15d14074e --- /dev/null +++ b/plugins/Geography/README.md @@ -0,0 +1 @@ +Provides geography facts, such as timezones. diff --git a/plugins/Geography/__init__.py b/plugins/Geography/__init__.py new file mode 100644 index 000000000..559ff38d9 --- /dev/null +++ b/plugins/Geography/__init__.py @@ -0,0 +1,72 @@ +### +# Copyright (c) 2021, Valentin Lorentz +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +""" +Geography: Provides geography facts, such as timezones. +""" + +import sys +import supybot +from supybot import world + +# Use this for the version of this plugin. +__version__ = "" + +# XXX Replace this with an appropriate author or supybot.Author instance. +__author__ = supybot.authors.unknown + +# This is a dictionary mapping supybot.Author instances to lists of +# contributions. +__contributors__ = {} + +# This is a url where the most recent plugin package can be downloaded. +__url__ = "" + +from . import config +from . import plugin + +if sys.version_info >= (3, 4): + from importlib import reload +else: + from imp import reload +# In case we're being reloaded. +reload(config) +reload(plugin) +# Add more reloads here if you add third-party modules and want them to be +# reloaded when this plugin is reloaded. Don't forget to import them as well! + +if world.testing: + from . import test + +Class = plugin.Class +configure = config.configure + + +# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: diff --git a/plugins/Geography/common.py b/plugins/Geography/common.py new file mode 100644 index 000000000..97f413013 --- /dev/null +++ b/plugins/Geography/common.py @@ -0,0 +1,43 @@ +### +# Copyright (c) 2021, Valentin Lorentz +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +import supybot.utils as utils + + +def headers(): + headers = utils.web.defaultHeaders.copy() + + # Comply with https://meta.wikimedia.org/wiki/User-Agent_policy + # and https://operations.osmfoundation.org/policies/nominatim/ + headers[ + "User-agent" + ] += " https://github.com/progval/Limnoria/ - Geography plugin" + + return headers diff --git a/plugins/Geography/config.py b/plugins/Geography/config.py new file mode 100644 index 000000000..396b7f9c0 --- /dev/null +++ b/plugins/Geography/config.py @@ -0,0 +1,53 @@ +### +# Copyright (c) 2021, Valentin Lorentz +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +from supybot import conf, registry +from supybot.i18n import PluginInternationalization + +_ = PluginInternationalization("Geography") + + +def configure(advanced): + # This will be called by supybot to configure this module. advanced is + # a bool that specifies whether the user identified themself as an advanced + # user or not. You should effect your configuration by manipulating the + # registry as appropriate. + from supybot.questions import expect, anything, something, yn + + conf.registerPlugin("Geography", True) + + +Geography = conf.registerPlugin("Geography") +# This is where your configuration variables (if any) should go. For example: +# conf.registerGlobalValue(Geography, 'someConfigVariableName', +# registry.Boolean(False, _("""Help for someConfigVariableName."""))) + + +# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: diff --git a/plugins/Geography/local/__init__.py b/plugins/Geography/local/__init__.py new file mode 100644 index 000000000..e86e97b86 --- /dev/null +++ b/plugins/Geography/local/__init__.py @@ -0,0 +1 @@ +# Stub so local is a module, used for third-party modules diff --git a/plugins/Geography/nominatim.py b/plugins/Geography/nominatim.py new file mode 100644 index 000000000..d244e5199 --- /dev/null +++ b/plugins/Geography/nominatim.py @@ -0,0 +1,78 @@ +### +# Copyright (c) 2021, Valentin Lorentz +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + + +import json +import time +import threading +import urllib.parse + +import supybot.utils as utils +from .common import headers + +NOMINATIM_BASE_URL = "https://nominatim.openstreetmap.org" + +_QUERY_LOCK = threading.Lock() +_LAST_QUERY_TIME = 0 + + +def _wait_before_query(): + """Should be called before any API access. Blocks the current thread + in order to follow the rate limit: + https://operations.osmfoundation.org/policies/nominatim/""" + + global _LAST_QUERY_TIME + + min_time_between_queries = 1.0 + + with _QUERY_LOCK: + time_since_last_query = _LAST_QUERY_TIME - time.time() + if time_since_last_query >= min_time_between_queries: + time.sleep(min_time_between_queries - time_since_last_query) + _LAST_QUERY_TIME = time.time() + + +def _query_nominatim(path, params): + url = NOMINATIM_BASE_URL + path + "?" + urllib.parse.urlencode(params) + + _wait_before_query() + + content = utils.web.getUrlContent(url, headers=headers()) + return json.loads(content) + + +def search_osmids(query): + """Queries nominatim's search endpoint and returns a list of OSM ids + + See https://nominatim.org/release-docs/develop/api/Search/ for details + on the query format""" + data = _query_nominatim("/search", {"format": "json", "q": query}) + + return [item["osm_id"] for item in data if item.get("osm_id")] diff --git a/plugins/Geography/plugin.py b/plugins/Geography/plugin.py new file mode 100644 index 000000000..95c66acaa --- /dev/null +++ b/plugins/Geography/plugin.py @@ -0,0 +1,166 @@ +### +# Copyright (c) 2021, Valentin Lorentz +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +import datetime + +from supybot import conf, utils, plugins, ircutils, callbacks +from supybot.commands import * +from supybot.i18n import PluginInternationalization + +from . import nominatim +from . import wikidata + +_ = PluginInternationalization("Geography") + + +def timezone_from_uri(irc, uri): + try: + return wikidata.timezone_from_uri(uri) + except utils.time.UnknownTimeZone as e: + irc.error( + format(_("Could not understand timezone: %s"), e.args[0]), + Raise=True, + ) + except utils.time.MissingTimezoneLibrary: + irc.error( + _( + "Timezone-related commands are not available. " + "Your administrator need to either upgrade Python to " + "version 3.9 or greater, or install pytz." + ), + Raise=True, + ) + except utils.time.TimezoneException as e: + irc.error(e.args[0], Raise=True) + + +class Geography(callbacks.Plugin): + """Provides geography facts, such as timezones. + + This plugin uses data from `Wikidata `_ + and `OSM/Nominatim `. + """ + + threaded = True + + @wrap(["text"]) + def localtime(self, irc, msg, args, query): + """ + + Returns the current used in the given location. For example, + the name could be "Paris" or "Paris, France". The response is + formatted according to supybot.reply.format.time + This uses data from Wikidata and Nominatim.""" + osmids = nominatim.search_osmids(query) + if not osmids: + irc.error(_("Could not find the location"), Raise=True) + + for osmid in osmids: + uri = wikidata.uri_from_osmid(osmid) + if not uri: + continue + + # Get the timezone object (and handle various errors) + timezone = timezone_from_uri(irc, uri) + + if timezone is None: + continue + + # Get the local time + now = datetime.datetime.now(tz=timezone) + + format_ = conf.supybot.reply.format.time.getSpecific( + channel=msg.channel, network=irc.network + )() + + # Return it + irc.reply(now.strftime(format_)) + + return + + irc.error( + _("Could not find the timezone of this location."), Raise=True + ) + + @wrap(["text"]) + def timezone(self, irc, msg, args, query): + """ + + Returns the timezone used in the given location. For example, + the name could be "Paris" or "Paris, France". + This uses data from Wikidata and Nominatim.""" + osmids = nominatim.search_osmids(query) + if not osmids: + irc.error(_("Could not find the location"), Raise=True) + + now = datetime.datetime.now(tz=datetime.timezone.utc) + + for osmid in osmids: + uri = wikidata.uri_from_osmid(osmid) + if not uri: + continue + + # Get the timezone object (and handle various errors) + timezone = timezone_from_uri(irc, uri) + + if timezone is None: + continue + + # Extract a human-friendly name, depending on the type of + # the timezone object: + if hasattr(timezone, "key"): + # instance of zoneinfo.ZoneInfo + irc.reply(timezone.key) + return + elif hasattr(timezone, "zone"): + # instance of pytz.timezone + irc.reply(timezone.zone) + return + else: + # probably datetime.timezone built from a constant offset + try: + offset = timezone.utcoffset(now).seconds + except NotImplementedError: + continue + + hours = int(offset / 3600) + minutes = int(offset / 60 % 60) + irc.reply("UTC+%0.2i:%0.2i" % (hours, minutes)) + return + + irc.error( + _("Could not find the timezone of this location."), Raise=True + ) + + +Class = Geography + + +# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: diff --git a/plugins/Geography/test.py b/plugins/Geography/test.py new file mode 100644 index 000000000..7cbb7a77b --- /dev/null +++ b/plugins/Geography/test.py @@ -0,0 +1,222 @@ +### +# Copyright (c) 2021, Valentin Lorentz +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +import datetime +import functools +import contextlib +from unittest import skipIf +from unittest.mock import patch + +try: + import pytz +except ImportError: + pytz = None + +try: + import zoneinfo +except ImportError: + zoneinfo = None + +from supybot.test import * +from supybot import utils + +from . import wikidata +from . import nominatim + + +def mock(f): + @functools.wraps(f) + def newf(self): + with patch.object(wikidata, "uri_from_osmid", return_value="foo"): + with patch.object(nominatim, "search_osmids", return_value=[42]): + f(self) + + return newf + + +class GeographyTimezoneTestCase(PluginTestCase): + plugins = ("Geography",) + + @skipIf(not pytz, "pytz is not available") + @mock + def testTimezonePytz(self): + tz = pytz.timezone("Europe/Paris") + + with patch.object(wikidata, "timezone_from_uri", return_value=tz): + self.assertResponse("timezone Foo Bar", "Europe/Paris") + + @skipIf(not zoneinfo, "Python is older than 3.9") + @mock + def testTimezoneZoneinfo(self): + tz = zoneinfo.ZoneInfo("Europe/Paris") + + with patch.object(wikidata, "timezone_from_uri", return_value=tz): + self.assertResponse("timezone Foo Bar", "Europe/Paris") + + @skipIf(not zoneinfo, "Python is older than 3.9") + @mock + def testTimezoneAbsolute(self): + tz = datetime.timezone(datetime.timedelta(hours=4)) + + with patch.object(wikidata, "timezone_from_uri", return_value=tz): + self.assertResponse("timezone Foo Bar", "UTC+04:00") + + tz = datetime.timezone(datetime.timedelta(hours=4, minutes=30)) + + with patch.object(wikidata, "timezone_from_uri", return_value=tz): + self.assertResponse("timezone Foo Bar", "UTC+04:30") + + @skipIf(not network, "Network test") + def testTimezoneIntegration(self): + self.assertResponse("timezone Metz, France", "Europe/Paris") + self.assertResponse("timezone Saint-Denis, La Réunion", "UTC+04:00") + + +class GeographyLocaltimeTestCase(PluginTestCase): + plugins = ("Geography",) + + @skipIf(not pytz, "pytz is not available") + @mock + def testLocaltimePytz(self): + tz = pytz.timezone("Europe/Paris") + + with patch.object(wikidata, "timezone_from_uri", return_value=tz): + self.assertRegexp("localtime Foo Bar", r".*\+0[12]00$") + + @skipIf(not zoneinfo, "Python is older than 3.9") + @mock + def testLocaltimeZoneinfo(self): + tz = zoneinfo.ZoneInfo("Europe/Paris") + + with patch.object(wikidata, "timezone_from_uri", return_value=tz): + self.assertRegexp("localtime Foo Bar", r".*\+0[12]00$") + + @skipIf(not zoneinfo, "Python is older than 3.9") + @mock + def testLocaltimeAbsolute(self): + tz = datetime.timezone(datetime.timedelta(hours=4)) + + with patch.object(wikidata, "timezone_from_uri", return_value=tz): + self.assertRegexp("localtime Foo Bar", r".*\+0400$") + + tz = datetime.timezone(datetime.timedelta(hours=4, minutes=30)) + + with patch.object(wikidata, "timezone_from_uri", return_value=tz): + self.assertRegexp("localtime Foo Bar", r".*\+0430$") + + @skipIf(not network, "Network test") + def testLocaltimeIntegration(self): + self.assertRegexp("localtime Metz, France", r".*\+0[12]00$") + self.assertRegexp("localtime Saint-Denis, La Réunion", r".*\+0400$") + + +class GeographyWikidataTestCase(SupyTestCase): + @skipIf(not network, "Network test") + def testOsmidToTimezone(self): + self.assertEqual( + wikidata.uri_from_osmid(450381), + "http://www.wikidata.org/entity/Q22690", + ) + self.assertEqual( + wikidata.uri_from_osmid(192468), + "http://www.wikidata.org/entity/Q47045", + ) + + @skipIf(not network, "Network test") + def testDirect(self): + # The queried object directly has a timezone property + self.assertEqual( + # New York + wikidata.timezone_from_uri("http://www.wikidata.org/entity/Q1384"), + utils.time.iana_timezone("America/New_York"), + ) + + @skipIf(not network, "Network test") + def testParent(self): + # The queried object does not have a TZ property but it is part + # of an object that does + self.assertEqual( + # Metz, France + wikidata.timezone_from_uri( + "http://www.wikidata.org/entity/Q22690" + ), + utils.time.iana_timezone("Europe/Paris"), + ) + + @skipIf(not network, "Network test") + def testParentAndIgnoreSelf(self): + # The queried object has a TZ property, but it's useless to us; + # however it is part of an object that has a useful one.""" + self.assertEqual( + # New York City, NY + wikidata.timezone_from_uri("http://www.wikidata.org/entity/Q60"), + utils.time.iana_timezone("America/New_York"), + ) + + self.assertEqual( + # Paris, France + wikidata.timezone_from_uri("http://www.wikidata.org/entity/Q90"), + utils.time.iana_timezone("Europe/Paris"), + ) + + @skipIf(not network, "Network test") + def testParentQualifiedIgnorePreferred(self): + # The queried object does not have a TZ property, + # and is part of an object that does. + # However, this parent's 'preferred' timezone is not the + # right one, so we must make sure to select the right one + # based on P518 ('applies to part'). + + # La Réunion is a French region, but in UTC+4. + # France has a bunch of timezone statements, and 'Europe/Paris' + # is marked as Preferred because it is the time of metropolitan + # France. However, it is not valid for La Réunion. + self.assertEqual( + # La Réunion + wikidata.timezone_from_uri( + "http://www.wikidata.org/entity/Q17070" + ), + datetime.timezone(datetime.timedelta(hours=4)), + ) + + +class GeographyNominatimTestCase(SupyTestCase): + @skipIf(not network, "Network test") + def testSearch(self): + self.assertIn(450381, nominatim.search_osmids("Metz")) + + results = nominatim.search_osmids("Metz, France") + self.assertEqual(results[0], 450381, results) + + results = nominatim.search_osmids("Saint-Denis, La Réunion") + self.assertEqual(results[0], 192468, results) + + +# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: diff --git a/plugins/Geography/wikidata.py b/plugins/Geography/wikidata.py new file mode 100644 index 000000000..712fe01a1 --- /dev/null +++ b/plugins/Geography/wikidata.py @@ -0,0 +1,158 @@ +### +# Copyright (c) 2021, Valentin Lorentz +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +import json +import string +import datetime +import urllib.parse + +import supybot.utils as utils + +from .common import headers + +SPARQL_URL = "https://query.wikidata.org/sparql" + +TIMEZONE_QUERY = string.Template( + """ +SELECT ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid (MIN(?area) AS ?min_area) WHERE { + + # find all ?item entities that the subject is part of, recursively; + <$subject> (wdt:P131*) ?item. + + # Get all timezones (returns a superset of "?item wdt:P421 ?timezone", as it does not filter on rank) + ?item p:P421 ?statement. + ?statement ps:P421 ?timezone. + + # TODO: order the final result based on the rank? + ?statement wikibase:rank ?rank. + + # fetch the end of validity of the given statement (TODO: check it) + OPTIONAL { ?statement pq:P582 ?endtime. } + + { + # filter out statements that apply only to a part of ?item... + FILTER NOT EXISTS { + ?statement pq:P518 ?appliestopart. + } + } + UNION { + # ... unless it applies to a part that contains what we are interested in + ?statement pq:P518 ?appliestopart. + <$subject> (wdt:P131*) ?appliestopart. + } + + # Filter out values only valid in certain periods of the year (DST vs normal time) + FILTER NOT EXISTS { + ?statement pq:P1264 ?validinperiod. + } + + # store the identifier of the object the statement applies to + BIND(IF(BOUND(?appliestopart),?appliestopart,?item) AS ?statementsubject). + + # Get the area, will be used to order by specificity + OPTIONAL { ?statementsubject wdt:P2046 ?area. } + + # Require that ?timezone be an instance of... + ?timezone (wdt:P31/wdt:P279*) <$tztype>. + + { + # Get either an IANA timezone ID... + ?timezone wdt:P6687 ?tzid. + } + UNION + { + # ... or an absolute UTC offset + ?timezone p:P2907 ?utcoffset_statement. + ?utcoffset_statement ps:P2907 ?utcoffset. + + # unless it is only valid in certain periods of the year (DST vs normal time) + FILTER NOT EXISTS { + ?utcoffset_statement pq:P1264 ?utcoffset_validinperiod. + } + } + + SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } +} + +# Deduplicate in case there is more than one ?area statement +GROUP BY ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid + +# Get the smallest entities first. As they are more specific, +# they are more likely to be correct. +ORDER BY ?min_area DESC(?tzid) + +LIMIT 1 +""" +) + +OSMID_QUERY = string.Template( + """ +SELECT ?item WHERE { + ?item wdt:P402 "$osmid". +} +LIMIT 1 +""" +) + + +def _query_sparql(query): + params = {"format": "json", "query": query} + url = SPARQL_URL + "?" + urllib.parse.urlencode(params) + + content = utils.web.getUrlContent(url, headers=headers()) + return json.loads(content) + + +def timezone_from_uri(location_uri): + """Returns a :class:datetime.tzinfo object, given a Wikidata Q-ID. + eg. ``"Q60"`` for New York City.""" + for tztype in [ + "http://www.wikidata.org/entity/Q17272692", # IANA timezones first + "http://www.wikidata.org/entity/Q12143", # any timezone as a fallback + ]: + data = _query_sparql(TIMEZONE_QUERY.substitute(subject=location_uri, tztype=tztype)) + results = data["results"]["bindings"] + for result in results: + if "tzid" in result: + return utils.time.iana_timezone(result["tzid"]["value"]) + else: + assert "utcoffset" in result + utc_offset = float(result["utcoffset"]["value"]) + return datetime.timezone(datetime.timedelta(hours=utc_offset)) + + return None + + +def uri_from_osmid(location_osmid): + """Returns the wikidata Q-id from an OpenStreetMap ID.""" + data = _query_sparql(OSMID_QUERY.substitute(osmid=location_osmid)) + results = data["results"]["bindings"] + for result in results: + return result["item"]["value"] diff --git a/pyproject.toml b/pyproject.toml index 99248acc8..27271b59c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,4 +5,4 @@ build-backend = "setuptools.build_meta" [tool.black] line-length = 79 -include = 'plugins/(Autocomplete|Fediverse|Poll)/.*\.pyi?$' +include = 'plugins/(Autocomplete|Fediverse|Geography|Poll)/.*\.pyi?$'