Geography: Early draft, with buggy implementation of timezone_from_qid.

This commit is contained in:
Valentin Lorentz 2021-11-08 23:22:17 +01:00
parent 5aefd6395f
commit 27c513135c
7 changed files with 383 additions and 0 deletions

View File

@ -0,0 +1 @@
Provides geography facts, such as timezones.

View File

@ -0,0 +1,71 @@
###
# Copyright (c) 2021, Valentin Lorentz
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
"""
Geography: Provides geography facts, such as timezones.
"""
import sys
import supybot
from supybot import world
# Use this for the version of this plugin.
__version__ = ""
# XXX Replace this with an appropriate author or supybot.Author instance.
__author__ = supybot.authors.unknown
# This is a dictionary mapping supybot.Author instances to lists of
# contributions.
__contributors__ = {}
# This is a url where the most recent plugin package can be downloaded.
__url__ = ''
from . import config
from . import plugin
if sys.version_info >= (3, 4):
from importlib import reload
else:
from imp import reload
# In case we're being reloaded.
reload(config)
reload(plugin)
# Add more reloads here if you add third-party modules and want them to be
# reloaded when this plugin is reloaded. Don't forget to import them as well!
if world.testing:
from . import test
Class = plugin.Class
configure = config.configure
# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:

View File

@ -0,0 +1,51 @@
###
# Copyright (c) 2021, Valentin Lorentz
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
from supybot import conf, registry
from supybot.i18n import PluginInternationalization
_ = PluginInternationalization('Geography')
def configure(advanced):
# This will be called by supybot to configure this module. advanced is
# a bool that specifies whether the user identified themself as an advanced
# user or not. You should effect your configuration by manipulating the
# registry as appropriate.
from supybot.questions import expect, anything, something, yn
conf.registerPlugin('Geography', True)
Geography = conf.registerPlugin('Geography')
# This is where your configuration variables (if any) should go. For example:
# conf.registerGlobalValue(Geography, 'someConfigVariableName',
# registry.Boolean(False, _("""Help for someConfigVariableName.""")))
# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:

View File

@ -0,0 +1 @@
# Stub so local is a module, used for third-party modules

View File

@ -0,0 +1,46 @@
###
# Copyright (c) 2021, Valentin Lorentz
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
from supybot import utils, plugins, ircutils, callbacks
from supybot.commands import *
from supybot.i18n import PluginInternationalization
_ = PluginInternationalization('Geography')
class Geography(callbacks.Plugin):
"""Provides geography facts, such as timezones."""
threaded = True
Class = Geography
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:

86
plugins/Geography/test.py Normal file
View File

@ -0,0 +1,86 @@
###
# Copyright (c) 2021, Valentin Lorentz
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
import datetime
from unittest import skipIf
from supybot.test import *
from supybot import utils
from . import wikidata
class GeographyTestCase(PluginTestCase):
plugins = ('Geography',)
class GeographyWikidataTestCase(SupyTestCase):
@skipIf(not network, "Network test")
def testDirect(self):
"""The queried object directly has a timezone property"""
self.assertEqual(
wikidata.timezone_from_qid("Q1384"), # New York
utils.time.iana_timezone("America/New_York")
)
@skipIf(not network, "Network test")
def testParent(self):
"""The queried object does not have a TZ property
but it is part of an object that does"""
self.assertEqual(
wikidata.timezone_from_qid("Q22690"), # Metz, France
utils.time.iana_timezone("Europe/Paris")
)
@skipIf(not network, "Network test")
def testParentAndIgnoreSelf(self):
"""The queried object has a TZ property, but it's useless to us;
however it is part of an object that has a useful one."""
self.assertEqual(
wikidata.timezone_from_qid("Q60"), # New York City, NY
utils.time.iana_timezone("America/New_York")
)
@skipIf(not network, "Network test")
def testParentQualifiedIgnorePreferred(self):
"""The queried object does not have a TZ property,
and is part of an object that does.
However, this parent's 'preferred' timezone is not the
right one, so we must make sure to select the right one
based on P518 ('applies to part')."""
# La Réunion is a French region, but in UTC+4.
# France has a bunch of timezone statements, and 'Europe/Paris'
# is marked as Preferred because it is the time of metropolitan
# France. However, it is not valid for La Réunion.
self.assertEqual(
wikidata.timezone_from_qid("Q17070"), # La Réunion
datetime.timezone(datetime.timedelta(hours=4))
)
# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:

View File

@ -0,0 +1,127 @@
###
# Copyright (c) 2021, Valentin Lorentz
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
import json
import string
import datetime
import urllib.parse
import supybot.utils as utils
SPARQL_URL = "https://query.wikidata.org/sparql"
TIMEZONE_QUERY = string.Template("""
SELECT ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid (MIN(?area) AS ?min_area) WHERE {
# find all entities that the subject is part of, recursively
wd:$subject (wdt:P131*) ?item.
# Get all timezones (returns a superset of "?item wdt:P421 ?timezone", as it does not filter on rank)
?item p:P421 ?statement.
?statement ps:P421 ?timezone.
?statement wikibase:rank ?rank.
# fetch the end of validity of the given statement (TODO: check it)
OPTIONAL { ?statement pq:P582 ?endtime. }
# filter out statements that applies only to a part of ?item...
{
FILTER NOT EXISTS {
?statement pq:P518 ?appliestopart.
}
}
UNION {
# ... unless it applies to a part that contains what we are interested in
?statement pq:P518 ?appliestopart.
wd:$subject (wdt:P131*) ?appliestopart.
}
# Filter out values only valid in certain periods of the year (DST vs normal time)
FILTER NOT EXISTS {
?statement pq:P1264 ?validinperiod.
}
# store the identifier of the object the statement applies to
BIND(IF(BOUND(?appliestopart),?appliestopart,?item) AS ?statementsubject).
#OPTIONAL { ?statementsubject wdt:P1082 ?population. }
OPTIONAL { ?statementsubject wdt:P2046 ?area. }
# ?timezone wdt:P31 ?timezonetype.
# ?timezone wdt:P31 wd:Q17272692. # only keep IANA timezones
# ?timezone wdt:P31 wd:Q17272482. # only keep UTC offsets
{
# Get either an IANA timezone ID...
?timezone wdt:P6687 ?tzid.
}
UNION
{
# ... or an absolute UTC offset that is not subject to DST
?timezone p:P2907 ?utcoffset_statement.
?utcoffset_statement ps:P2907 ?utcoffset.
FILTER NOT EXISTS {
?utcoffset_statement wdt:P1264 ?utcoffset_validinperiod.
}
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
# Deduplicate in case there is more than one ?area statement
GROUP BY ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid
# Get the smallest entities first. As they are more specific,
# they are more likely to be correct.
ORDER BY ?min_area DESC(?tzid)
LIMIT 1
""")
def _query_sparql(query):
params = {"format": "json", "query": query}
url = SPARQL_URL + "?" + urllib.parse.urlencode(params)
content = utils.web.getUrlContent(url)
return json.loads(content)
def timezone_from_qid(location_qid):
"""Returns a :class:datetime.tzinfo object, given a Wikidata Q-ID.
eg. ``"Q60"`` for New York City."""
data = _query_sparql(TIMEZONE_QUERY.substitute(subject=location_qid))
results = data["results"]["bindings"]
for result in results:
if "tzid" in result:
return utils.time.iana_timezone(result["tzid"]["value"])
else:
assert "utcoffset" in result
utc_offset = float(result["utcoffset"]["value"])
return datetime.timezone(datetime.timedelta(hours=utc_offset))