From d2091e8f71ce2dda90c86e3b3a4edce3ec723981 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Thu, 11 Nov 2021 19:07:31 +0100 Subject: [PATCH] Geography: Always prefer IANA timezones over UTC offsets Even if the UTC offsets are given on a more specific entity, they are usually worthless to us (eg. DST). --- plugins/Geography/test.py | 6 ++++++ plugins/Geography/wikidata.py | 25 ++++++++++++++++--------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/plugins/Geography/test.py b/plugins/Geography/test.py index 7accfa44b..7cbb7a77b 100644 --- a/plugins/Geography/test.py +++ b/plugins/Geography/test.py @@ -180,6 +180,12 @@ class GeographyWikidataTestCase(SupyTestCase): utils.time.iana_timezone("America/New_York"), ) + self.assertEqual( + # Paris, France + wikidata.timezone_from_uri("http://www.wikidata.org/entity/Q90"), + utils.time.iana_timezone("Europe/Paris"), + ) + @skipIf(not network, "Network test") def testParentQualifiedIgnorePreferred(self): # The queried object does not have a TZ property, diff --git a/plugins/Geography/wikidata.py b/plugins/Geography/wikidata.py index df7ff7ced..712fe01a1 100644 --- a/plugins/Geography/wikidata.py +++ b/plugins/Geography/wikidata.py @@ -79,6 +79,9 @@ SELECT ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid (MIN(?are # Get the area, will be used to order by specificity OPTIONAL { ?statementsubject wdt:P2046 ?area. } + # Require that ?timezone be an instance of... + ?timezone (wdt:P31/wdt:P279*) <$tztype>. + { # Get either an IANA timezone ID... ?timezone wdt:P6687 ?tzid. @@ -130,15 +133,19 @@ def _query_sparql(query): def timezone_from_uri(location_uri): """Returns a :class:datetime.tzinfo object, given a Wikidata Q-ID. eg. ``"Q60"`` for New York City.""" - data = _query_sparql(TIMEZONE_QUERY.substitute(subject=location_uri)) - results = data["results"]["bindings"] - for result in results: - if "tzid" in result: - return utils.time.iana_timezone(result["tzid"]["value"]) - else: - assert "utcoffset" in result - utc_offset = float(result["utcoffset"]["value"]) - return datetime.timezone(datetime.timedelta(hours=utc_offset)) + for tztype in [ + "http://www.wikidata.org/entity/Q17272692", # IANA timezones first + "http://www.wikidata.org/entity/Q12143", # any timezone as a fallback + ]: + data = _query_sparql(TIMEZONE_QUERY.substitute(subject=location_uri, tztype=tztype)) + results = data["results"]["bindings"] + for result in results: + if "tzid" in result: + return utils.time.iana_timezone(result["tzid"]["value"]) + else: + assert "utcoffset" in result + utc_offset = float(result["utcoffset"]["value"]) + return datetime.timezone(datetime.timedelta(hours=utc_offset)) return None