Geography: Add support for querying Wikidata from an OSM ID.

This commit is contained in:
Valentin Lorentz 2021-11-09 20:41:38 +01:00
parent 1a72a7d6d0
commit 35c1407779
2 changed files with 60 additions and 17 deletions

View File

@ -41,31 +41,47 @@ class GeographyTestCase(PluginTestCase):
class GeographyWikidataTestCase(SupyTestCase):
@skipIf(not network, "Network test")
def testOsmidToTimezone(self):
self.assertEqual(
wikidata.uri_from_osmid(450381),
"http://www.wikidata.org/entity/Q22690",
)
self.assertEqual(
wikidata.uri_from_osmid(192468),
"http://www.wikidata.org/entity/Q47045",
)
@skipIf(not network, "Network test")
def testDirect(self):
"""The queried object directly has a timezone property"""
self.assertEqual(
wikidata.timezone_from_qid("Q1384"), # New York
utils.time.iana_timezone("America/New_York")
)
# New York
wikidata.timezone_from_uri("http://www.wikidata.org/entity/Q1384"),
utils.time.iana_timezone("America/New_York"),
)
@skipIf(not network, "Network test")
def testParent(self):
"""The queried object does not have a TZ property
but it is part of an object that does"""
self.assertEqual(
wikidata.timezone_from_qid("Q22690"), # Metz, France
utils.time.iana_timezone("Europe/Paris")
)
# Metz, France
wikidata.timezone_from_uri(
"http://www.wikidata.org/entity/Q22690"
),
utils.time.iana_timezone("Europe/Paris"),
)
@skipIf(not network, "Network test")
def testParentAndIgnoreSelf(self):
"""The queried object has a TZ property, but it's useless to us;
however it is part of an object that has a useful one."""
self.assertEqual(
wikidata.timezone_from_qid("Q60"), # New York City, NY
utils.time.iana_timezone("America/New_York")
)
# New York City, NY
wikidata.timezone_from_uri("http://www.wikidata.org/entity/Q60"),
utils.time.iana_timezone("America/New_York"),
)
@skipIf(not network, "Network test")
def testParentQualifiedIgnorePreferred(self):
@ -79,8 +95,13 @@ class GeographyWikidataTestCase(SupyTestCase):
# is marked as Preferred because it is the time of metropolitan
# France. However, it is not valid for La Réunion.
self.assertEqual(
wikidata.timezone_from_qid("Q17070"), # La Réunion
datetime.timezone(datetime.timedelta(hours=4))
)
# La Réunion
wikidata.timezone_from_uri(
"http://www.wikidata.org/entity/Q17070"
),
datetime.timezone(datetime.timedelta(hours=4)),
)
# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:

View File

@ -42,7 +42,7 @@ TIMEZONE_QUERY = string.Template(
SELECT ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid (MIN(?area) AS ?min_area) WHERE {
# find all ?item entities that the subject is part of, recursively;
wd:$subject (wdt:P131*) ?item.
<$subject> (wdt:P131*) ?item.
# Get all timezones (returns a superset of "?item wdt:P421 ?timezone", as it does not filter on rank)
?item p:P421 ?statement.
@ -63,7 +63,7 @@ SELECT ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid (MIN(?are
UNION {
# ... unless it applies to a part that contains what we are interested in
?statement pq:P518 ?appliestopart.
wd:$subject (wdt:P131*) ?appliestopart.
<$subject> (wdt:P131*) ?appliestopart.
}
# Filter out values only valid in certain periods of the year (DST vs normal time)
@ -104,7 +104,18 @@ GROUP BY ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid
ORDER BY ?min_area DESC(?tzid)
LIMIT 1
""")
"""
)
OSMID_QUERY = string.Template(
"""
SELECT ?item WHERE {
?item wdt:P402 "$osmid".
}
LIMIT 1
"""
)
def _query_sparql(query):
params = {"format": "json", "query": query}
@ -117,10 +128,11 @@ def _query_sparql(query):
content = utils.web.getUrlContent(url, headers=headers)
return json.loads(content)
def timezone_from_qid(location_qid):
def timezone_from_uri(location_uri):
"""Returns a :class:datetime.tzinfo object, given a Wikidata Q-ID.
eg. ``"Q60"`` for New York City."""
data = _query_sparql(TIMEZONE_QUERY.substitute(subject=location_qid))
data = _query_sparql(TIMEZONE_QUERY.substitute(subject=location_uri))
results = data["results"]["bindings"]
for result in results:
if "tzid" in result:
@ -129,3 +141,13 @@ def timezone_from_qid(location_qid):
assert "utcoffset" in result
utc_offset = float(result["utcoffset"]["value"])
return datetime.timezone(datetime.timedelta(hours=utc_offset))
return None
def uri_from_osmid(location_osmid):
"""Returns the wikidata Q-id from an OpenStreetMap ID."""
data = _query_sparql(OSMID_QUERY.substitute(osmid=location_osmid))
results = data["results"]["bindings"]
for result in results:
return result["item"]["value"]