Geography: Add support for querying Wikidata from an OSM ID.

This commit is contained in:
Valentin Lorentz 2021-11-09 20:41:38 +01:00
parent 1a72a7d6d0
commit 35c1407779
2 changed files with 60 additions and 17 deletions

View File

@ -41,31 +41,47 @@ class GeographyTestCase(PluginTestCase):
class GeographyWikidataTestCase(SupyTestCase): class GeographyWikidataTestCase(SupyTestCase):
@skipIf(not network, "Network test")
def testOsmidToTimezone(self):
self.assertEqual(
wikidata.uri_from_osmid(450381),
"http://www.wikidata.org/entity/Q22690",
)
self.assertEqual(
wikidata.uri_from_osmid(192468),
"http://www.wikidata.org/entity/Q47045",
)
@skipIf(not network, "Network test") @skipIf(not network, "Network test")
def testDirect(self): def testDirect(self):
"""The queried object directly has a timezone property""" """The queried object directly has a timezone property"""
self.assertEqual( self.assertEqual(
wikidata.timezone_from_qid("Q1384"), # New York # New York
utils.time.iana_timezone("America/New_York") wikidata.timezone_from_uri("http://www.wikidata.org/entity/Q1384"),
) utils.time.iana_timezone("America/New_York"),
)
@skipIf(not network, "Network test") @skipIf(not network, "Network test")
def testParent(self): def testParent(self):
"""The queried object does not have a TZ property """The queried object does not have a TZ property
but it is part of an object that does""" but it is part of an object that does"""
self.assertEqual( self.assertEqual(
wikidata.timezone_from_qid("Q22690"), # Metz, France # Metz, France
utils.time.iana_timezone("Europe/Paris") wikidata.timezone_from_uri(
) "http://www.wikidata.org/entity/Q22690"
),
utils.time.iana_timezone("Europe/Paris"),
)
@skipIf(not network, "Network test") @skipIf(not network, "Network test")
def testParentAndIgnoreSelf(self): def testParentAndIgnoreSelf(self):
"""The queried object has a TZ property, but it's useless to us; """The queried object has a TZ property, but it's useless to us;
however it is part of an object that has a useful one.""" however it is part of an object that has a useful one."""
self.assertEqual( self.assertEqual(
wikidata.timezone_from_qid("Q60"), # New York City, NY # New York City, NY
utils.time.iana_timezone("America/New_York") wikidata.timezone_from_uri("http://www.wikidata.org/entity/Q60"),
) utils.time.iana_timezone("America/New_York"),
)
@skipIf(not network, "Network test") @skipIf(not network, "Network test")
def testParentQualifiedIgnorePreferred(self): def testParentQualifiedIgnorePreferred(self):
@ -79,8 +95,13 @@ class GeographyWikidataTestCase(SupyTestCase):
# is marked as Preferred because it is the time of metropolitan # is marked as Preferred because it is the time of metropolitan
# France. However, it is not valid for La Réunion. # France. However, it is not valid for La Réunion.
self.assertEqual( self.assertEqual(
wikidata.timezone_from_qid("Q17070"), # La Réunion # La Réunion
datetime.timezone(datetime.timedelta(hours=4)) wikidata.timezone_from_uri(
) "http://www.wikidata.org/entity/Q17070"
),
datetime.timezone(datetime.timedelta(hours=4)),
)
# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: # vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:

View File

@ -42,7 +42,7 @@ TIMEZONE_QUERY = string.Template(
SELECT ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid (MIN(?area) AS ?min_area) WHERE { SELECT ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid (MIN(?area) AS ?min_area) WHERE {
# find all ?item entities that the subject is part of, recursively; # find all ?item entities that the subject is part of, recursively;
wd:$subject (wdt:P131*) ?item. <$subject> (wdt:P131*) ?item.
# Get all timezones (returns a superset of "?item wdt:P421 ?timezone", as it does not filter on rank) # Get all timezones (returns a superset of "?item wdt:P421 ?timezone", as it does not filter on rank)
?item p:P421 ?statement. ?item p:P421 ?statement.
@ -63,7 +63,7 @@ SELECT ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid (MIN(?are
UNION { UNION {
# ... unless it applies to a part that contains what we are interested in # ... unless it applies to a part that contains what we are interested in
?statement pq:P518 ?appliestopart. ?statement pq:P518 ?appliestopart.
wd:$subject (wdt:P131*) ?appliestopart. <$subject> (wdt:P131*) ?appliestopart.
} }
# Filter out values only valid in certain periods of the year (DST vs normal time) # Filter out values only valid in certain periods of the year (DST vs normal time)
@ -104,7 +104,18 @@ GROUP BY ?item ?itemLabel ?rank ?endtime ?appliestopart ?utcoffset ?tzid
ORDER BY ?min_area DESC(?tzid) ORDER BY ?min_area DESC(?tzid)
LIMIT 1 LIMIT 1
""") """
)
OSMID_QUERY = string.Template(
"""
SELECT ?item WHERE {
?item wdt:P402 "$osmid".
}
LIMIT 1
"""
)
def _query_sparql(query): def _query_sparql(query):
params = {"format": "json", "query": query} params = {"format": "json", "query": query}
@ -117,10 +128,11 @@ def _query_sparql(query):
content = utils.web.getUrlContent(url, headers=headers) content = utils.web.getUrlContent(url, headers=headers)
return json.loads(content) return json.loads(content)
def timezone_from_qid(location_qid):
def timezone_from_uri(location_uri):
"""Returns a :class:datetime.tzinfo object, given a Wikidata Q-ID. """Returns a :class:datetime.tzinfo object, given a Wikidata Q-ID.
eg. ``"Q60"`` for New York City.""" eg. ``"Q60"`` for New York City."""
data = _query_sparql(TIMEZONE_QUERY.substitute(subject=location_qid)) data = _query_sparql(TIMEZONE_QUERY.substitute(subject=location_uri))
results = data["results"]["bindings"] results = data["results"]["bindings"]
for result in results: for result in results:
if "tzid" in result: if "tzid" in result:
@ -129,3 +141,13 @@ def timezone_from_qid(location_qid):
assert "utcoffset" in result assert "utcoffset" in result
utc_offset = float(result["utcoffset"]["value"]) utc_offset = float(result["utcoffset"]["value"])
return datetime.timezone(datetime.timedelta(hours=utc_offset)) return datetime.timezone(datetime.timedelta(hours=utc_offset))
return None
def uri_from_osmid(location_osmid):
"""Returns the wikidata Q-id from an OpenStreetMap ID."""
data = _query_sparql(OSMID_QUERY.substitute(osmid=location_osmid))
results = data["results"]["bindings"]
for result in results:
return result["item"]["value"]