Geography: Implement search for OSM IDs using Nominatim.

This commit is contained in:
Valentin Lorentz 2021-11-09 20:42:42 +01:00
parent 35c1407779
commit 162e974aba
4 changed files with 137 additions and 5 deletions

View File

@ -0,0 +1,43 @@
###
# Copyright (c) 2021, Valentin Lorentz
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
import supybot.utils as utils
def headers():
headers = utils.web.defaultHeaders.copy()
# Comply with https://meta.wikimedia.org/wiki/User-Agent_policy
# and https://operations.osmfoundation.org/policies/nominatim/
headers[
"User-agent"
] += " https://github.com/progval/Limnoria/ - Geography plugin"
return headers

View File

@ -0,0 +1,80 @@
###
# Copyright (c) 2021, Valentin Lorentz
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
import json
import time
import string
import datetime
import threading
import urllib.parse
import supybot.utils as utils
from .common import headers
NOMINATIM_BASE_URL = "https://nominatim.openstreetmap.org"
_QUERY_LOCK = threading.Lock()
_LAST_QUERY_TIME = 0
def _wait_before_query():
"""Should be called before any API access. Blocks the current thread
in order to follow the rate limit:
https://operations.osmfoundation.org/policies/nominatim/"""
global _LAST_QUERY_TIME
min_time_between_queries = 1.0
with _QUERY_LOCK:
time_since_last_query = _LAST_QUERY_TIME - time.time()
if time_since_last_query >= min_time_between_queries:
time.sleep(min_time_between_queries - time_since_last_query)
_LAST_QUERY_TIME = time.time()
def _query_nominatim(path, params):
url = NOMINATIM_BASE_URL + path + "?" + urllib.parse.urlencode(params)
_wait_before_query()
content = utils.web.getUrlContent(url, headers=headers())
return json.loads(content)
def search_osmids(query):
"""Queries nominatim's search endpoint and returns a list of OSM ids
See https://nominatim.org/release-docs/develop/api/Search/ for details
on the query format"""
data = _query_nominatim("/search", {"format": "json", "q": query})
return [item["osm_id"] for item in data]

View File

@ -35,6 +35,8 @@ from supybot.test import *
from supybot import utils
from . import wikidata
from . import nominatim
class GeographyTestCase(PluginTestCase):
plugins = ("Geography",)
@ -103,5 +105,14 @@ class GeographyWikidataTestCase(SupyTestCase):
)
class GeographyNominatimTestCase(SupyTestCase):
@skipIf(not network, "Network test")
def testSearch(self):
self.assertIn(450381, nominatim.search_osmids("Metz"))
results = nominatim.search_osmids("Metz, France")
self.assertEqual(results[0], 450381, results)
# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:

View File

@ -35,6 +35,8 @@ import urllib.parse
import supybot.utils as utils
from .common import headers
SPARQL_URL = "https://query.wikidata.org/sparql"
TIMEZONE_QUERY = string.Template(
@ -121,11 +123,7 @@ def _query_sparql(query):
params = {"format": "json", "query": query}
url = SPARQL_URL + "?" + urllib.parse.urlencode(params)
# Comply with https://meta.wikimedia.org/wiki/User-Agent_policy
headers = utils.web.defaultHeaders.copy()
headers["User-agent"] += " https://github.com/progval/Limnoria/ - Geography plugin"
content = utils.web.getUrlContent(url, headers=headers)
content = utils.web.getUrlContent(url, headers=headers())
return json.loads(content)