From 162e974aba9b6659f77a4d2201a59dcb84f7ac9e Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Tue, 9 Nov 2021 20:42:42 +0100 Subject: [PATCH] Geography: Implement search for OSM IDs using Nominatim. --- plugins/Geography/common.py | 43 ++++++++++++++++++ plugins/Geography/nominatim.py | 80 ++++++++++++++++++++++++++++++++++ plugins/Geography/test.py | 11 +++++ plugins/Geography/wikidata.py | 8 ++-- 4 files changed, 137 insertions(+), 5 deletions(-) create mode 100644 plugins/Geography/common.py create mode 100644 plugins/Geography/nominatim.py diff --git a/plugins/Geography/common.py b/plugins/Geography/common.py new file mode 100644 index 000000000..97f413013 --- /dev/null +++ b/plugins/Geography/common.py @@ -0,0 +1,43 @@ +### +# Copyright (c) 2021, Valentin Lorentz +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + +import supybot.utils as utils + + +def headers(): + headers = utils.web.defaultHeaders.copy() + + # Comply with https://meta.wikimedia.org/wiki/User-Agent_policy + # and https://operations.osmfoundation.org/policies/nominatim/ + headers[ + "User-agent" + ] += " https://github.com/progval/Limnoria/ - Geography plugin" + + return headers diff --git a/plugins/Geography/nominatim.py b/plugins/Geography/nominatim.py new file mode 100644 index 000000000..b919e87d1 --- /dev/null +++ b/plugins/Geography/nominatim.py @@ -0,0 +1,80 @@ +### +# Copyright (c) 2021, Valentin Lorentz +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the author of this software nor the name of +# contributors to this software may be used to endorse or promote products +# derived from this software without specific prior written consent. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +### + + +import json +import time +import string +import datetime +import threading +import urllib.parse + +import supybot.utils as utils +from .common import headers + +NOMINATIM_BASE_URL = "https://nominatim.openstreetmap.org" + +_QUERY_LOCK = threading.Lock() +_LAST_QUERY_TIME = 0 + + +def _wait_before_query(): + """Should be called before any API access. Blocks the current thread + in order to follow the rate limit: + https://operations.osmfoundation.org/policies/nominatim/""" + + global _LAST_QUERY_TIME + + min_time_between_queries = 1.0 + + with _QUERY_LOCK: + time_since_last_query = _LAST_QUERY_TIME - time.time() + if time_since_last_query >= min_time_between_queries: + time.sleep(min_time_between_queries - time_since_last_query) + _LAST_QUERY_TIME = time.time() + + +def _query_nominatim(path, params): + url = NOMINATIM_BASE_URL + path + "?" + urllib.parse.urlencode(params) + + _wait_before_query() + + content = utils.web.getUrlContent(url, headers=headers()) + return json.loads(content) + + +def search_osmids(query): + """Queries nominatim's search endpoint and returns a list of OSM ids + + See https://nominatim.org/release-docs/develop/api/Search/ for details + on the query format""" + data = _query_nominatim("/search", {"format": "json", "q": query}) + + return [item["osm_id"] for item in data] diff --git a/plugins/Geography/test.py b/plugins/Geography/test.py index 563629ae7..e16e54901 100644 --- a/plugins/Geography/test.py +++ b/plugins/Geography/test.py @@ -35,6 +35,8 @@ from supybot.test import * from supybot import utils from . import wikidata +from . import nominatim + class GeographyTestCase(PluginTestCase): plugins = ("Geography",) @@ -103,5 +105,14 @@ class GeographyWikidataTestCase(SupyTestCase): ) +class GeographyNominatimTestCase(SupyTestCase): + @skipIf(not network, "Network test") + def testSearch(self): + self.assertIn(450381, nominatim.search_osmids("Metz")) + + results = nominatim.search_osmids("Metz, France") + self.assertEqual(results[0], 450381, results) + + # vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: diff --git a/plugins/Geography/wikidata.py b/plugins/Geography/wikidata.py index 27a06eac6..df7ff7ced 100644 --- a/plugins/Geography/wikidata.py +++ b/plugins/Geography/wikidata.py @@ -35,6 +35,8 @@ import urllib.parse import supybot.utils as utils +from .common import headers + SPARQL_URL = "https://query.wikidata.org/sparql" TIMEZONE_QUERY = string.Template( @@ -121,11 +123,7 @@ def _query_sparql(query): params = {"format": "json", "query": query} url = SPARQL_URL + "?" + urllib.parse.urlencode(params) - # Comply with https://meta.wikimedia.org/wiki/User-Agent_policy - headers = utils.web.defaultHeaders.copy() - headers["User-agent"] += " https://github.com/progval/Limnoria/ - Geography plugin" - - content = utils.web.getUrlContent(url, headers=headers) + content = utils.web.getUrlContent(url, headers=headers()) return json.loads(content)