From e74f48a6d4420cd08af2504e8c78d0a62b9f28e9 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 30 Apr 2024 20:01:02 +0200 Subject: [PATCH 01/14] create subproject for embedded photon --- .gitignore | 2 + build.gradle => app/es_embedded/build.gradle | 75 +++++-------------- {es => app/es_embedded/es}/__init__.py | 0 .../es_embedded/es}/index_settings.json | 0 {es => app/es_embedded/es}/mappings.json | 0 .../plugin-descriptor.properties | 0 .../lang-painless/plugin-security.policy | 0 settings.gradle | 2 + 8 files changed, 21 insertions(+), 58 deletions(-) rename build.gradle => app/es_embedded/build.gradle (50%) rename {es => app/es_embedded/es}/__init__.py (100%) rename {es => app/es_embedded/es}/index_settings.json (100%) rename {es => app/es_embedded/es}/mappings.json (100%) rename {es => app/es_embedded/es}/modules/lang-painless/plugin-descriptor.properties (100%) rename {es => app/es_embedded/es}/modules/lang-painless/plugin-security.policy (100%) diff --git a/.gitignore b/.gitignore index 077758141..8dc69496f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ target/ photon_data/ dependency-reduced-pom.xml +.gradle/ +app/*/build .idea/ *.iml diff --git a/build.gradle b/app/es_embedded/build.gradle similarity index 50% rename from build.gradle rename to app/es_embedded/build.gradle index 1ac5f4adc..5b8ed7173 100644 --- a/build.gradle +++ b/app/es_embedded/build.gradle @@ -3,85 +3,42 @@ plugins { id 'application' } -group = 'de.komoot.photon' -version = '0.5.0' +apply from: rootProject.file('buildSrc/shared.gradle') -description = "Geocoder for OSM data" - -distZip.enabled = false -distTar.enabled = false -shadowDistZip.enabled = false -shadowDistTar.enabled = false - -application { - mainClass = 'de.komoot.photon.App'; -} - -java { - sourceCompatibility = JavaVersion.VERSION_11 - targetCompatibility = JavaVersion.VERSION_11 -} - -repositories { - maven { url "https://www.datanucleus.org/downloads/maven2/" } - mavenCentral() -} - -configurations { - runtimePlugins.extendsFrom runtimeOnly { - canBeResolved = true - } -} +description = "Geocoder for OSM data (ElasticSearch-based version)" sourceSets { main { resources { - srcDir 'build/es' srcDir 'es' + srcDir 'build/es' + } + } + test { + java { + srcDir 'src/test/java' } } } -dependencies { - implementation('org.elasticsearch:elasticsearch:5.6.16') { - exclude(module: 'log4j-api') +configurations { + runtimePlugins.extendsFrom runtimeOnly { + canBeResolved = true } +} + +dependencies { + implementation 'org.elasticsearch:elasticsearch:5.6.16' implementation 'org.elasticsearch.plugin:transport-netty4-client:5.6.16' - implementation 'org.apache.logging.log4j:log4j-core:2.23.1' - implementation 'org.apache.logging.log4j:log4j-api:2.23.1' implementation('org.elasticsearch.client:transport:5.6.16') { exclude(module: 'commons-logging') } - implementation 'org.postgresql:postgresql:42.7.2' - implementation 'org.slf4j:slf4j-api:2.0.13' - implementation 'org.apache.logging.log4j:log4j-slf4j2-impl:2.23.1' - implementation 'com.beust:jcommander:1.82' - implementation 'org.apache.commons:commons-lang3:3.14.0' - implementation 'org.springframework:spring-jdbc:5.3.32' - implementation('org.apache.commons:commons-dbcp2:2.12.0') { -exclude(module: 'commons-logging') - } - implementation 'org.locationtech.jts:jts-core:1.19.0' - implementation 'com.sparkjava:spark-core:2.9.4' - implementation 'net.postgis:postgis-jdbc:2023.1.0' - implementation 'org.json:json:20240303' - - testImplementation(platform("org.junit:junit-bom:5.10.2")) - testImplementation 'com.h2database:h2:2.2.224' - testImplementation 'org.junit.jupiter:junit-jupiter' - testImplementation 'org.mockito:mockito-core:5.11.0' - - testRuntimeOnly 'org.junit.platform:junit-platform-launcher' runtimePlugins 'org.codelibs.elasticsearch.module:lang-painless:5.6.16' runtimePlugins 'org.ow2.asm:asm-debug-all:5.1' runtimePlugins 'org.antlr:antlr4-runtime:4.5.1-1' } -tasks.named('test') { - useJUnitPlatform() -} - task copyLibs(type: Copy){ from configurations.runtimePlugins { into layout.buildDirectory.dir('es/modules/lang-painless') @@ -100,6 +57,8 @@ tasks.named('processResources') { shadowJar { mergeServiceFiles() + destinationDirectory.set(rootProject.file('target')) + archiveBaseName.set('photon') archiveClassifier.set('') exclude '**/module-info.class' diff --git a/es/__init__.py b/app/es_embedded/es/__init__.py similarity index 100% rename from es/__init__.py rename to app/es_embedded/es/__init__.py diff --git a/es/index_settings.json b/app/es_embedded/es/index_settings.json similarity index 100% rename from es/index_settings.json rename to app/es_embedded/es/index_settings.json diff --git a/es/mappings.json b/app/es_embedded/es/mappings.json similarity index 100% rename from es/mappings.json rename to app/es_embedded/es/mappings.json diff --git a/es/modules/lang-painless/plugin-descriptor.properties b/app/es_embedded/es/modules/lang-painless/plugin-descriptor.properties similarity index 100% rename from es/modules/lang-painless/plugin-descriptor.properties rename to app/es_embedded/es/modules/lang-painless/plugin-descriptor.properties diff --git a/es/modules/lang-painless/plugin-security.policy b/app/es_embedded/es/modules/lang-painless/plugin-security.policy similarity index 100% rename from es/modules/lang-painless/plugin-security.policy rename to app/es_embedded/es/modules/lang-painless/plugin-security.policy diff --git a/settings.gradle b/settings.gradle index 97b67da8a..60abdc6d4 100644 --- a/settings.gradle +++ b/settings.gradle @@ -1 +1,3 @@ rootProject.name = 'photon' + +include ':app:es_embedded' From 324b7bffce98caf36e7feeae553b2893e67d6a76 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 30 Apr 2024 20:11:47 +0200 Subject: [PATCH 02/14] move ES-specific code into app directory --- .../es_embedded/src}/main/java/de/komoot/photon/JsonDumper.java | 0 .../es_embedded/src}/main/java/de/komoot/photon/Utils.java | 0 .../main/java/de/komoot/photon/elasticsearch/ElasticResult.java | 0 .../komoot/photon/elasticsearch/ElasticsearchReverseHandler.java | 0 .../komoot/photon/elasticsearch/ElasticsearchSearchHandler.java | 0 .../src}/main/java/de/komoot/photon/elasticsearch/Importer.java | 0 .../main/java/de/komoot/photon/elasticsearch/IndexMapping.java | 0 .../main/java/de/komoot/photon/elasticsearch/IndexSettings.java | 0 .../main/java/de/komoot/photon/elasticsearch/OsmTagFilter.java | 0 .../main/java/de/komoot/photon/elasticsearch/PhotonIndex.java | 0 .../java/de/komoot/photon/elasticsearch/PhotonQueryBuilder.java | 0 .../java/de/komoot/photon/elasticsearch/ReverseQueryBuilder.java | 0 .../src}/main/java/de/komoot/photon/elasticsearch/Server.java | 0 .../src}/main/java/de/komoot/photon/elasticsearch/Updater.java | 0 .../es_embedded/src}/test/java/de/komoot/photon/ESBaseTester.java | 0 .../de/komoot/photon/elasticsearch/DatabasePropertiesTest.java | 0 .../java/de/komoot/photon/elasticsearch/ElasticGetIdResult.java | 0 .../java/de/komoot/photon/elasticsearch/ElasticResultTest.java | 0 .../java/de/komoot/photon/elasticsearch/ElasticTestServer.java | 0 .../test/java/de/komoot/photon/elasticsearch/ImporterTest.java | 0 .../src}/test/java/de/komoot/photon/elasticsearch/ServerTest.java | 0 .../test/java/de/komoot/photon/elasticsearch/UpdaterTest.java | 0 .../src}/test/java/org/elasticsearch/bootstrap/JarHell.java | 0 23 files changed, 0 insertions(+), 0 deletions(-) rename {src => app/es_embedded/src}/main/java/de/komoot/photon/JsonDumper.java (100%) rename {src => app/es_embedded/src}/main/java/de/komoot/photon/Utils.java (100%) rename {src => app/es_embedded/src}/main/java/de/komoot/photon/elasticsearch/ElasticResult.java (100%) rename {src => app/es_embedded/src}/main/java/de/komoot/photon/elasticsearch/ElasticsearchReverseHandler.java (100%) rename {src => app/es_embedded/src}/main/java/de/komoot/photon/elasticsearch/ElasticsearchSearchHandler.java (100%) rename {src => app/es_embedded/src}/main/java/de/komoot/photon/elasticsearch/Importer.java (100%) rename {src => app/es_embedded/src}/main/java/de/komoot/photon/elasticsearch/IndexMapping.java (100%) rename {src => app/es_embedded/src}/main/java/de/komoot/photon/elasticsearch/IndexSettings.java (100%) rename {src => app/es_embedded/src}/main/java/de/komoot/photon/elasticsearch/OsmTagFilter.java (100%) rename {src => app/es_embedded/src}/main/java/de/komoot/photon/elasticsearch/PhotonIndex.java (100%) rename {src => app/es_embedded/src}/main/java/de/komoot/photon/elasticsearch/PhotonQueryBuilder.java (100%) rename {src => app/es_embedded/src}/main/java/de/komoot/photon/elasticsearch/ReverseQueryBuilder.java (100%) rename {src => app/es_embedded/src}/main/java/de/komoot/photon/elasticsearch/Server.java (100%) rename {src => app/es_embedded/src}/main/java/de/komoot/photon/elasticsearch/Updater.java (100%) rename {src => app/es_embedded/src}/test/java/de/komoot/photon/ESBaseTester.java (100%) rename {src => app/es_embedded/src}/test/java/de/komoot/photon/elasticsearch/DatabasePropertiesTest.java (100%) rename {src => app/es_embedded/src}/test/java/de/komoot/photon/elasticsearch/ElasticGetIdResult.java (100%) rename {src => app/es_embedded/src}/test/java/de/komoot/photon/elasticsearch/ElasticResultTest.java (100%) rename {src => app/es_embedded/src}/test/java/de/komoot/photon/elasticsearch/ElasticTestServer.java (100%) rename {src => app/es_embedded/src}/test/java/de/komoot/photon/elasticsearch/ImporterTest.java (100%) rename {src => app/es_embedded/src}/test/java/de/komoot/photon/elasticsearch/ServerTest.java (100%) rename {src => app/es_embedded/src}/test/java/de/komoot/photon/elasticsearch/UpdaterTest.java (100%) rename {src => app/es_embedded/src}/test/java/org/elasticsearch/bootstrap/JarHell.java (100%) diff --git a/src/main/java/de/komoot/photon/JsonDumper.java b/app/es_embedded/src/main/java/de/komoot/photon/JsonDumper.java similarity index 100% rename from src/main/java/de/komoot/photon/JsonDumper.java rename to app/es_embedded/src/main/java/de/komoot/photon/JsonDumper.java diff --git a/src/main/java/de/komoot/photon/Utils.java b/app/es_embedded/src/main/java/de/komoot/photon/Utils.java similarity index 100% rename from src/main/java/de/komoot/photon/Utils.java rename to app/es_embedded/src/main/java/de/komoot/photon/Utils.java diff --git a/src/main/java/de/komoot/photon/elasticsearch/ElasticResult.java b/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/ElasticResult.java similarity index 100% rename from src/main/java/de/komoot/photon/elasticsearch/ElasticResult.java rename to app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/ElasticResult.java diff --git a/src/main/java/de/komoot/photon/elasticsearch/ElasticsearchReverseHandler.java b/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/ElasticsearchReverseHandler.java similarity index 100% rename from src/main/java/de/komoot/photon/elasticsearch/ElasticsearchReverseHandler.java rename to app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/ElasticsearchReverseHandler.java diff --git a/src/main/java/de/komoot/photon/elasticsearch/ElasticsearchSearchHandler.java b/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/ElasticsearchSearchHandler.java similarity index 100% rename from src/main/java/de/komoot/photon/elasticsearch/ElasticsearchSearchHandler.java rename to app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/ElasticsearchSearchHandler.java diff --git a/src/main/java/de/komoot/photon/elasticsearch/Importer.java b/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/Importer.java similarity index 100% rename from src/main/java/de/komoot/photon/elasticsearch/Importer.java rename to app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/Importer.java diff --git a/src/main/java/de/komoot/photon/elasticsearch/IndexMapping.java b/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/IndexMapping.java similarity index 100% rename from src/main/java/de/komoot/photon/elasticsearch/IndexMapping.java rename to app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/IndexMapping.java diff --git a/src/main/java/de/komoot/photon/elasticsearch/IndexSettings.java b/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/IndexSettings.java similarity index 100% rename from src/main/java/de/komoot/photon/elasticsearch/IndexSettings.java rename to app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/IndexSettings.java diff --git a/src/main/java/de/komoot/photon/elasticsearch/OsmTagFilter.java b/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/OsmTagFilter.java similarity index 100% rename from src/main/java/de/komoot/photon/elasticsearch/OsmTagFilter.java rename to app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/OsmTagFilter.java diff --git a/src/main/java/de/komoot/photon/elasticsearch/PhotonIndex.java b/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/PhotonIndex.java similarity index 100% rename from src/main/java/de/komoot/photon/elasticsearch/PhotonIndex.java rename to app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/PhotonIndex.java diff --git a/src/main/java/de/komoot/photon/elasticsearch/PhotonQueryBuilder.java b/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/PhotonQueryBuilder.java similarity index 100% rename from src/main/java/de/komoot/photon/elasticsearch/PhotonQueryBuilder.java rename to app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/PhotonQueryBuilder.java diff --git a/src/main/java/de/komoot/photon/elasticsearch/ReverseQueryBuilder.java b/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/ReverseQueryBuilder.java similarity index 100% rename from src/main/java/de/komoot/photon/elasticsearch/ReverseQueryBuilder.java rename to app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/ReverseQueryBuilder.java diff --git a/src/main/java/de/komoot/photon/elasticsearch/Server.java b/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/Server.java similarity index 100% rename from src/main/java/de/komoot/photon/elasticsearch/Server.java rename to app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/Server.java diff --git a/src/main/java/de/komoot/photon/elasticsearch/Updater.java b/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/Updater.java similarity index 100% rename from src/main/java/de/komoot/photon/elasticsearch/Updater.java rename to app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/Updater.java diff --git a/src/test/java/de/komoot/photon/ESBaseTester.java b/app/es_embedded/src/test/java/de/komoot/photon/ESBaseTester.java similarity index 100% rename from src/test/java/de/komoot/photon/ESBaseTester.java rename to app/es_embedded/src/test/java/de/komoot/photon/ESBaseTester.java diff --git a/src/test/java/de/komoot/photon/elasticsearch/DatabasePropertiesTest.java b/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/DatabasePropertiesTest.java similarity index 100% rename from src/test/java/de/komoot/photon/elasticsearch/DatabasePropertiesTest.java rename to app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/DatabasePropertiesTest.java diff --git a/src/test/java/de/komoot/photon/elasticsearch/ElasticGetIdResult.java b/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ElasticGetIdResult.java similarity index 100% rename from src/test/java/de/komoot/photon/elasticsearch/ElasticGetIdResult.java rename to app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ElasticGetIdResult.java diff --git a/src/test/java/de/komoot/photon/elasticsearch/ElasticResultTest.java b/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ElasticResultTest.java similarity index 100% rename from src/test/java/de/komoot/photon/elasticsearch/ElasticResultTest.java rename to app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ElasticResultTest.java diff --git a/src/test/java/de/komoot/photon/elasticsearch/ElasticTestServer.java b/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ElasticTestServer.java similarity index 100% rename from src/test/java/de/komoot/photon/elasticsearch/ElasticTestServer.java rename to app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ElasticTestServer.java diff --git a/src/test/java/de/komoot/photon/elasticsearch/ImporterTest.java b/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ImporterTest.java similarity index 100% rename from src/test/java/de/komoot/photon/elasticsearch/ImporterTest.java rename to app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ImporterTest.java diff --git a/src/test/java/de/komoot/photon/elasticsearch/ServerTest.java b/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ServerTest.java similarity index 100% rename from src/test/java/de/komoot/photon/elasticsearch/ServerTest.java rename to app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ServerTest.java diff --git a/src/test/java/de/komoot/photon/elasticsearch/UpdaterTest.java b/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/UpdaterTest.java similarity index 100% rename from src/test/java/de/komoot/photon/elasticsearch/UpdaterTest.java rename to app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/UpdaterTest.java diff --git a/src/test/java/org/elasticsearch/bootstrap/JarHell.java b/app/es_embedded/src/test/java/org/elasticsearch/bootstrap/JarHell.java similarity index 100% rename from src/test/java/org/elasticsearch/bootstrap/JarHell.java rename to app/es_embedded/src/test/java/org/elasticsearch/bootstrap/JarHell.java From d15895dba6886db99a0c0f94218607421872006a Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 30 Apr 2024 21:06:15 +0200 Subject: [PATCH 03/14] add skeleton for OpenSearch port --- .../java/de/komoot/photon/JsonDumper.java | 3 +- .../photon/{elasticsearch => }/Server.java | 3 +- .../komoot/photon/elasticsearch/Importer.java | 2 +- .../PhotonDocConverter.java} | 56 +++----------- .../komoot/photon/elasticsearch/Updater.java | 2 +- .../java/de/komoot/photon/ESBaseTester.java | 1 - .../ElasticTestServer.java | 3 +- .../elasticsearch/ElasticGetIdResult.java | 2 +- app/opensearch/build.gradle | 47 ++++++++++++ .../java/de/komoot/photon/JsonDumper.java | 22 ++++++ .../main/java/de/komoot/photon/Server.java | 64 ++++++++++++++++ .../java/de/komoot/photon/ESBaseTester.java | 73 +++++++++++++++++++ settings.gradle | 2 +- src/main/java/de/komoot/photon/App.java | 1 - .../komoot/photon/StatusRequestHandler.java | 1 - src/main/java/de/komoot/photon/Utils.java | 43 +++++++++++ 16 files changed, 271 insertions(+), 54 deletions(-) rename app/es_embedded/src/main/java/de/komoot/photon/{elasticsearch => }/Server.java (99%) rename app/es_embedded/src/main/java/de/komoot/photon/{Utils.java => elasticsearch/PhotonDocConverter.java} (79%) rename app/es_embedded/src/test/java/de/komoot/photon/{elasticsearch => }/ElasticTestServer.java (92%) create mode 100644 app/opensearch/build.gradle create mode 100644 app/opensearch/src/main/java/de/komoot/photon/JsonDumper.java create mode 100644 app/opensearch/src/main/java/de/komoot/photon/Server.java create mode 100644 app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java create mode 100644 src/main/java/de/komoot/photon/Utils.java diff --git a/app/es_embedded/src/main/java/de/komoot/photon/JsonDumper.java b/app/es_embedded/src/main/java/de/komoot/photon/JsonDumper.java index 3e325a124..f93bcf9b9 100644 --- a/app/es_embedded/src/main/java/de/komoot/photon/JsonDumper.java +++ b/app/es_embedded/src/main/java/de/komoot/photon/JsonDumper.java @@ -1,5 +1,6 @@ package de.komoot.photon; +import de.komoot.photon.elasticsearch.PhotonDocConverter; import org.slf4j.Logger; import java.io.FileNotFoundException; @@ -26,7 +27,7 @@ public JsonDumper(String filename, String[] languages, String[] extraTags) throw public void add(PhotonDoc doc, int objectId) { try { writer.println("{\"index\": {}}"); - writer.println(Utils.convert(doc, languages, extraTags).string()); + writer.println(PhotonDocConverter.convert(doc, languages, extraTags).string()); } catch (IOException e) { LOGGER.error("Error writing json file", e); } diff --git a/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/Server.java b/app/es_embedded/src/main/java/de/komoot/photon/Server.java similarity index 99% rename from app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/Server.java rename to app/es_embedded/src/main/java/de/komoot/photon/Server.java index f7b5d0684..7f3805942 100644 --- a/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/Server.java +++ b/app/es_embedded/src/main/java/de/komoot/photon/Server.java @@ -1,10 +1,11 @@ -package de.komoot.photon.elasticsearch; +package de.komoot.photon; import de.komoot.photon.DatabaseProperties; import de.komoot.photon.Importer; import de.komoot.photon.Updater; import de.komoot.photon.searcher.ReverseHandler; import de.komoot.photon.searcher.SearchHandler; +import de.komoot.photon.elasticsearch.*; import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.client.Client; import org.elasticsearch.client.transport.TransportClient; diff --git a/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/Importer.java b/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/Importer.java index fd421f30e..a7a0852c4 100644 --- a/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/Importer.java +++ b/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/Importer.java @@ -34,7 +34,7 @@ public void add(PhotonDoc doc, int objectId) { String uid = doc.getUid(objectId); try { this.bulkRequest.add(this.esClient.prepareIndex(PhotonIndex.NAME, PhotonIndex.TYPE). - setSource(Utils.convert(doc, languages, extraTags)).setId(uid)); + setSource(PhotonDocConverter.convert(doc, languages, extraTags)).setId(uid)); } catch (IOException e) { LOGGER.error("Could not bulk add document {}", uid, e); return; diff --git a/app/es_embedded/src/main/java/de/komoot/photon/Utils.java b/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/PhotonDocConverter.java similarity index 79% rename from app/es_embedded/src/main/java/de/komoot/photon/Utils.java rename to app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/PhotonDocConverter.java index 13e8e32dd..cc2688bd5 100644 --- a/app/es_embedded/src/main/java/de/komoot/photon/Utils.java +++ b/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/PhotonDocConverter.java @@ -1,17 +1,21 @@ -package de.komoot.photon; +package de.komoot.photon.elasticsearch; -import org.locationtech.jts.geom.Envelope; +import de.komoot.photon.Constants; +import de.komoot.photon.PhotonDoc; import de.komoot.photon.nominatim.model.AddressType; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; +import org.locationtech.jts.geom.Envelope; import java.io.IOException; -import java.util.*; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; -/** - * Helper functions to convert a photon document to XContentBuilder object / JSON - */ -public class Utils { +import static de.komoot.photon.Utils.buildClassificationString; + +public class PhotonDocConverter { public static XContentBuilder convert(PhotonDoc doc, String[] languages, String[] extraTags) throws IOException { final AddressType atype = doc.getAddressType(); XContentBuilder builder = XContentFactory.jsonBuilder().startObject() @@ -72,7 +76,7 @@ public static XContentBuilder convert(PhotonDoc doc, String[] languages, String[ private static void writeExtraTags(XContentBuilder builder, Map docTags, String[] extraTags) throws IOException { boolean foundTag = false; - for (String tag: extraTags) { + for (String tag : extraTags) { String value = docTags.get(tag); if (value != null) { if (!foundTag) { @@ -157,40 +161,4 @@ protected static void writeContext(XContentBuilder builder, Set 47 && c < 58) { - sb.append(c); - } - } - return sb.toString(); - } - - public static String buildClassificationString(String key, String value) { - if ("place".equals(key) || "building".equals(key)) { - return null; - } - - if ("highway".equals(key) - && ("unclassified".equals(value) || "residential".equals(value))) { - return null; - } - - for (char c : value.toCharArray()) { - if (!(c == '_' - || ((c >= 'a') && (c <= 'z')) - || ((c >= 'A') && (c <= 'Z')) - || ((c >= '0') && (c <= '9')))) { - return null; - } - } - - return "tpfld" + value.replace("_", "").toLowerCase() + "clsfld" + key.replace("_", "").toLowerCase(); - } } diff --git a/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/Updater.java b/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/Updater.java index c0ea49a58..71464049f 100644 --- a/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/Updater.java +++ b/app/es_embedded/src/main/java/de/komoot/photon/elasticsearch/Updater.java @@ -36,7 +36,7 @@ public void finish() { public void create(PhotonDoc doc, int objectId) { String uid = doc.getUid(objectId); try { - bulkRequest.add(esClient.prepareIndex(PhotonIndex.NAME, PhotonIndex.TYPE).setSource(Utils.convert(doc, languages, extraTags)).setId(uid)); + bulkRequest.add(esClient.prepareIndex(PhotonIndex.NAME, PhotonIndex.TYPE).setSource(PhotonDocConverter.convert(doc, languages, extraTags)).setId(uid)); } catch (IOException e) { LOGGER.error("Creation of new doc {} failed", uid, e); } diff --git a/app/es_embedded/src/test/java/de/komoot/photon/ESBaseTester.java b/app/es_embedded/src/test/java/de/komoot/photon/ESBaseTester.java index b5c5be8fa..4d77220e9 100644 --- a/app/es_embedded/src/test/java/de/komoot/photon/ESBaseTester.java +++ b/app/es_embedded/src/test/java/de/komoot/photon/ESBaseTester.java @@ -4,7 +4,6 @@ import org.locationtech.jts.geom.GeometryFactory; import org.locationtech.jts.geom.Point; import org.locationtech.jts.geom.PrecisionModel; -import de.komoot.photon.elasticsearch.ElasticTestServer; import de.komoot.photon.searcher.PhotonResult; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.io.TempDir; diff --git a/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ElasticTestServer.java b/app/es_embedded/src/test/java/de/komoot/photon/ElasticTestServer.java similarity index 92% rename from app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ElasticTestServer.java rename to app/es_embedded/src/test/java/de/komoot/photon/ElasticTestServer.java index 0b4a44d15..b71f8c75a 100644 --- a/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ElasticTestServer.java +++ b/app/es_embedded/src/test/java/de/komoot/photon/ElasticTestServer.java @@ -1,5 +1,6 @@ -package de.komoot.photon.elasticsearch; +package de.komoot.photon; +import de.komoot.photon.elasticsearch.*; import de.komoot.photon.searcher.PhotonResult; import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; import org.elasticsearch.action.get.GetResponse; diff --git a/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ElasticGetIdResult.java b/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ElasticGetIdResult.java index 715146b9a..7b45aa8c2 100644 --- a/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ElasticGetIdResult.java +++ b/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ElasticGetIdResult.java @@ -10,7 +10,7 @@ public class ElasticGetIdResult implements PhotonResult { private final GetResponse result; - ElasticGetIdResult(GetResponse result) { + public ElasticGetIdResult(GetResponse result) { this.result = result; } @Override diff --git a/app/opensearch/build.gradle b/app/opensearch/build.gradle new file mode 100644 index 000000000..12939dbda --- /dev/null +++ b/app/opensearch/build.gradle @@ -0,0 +1,47 @@ +plugins { + id 'com.github.johnrengelman.shadow' version '8.1.1' + id 'application' +} + +apply from: rootProject.file('buildSrc/shared.gradle') + +description = "Geocoder for OSM data (OpenSearch-based version)" + +sourceSets { + test { + java { + srcDir 'src/test/java' + } + } +} + +dependencies { + implementation 'org.opensearch.client:opensearch-java:2.10.1' + implementation 'org.apache.httpcomponents.client5:httpclient5:5.3.1' + implementation 'com.fasterxml.jackson.core:jackson-databind:2.17.0' + + testImplementation 'org.codelibs.opensearch:opensearch-runner:2.13.0.0' +} + +tasks.named('jar') { + archiveBaseName.set('original-photon-opensearch') + manifest.attributes('Multi-Release': 'true') +} + +shadowJar { + mergeServiceFiles() + destinationDirectory.set(rootProject.file('target')) + archiveBaseName.set('photon-opensearch') + archiveClassifier.set('') + + exclude '**/module-info.class' + + // This mitigates against the log4j JNDI lookup vulnerability: + // https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-44228 + // Since we are using an old version of elastic search that is incompatible + // with a patched, newer version of log4j we have to remove the class + // JndiLookup from the fat jar. This is the recommended course of action + // when you cannot upgrade as per https://logging.apache.org/log4j/2.x/security.html + exclude 'org/apache/logging/log4j/core/lookup/JndiLookup.class' +} + diff --git a/app/opensearch/src/main/java/de/komoot/photon/JsonDumper.java b/app/opensearch/src/main/java/de/komoot/photon/JsonDumper.java new file mode 100644 index 000000000..9cb66f760 --- /dev/null +++ b/app/opensearch/src/main/java/de/komoot/photon/JsonDumper.java @@ -0,0 +1,22 @@ +package de.komoot.photon; + +import org.apache.commons.lang3.NotImplementedException; + +import java.io.FileNotFoundException; + +public class JsonDumper implements Importer { + + public JsonDumper(String filename, String[] languages, String[] extraTags) throws FileNotFoundException { + throw new NotImplementedException(); + } + + @Override + public void add(PhotonDoc doc, int objectId) { + throw new NotImplementedException(); + } + + @Override + public void finish() { + throw new NotImplementedException(); + } +} diff --git a/app/opensearch/src/main/java/de/komoot/photon/Server.java b/app/opensearch/src/main/java/de/komoot/photon/Server.java new file mode 100644 index 000000000..46d6f7ddc --- /dev/null +++ b/app/opensearch/src/main/java/de/komoot/photon/Server.java @@ -0,0 +1,64 @@ +package de.komoot.photon; + +import de.komoot.photon.searcher.ReverseHandler; +import de.komoot.photon.searcher.SearchHandler; +import org.slf4j.Logger; + +import java.io.IOException; +import java.util.Date; + +public class Server { + private static final Logger LOGGER = org.slf4j.LoggerFactory.getLogger(Server.class); + + public Server(String mainDirectory) { + } + + public Server start(String clusterName, String[] transportAddresses) { + return this; + } + + public void waitForReady() { + + } + + public void refreshIndexes() { + + } + + public void shutdown() { + + } + + public DatabaseProperties recreateIndex(String[] languages, Date importDate) throws IOException { + return null; + } + + public void updateIndexSettings(String synonymFile) throws IOException { + + } + + public void saveToDatabase(DatabaseProperties dbProperties) throws IOException { + + } + + public void loadFromDatabase(DatabaseProperties dbProperties) { + + } + + public Importer createImporter(String[] languages, String[] extraTags) { + return null; + } + + public Updater createUpdater(String[] languages, String[] extraTags) { + return null; + } + + public SearchHandler createSearchHandler(String[] languages, int queryTimeoutSec) { + return null; + } + + public ReverseHandler createReverseHandler(int queryTimeoutSec) { + return null; + } + +} diff --git a/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java b/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java new file mode 100644 index 000000000..fc06cd718 --- /dev/null +++ b/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java @@ -0,0 +1,73 @@ +package de.komoot.photon; + +import de.komoot.photon.searcher.PhotonResult; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.io.TempDir; +import org.locationtech.jts.geom.GeometryFactory; +import org.locationtech.jts.geom.PrecisionModel; + +import java.io.IOException; +import java.nio.file.Path; + +public class ESBaseTester { + @TempDir + protected Path dataDirectory; + + public static final String TEST_CLUSTER_NAME = "photon-test"; + protected static GeometryFactory FACTORY = new GeometryFactory(new PrecisionModel(), 4326); + + protected PhotonDoc createDoc(double lon, double lat, int id, int osmId, String key, String value) { + return null; + } + + @AfterEach + public void tearDown() { + shutdownES(); + } + + + protected PhotonResult getById(int id) { + return null; + } + + public void setUpES() throws IOException { + setUpES(dataDirectory, "en"); + } + + public void setUpES(Path test_directory, String... languages) throws IOException { + } + + protected Importer makeImporter() { + return null; + } + + protected Importer makeImporterWithExtra(String... extraTags) { + return null; + } + + protected Importer makeImporterWithLanguages(String... languages) { + return null; + } + + protected Updater makeUpdater() { + return null; + } + + protected Updater makeUpdaterWithExtra(String... extraTags) { + return null; + } + + protected Server getServer() { + return null; + } + + protected void refresh() { + } + + /** + * Shutdown the ES node + */ + public void shutdownES() { + } + +} diff --git a/settings.gradle b/settings.gradle index 60abdc6d4..e328c71c4 100644 --- a/settings.gradle +++ b/settings.gradle @@ -1,3 +1,3 @@ rootProject.name = 'photon' -include ':app:es_embedded' +include ':app:es_embedded', ':app:opensearch' diff --git a/src/main/java/de/komoot/photon/App.java b/src/main/java/de/komoot/photon/App.java index d966121ea..f10e6fd2c 100644 --- a/src/main/java/de/komoot/photon/App.java +++ b/src/main/java/de/komoot/photon/App.java @@ -2,7 +2,6 @@ import com.beust.jcommander.JCommander; import com.beust.jcommander.ParameterException; -import de.komoot.photon.elasticsearch.Server; import de.komoot.photon.nominatim.NominatimConnector; import de.komoot.photon.nominatim.NominatimUpdater; import de.komoot.photon.searcher.ReverseHandler; diff --git a/src/main/java/de/komoot/photon/StatusRequestHandler.java b/src/main/java/de/komoot/photon/StatusRequestHandler.java index 36adec715..1d5c1790b 100644 --- a/src/main/java/de/komoot/photon/StatusRequestHandler.java +++ b/src/main/java/de/komoot/photon/StatusRequestHandler.java @@ -4,7 +4,6 @@ import org.json.JSONObject; -import de.komoot.photon.elasticsearch.Server; import spark.Request; import spark.Response; import spark.RouteImpl; diff --git a/src/main/java/de/komoot/photon/Utils.java b/src/main/java/de/komoot/photon/Utils.java new file mode 100644 index 000000000..786e4929a --- /dev/null +++ b/src/main/java/de/komoot/photon/Utils.java @@ -0,0 +1,43 @@ +package de.komoot.photon; + +/** + * Helper functions to convert a photon document to XContentBuilder object / JSON + */ +public class Utils { + + // http://stackoverflow.com/a/4031040/1437096 + public static String stripNonDigits( + final CharSequence input /* inspired by seh's comment */) { + final StringBuilder sb = new StringBuilder( + input.length() /* also inspired by seh's comment */); + for (int i = 0; i < input.length(); i++) { + final char c = input.charAt(i); + if (c > 47 && c < 58) { + sb.append(c); + } + } + return sb.toString(); + } + + public static String buildClassificationString(String key, String value) { + if ("place".equals(key) || "building".equals(key)) { + return null; + } + + if ("highway".equals(key) + && ("unclassified".equals(value) || "residential".equals(value))) { + return null; + } + + for (char c : value.toCharArray()) { + if (!(c == '_' + || ((c >= 'a') && (c <= 'z')) + || ((c >= 'A') && (c <= 'Z')) + || ((c >= '0') && (c <= '9')))) { + return null; + } + } + + return "tpfld" + value.replace("_", "").toLowerCase() + "clsfld" + key.replace("_", "").toLowerCase(); + } +} From b4aca9cc0677f95839ed48dce85f87f8a74f1846 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 30 Apr 2024 22:44:05 +0200 Subject: [PATCH 04/14] port database setup to OpenSearch --- .../main/java/de/komoot/photon/Server.java | 16 +- .../java/de/komoot/photon/ESBaseTester.java | 2 +- .../elasticsearch/ElasticResultTest.java | 3 +- .../main/java/de/komoot/photon/Server.java | 89 ++++++++- .../photon/opensearch/DBPropertyEntry.java | 19 ++ .../photon/opensearch/IndexMapping.java | 99 +++++++++ .../opensearch/IndexSettingBuilder.java | 189 ++++++++++++++++++ .../komoot/photon/opensearch/PhotonIndex.java | 9 + .../java/de/komoot/photon/ESBaseTester.java | 28 ++- .../java/de/komoot/photon/ServerTest.java | 31 +++ .../opensearch/OpenSearchTestServer.java | 52 +++++ src/main/java/de/komoot/photon/App.java | 12 +- .../de/komoot/photon/DatabaseProperties.java | 11 + .../komoot/photon/StatusRequestHandler.java | 3 +- .../photon}/DatabasePropertiesTest.java | 2 +- .../photon/query/QueryBasicSearchTest.java | 14 +- .../query/QueryByClassificationTest.java | 10 +- .../photon/query/QueryFilterLayerTest.java | 3 +- .../photon/query/QueryFilterTagValueTest.java | 3 +- .../photon/query/QueryRelevanceTest.java | 10 +- .../query/QueryReverseFilterLayerTest.java | 2 +- .../query/QueryReverseFilterTagValueTest.java | 2 +- .../komoot/photon/query/QueryReverseTest.java | 2 +- 23 files changed, 551 insertions(+), 60 deletions(-) create mode 100644 app/opensearch/src/main/java/de/komoot/photon/opensearch/DBPropertyEntry.java create mode 100644 app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexMapping.java create mode 100644 app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexSettingBuilder.java create mode 100644 app/opensearch/src/main/java/de/komoot/photon/opensearch/PhotonIndex.java create mode 100644 app/opensearch/src/test/java/de/komoot/photon/ServerTest.java create mode 100644 app/opensearch/src/test/java/de/komoot/photon/opensearch/OpenSearchTestServer.java rename {app/es_embedded/src/test/java/de/komoot/photon/elasticsearch => src/test/java/de/komoot/photon}/DatabasePropertiesTest.java (97%) diff --git a/app/es_embedded/src/main/java/de/komoot/photon/Server.java b/app/es_embedded/src/main/java/de/komoot/photon/Server.java index 7f3805942..2738dcf28 100644 --- a/app/es_embedded/src/main/java/de/komoot/photon/Server.java +++ b/app/es_embedded/src/main/java/de/komoot/photon/Server.java @@ -41,16 +41,6 @@ public class Server { private static final Logger LOGGER = org.slf4j.LoggerFactory.getLogger(Server.class); - /** - * Database version created by new imports with the current code. - * - * Format must be: major.minor.patch-dev - * - * Increase to next to be released version when the database layout - * changes in an incompatible way. If it is already at the next released - * version, increase the dev version. - */ - private static final String DATABASE_VERSION = "0.3.6-1"; public static final String PROPERTY_DOCUMENT_ID = "DATABASE_PROPERTIES"; private static final String BASE_FIELD = "document_properties"; @@ -232,7 +222,7 @@ private void deleteIndex() { */ public void saveToDatabase(DatabaseProperties dbProperties) throws IOException { final XContentBuilder builder = XContentFactory.jsonBuilder().startObject().startObject(BASE_FIELD) - .field(FIELD_VERSION, DATABASE_VERSION) + .field(FIELD_VERSION, DatabaseProperties.DATABASE_VERSION) .field(FIELD_LANGUAGES, String.join(",", dbProperties.getLanguages())) .field(FIELD_IMPORT_DATE, dbProperties.getImportDate() instanceof Date ? dbProperties.getImportDate().toInstant() : null) .endObject().endObject(); @@ -265,8 +255,8 @@ public void loadFromDatabase(DatabaseProperties dbProperties) { } String version = properties.getOrDefault(FIELD_VERSION, ""); - if (!DATABASE_VERSION.equals(version)) { - LOGGER.error("Database has incompatible version '{}'. Expected: {}", version, DATABASE_VERSION); + if (!DatabaseProperties.DATABASE_VERSION.equals(version)) { + LOGGER.error("Database has incompatible version '{}'. Expected: {}", version, DatabaseProperties.DATABASE_VERSION); throw new RuntimeException("Incompatible database."); } diff --git a/app/es_embedded/src/test/java/de/komoot/photon/ESBaseTester.java b/app/es_embedded/src/test/java/de/komoot/photon/ESBaseTester.java index 4d77220e9..655ab5343 100644 --- a/app/es_embedded/src/test/java/de/komoot/photon/ESBaseTester.java +++ b/app/es_embedded/src/test/java/de/komoot/photon/ESBaseTester.java @@ -40,7 +40,7 @@ protected PhotonResult getById(String id) { @AfterEach - public void tearDown() { + public void tearDown() throws IOException { shutdownES(); } diff --git a/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ElasticResultTest.java b/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ElasticResultTest.java index 634bb1668..db51d985e 100644 --- a/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ElasticResultTest.java +++ b/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/ElasticResultTest.java @@ -14,6 +14,7 @@ import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.io.TempDir; +import java.io.IOException; import java.nio.file.Path; import java.util.Collections; import java.util.HashMap; @@ -68,7 +69,7 @@ void setUp() throws Exception { @AfterAll @Override - public void tearDown() { + public void tearDown() throws IOException { super.tearDown(); } diff --git a/app/opensearch/src/main/java/de/komoot/photon/Server.java b/app/opensearch/src/main/java/de/komoot/photon/Server.java index 46d6f7ddc..223b5073a 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/Server.java +++ b/app/opensearch/src/main/java/de/komoot/photon/Server.java @@ -1,7 +1,17 @@ package de.komoot.photon; +import de.komoot.photon.opensearch.DBPropertyEntry; +import de.komoot.photon.opensearch.IndexMapping; +import de.komoot.photon.opensearch.IndexSettingBuilder; +import de.komoot.photon.opensearch.PhotonIndex; import de.komoot.photon.searcher.ReverseHandler; import de.komoot.photon.searcher.SearchHandler; +import org.apache.hc.core5.http.HttpHost; +import org.opensearch.client.json.jackson.JacksonJsonpMapper; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch._types.HealthStatus; +import org.opensearch.client.opensearch._types.OpenSearchException; +import org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder; import org.slf4j.Logger; import java.io.IOException; @@ -10,39 +20,102 @@ public class Server { private static final Logger LOGGER = org.slf4j.LoggerFactory.getLogger(Server.class); + protected OpenSearchClient esClient; + public Server(String mainDirectory) { } public Server start(String clusterName, String[] transportAddresses) { + if (transportAddresses.length == 0) { + throw new RuntimeException("OpenSearch-port neds an external OpeSearch instance. Use -transport-addresses."); + } + final HttpHost[] hosts = new HttpHost[transportAddresses.length]; + for (int i = 0; i < transportAddresses.length; ++i) { + final String[] parts = transportAddresses[i].split(":", 2); + hosts[i] = new HttpHost("http", parts[0], + parts.length > 1 ? Integer.parseInt(parts[1]) : 9200); + } + + final var transport = ApacheHttpClient5TransportBuilder + .builder(hosts) + .setMapper(new JacksonJsonpMapper()) + .build(); + + esClient = new OpenSearchClient(transport); + return this; } - public void waitForReady() { - + public void waitForReady() throws IOException{ + esClient.cluster().health(h -> h.waitForStatus(HealthStatus.Yellow)); } - public void refreshIndexes() { - + public void refreshIndexes() throws IOException { + waitForReady(); + esClient.indices().refresh(); } public void shutdown() { - + // external node only, do nothing } public DatabaseProperties recreateIndex(String[] languages, Date importDate) throws IOException { - return null; + // delete any existing data + if (esClient.indices().exists(e -> e.index(PhotonIndex.NAME)).value()) { + esClient.indices().delete(d -> d.index(PhotonIndex.NAME)); + } + + (new IndexSettingBuilder()).createIndex(esClient, PhotonIndex.NAME); + + (new IndexMapping()).addLanguages(languages).putMapping(esClient, PhotonIndex.NAME); + + var dbProperties = new DatabaseProperties() + .setLanguages(languages) + .setImportDate(importDate); + saveToDatabase(dbProperties); + + return dbProperties; } public void updateIndexSettings(String synonymFile) throws IOException { + var dbProperties = new DatabaseProperties(); + loadFromDatabase(dbProperties); + + (new IndexSettingBuilder()).setSynonymFile(synonymFile).updateIndex(esClient, PhotonIndex.NAME); + if (dbProperties.getLanguages() != null) { + (new IndexMapping()) + .addLanguages(dbProperties.getLanguages()) + .putMapping(esClient, PhotonIndex.NAME); + } } public void saveToDatabase(DatabaseProperties dbProperties) throws IOException { - + esClient.index(r -> r + .index(PhotonIndex.NAME) + .id(PhotonIndex.PROPERTY_DOCUMENT_ID) + .document(new DBPropertyEntry(dbProperties)) + ); } - public void loadFromDatabase(DatabaseProperties dbProperties) { + public void loadFromDatabase(DatabaseProperties dbProperties) throws IOException { + var dbEntry = esClient.get(r -> r + .index(PhotonIndex.NAME) + .id(PhotonIndex.PROPERTY_DOCUMENT_ID), + DBPropertyEntry.class); + + if (!dbEntry.found()) { + throw new RuntimeException("Cannot access property record. Database too old?"); + } + + if (!DatabaseProperties.DATABASE_VERSION.equals(dbEntry.source().databaseVersion)) { + LOGGER.error("Database has incompatible version '{}'. Expected: {}", + dbEntry.source().databaseVersion, DatabaseProperties.DATABASE_VERSION); + throw new RuntimeException("Incompatible database."); + } + dbProperties.setLanguages(dbEntry.source().languages); + dbProperties.setImportDate(dbEntry.source().importDate); } public Importer createImporter(String[] languages, String[] extraTags) { diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/DBPropertyEntry.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/DBPropertyEntry.java new file mode 100644 index 000000000..4eb3e61fc --- /dev/null +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/DBPropertyEntry.java @@ -0,0 +1,19 @@ +package de.komoot.photon.opensearch; + +import de.komoot.photon.DatabaseProperties; + +import java.util.Date; + +public class DBPropertyEntry { + public String databaseVersion; + public Date importDate; + public String[] languages; + + public DBPropertyEntry() {} + + public DBPropertyEntry(DatabaseProperties props) { + databaseVersion = DatabaseProperties.DATABASE_VERSION; + importDate = props.getImportDate(); + languages = props.getLanguages(); + } +} diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexMapping.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexMapping.java new file mode 100644 index 000000000..ca11639e3 --- /dev/null +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexMapping.java @@ -0,0 +1,99 @@ +package de.komoot.photon.opensearch; + +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch._types.mapping.DynamicMapping; +import org.opensearch.client.opensearch.indices.PutMappingRequest; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class IndexMapping { + private static final String[] ADDRESS_FIELDS = new String[]{"street", "city", "locality", "district", "county", "state", "country", "context"}; + + private PutMappingRequest.Builder mappings; + + public IndexMapping() { + setupBaseMappings(); + } + + public void putMapping(OpenSearchClient client, String indexName) throws IOException { + client.indices().putMapping(mappings.index(indexName).build()); + } + + public IndexMapping addLanguages(String[] languages) { + List name_collectors = new ArrayList<>(); + for (var lang: languages) { + mappings.properties("collector." + lang, + b -> b.text(p -> p.index(false) + .fields("ngrams", f -> f.text(pi -> pi.index(true).analyzer("index_ngram"))) + .fields("raw", f2 -> f2.text(pi2 -> pi2.index(true).analyzer("index_raw").searchAnalyzer("search_raw")) + ))); + + for (var field: ADDRESS_FIELDS) { + mappings.properties(String.format("%s.%s", field, lang), + b -> b.text(p -> p.index(false).copyTo("collector." + lang))); + } + + mappings.properties("name." + lang, + b -> b.text(p -> p.index(false) + .fields("ngrams", f -> f.text(pi -> pi.index(true).analyzer("index_ngram"))) + .fields("raw", f2 -> f2.text(pi2 -> pi2.index(true).analyzer("index_raw").searchAnalyzer("search_raw"))) + .copyTo("collector." + lang))); + + //add language-specific collector to default for name + name_collectors.add("name." + lang); + } + + name_collectors.add("collector.default"); + mappings.properties("name.default", b -> b.text(p -> p.index(false).copyTo(name_collectors))); + + return this; + } + + private void setupBaseMappings() { + mappings = new PutMappingRequest.Builder(); + + mappings.dynamic(DynamicMapping.False) + .source(s -> s.excludes("context.*")); + + mappings.properties("osm_type", b -> b.text(p -> p.index(false))); + mappings.properties("osm_id", b -> b.unsignedLong(l -> l.index(false))); + + for (var field : new String[]{"osm_key", "osm_value", "type"}) { + mappings.properties(field, b -> b.keyword(p -> p.index(true))); + } + + mappings.properties("coordinate", b -> b.geoPoint(p -> p)); + mappings.properties("countrycode", b -> b.text(p -> p.index(true))); + mappings.properties("importance", b -> b.float_(p -> p.index(false))); + + mappings.properties("housenumber", b -> b.text(p -> p.index(true) + .analyzer("index_housenumber").searchAnalyzer("standard") + .copyTo("collector.default") + )); + + mappings.properties("classification", b -> b.text(p -> p.index(true) + .analyzer("keyword") + .searchAnalyzer("search_classification") + .copyTo("collector.default"))); + + mappings.properties("collector.default", + b -> b.text(p -> p.index(true) + .analyzer("index_ngram") + .fields("raw", f -> f.text(pi -> pi.index(true).analyzer("index_raw"))))); + + for (var field : ADDRESS_FIELDS) { + mappings.properties(field + ".default", b -> b.text(p -> p.index(false).copyTo("collector.default"))); + } + mappings.properties("postcode", b -> b.text(p -> p.index(false).copyTo("collector.default"))); + + mappings.properties("name.default", b -> b.text(p -> p.index(false).copyTo("collector.default"))); + + for (var suffix : new String[]{"alt", "int", "loc", "old", "reg", "housename"}) { + mappings.properties("name." + suffix, b -> b.text(p -> p.index(false) + .fields("raw", bi -> bi.text(pi -> pi.index(true).analyzer("index_raw"))) + .copyTo("collector.default"))); + } + } +} diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexSettingBuilder.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexSettingBuilder.java new file mode 100644 index 000000000..987607915 --- /dev/null +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexSettingBuilder.java @@ -0,0 +1,189 @@ +package de.komoot.photon.opensearch; + +import de.komoot.photon.Utils; +import org.json.JSONArray; +import org.json.JSONObject; +import org.json.JSONTokener; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch._types.analysis.TokenChar; +import org.opensearch.client.opensearch.indices.IndexSettingsAnalysis; + +import java.io.FileReader; +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +public class IndexSettingBuilder { + private IndexSettingsAnalysis.Builder settings = new IndexSettingsAnalysis.Builder(); + private int numShards = 1; + private Set extra_filters = new HashSet<>(); + + public IndexSettingBuilder setShards(Integer numShards) { + this.numShards = numShards == null ? 1 : numShards; + return this; + } + + public void createIndex(OpenSearchClient client, String indexName) throws IOException { + addDefaultSettings(); + + client.indices().create(r -> r + .index(indexName) + .settings(s -> s + .numberOfShards(Integer.toString(numShards)) + .analysis(settings.build()))); + } + + public void updateIndex(OpenSearchClient client, String indexName) throws IOException { + addDefaultSettings(); + + client.indices().close(req -> req.index(indexName)); + client.indices().putSettings(req -> req + .index(indexName) + .settings(s -> s.analysis(settings.build()))); + client.indices().open(req -> req.index(indexName)); + } + + public IndexSettingBuilder setSynonymFile(String synonymFile) throws IOException { + if (synonymFile != null) { + final var synonymConfig = new JSONObject(new JSONTokener(new FileReader(synonymFile))); + + setSearchTimeSynonyms(synonymConfig.optJSONArray("search_synonyms")); + setClassificationTerms(synonymConfig.optJSONArray("classification_terms")); + } + return this; + } + + private void setSearchTimeSynonyms(JSONArray synonyms) { + if (synonyms != null) { + insertSynonymFilter("extra_synonyms", synonyms); + } + } + + private void setClassificationTerms(JSONArray terms) { + if (terms == null) { + return; + } + + // Collect for each term in the list the possible classification expansions. + Map> collector = new HashMap<>(); + for (int i = 0; i < terms.length(); i++) { + JSONObject descr = terms.getJSONObject(i); + + String classString = Utils.buildClassificationString(descr.getString("key"), descr.getString("value")).toLowerCase(); + + if (classString != null) { + JSONArray jsonTerms = descr.getJSONArray("terms"); + for (int j = 0; j < jsonTerms.length(); j++) { + String term = jsonTerms.getString(j).toLowerCase().trim(); + if (term.indexOf(' ') >= 0) { + throw new RuntimeException("Syntax error in synonym file: only single word classification terms allowed."); + } + + if (term.length() > 1) { + collector.computeIfAbsent(term, k -> new HashSet<>()).add(classString); + } + } + } + } + + // Create the final list of synonyms. A term can expand to any classificator or not at all. + JSONArray synonyms = new JSONArray(); + collector.forEach((term, classificators) -> + synonyms.put(term + " => " + term + "," + String.join(",", classificators))); + + insertSynonymFilter("classification_synonyms", synonyms); + + } + + private void insertSynonymFilter(String filterName, JSONArray synonyms) { + if (!synonyms.isEmpty()) { + settings.filter(filterName, + f -> f.definition(d -> d + .synonymGraph(s -> { + for (var ele : synonyms) { + s.synonyms(ele.toString()); + } + return s; + }))); + + extra_filters.add(filterName); + } + } + + private void addDefaultSettings() { + settings.filter("photonlength", + f -> f.definition(d -> d.length(l -> l.min(2).max(500)))); + settings.filter("preserving_word_delimiter", + f -> f.definition(d -> d.wordDelimiterGraph(w -> w.preserveOriginal(true)))); + + settings.charFilter("punctuationgreedy", + f -> f.definition(d -> d.patternReplace(p -> p.pattern("[\\.,']").replacement(" ")))); + settings.charFilter("remove_ws_hnr_suffix", + f -> f.definition(d -> d.patternReplace(p -> p.pattern("(\\d+)\\s(?=\\p{L}\\b)").replacement("$1")))); + + settings.tokenizer("edge_ngram", + f -> f.definition(d -> d.edgeNgram(e -> e.minGram(1).maxGram(100).tokenChars(TokenChar.Letter, TokenChar.Digit)))); + + settings.analyzer("index_ngram", + f -> f.custom(d -> d + .charFilter("punctuationgreedy", "remove_ws_hnr_suffix") + .tokenizer("edge_ngram") + .filter("preserving_word_delimiter", + "flatten_graph", + "lowercase", + "german_normalization", + "asciifolding", + "unique"))); + settings.analyzer("search_ngram", + f -> f.custom(d -> { + d.charFilter("punctuationgreedy") + .tokenizer("standard") + .filter("lowercase"); + for (var filter : extra_filters) { + d.filter(filter); + } + d.filter("german_normalization", "asciifolding"); + + return d; + })); + settings.analyzer("index_raw", + f -> f.custom(d -> d + .charFilter("punctuationgreedy") + .tokenizer("standard") + .filter("lowercase", + "german_normalization", + "asciifolding", + "unique"))); + settings.analyzer("search_raw", + f -> f.custom(d -> { + d.charFilter("punctuationgreedy") + .tokenizer("standard") + .filter("lowercase"); + for (var filter : extra_filters) { + d.filter(filter); + } + d.filter("german_normalization", + "asciifolding", + "unique"); + return d; + })); + settings.analyzer("index_housenumber", + f -> f.custom(d -> d + .charFilter("punctuationgreedy", "remove_ws_hnr_suffix") + .tokenizer("standard") + .filter("lowercase", + "preserving_word_delimiter"))); + settings.analyzer("search_classification", + f -> f.custom(d -> { + d.tokenizer("whitespace") + .filter("lowercase"); + if (extra_filters.contains("classification_synonyms")) { + d.filter("classification_synonyms"); + } + + return d; + })); + } +} diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/PhotonIndex.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/PhotonIndex.java new file mode 100644 index 000000000..e88ba38f3 --- /dev/null +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/PhotonIndex.java @@ -0,0 +1,9 @@ +package de.komoot.photon.opensearch; + +public class PhotonIndex { + public static final String NAME = "photon"; + public static final String PROPERTY_DOCUMENT_ID = "DATABASE_PROPERTIES"; + + private PhotonIndex() { + } +} diff --git a/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java b/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java index fc06cd718..6f3a2ba26 100644 --- a/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java +++ b/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java @@ -1,5 +1,6 @@ package de.komoot.photon; +import de.komoot.photon.opensearch.OpenSearchTestServer; import de.komoot.photon.searcher.PhotonResult; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.io.TempDir; @@ -8,24 +9,27 @@ import java.io.IOException; import java.nio.file.Path; +import java.util.Date; public class ESBaseTester { + public static final String TEST_CLUSTER_NAME = "photon-test"; + protected static GeometryFactory FACTORY = new GeometryFactory(new PrecisionModel(), 4326); + @TempDir protected Path dataDirectory; - public static final String TEST_CLUSTER_NAME = "photon-test"; - protected static GeometryFactory FACTORY = new GeometryFactory(new PrecisionModel(), 4326); + private OpenSearchTestServer server; + protected PhotonDoc createDoc(double lon, double lat, int id, int osmId, String key, String value) { return null; } @AfterEach - public void tearDown() { + public void tearDown() throws IOException { shutdownES(); } - protected PhotonResult getById(int id) { return null; } @@ -35,6 +39,10 @@ public void setUpES() throws IOException { } public void setUpES(Path test_directory, String... languages) throws IOException { + server = new OpenSearchTestServer(test_directory.toString()); + server.startTestServer(TEST_CLUSTER_NAME); + server.recreateIndex(languages, new Date()); + server.refreshIndexes(); } protected Importer makeImporter() { @@ -58,16 +66,22 @@ protected Updater makeUpdaterWithExtra(String... extraTags) { } protected Server getServer() { - return null; + assert server != null; + + return server; } - protected void refresh() { + protected void refresh() throws IOException { + server.refreshIndexes(); } /** * Shutdown the ES node */ - public void shutdownES() { + public void shutdownES() throws IOException { + if (server != null) { + server.stopTestServer(); + } } } diff --git a/app/opensearch/src/test/java/de/komoot/photon/ServerTest.java b/app/opensearch/src/test/java/de/komoot/photon/ServerTest.java new file mode 100644 index 000000000..9d784c56b --- /dev/null +++ b/app/opensearch/src/test/java/de/komoot/photon/ServerTest.java @@ -0,0 +1,31 @@ +package de.komoot.photon; + +import de.komoot.photon.DatabaseProperties; +import de.komoot.photon.ESBaseTester; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.Date; + +import static org.junit.jupiter.api.Assertions.*; + +class ServerTest extends ESBaseTester { + + @Test + void testSaveAndLoadFromDatabase() throws IOException { + setUpES(); + + DatabaseProperties prop = new DatabaseProperties(); + prop.setLanguages(new String[]{"en", "de", "fr"}); + Date now = new Date(); + prop.setImportDate(now); + getServer().saveToDatabase(prop); + + prop = new DatabaseProperties(); + getServer().loadFromDatabase(prop); + + assertArrayEquals(new String[]{"en", "de", "fr"}, prop.getLanguages()); + assertEquals(now, prop.getImportDate()); + + } +} \ No newline at end of file diff --git a/app/opensearch/src/test/java/de/komoot/photon/opensearch/OpenSearchTestServer.java b/app/opensearch/src/test/java/de/komoot/photon/opensearch/OpenSearchTestServer.java new file mode 100644 index 000000000..4a72a7518 --- /dev/null +++ b/app/opensearch/src/test/java/de/komoot/photon/opensearch/OpenSearchTestServer.java @@ -0,0 +1,52 @@ +package de.komoot.photon.opensearch; + +import de.komoot.photon.Server; +import de.komoot.photon.searcher.PhotonResult; +import org.codelibs.opensearch.runner.OpenSearchRunner; +import org.opensearch.common.settings.Settings; + +import java.io.IOException; + +public class OpenSearchTestServer extends Server { + private OpenSearchRunner runner; + private String instanceDir; + + public OpenSearchTestServer(String mainDirectory) { + super(mainDirectory); + + instanceDir = mainDirectory; + } + + public void startTestServer(String clusterName) { + runner = new OpenSearchRunner(); + runner.onBuild(new OpenSearchRunner.Builder() { + @Override + public void build(final int number, final Settings.Builder settingsBuilder) { + settingsBuilder.put("http.cors.enabled", true); + settingsBuilder.put("http.cors.allow-origin", "*"); + settingsBuilder.put("discovery.type", "single-node"); + settingsBuilder.putList("discovery.seed_hosts", "127.0.0.1:9201"); + settingsBuilder.put("logger.org.opensearch.cluster.metadata", "TRACE"); + settingsBuilder.put("cluster.search.request.slowlog.level", "TRACE"); + settingsBuilder.put("cluster.search.request.slowlog.threshold.warn", "0ms"); + settingsBuilder.put("cluster.search.request.slowlog.threshold.info", "0ms"); + settingsBuilder.put("cluster.search.request.slowlog.threshold.debug", "0ms"); + settingsBuilder.put("cluster.search.request.slowlog.threshold.trace", "0ms"); + + } + }).build(OpenSearchRunner.newConfigs().basePath(instanceDir).clusterName(clusterName).numOfNode(1).baseHttpPort(9200)); + + // wait for yellow status + runner.ensureYellow(); + + String[] transportAddresses = {"127.0.0.1:" + runner.node().settings().get("http.port")}; + start(clusterName, transportAddresses); + } + + public void stopTestServer() throws IOException { + shutdown(); + runner.close(); + runner.clean(); + } + +} diff --git a/src/main/java/de/komoot/photon/App.java b/src/main/java/de/komoot/photon/App.java index f10e6fd2c..a18daca18 100644 --- a/src/main/java/de/komoot/photon/App.java +++ b/src/main/java/de/komoot/photon/App.java @@ -136,14 +136,14 @@ private static void startNominatimUpdateInit(CommandLineArgs args) { nominatimUpdater.initUpdates(args.getNominatimUpdateInit()); } - private static void startNominatimUpdate(NominatimUpdater nominatimUpdater, Server esServer) { + private static void startNominatimUpdate(NominatimUpdater nominatimUpdater, Server esServer) { nominatimUpdater.update(); DatabaseProperties dbProperties = new DatabaseProperties(); - esServer.loadFromDatabase(dbProperties); - Date importDate = nominatimUpdater.getLastImportDate(); - dbProperties.setImportDate(importDate); try { + esServer.loadFromDatabase(dbProperties); + Date importDate = nominatimUpdater.getLastImportDate(); + dbProperties.setImportDate(importDate); esServer.saveToDatabase(dbProperties); } catch (IOException e) { throw new RuntimeException("Cannot setup index, elastic search config files not readable", e); @@ -154,7 +154,7 @@ private static void startNominatimUpdate(NominatimUpdater nominatimUpdater, Serv /** * Prepare Nominatim updater. */ - private static NominatimUpdater setupNominatimUpdater(CommandLineArgs args, Server server) { + private static NominatimUpdater setupNominatimUpdater(CommandLineArgs args, Server server)throws IOException { // Get database properties and ensure that the version is compatible. DatabaseProperties dbProperties = new DatabaseProperties(); server.loadFromDatabase(dbProperties); @@ -167,7 +167,7 @@ private static NominatimUpdater setupNominatimUpdater(CommandLineArgs args, Serv /** * Start API server to accept search requests via http. */ - private static void startApi(CommandLineArgs args, Server server) { + private static void startApi(CommandLineArgs args, Server server) throws IOException { // Get database properties and ensure that the version is compatible. DatabaseProperties dbProperties = new DatabaseProperties(); server.loadFromDatabase(dbProperties); diff --git a/src/main/java/de/komoot/photon/DatabaseProperties.java b/src/main/java/de/komoot/photon/DatabaseProperties.java index 773ee8332..73fb31af7 100644 --- a/src/main/java/de/komoot/photon/DatabaseProperties.java +++ b/src/main/java/de/komoot/photon/DatabaseProperties.java @@ -8,6 +8,17 @@ * The server is responsible for making the data persistent in the Photon database. */ public class DatabaseProperties { + /** + * Database version created by new imports with the current code. + * + * Format must be: major.minor.patch-dev + * + * Increase to next to be released version when the database layout + * changes in an incompatible way. If it is already at the next released + * version, increase the dev version. + */ + public static final String DATABASE_VERSION = "0.3.6-1"; + private String[] languages = null; /** diff --git a/src/main/java/de/komoot/photon/StatusRequestHandler.java b/src/main/java/de/komoot/photon/StatusRequestHandler.java index 1d5c1790b..d61891ca2 100644 --- a/src/main/java/de/komoot/photon/StatusRequestHandler.java +++ b/src/main/java/de/komoot/photon/StatusRequestHandler.java @@ -1,5 +1,6 @@ package de.komoot.photon; +import java.io.IOException; import java.util.Date; import org.json.JSONObject; @@ -17,7 +18,7 @@ protected StatusRequestHandler(String path, Server server) { } @Override - public String handle(Request request, Response response) { + public String handle(Request request, Response response) throws IOException { DatabaseProperties dbProperties = new DatabaseProperties(); server.loadFromDatabase(dbProperties); String importDateStr = null; diff --git a/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/DatabasePropertiesTest.java b/src/test/java/de/komoot/photon/DatabasePropertiesTest.java similarity index 97% rename from app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/DatabasePropertiesTest.java rename to src/test/java/de/komoot/photon/DatabasePropertiesTest.java index ad51808eb..ea042247f 100644 --- a/app/es_embedded/src/test/java/de/komoot/photon/elasticsearch/DatabasePropertiesTest.java +++ b/src/test/java/de/komoot/photon/DatabasePropertiesTest.java @@ -1,4 +1,4 @@ -package de.komoot.photon.elasticsearch; +package de.komoot.photon; import de.komoot.photon.DatabaseProperties; import de.komoot.photon.ESBaseTester; diff --git a/src/test/java/de/komoot/photon/query/QueryBasicSearchTest.java b/src/test/java/de/komoot/photon/query/QueryBasicSearchTest.java index 6358ec30a..0844dd9d3 100644 --- a/src/test/java/de/komoot/photon/query/QueryBasicSearchTest.java +++ b/src/test/java/de/komoot/photon/query/QueryBasicSearchTest.java @@ -44,7 +44,7 @@ private List search(String query) { @Test - void testSearchByDefaultName() { + void testSearchByDefaultName() throws IOException { Importer instance = makeImporter(); instance.add(createDoc("name", "Muffle Flu"), 0); instance.finish(); @@ -61,7 +61,7 @@ void testSearchByDefaultName() { } @Test - void testSearchNameSkipTerms() { + void testSearchNameSkipTerms() throws IOException { Importer instance = makeImporter(); instance.add(createDoc("name", "Hunted House Hotel"), 0); instance.finish(); @@ -76,7 +76,7 @@ void testSearchNameSkipTerms() { ); } @Test - void testSearchByAlternativeNames() { + void testSearchByAlternativeNames() throws IOException { Importer instance = makeImporter(); instance.add(createDoc("name", "original", "alt_name", "alt", "old_name", "older", "int_name", "int", "loc_name", "local", "reg_name", "regional", "addr:housename", "house", @@ -97,7 +97,7 @@ void testSearchByAlternativeNames() { } @Test - void testSearchByNameAndAddress() { + void testSearchByNameAndAddress() throws IOException { Map address = new HashMap<>(); address.put("street", "Callino"); address.put("city", "Madrid"); @@ -122,7 +122,7 @@ void testSearchByNameAndAddress() { } @Test - void testSearchMustContainANameTerm() { + void testSearchMustContainANameTerm() throws IOException { Importer instance = makeImporter(); instance.add(createDoc("name", "Palermo").address(Collections.singletonMap("state", "Sicilia")), 0); instance.finish(); @@ -139,7 +139,7 @@ void testSearchMustContainANameTerm() { } @Test - void testSearchWithHousenumberNamed() { + void testSearchWithHousenumberNamed() throws IOException { Importer instance = makeImporter(); instance.add(createDoc("name", "Edeka").houseNumber("5").address(Collections.singletonMap("street", "Hauptstrasse")), 0); instance.finish(); @@ -154,7 +154,7 @@ void testSearchWithHousenumberNamed() { } @Test - void testSearchWithHousenumberUnnamed() { + void testSearchWithHousenumberUnnamed() throws IOException { Importer instance = makeImporter(); instance.add(createDoc().houseNumber("5").address(Collections.singletonMap("street", "Hauptstrasse")), 0); instance.finish(); diff --git a/src/test/java/de/komoot/photon/query/QueryByClassificationTest.java b/src/test/java/de/komoot/photon/query/QueryByClassificationTest.java index 5ffeb848b..8e137cfd5 100644 --- a/src/test/java/de/komoot/photon/query/QueryByClassificationTest.java +++ b/src/test/java/de/komoot/photon/query/QueryByClassificationTest.java @@ -36,7 +36,7 @@ private List search(String query) { return getServer().createSearchHandler(new String[]{"en"}, 1).search(new PhotonRequest(query, "en")); } - private void updateClassification(String key, String value, String... terms) { + private void updateClassification(String key, String value, String... terms) throws IOException { JSONArray jsonTerms = new JSONArray(); for (String term : terms) { jsonTerms.put(term); @@ -68,7 +68,7 @@ private void updateClassification(String key, String value, String... terms) { } @Test - void testQueryByClassificationString() { + void testQueryByClassificationString() throws IOException { Importer instance = makeImporter(); instance.add(createDoc("amenity", "restaurant", "curliflower"), 0); instance.finish(); @@ -89,7 +89,7 @@ void testQueryByClassificationString() { } @Test - void testQueryByClassificationSynonym() { + void testQueryByClassificationSynonym() throws IOException { Importer instance = makeImporter(); instance.add(createDoc("amenity", "restaurant", "curliflower"), 0); instance.finish(); @@ -109,7 +109,7 @@ void testQueryByClassificationSynonym() { @Test - void testSynonymDoNotInterfereWithWords() { + void testSynonymDoNotInterfereWithWords() throws IOException { Importer instance = makeImporter(); instance.add(createDoc("amenity", "restaurant", "airport"), 0); instance.add(createDoc("aeroway", "terminal", "Houston"), 0); @@ -129,7 +129,7 @@ void testSynonymDoNotInterfereWithWords() { } @Test - void testSameSynonymForDifferentTags() { + void testSameSynonymForDifferentTags() throws IOException { Importer instance = makeImporter(); instance.add(createDoc("railway", "halt", "Newtown"), 0); instance.add(createDoc("railway", "station", "King's Cross"), 0); diff --git a/src/test/java/de/komoot/photon/query/QueryFilterLayerTest.java b/src/test/java/de/komoot/photon/query/QueryFilterLayerTest.java index d66de64f7..f9badee7e 100644 --- a/src/test/java/de/komoot/photon/query/QueryFilterLayerTest.java +++ b/src/test/java/de/komoot/photon/query/QueryFilterLayerTest.java @@ -10,6 +10,7 @@ import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.io.TempDir; +import java.io.IOException; import java.nio.file.Path; import java.util.Arrays; import java.util.Collections; @@ -62,7 +63,7 @@ private List searchWithLayers(String... layers) { @AfterAll @Override - public void tearDown() { + public void tearDown() throws IOException { super.tearDown(); } } diff --git a/src/test/java/de/komoot/photon/query/QueryFilterTagValueTest.java b/src/test/java/de/komoot/photon/query/QueryFilterTagValueTest.java index 90294f610..eb95f9ffd 100644 --- a/src/test/java/de/komoot/photon/query/QueryFilterTagValueTest.java +++ b/src/test/java/de/komoot/photon/query/QueryFilterTagValueTest.java @@ -11,6 +11,7 @@ import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +import java.io.IOException; import java.nio.file.Path; import java.util.List; import java.util.stream.Stream; @@ -57,7 +58,7 @@ void setUp() throws Exception { @AfterAll @Override - public void tearDown() { + public void tearDown() throws IOException { super.tearDown(); } diff --git a/src/test/java/de/komoot/photon/query/QueryRelevanceTest.java b/src/test/java/de/komoot/photon/query/QueryRelevanceTest.java index d27524106..5225806dc 100644 --- a/src/test/java/de/komoot/photon/query/QueryRelevanceTest.java +++ b/src/test/java/de/komoot/photon/query/QueryRelevanceTest.java @@ -47,7 +47,7 @@ private List search(PhotonRequest request) { } @Test - void testRelevanceByImportance() { + void testRelevanceByImportance() throws IOException { Importer instance = makeImporter(); instance.add(createDoc("amenity", "restuarant", 1001, "name", "New York").importance(0.0), 0); instance.add(createDoc("place", "city", 2000, "name", "New York").importance(0.5), 0); @@ -61,7 +61,7 @@ void testRelevanceByImportance() { } @Test - void testFullNameOverPartialName() { + void testFullNameOverPartialName() throws IOException { Importer instance = makeImporter(); instance.add(createDoc("place", "hamlet", 1000, "name", "Ham"), 0); instance.add(createDoc("place", "hamlet", 1001, "name", "Hamburg"), 0); @@ -75,7 +75,7 @@ void testFullNameOverPartialName() { } @Test - void testPartialNameWithImportanceOverFullName() { + void testPartialNameWithImportanceOverFullName() throws IOException { Importer instance = makeImporter(); instance.add(createDoc("place", "hamlet", 1000, "name", "Ham").importance(0.1), 0); instance.add(createDoc("place", "city", 1001, "name", "Hamburg").importance(0.5), 0); @@ -90,7 +90,7 @@ void testPartialNameWithImportanceOverFullName() { @ParameterizedTest @ValueSource(strings = {"Ham", "Hamm", "Hamburg"}) - void testLocationPreferenceForEqualImportance(String placeName) { + void testLocationPreferenceForEqualImportance(String placeName) throws IOException { Importer instance = makeImporter(); instance.add(createDoc("place", "hamlet", 1000, "name", "Ham") .centroid(FACTORY.createPoint(new Coordinate(10, 10))), 0); @@ -107,7 +107,7 @@ void testLocationPreferenceForEqualImportance(String placeName) { } @Test - void testLocationPreferenceForHigherImportance() { + void testLocationPreferenceForHigherImportance() throws IOException { Importer instance = makeImporter(); instance.add(createDoc("place", "hamlet", 1000, "name", "Ham") .importance(0.8) diff --git a/src/test/java/de/komoot/photon/query/QueryReverseFilterLayerTest.java b/src/test/java/de/komoot/photon/query/QueryReverseFilterLayerTest.java index a4904dda9..5efac0bbc 100644 --- a/src/test/java/de/komoot/photon/query/QueryReverseFilterLayerTest.java +++ b/src/test/java/de/komoot/photon/query/QueryReverseFilterLayerTest.java @@ -69,7 +69,7 @@ void testMultipleLayers() { @AfterAll @Override - public void tearDown() { + public void tearDown() throws IOException { super.tearDown(); } } diff --git a/src/test/java/de/komoot/photon/query/QueryReverseFilterTagValueTest.java b/src/test/java/de/komoot/photon/query/QueryReverseFilterTagValueTest.java index 456be5cee..4f4a46fe0 100644 --- a/src/test/java/de/komoot/photon/query/QueryReverseFilterTagValueTest.java +++ b/src/test/java/de/komoot/photon/query/QueryReverseFilterTagValueTest.java @@ -62,7 +62,7 @@ void setup() throws IOException { @AfterAll @Override - public void tearDown() { + public void tearDown() throws IOException { super.tearDown(); } diff --git a/src/test/java/de/komoot/photon/query/QueryReverseTest.java b/src/test/java/de/komoot/photon/query/QueryReverseTest.java index 9bacd9d7f..7d96ff781 100644 --- a/src/test/java/de/komoot/photon/query/QueryReverseTest.java +++ b/src/test/java/de/komoot/photon/query/QueryReverseTest.java @@ -40,7 +40,7 @@ void setup() throws IOException { @AfterAll @Override - public void tearDown() { + public void tearDown() throws IOException { super.tearDown(); } From 0f5e6ac17409bf7730270f4ca214db0cfb19fdb0 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 1 May 2024 15:55:02 +0200 Subject: [PATCH 05/14] port importer to OpenSearch --- .../main/java/de/komoot/photon/Server.java | 51 ++++-- .../de/komoot/photon/opensearch/Importer.java | 62 +++++++ .../photon/opensearch/OpenSearchResult.java | 68 +++++++ .../OpenSearchResultDeserializer.java | 82 +++++++++ .../opensearch/PhotonDocSerializer.java | 169 ++++++++++++++++++ .../java/de/komoot/photon/ESBaseTester.java | 20 ++- .../photon/opensearch/ImporterTest.java | 102 +++++++++++ .../opensearch/OpenSearchTestServer.java | 15 ++ 8 files changed, 544 insertions(+), 25 deletions(-) create mode 100644 app/opensearch/src/main/java/de/komoot/photon/opensearch/Importer.java create mode 100644 app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchResult.java create mode 100644 app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchResultDeserializer.java create mode 100644 app/opensearch/src/main/java/de/komoot/photon/opensearch/PhotonDocSerializer.java create mode 100644 app/opensearch/src/test/java/de/komoot/photon/opensearch/ImporterTest.java diff --git a/app/opensearch/src/main/java/de/komoot/photon/Server.java b/app/opensearch/src/main/java/de/komoot/photon/Server.java index 223b5073a..17c2472f0 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/Server.java +++ b/app/opensearch/src/main/java/de/komoot/photon/Server.java @@ -1,16 +1,14 @@ package de.komoot.photon; -import de.komoot.photon.opensearch.DBPropertyEntry; -import de.komoot.photon.opensearch.IndexMapping; -import de.komoot.photon.opensearch.IndexSettingBuilder; -import de.komoot.photon.opensearch.PhotonIndex; +import com.fasterxml.jackson.core.Version; +import com.fasterxml.jackson.databind.module.SimpleModule; +import de.komoot.photon.opensearch.*; import de.komoot.photon.searcher.ReverseHandler; import de.komoot.photon.searcher.SearchHandler; import org.apache.hc.core5.http.HttpHost; import org.opensearch.client.json.jackson.JacksonJsonpMapper; import org.opensearch.client.opensearch.OpenSearchClient; import org.opensearch.client.opensearch._types.HealthStatus; -import org.opensearch.client.opensearch._types.OpenSearchException; import org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder; import org.slf4j.Logger; @@ -20,7 +18,7 @@ public class Server { private static final Logger LOGGER = org.slf4j.LoggerFactory.getLogger(Server.class); - protected OpenSearchClient esClient; + protected OpenSearchClient client; public Server(String mainDirectory) { } @@ -36,23 +34,30 @@ public Server start(String clusterName, String[] transportAddresses) { parts.length > 1 ? Integer.parseInt(parts[1]) : 9200); } + final var module = new SimpleModule("PhotonResultDeserializer", + new Version(1, 0, 0, null, null, null)); + module.addDeserializer(OpenSearchResult.class, new OpenSearchResultDeserializer()); + + final var mapper = new JacksonJsonpMapper(); + mapper.objectMapper().registerModule(module); + final var transport = ApacheHttpClient5TransportBuilder .builder(hosts) - .setMapper(new JacksonJsonpMapper()) + .setMapper(mapper) .build(); - esClient = new OpenSearchClient(transport); + client = new OpenSearchClient(transport); return this; } public void waitForReady() throws IOException{ - esClient.cluster().health(h -> h.waitForStatus(HealthStatus.Yellow)); + client.cluster().health(h -> h.waitForStatus(HealthStatus.Yellow)); } public void refreshIndexes() throws IOException { waitForReady(); - esClient.indices().refresh(); + client.indices().refresh(); } public void shutdown() { @@ -61,13 +66,13 @@ public void shutdown() { public DatabaseProperties recreateIndex(String[] languages, Date importDate) throws IOException { // delete any existing data - if (esClient.indices().exists(e -> e.index(PhotonIndex.NAME)).value()) { - esClient.indices().delete(d -> d.index(PhotonIndex.NAME)); + if (client.indices().exists(e -> e.index(PhotonIndex.NAME)).value()) { + client.indices().delete(d -> d.index(PhotonIndex.NAME)); } - (new IndexSettingBuilder()).createIndex(esClient, PhotonIndex.NAME); + (new IndexSettingBuilder()).createIndex(client, PhotonIndex.NAME); - (new IndexMapping()).addLanguages(languages).putMapping(esClient, PhotonIndex.NAME); + (new IndexMapping()).addLanguages(languages).putMapping(client, PhotonIndex.NAME); var dbProperties = new DatabaseProperties() .setLanguages(languages) @@ -81,17 +86,17 @@ public void updateIndexSettings(String synonymFile) throws IOException { var dbProperties = new DatabaseProperties(); loadFromDatabase(dbProperties); - (new IndexSettingBuilder()).setSynonymFile(synonymFile).updateIndex(esClient, PhotonIndex.NAME); + (new IndexSettingBuilder()).setSynonymFile(synonymFile).updateIndex(client, PhotonIndex.NAME); if (dbProperties.getLanguages() != null) { (new IndexMapping()) .addLanguages(dbProperties.getLanguages()) - .putMapping(esClient, PhotonIndex.NAME); + .putMapping(client, PhotonIndex.NAME); } } public void saveToDatabase(DatabaseProperties dbProperties) throws IOException { - esClient.index(r -> r + client.index(r -> r .index(PhotonIndex.NAME) .id(PhotonIndex.PROPERTY_DOCUMENT_ID) .document(new DBPropertyEntry(dbProperties)) @@ -99,7 +104,7 @@ public void saveToDatabase(DatabaseProperties dbProperties) throws IOException { } public void loadFromDatabase(DatabaseProperties dbProperties) throws IOException { - var dbEntry = esClient.get(r -> r + var dbEntry = client.get(r -> r .index(PhotonIndex.NAME) .id(PhotonIndex.PROPERTY_DOCUMENT_ID), DBPropertyEntry.class); @@ -119,7 +124,8 @@ public void loadFromDatabase(DatabaseProperties dbProperties) throws IOException } public Importer createImporter(String[] languages, String[] extraTags) { - return null; + registerPhotonDocSerializer(languages, extraTags); + return new de.komoot.photon.opensearch.Importer(client); } public Updater createUpdater(String[] languages, String[] extraTags) { @@ -134,4 +140,11 @@ public ReverseHandler createReverseHandler(int queryTimeoutSec) { return null; } + private void registerPhotonDocSerializer(String[] languages, String[] extraTags) { + final var module = new SimpleModule("PhotonDocSerializer", + new Version(1, 0, 0, null, null, null)); + module.addSerializer(PhotonDoc.class, new PhotonDocSerializer(languages, extraTags)); + + ((JacksonJsonpMapper) client._transport().jsonpMapper()).objectMapper().registerModule(module); + } } diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/Importer.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/Importer.java new file mode 100644 index 000000000..034c30a76 --- /dev/null +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/Importer.java @@ -0,0 +1,62 @@ +package de.komoot.photon.opensearch; + +import de.komoot.photon.PhotonDoc; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch.core.BulkRequest; +import org.slf4j.Logger; + +import java.io.IOException; + +public class Importer implements de.komoot.photon.Importer { + private static final Logger LOGGER = org.slf4j.LoggerFactory.getLogger(Importer.class); + + private final OpenSearchClient client; + private BulkRequest.Builder bulkRequest = new BulkRequest.Builder(); + private int todoDocuments = 0; + + public Importer(OpenSearchClient client) { + this.client = client; + } + + @Override + public void add(PhotonDoc doc, int objectId) { + bulkRequest.operations(op -> op + .index(i -> i + .index(PhotonIndex.NAME) + .id(doc.getUid(objectId)) + .document(doc))); + ++todoDocuments; + + if (todoDocuments % 10000 == 0) { + saveDocuments(); + } + } + + @Override + public void finish() { + if (todoDocuments > 0) { + saveDocuments(); + } + + try { + client.indices().refresh(); + } catch (IOException e) { + LOGGER.warn("Refresh of database failed", e); + } + } + + private void saveDocuments() { + try { + var response = client.bulk(bulkRequest.build()); + + if (response.errors()) { + LOGGER.error("Error during bulk import."); + } + } catch (IOException e) { + LOGGER.error("Error during bulk import", e); + } + + bulkRequest = new BulkRequest.Builder(); + todoDocuments = 0; + } +} diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchResult.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchResult.java new file mode 100644 index 000000000..135cb05b7 --- /dev/null +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchResult.java @@ -0,0 +1,68 @@ +package de.komoot.photon.opensearch; + +import de.komoot.photon.searcher.PhotonResult; + +import java.util.Map; + +public class OpenSearchResult implements PhotonResult { + private static final String[] NAME_PRECEDENCE = {"default", "housename", "int", "loc", "reg", "alt", "old"}; + + private double score = 0.0; + private final double[] extent; + private final double[] coordinates; + private final Map infos; + private final Map> localeTags; + + OpenSearchResult(double extent[], double[] coordinates, Map infos, Map> localeTags) { + this.extent = extent; + this.coordinates = coordinates; + this.infos = infos; + this.localeTags = localeTags; + } + + + @Override + public Object get(String key) { + return infos.get(key); + } + + @Override + public String getLocalised(String key, String language) { + final var map = getMap(key); + if (map == null) return null; + + if (map.get(language) != null) { + // language specific field + return map.get(language); + } + + if ("name".equals(key)) { + for (String name : NAME_PRECEDENCE) { + if (map.containsKey(name)) + return map.get(name); + } + } + + return map.get("default"); + } + + @Override + public Map getMap(String key) { + return localeTags.get(key); + } + + @Override + public double[] getCoordinates() { + return coordinates; + } + + @Override + public double[] getExtent() { + return extent; + } + + @Override + public double getScore() { + return score; + } +} diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchResultDeserializer.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchResultDeserializer.java new file mode 100644 index 000000000..4fe86d066 --- /dev/null +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchResultDeserializer.java @@ -0,0 +1,82 @@ +package de.komoot.photon.opensearch; + +import com.fasterxml.jackson.core.JacksonException; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.deser.std.StdDeserializer; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import de.komoot.photon.Constants; +import de.komoot.photon.searcher.PhotonResult; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class OpenSearchResultDeserializer extends StdDeserializer { + + public OpenSearchResultDeserializer() { + super(OpenSearchResult.class); + } + + @Override + public OpenSearchResult deserialize(JsonParser p, DeserializationContext ctxt) throws IOException, JacksonException { + final var node = (ObjectNode) p.getCodec().readTree(p); + + final var importanceNode = node.get("importance"); + final double[] extent = extractExtent((ObjectNode) node.get("extent")); + final double[] coordinates = extractCoordinate((ObjectNode) node.get("coordinate")); + + final Map tags = new HashMap<>(); + final Map> localeTags = new HashMap<>(); + + var fields = node.fields(); + while (fields.hasNext()) { + final var entry = fields.next(); + final String key = entry.getKey(); + final JsonNode value = entry.getValue(); + if (value.isTextual()) { + tags.put(key, value.asText()); + } else if (value.isInt()) { + tags.put(entry.getKey(), value.asInt()); + } else if (value.isBigInteger()) { + tags.put(entry.getKey(), value.asLong()); + } else if (value.isObject()) { + Map vtags = new HashMap<>(); + var subfields = value.fields(); + while (subfields.hasNext()) { + final var subentry = subfields.next(); + if (subentry.getValue().isTextual()) { + vtags.put(subentry.getKey(), subentry.getValue().asText()); + } + localeTags.put(key, vtags); + } + } + } + + return new OpenSearchResult(extent, coordinates, tags, localeTags); + } + + private double[] extractExtent(ObjectNode node) { + if (node == null || !node.has("coordinates")) { + return null; + } + + final var coords = ((ArrayNode) node.get("coordinates")); + final var nw = ((ArrayNode) coords.get(0)); + final var se = ((ArrayNode) coords.get(1)); + + return new double[]{nw.get(0).doubleValue(), nw.get(1).doubleValue(), + se.get(0).doubleValue(), se.get(1).doubleValue()}; + } + + private double[] extractCoordinate(ObjectNode node) { + if (node == null) { + return PhotonResult.INVALID_COORDINATES; + } + + return new double[]{node.get(Constants.LON).doubleValue(), node.get(Constants.LAT).doubleValue()}; + } + +} diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/PhotonDocSerializer.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/PhotonDocSerializer.java new file mode 100644 index 000000000..2ed731620 --- /dev/null +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/PhotonDocSerializer.java @@ -0,0 +1,169 @@ +package de.komoot.photon.opensearch; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.ser.std.StdSerializer; +import de.komoot.photon.Constants; +import de.komoot.photon.PhotonDoc; +import de.komoot.photon.Utils; +import org.locationtech.jts.geom.Envelope; + +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +public class PhotonDocSerializer extends StdSerializer { + final private String[] languages; + final private String[] extraTags; + + public PhotonDocSerializer(String[] languages, String[] extraTags) { + super(PhotonDoc.class); + this.languages = languages; + this.extraTags = extraTags; + } + + @Override + public void serialize(PhotonDoc value, JsonGenerator gen, SerializerProvider provider) throws IOException { + final var atype = value.getAddressType(); + + gen.writeStartObject(); + gen.writeNumberField(Constants.OSM_ID, value.getOsmId()); + gen.writeStringField(Constants.OSM_TYPE, value.getOsmType()); + gen.writeStringField(Constants.OSM_KEY, value.getTagKey()); + gen.writeStringField(Constants.OSM_VALUE, value.getTagValue()); + gen.writeStringField(Constants.OBJECT_TYPE, atype == null ? "locality" : atype.getName()); + gen.writeNumberField(Constants.IMPORTANCE, value.getImportance()); + + String classification = Utils.buildClassificationString(value.getTagKey(), value.getTagValue()); + if (classification != null) { + gen.writeStringField(Constants.CLASSIFICATION, classification); + } + + if (value.getCentroid() != null) { + gen.writeObjectFieldStart("coordinate"); + gen.writeNumberField("lat", value.getCentroid().getY()); + gen.writeNumberField("lon", value.getCentroid().getX()); + gen.writeEndObject(); + } + + if (value.getHouseNumber() != null) { + gen.writeStringField("housenumber", value.getHouseNumber()); + } + + if (value.getPostcode() != null) { + gen.writeStringField("postcode", value.getPostcode()); + } + + writeName(gen, value, languages); + + for (var entry : value.getAddressParts().keySet()) { + Map fNames = new HashMap<>(); + + value.copyAddressName(fNames, "default", entry, "name"); + + for (String language : languages) { + value.copyAddressName(fNames, language, entry, "name:" + language); + } + + gen.writeObjectField(entry.getName(), fNames); + } + + String countryCode = value.getCountryCode(); + if (countryCode != null) { + gen.writeStringField(Constants.COUNTRYCODE, countryCode); + } + + writeContext(gen, value.getContext()); + writeExtraTags(gen, value.getExtratags()); + writeExtent(gen, value.getBbox()); + + gen.writeEndObject(); + } + + private void writeName(JsonGenerator gen, PhotonDoc doc, String[] languages) throws IOException { + Map fNames = new HashMap<>(); + + doc.copyName(fNames, "default", "name"); + + for (String language : languages) { + doc.copyName(fNames, language, "name:" + language); + } + + doc.copyName(fNames, "alt", "alt_name"); + doc.copyName(fNames, "int", "int_name"); + doc.copyName(fNames, "loc", "loc_name"); + doc.copyName(fNames, "old", "old_name"); + doc.copyName(fNames, "reg", "reg_name"); + doc.copyName(fNames, "housename", "addr:housename"); + + gen.writeObjectField("name", fNames); + } + + private void writeContext(JsonGenerator gen, Set> contexts) throws IOException { + final Map> multimap = new HashMap<>(); + + for (Map context : contexts) { + if (context.get("name") != null) { + multimap.computeIfAbsent("default", k -> new HashSet<>()).add(context.get("name")); + } + + for (String language : languages) { + if (context.get("name:" + language) != null) { + multimap.computeIfAbsent("default", k -> new HashSet<>()).add(context.get("name:" + language)); + } + } + } + + if (!multimap.isEmpty()) { + gen.writeObjectFieldStart("context"); + for (Map.Entry> entry : multimap.entrySet()) { + gen.writeStringField(entry.getKey(), String.join(", ", entry.getValue())); + } + gen.writeEndObject(); + } + } + + private void writeExtraTags(JsonGenerator gen, Map docTags) throws IOException { + boolean foundTag = false; + + for (String tag: extraTags) { + String value = docTags.get(tag); + if (value != null) { + if (!foundTag) { + gen.writeObjectFieldStart("extra"); + foundTag = true; + } + gen.writeStringField(tag, value); + } + } + + if (foundTag) { + gen.writeEndObject(); + } + } + + private static void writeExtent(JsonGenerator gen, Envelope bbox) throws IOException { + if (bbox == null || bbox.getArea() == 0.) return; + + //https://opensearch.org/docs/latest/field-types/supported-field-types/geo-shape/#envelope + gen.writeObjectFieldStart("extent"); + gen.writeStringField("type", "envelope"); + + gen.writeArrayFieldStart("coordinates"); + + gen.writeStartArray(); + gen.writeNumber(bbox.getMinX()); + gen.writeNumber(bbox.getMaxY()); + gen.writeEndArray(); + gen.writeStartArray(); + gen.writeNumber(bbox.getMaxX()); + gen.writeNumber(bbox.getMinY()); + gen.writeEndArray(); + + gen.writeEndArray(); + gen.writeEndObject(); + } + +} diff --git a/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java b/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java index 6f3a2ba26..ac7573db9 100644 --- a/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java +++ b/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java @@ -4,11 +4,13 @@ import de.komoot.photon.searcher.PhotonResult; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.io.TempDir; +import org.locationtech.jts.geom.Coordinate; import org.locationtech.jts.geom.GeometryFactory; import org.locationtech.jts.geom.PrecisionModel; import java.io.IOException; import java.nio.file.Path; +import java.util.Collections; import java.util.Date; public class ESBaseTester { @@ -20,9 +22,11 @@ public class ESBaseTester { private OpenSearchTestServer server; - protected PhotonDoc createDoc(double lon, double lat, int id, int osmId, String key, String value) { - return null; + final var location = FACTORY.createPoint(new Coordinate(lon, lat)); + return new PhotonDoc(id, "W", osmId, key, value) + .names(Collections.singletonMap("name", "berlin")) + .centroid(location); } @AfterEach @@ -31,7 +35,11 @@ public void tearDown() throws IOException { } protected PhotonResult getById(int id) { - return null; + return getById(Integer.toString(id)); + } + + protected PhotonResult getById(String id) { + return server.getByID(id); } public void setUpES() throws IOException { @@ -46,15 +54,15 @@ public void setUpES(Path test_directory, String... languages) throws IOException } protected Importer makeImporter() { - return null; + return server.createImporter(new String[]{"en"}, new String[]{}); } protected Importer makeImporterWithExtra(String... extraTags) { - return null; + return server.createImporter(new String[]{"en"}, extraTags); } protected Importer makeImporterWithLanguages(String... languages) { - return null; + return server.createImporter(languages, new String[]{}); } protected Updater makeUpdater() { diff --git a/app/opensearch/src/test/java/de/komoot/photon/opensearch/ImporterTest.java b/app/opensearch/src/test/java/de/komoot/photon/opensearch/ImporterTest.java new file mode 100644 index 000000000..f1b2a8687 --- /dev/null +++ b/app/opensearch/src/test/java/de/komoot/photon/opensearch/ImporterTest.java @@ -0,0 +1,102 @@ +package de.komoot.photon.opensearch; + +import de.komoot.photon.ESBaseTester; +import de.komoot.photon.Importer; +import de.komoot.photon.PhotonDoc; +import de.komoot.photon.searcher.PhotonResult; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +class ImporterTest extends ESBaseTester { + + @BeforeEach + public void setUp() throws IOException { + setUpES(); + } + + @Test + void testAddSimpleDoc() { + Importer instance = makeImporterWithExtra(""); + + instance.add(new PhotonDoc(1234, "N", 1000, "place", "city") + .extraTags(Collections.singletonMap("maxspeed", "100")), 0); + instance.finish(); + + PhotonResult response = getById(1234); + + assertNotNull(response); + + assertEquals("N", response.get("osm_type")); + assertEquals(1000, response.get("osm_id")); + assertEquals("place", response.get("osm_key")); + assertEquals("city", response.get("osm_value")); + + assertNull(response.get("extra")); + } + + @Test + void testAddHousenumberMultiDoc() { + Importer instance = makeImporterWithExtra(""); + + instance.add(new PhotonDoc(4432, "N", 100, "building", "yes").houseNumber("34"), 0); + instance.add(new PhotonDoc(4432, "N", 100, "building", "yes").houseNumber("35"), 1); + instance.finish(); + + PhotonResult response = getById("4432"); + + assertNotNull(response); + + assertEquals("N", response.get("osm_type")); + assertEquals(100, response.get("osm_id")); + assertEquals("building", response.get("osm_key")); + assertEquals("yes", response.get("osm_value")); + assertEquals("34", response.get("housenumber")); + + response = getById("4432.1"); + + assertNotNull(response); + + assertEquals("N", response.get("osm_type")); + assertEquals(100, response.get("osm_id")); + assertEquals("building", response.get("osm_key")); + assertEquals("yes", response.get("osm_value")); + assertEquals("35", response.get("housenumber")); + } + + @Test + void testSelectedExtraTagsCanBeIncluded() { + Importer instance = makeImporterWithExtra("maxspeed", "website"); + + Map extratags = new HashMap<>(); + extratags.put("website", "foo"); + extratags.put("maxspeed", "100 mph"); + extratags.put("source", "survey"); + + instance.add(new PhotonDoc(1234, "N", 1000, "place", "city").extraTags(extratags), 0); + instance.add(new PhotonDoc(1235, "N", 1001, "place", "city") + .extraTags(Collections.singletonMap("wikidata", "100")), 0); + instance.finish(); + + PhotonResult response = getById(1234); + assertNotNull(response); + + Map extra = response.getMap("extra"); + assertNotNull(extra); + + assertEquals(2, extra.size()); + assertEquals("100 mph", extra.get("maxspeed")); + assertEquals("foo", extra.get("website")); + + response = getById(1235); + assertNotNull(response); + + assertNull(response.get("extra")); + } +} \ No newline at end of file diff --git a/app/opensearch/src/test/java/de/komoot/photon/opensearch/OpenSearchTestServer.java b/app/opensearch/src/test/java/de/komoot/photon/opensearch/OpenSearchTestServer.java index 4a72a7518..462d7a29e 100644 --- a/app/opensearch/src/test/java/de/komoot/photon/opensearch/OpenSearchTestServer.java +++ b/app/opensearch/src/test/java/de/komoot/photon/opensearch/OpenSearchTestServer.java @@ -49,4 +49,19 @@ public void stopTestServer() throws IOException { runner.clean(); } + public PhotonResult getByID(String id) { + try { + final var response = client.get(fn -> fn + .index(PhotonIndex.NAME) + .id(id), OpenSearchResult.class); + + if (response.found()) { + return response.source(); + } + } catch (IOException e) { + // ignore + } + + return null; + } } From 59ddd179cba6122a3bc67c9cb1987d3fe5653497 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 1 May 2024 16:15:02 +0200 Subject: [PATCH 06/14] port updater to OpenSearch --- .../main/java/de/komoot/photon/Server.java | 3 +- .../de/komoot/photon/opensearch/Updater.java | 78 ++++++++++ .../java/de/komoot/photon/ESBaseTester.java | 4 +- .../komoot/photon/opensearch/UpdaterTest.java | 143 ++++++++++++++++++ src/main/java/de/komoot/photon/PhotonDoc.java | 4 + 5 files changed, 229 insertions(+), 3 deletions(-) create mode 100644 app/opensearch/src/main/java/de/komoot/photon/opensearch/Updater.java create mode 100644 app/opensearch/src/test/java/de/komoot/photon/opensearch/UpdaterTest.java diff --git a/app/opensearch/src/main/java/de/komoot/photon/Server.java b/app/opensearch/src/main/java/de/komoot/photon/Server.java index 17c2472f0..f34f67766 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/Server.java +++ b/app/opensearch/src/main/java/de/komoot/photon/Server.java @@ -129,7 +129,8 @@ public Importer createImporter(String[] languages, String[] extraTags) { } public Updater createUpdater(String[] languages, String[] extraTags) { - return null; + registerPhotonDocSerializer(languages, extraTags); + return new de.komoot.photon.opensearch.Updater(client); } public SearchHandler createSearchHandler(String[] languages, int queryTimeoutSec) { diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/Updater.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/Updater.java new file mode 100644 index 000000000..2324114c0 --- /dev/null +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/Updater.java @@ -0,0 +1,78 @@ +package de.komoot.photon.opensearch; + +import de.komoot.photon.PhotonDoc; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch.core.BulkRequest; +import org.slf4j.Logger; + +import java.io.IOException; + +public class Updater implements de.komoot.photon.Updater { + private static final Logger LOGGER = org.slf4j.LoggerFactory.getLogger(Updater.class); + + private final OpenSearchClient client; + private BulkRequest.Builder bulkRequest = new BulkRequest.Builder(); + private int todoDocuments = 0; + + public Updater(OpenSearchClient client) { + this.client = client; + } + + @Override + public void create(PhotonDoc doc, int objectId) { + bulkRequest.operations(op -> op + .index(i -> i + .index(PhotonIndex.NAME) + .id(doc.getUid(objectId)) + .document(doc))); + ++todoDocuments; + } + + @Override + public void delete(long docId, int objectId) { + bulkRequest.operations(op -> op + .delete(d -> d + .index(PhotonIndex.NAME) + .id(PhotonDoc.makeUid(docId, objectId)))); + ++todoDocuments; + } + + @Override + public boolean exists(long docId, int objectId) { + try { + return client.exists(e -> e.index(PhotonIndex.NAME).id(PhotonDoc.makeUid(docId, objectId))).value(); + } catch (IOException e) { + LOGGER.warn("IO error on exists operation", e); + } + return false; + } + + @Override + public void finish() { + updateDocuments(); + try { + client.indices().refresh(); + } catch (IOException e) { + LOGGER.warn("IO error on refresh."); + } + } + + private void updateDocuments() { + if (todoDocuments == 0) { + return; + } + + try { + var response = client.bulk(bulkRequest.build()); + + if (response.errors()) { + LOGGER.error("Errors during bulk update."); + } + } catch (IOException e) { + LOGGER.error("IO error during bulk update", e); + } + + bulkRequest = new BulkRequest.Builder(); + todoDocuments = 0; + } +} diff --git a/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java b/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java index ac7573db9..d55a6ad31 100644 --- a/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java +++ b/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java @@ -66,11 +66,11 @@ protected Importer makeImporterWithLanguages(String... languages) { } protected Updater makeUpdater() { - return null; + return server.createUpdater(new String[]{"en"}, new String[]{}); } protected Updater makeUpdaterWithExtra(String... extraTags) { - return null; + return server.createUpdater(new String[]{"en"}, extraTags); } protected Server getServer() { diff --git a/app/opensearch/src/test/java/de/komoot/photon/opensearch/UpdaterTest.java b/app/opensearch/src/test/java/de/komoot/photon/opensearch/UpdaterTest.java new file mode 100644 index 000000000..a70626219 --- /dev/null +++ b/app/opensearch/src/test/java/de/komoot/photon/opensearch/UpdaterTest.java @@ -0,0 +1,143 @@ +package de.komoot.photon.opensearch; + +import de.komoot.photon.ESBaseTester; +import de.komoot.photon.Importer; +import de.komoot.photon.PhotonDoc; +import de.komoot.photon.Updater; +import de.komoot.photon.searcher.PhotonResult; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +class UpdaterTest extends ESBaseTester { + + @Test + void addNameToDoc() throws IOException { + Map names = new HashMap<>(); + names.put("name", "Foo"); + PhotonDoc doc = new PhotonDoc(1234, "N", 1000, "place", "city").names(names); + + setUpES(); + Importer instance = makeImporter(); + instance.add(doc, 0); + instance.finish(); + refresh(); + + names.put("name:en", "Enfoo"); + Updater updater = makeUpdater(); + updater.create(doc, 0); + updater.finish(); + refresh(); + + PhotonResult response = getById(1234); + assertNotNull(response); + + Map out_names = response.getMap("name"); + assertEquals("Foo", out_names.get("default")); + assertEquals("Enfoo", out_names.get("en")); + } + + @Test + void removeNameFromDoc() throws IOException { + Map names = new HashMap<>(); + names.put("name", "Foo"); + names.put("name:en", "Enfoo"); + PhotonDoc doc = new PhotonDoc(1234, "N", 1000, "place", "city").names(names); + + setUpES(); + Importer instance = makeImporter(); + instance.add(doc, 0); + instance.finish(); + refresh(); + + names.remove("name"); + Updater updater = makeUpdater(); + updater.create(doc, 0); + updater.finish(); + refresh(); + + PhotonResult response = getById(1234); + assertNotNull(response); + + Map out_names = response.getMap("name"); + assertFalse(out_names.containsKey("default")); + assertEquals("Enfoo", out_names.get("en")); + } + + @Test + void addExtraTagsToDoc() throws IOException { + Map names = new HashMap<>(); + names.put("name", "Foo"); + PhotonDoc doc = new PhotonDoc(1234, "N", 1000, "place", "city").names(names); + + setUpES(); + Importer instance = makeImporterWithExtra("website"); + instance.add(doc, 0); + instance.finish(); + refresh(); + + PhotonResult response = getById(1234); + assertNotNull(response); + + assertNull(response.get("extra")); + + doc.extraTags(Collections.singletonMap("website", "http://site.foo")); + Updater updater = makeUpdaterWithExtra("website"); + updater.create(doc, 0); + updater.finish(); + refresh(); + + response = getById(1234); + assertNotNull(response); + + Map extra = response.getMap("extra"); + + assertNotNull(extra); + assertEquals(Collections.singletonMap("website", "http://site.foo"), extra); + } + + @Test + void deleteDoc() throws IOException { + setUpES(); + Importer instance = makeImporterWithExtra("website"); + instance.add(new PhotonDoc(4432, "N", 100, "building", "yes").houseNumber("34"), 0); + instance.add(new PhotonDoc(4432, "N", 100, "building", "yes").houseNumber("35"), 1); + instance.finish(); + refresh(); + + assertNotNull(getById("4432")); + assertNotNull(getById("4432.1")); + + Updater updater = makeUpdaterWithExtra("website"); + updater.delete(4432L, 1); + updater.finish(); + refresh(); + + assertNotNull(getById("4432")); + assertNull(getById("4432.1")); + } + + @Test + void checkExistence() throws IOException { + setUpES(); + Importer instance = makeImporterWithExtra("website"); + instance.add(new PhotonDoc(4432, "N", 100, "building", "yes").houseNumber("34"), 0); + instance.add(new PhotonDoc(4432, "N", 100, "building", "yes").houseNumber("35"), 1); + instance.finish(); + refresh(); + + Updater updater = makeUpdaterWithExtra("website"); + assertTrue(updater.exists(4432L, 0)); + assertTrue(updater.exists(4432L, 1)); + assertFalse(updater.exists(4432L, 2)); + assertFalse(updater.exists(4433L, 0)); + assertFalse(updater.exists(4433L, 1)); + updater.finish(); + refresh(); + } +} \ No newline at end of file diff --git a/src/main/java/de/komoot/photon/PhotonDoc.java b/src/main/java/de/komoot/photon/PhotonDoc.java index 6c82fbc84..719c42617 100644 --- a/src/main/java/de/komoot/photon/PhotonDoc.java +++ b/src/main/java/de/komoot/photon/PhotonDoc.java @@ -157,6 +157,10 @@ public PhotonDoc postcode(String postcode) { } public String getUid(int objectId) { + return makeUid(placeId, objectId); + } + + static public String makeUid(long placeId, int objectId) { if (objectId <= 0) return String.valueOf(placeId); From 2beccc3bee402cfa850498462a9d2d7f6952b27f Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 1 May 2024 22:28:16 +0200 Subject: [PATCH 07/14] port reverse geocoding to OpenSearch --- .../main/java/de/komoot/photon/Server.java | 2 +- .../opensearch/OpenSearchReverseHandler.java | 71 +++++++++++++++ .../photon/opensearch/OsmTagFilter.java | 86 +++++++++++++++++++ .../opensearch/ReverseQueryBuilder.java | 71 +++++++++++++++ 4 files changed, 229 insertions(+), 1 deletion(-) create mode 100644 app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchReverseHandler.java create mode 100644 app/opensearch/src/main/java/de/komoot/photon/opensearch/OsmTagFilter.java create mode 100644 app/opensearch/src/main/java/de/komoot/photon/opensearch/ReverseQueryBuilder.java diff --git a/app/opensearch/src/main/java/de/komoot/photon/Server.java b/app/opensearch/src/main/java/de/komoot/photon/Server.java index f34f67766..11daf814e 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/Server.java +++ b/app/opensearch/src/main/java/de/komoot/photon/Server.java @@ -138,7 +138,7 @@ public SearchHandler createSearchHandler(String[] languages, int queryTimeoutSec } public ReverseHandler createReverseHandler(int queryTimeoutSec) { - return null; + return new OpenSearchReverseHandler(client, queryTimeoutSec); } private void registerPhotonDocSerializer(String[] languages, String[] extraTags) { diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchReverseHandler.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchReverseHandler.java new file mode 100644 index 000000000..344e141b2 --- /dev/null +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchReverseHandler.java @@ -0,0 +1,71 @@ +package de.komoot.photon.opensearch; + +import de.komoot.photon.query.ReverseRequest; +import de.komoot.photon.searcher.PhotonResult; +import de.komoot.photon.searcher.ReverseHandler; +import org.locationtech.jts.geom.Point; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch._types.SearchType; +import org.opensearch.client.opensearch._types.SortOrder; +import org.opensearch.client.opensearch._types.query_dsl.Query; +import org.opensearch.client.opensearch.core.SearchResponse; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class OpenSearchReverseHandler implements ReverseHandler { + final private OpenSearchClient client; + final private String queryTimeout; + + public OpenSearchReverseHandler(OpenSearchClient client, int queryTimeoutSec) { + this.client = client; + queryTimeout = queryTimeoutSec + "s"; + } + + @Override + public List reverse(ReverseRequest request) { + final var queryBuilder = new ReverseQueryBuilder(request.getLocation(), request.getRadius(), request.getQueryStringFilter(), request.getLayerFilters()) + .withOsmTagFilters(request.getOsmTagFilters()); + + + final var results = search(queryBuilder.buildQuery(), + request.getLimit(), + request.getLocationDistanceSort() ? request.getLocation() : null); + + final List ret = new ArrayList<>(); + for (var hit : results.hits().hits()) { + ret.add(hit.source()); + } + + return ret; + } + + @Override + public String dumpQuery(ReverseRequest photonRequest) { + return "{}"; + } + + private SearchResponse search(Query query, int limit, Point location) { + try { + return client.search(s -> { + s.index(PhotonIndex.NAME) + .searchType(SearchType.QueryThenFetch) + .query(query) + .size(limit); + + if (location != null) { + s.sort(sq -> sq + .geoDistance(gd -> gd + .field("coordinate") + .location(l -> l.latlon(ll -> ll.lat(location.getY()).lon(location.getX()))) + .order(SortOrder.Asc))); + } + return s; + }, OpenSearchResult.class); + } catch (IOException e) { + throw new RuntimeException("IO error during search", e); + } + } + +} diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/OsmTagFilter.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/OsmTagFilter.java new file mode 100644 index 000000000..ce2b80db8 --- /dev/null +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/OsmTagFilter.java @@ -0,0 +1,86 @@ +package de.komoot.photon.opensearch; + +import de.komoot.photon.searcher.TagFilter; +import de.komoot.photon.searcher.TagFilterKind; +import org.opensearch.client.opensearch._types.FieldValue; +import org.opensearch.client.opensearch._types.query_dsl.BoolQuery; +import org.opensearch.client.opensearch._types.query_dsl.Query; +import org.opensearch.client.opensearch._types.query_dsl.TermsQuery; + +import java.util.Collections; +import java.util.List; + +public class OsmTagFilter { + private BoolQuery.Builder includeTagQueryBuilder = null; + private BoolQuery.Builder excludeTagQueryBuilder = null; + + public OsmTagFilter withOsmTagFilters(List filters) { + for (var filter : filters) { + addOsmTagFilter(filter); + } + return this; + } + + public Query build() { + if (includeTagQueryBuilder != null || excludeTagQueryBuilder != null) { + return BoolQuery.of(q -> { + if (includeTagQueryBuilder != null) { + q.must(includeTagQueryBuilder.build().toQuery()); + } + if (excludeTagQueryBuilder != null) { + q.mustNot(excludeTagQueryBuilder.build().toQuery()); + } + return q; + }).toQuery(); + } + + return null; + } + + private void addOsmTagFilter(TagFilter filter) { + if (filter.getKind() == TagFilterKind.EXCLUDE_VALUE) { + appendIncludeTerm(BoolQuery.of(q -> q + .must(makeTermsQuery("osm_key", filter.getKey())) + .mustNot(makeTermsQuery("osm_value", filter.getValue()))).toQuery()); + } else { + Query query; + if (filter.isKeyOnly()) { + query = makeTermsQuery("osm_key", filter.getKey()); + } else if (filter.isValueOnly()) { + query = makeTermsQuery("osm_value", filter.getValue()); + } else { + query = BoolQuery.of(q -> q + .must(makeTermsQuery("osm_key", filter.getKey())) + .must(makeTermsQuery("osm_value", filter.getValue()))).toQuery(); + } + + if (filter.getKind() == TagFilterKind.INCLUDE) { + appendIncludeTerm(query); + } else { + appendExcludeTerm(query); + } + } + } + + private void appendIncludeTerm(Query query) { + if (includeTagQueryBuilder == null) { + includeTagQueryBuilder = new BoolQuery.Builder(); + } + + includeTagQueryBuilder.should(query); + } + + private void appendExcludeTerm(Query query) { + if (excludeTagQueryBuilder == null) { + excludeTagQueryBuilder = new BoolQuery.Builder(); + } + + excludeTagQueryBuilder.should(query); + } + + static private Query makeTermsQuery(String field, String term) { + return TermsQuery.of(q -> q + .field(field) + .terms(t -> t.value(Collections.singletonList(FieldValue.of(term))))).toQuery(); + } +} diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/ReverseQueryBuilder.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/ReverseQueryBuilder.java new file mode 100644 index 000000000..4ef122e9c --- /dev/null +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/ReverseQueryBuilder.java @@ -0,0 +1,71 @@ +package de.komoot.photon.opensearch; + +import de.komoot.photon.searcher.TagFilter; +import org.locationtech.jts.geom.Point; +import org.opensearch.client.opensearch._types.FieldValue; +import org.opensearch.client.opensearch._types.query_dsl.BoolQuery; +import org.opensearch.client.opensearch._types.query_dsl.Query; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +public class ReverseQueryBuilder { + final private double radius; + final private Point location; + final private String queryStringFilter; + final private Set layerFilter; + + final private OsmTagFilter osmTagFilter = new OsmTagFilter(); + + public ReverseQueryBuilder(Point location, double radius, String queryStringFilter, Set layerFilter) { + this.radius = radius; + this.location = location; + this.queryStringFilter = queryStringFilter != null && queryStringFilter.trim().length() > 0 ? queryStringFilter.trim() : null; + this.layerFilter = layerFilter; + } + + public Query buildQuery() { + return BoolQuery.of(q -> { + q.filter(fq -> fq + .geoDistance(gd -> gd + .field("coordinate") + .location(l -> l.latlon(ll -> ll.lat(location.getY()).lon(location.getX()))) + .distance(radius + "km"))); + + boolean hasQuery = false; + + if (queryStringFilter != null) { + q.must(qst -> qst.queryString(qs -> qs.query(queryStringFilter))); + hasQuery = true; + } + + if (!layerFilter.isEmpty()) { + q.must(ftq -> ftq.terms(tq -> { + List terms = new ArrayList<>(); + for (var filter : layerFilter) { + terms.add(FieldValue.of(filter)); + } + return tq.field("type").terms(tt -> tt.value(terms)); + })); + hasQuery = true; + } + + if (!hasQuery) { + q.must(mq -> mq.matchAll(ma -> ma)); + } + + final var tagFilters = osmTagFilter.build(); + if (tagFilters != null) { + q.filter(tagFilters); + } + + return q; + }).toQuery(); + } + + public ReverseQueryBuilder withOsmTagFilters(List filters) { + osmTagFilter.withOsmTagFilters(filters); + return this; + } +} From 87741ead759234930e53642bfb572f516a7de145 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 1 May 2024 23:47:29 +0200 Subject: [PATCH 08/14] port forward search to OpenSearch --- .../main/java/de/komoot/photon/Server.java | 2 +- .../photon/opensearch/OpenSearchResult.java | 4 + .../opensearch/OpenSearchSearchHandler.java | 70 ++++++ .../photon/opensearch/SearchQueryBuilder.java | 233 ++++++++++++++++++ 4 files changed, 308 insertions(+), 1 deletion(-) create mode 100644 app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchSearchHandler.java create mode 100644 app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java diff --git a/app/opensearch/src/main/java/de/komoot/photon/Server.java b/app/opensearch/src/main/java/de/komoot/photon/Server.java index 11daf814e..baa1d81f2 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/Server.java +++ b/app/opensearch/src/main/java/de/komoot/photon/Server.java @@ -134,7 +134,7 @@ public Updater createUpdater(String[] languages, String[] extraTags) { } public SearchHandler createSearchHandler(String[] languages, int queryTimeoutSec) { - return null; + return new OpenSearchSearchHandler(client, languages, queryTimeoutSec); } public ReverseHandler createReverseHandler(int queryTimeoutSec) { diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchResult.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchResult.java index 135cb05b7..1862e0671 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchResult.java +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchResult.java @@ -20,6 +20,10 @@ public class OpenSearchResult implements PhotonResult { this.localeTags = localeTags; } + public OpenSearchResult setScore(double score) { + this.score = score; + return this; + } @Override public Object get(String key) { diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchSearchHandler.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchSearchHandler.java new file mode 100644 index 000000000..22465ba97 --- /dev/null +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchSearchHandler.java @@ -0,0 +1,70 @@ +package de.komoot.photon.opensearch; + +import de.komoot.photon.query.PhotonRequest; +import de.komoot.photon.searcher.PhotonResult; +import de.komoot.photon.searcher.SearchHandler; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch._types.SearchType; +import org.opensearch.client.opensearch._types.query_dsl.Query; +import org.opensearch.client.opensearch.core.SearchResponse; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class OpenSearchSearchHandler implements SearchHandler { + final private OpenSearchClient client; + final private String[] supportedLanguages; + final private String queryTimeout; + + public OpenSearchSearchHandler(OpenSearchClient client, String[] supportedLanguages, int queryTimeout) { + this.client = client; + this.supportedLanguages = supportedLanguages; + this.queryTimeout = queryTimeout + "s"; + } + + @Override + public List search(PhotonRequest request) { + final int limit = request.getLimit(); + final int extLimit = limit > 1 ? (int) Math.round(limit * 1.5) : 1; + + var results = sendQuery(buildQuery(request, false).buildQuery(), extLimit); + + if (results.hits().hits().isEmpty()) { + results = sendQuery(buildQuery(request, true).buildQuery(), extLimit); + } + + List ret = new ArrayList<>(); + for (var hit : results.hits().hits()) { + ret.add(hit.source().setScore(hit.score())); + } + + return ret; + } + + @Override + public String dumpQuery(PhotonRequest photonRequest) { + return "{}"; + } + + private SearchQueryBuilder buildQuery(PhotonRequest request, boolean lenient) { + return new SearchQueryBuilder(request.getQuery(), request.getLanguage(), supportedLanguages, lenient). + withOsmTagFilters(request.getOsmTagFilters()). + withLayerFilters(request.getLayerFilters()). + withLocationBias(request.getLocationForBias(), request.getScaleForBias(), request.getZoomForBias()). + withBoundingBox(request.getBbox()); + } + + private SearchResponse sendQuery(Query query, int limit) { + try { + return client.search(s -> s + .index(PhotonIndex.NAME) + .searchType(SearchType.QueryThenFetch) + .query(query) + .size(limit) + .timeout(queryTimeout), OpenSearchResult.class); + } catch (IOException e) { + throw new RuntimeException("IO error during search", e); + } + } +} diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java new file mode 100644 index 000000000..8eb8ac026 --- /dev/null +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java @@ -0,0 +1,233 @@ +package de.komoot.photon.opensearch; + +import de.komoot.photon.searcher.TagFilter; +import org.locationtech.jts.geom.Envelope; +import org.locationtech.jts.geom.Point; +import org.opensearch.client.json.JsonData; +import org.opensearch.client.opensearch._types.FieldValue; +import org.opensearch.client.opensearch._types.query_dsl.*; +import org.opensearch.client.util.ObjectBuilder; + +import java.util.*; + +public class SearchQueryBuilder { + private static final String[] ALT_NAMES = new String[]{"alt", "int", "loc", "old", "reg", "housename"}; + + private ObjectBuilder finalQueryWithoutTagFilterBuilder; + private BoolQuery.Builder queryBuilderForTopLevelFilter; + private OsmTagFilter osmTagFilter = new OsmTagFilter(); + private GeoBoundingBoxQuery.Builder bboxQueryBuilder; + private TermsQuery.Builder layerQueryBuilder; + private Query finalQuery = null; + + public SearchQueryBuilder(String query, String language, String[] languages, boolean lenient) { + var query4QueryBuilder = QueryBuilders.bool(); + + // 1. All terms of the query must be contained in the place record somehow. Be more lenient on second try. + query4QueryBuilder.must(base -> base.multiMatch(q -> { + q.query(query) + .fields("collector.default^1.0") + .prefixLength(2) + .analyzer("search_ngram") + .tieBreaker(0.4) + .minimumShouldMatch(lenient ? "-34%" : "100%"); + + if (lenient) { + q.type(TextQueryType.BestFields).fuzziness("auto"); + } else { + q.type(TextQueryType.CrossFields); + } + for (String lang : languages) { + q.fields(String.format("collector.%s.ngrams^%f", lang, lang.equals(language) ? 1.0f : 0.6f)); + } + return q; + })); + + // 2. Prefer records that have the full names in. For address records with house numbers this is the main + // filter criterion because they have no name. Boost the score in this case. + query4QueryBuilder.should(shd -> shd.functionScore(fs -> fs + .query(q -> q.multiMatch(mm -> { + mm.query(query).type(TextQueryType.BestFields); + mm.fields(String.format("%s^%f", "collector.default.raw", 1.0f)); + + for (String lang : languages) { + mm.fields(String.format("collector.%s.raw^%f", lang, lang.equals(language) ? 1.0f : 0.6f)); + } + + return mm.boost(0.3f); + })) + .functions(fn -> fn + .filter(flt -> flt + .match(m -> m + .query(q -> q.stringValue(query)) + .field("housenumber"))) + .weight(10.0) + ) + )); + + // 3. Either the name or house number must be in the query terms. + final String defLang = "default".equals(language) ? languages[0] : language; + var nameNgramQuery = MultiMatchQuery.of(q -> { + q.query(query).type(TextQueryType.BestFields).fuzziness(lenient ? "1" : "0").analyzer("search_ngram"); + + for (String lang : languages) { + q.fields(String.format("name.%s.ngrams^%f", lang, lang.equals(defLang) ? 1.0f : 0.4f)); + } + + for (String alt : ALT_NAMES) { + q.fields(String.format("name.%s.raw^0.4", alt)); + } + + if (query.indexOf(',') < 0 && query.indexOf(' ') < 0) { + q.boost(2f); + } + + return q; + }); + + if (query.indexOf(',') < 0 && query.indexOf(' ') < 0) { + query4QueryBuilder.must(nameNgramQuery.toQuery()); + } else { + query4QueryBuilder.must(m -> m.bool(q -> q + .should(nameNgramQuery.toQuery()) + .should(shd1 -> shd1 + .match(m1 -> m1 + .query(q1 -> q1.stringValue(query)) + .field("housenumber") + .analyzer("standard"))) + .should(shd2 -> shd2 + .match(m2 -> m2 + .query(q2 -> q2.stringValue(query)) + .field("classification") + .boost(0.1f))) + .minimumShouldMatch("1") + )); + } + + // 4. Rerank results for having the full name in the default language. + query4QueryBuilder.should(m -> m.match(inner -> inner + .query(q -> q.stringValue(query)) + .field(String.format("name.%s.raw", language)) + .fuzziness(lenient ? "auto" : "0") + )); + + // Weigh the resulting score by importance. Use a linear scale function that ensures that the weight + // never drops to 0 and cancels out the ES score. + finalQueryWithoutTagFilterBuilder = new Query.Builder().functionScore(fs -> fs + .query(query4QueryBuilder.build().toQuery()) + .functions(fn1 -> fn1 + .linear(df1 -> df1 + .field("importance") + .placement(p1 -> p1 + .origin(JsonData.of(1.0)) + .scale(JsonData.of(0.6)) + .decay(0.5)))) + .functions(fn2 -> fn2 + .filter(flt -> flt + .match(m -> m + .query(q -> q.stringValue(query)) + .field("classification"))) + .weight(0.1)) + .scoreMode(FunctionScoreMode.Sum) + ); + + // Filter for later: records that have a house number and no name must only appear when the house number matches. + queryBuilderForTopLevelFilter = QueryBuilders.bool() + .should(q1 -> q1.bool(qin -> qin + .mustNot(mn -> mn.exists(ex -> ex.field("housenumber"))))) + .should(q2 -> q2.match(m2 -> m2 + .query(iq -> iq.stringValue(query)) + .field("housenumber") + .analyzer("standard"))) + .should(q3 -> q3.exists(ex2 -> ex2 + .field(String.format("name.%s.raw", language)))); + } + + public SearchQueryBuilder withLocationBias(Point point, double scale, int zoom) { + if (point == null || zoom < 4) return this; + + if (zoom > 18) { + zoom = 18; + } + double radius = (1 << (18 - zoom)) * 0.25; + final double fnscale = (scale <= 0.0) ? 0.0000001 : scale; + + Map params = new HashMap<>(); + params.put("lon", point.getX()); + params.put("lat", point.getY()); + + finalQueryWithoutTagFilterBuilder = new Query.Builder().functionScore(fs -> fs + .query(finalQueryWithoutTagFilterBuilder.build()) + .functions(fn1 -> fn1.exp(ex -> ex + .field("coordinate") + .placement(p -> p.origin(JsonData.of(params)).scale(JsonData.of(radius + "km"))))) + .functions(fn2 -> fn2.linear(lin -> lin + .field("importance") + .placement(p -> p.origin(JsonData.of(1.0)).scale(JsonData.of(fnscale)).decay(0.5)))) + .boostMode(FunctionBoostMode.Multiply) + .scoreMode(FunctionScoreMode.Max) + ); + + return this; + } + + public SearchQueryBuilder withBoundingBox(Envelope bbox) { + if (bbox != null) { + bboxQueryBuilder = QueryBuilders.geoBoundingBox() + .field("coordinate") + .boundingBox(b -> b.coords(c -> c + .top(bbox.getMaxY()) + .bottom(bbox.getMinY()) + .left(bbox.getMinX()) + .right(bbox.getMaxX()))); + } + + return this; + } + + public SearchQueryBuilder withOsmTagFilters(List filters) { + osmTagFilter.withOsmTagFilters(filters); + return this; + } + + public SearchQueryBuilder withLayerFilters(Set filters) { + if (!filters.isEmpty()) { + List terms = new ArrayList<>(); + for (var filter : filters) { + terms.add(FieldValue.of(filter)); + } + layerQueryBuilder = QueryBuilders.terms().field("type").terms(t -> t.value(terms)); + } + + return this; + } + + public Query buildQuery() { + if (finalQuery == null) { + finalQuery = BoolQuery.of(q -> { + q.must(finalQueryWithoutTagFilterBuilder.build()); + q.filter(queryBuilderForTopLevelFilter.build().toQuery()); + q.filter(f -> f.bool(fb -> fb + .mustNot(n -> n.ids(i -> i.values(PhotonIndex.PROPERTY_DOCUMENT_ID))) + )); + + final var tagFilters = osmTagFilter.build(); + if (tagFilters != null) { + q.filter(tagFilters); + } + + if (bboxQueryBuilder != null) { + q.filter(bboxQueryBuilder.build().toQuery()); + } + + if (layerQueryBuilder != null) { + q.filter(layerQueryBuilder.build().toQuery()); + } + + return q; + }).toQuery(); + } + + return finalQuery; + } +} From fca7f98c73056a3ca6a78cf441695d1a8998550e Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 1 May 2024 23:56:26 +0200 Subject: [PATCH 09/14] switch default port to match test server --- app/opensearch/src/main/java/de/komoot/photon/Server.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/opensearch/src/main/java/de/komoot/photon/Server.java b/app/opensearch/src/main/java/de/komoot/photon/Server.java index baa1d81f2..66a593fc5 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/Server.java +++ b/app/opensearch/src/main/java/de/komoot/photon/Server.java @@ -31,7 +31,7 @@ public Server start(String clusterName, String[] transportAddresses) { for (int i = 0; i < transportAddresses.length; ++i) { final String[] parts = transportAddresses[i].split(":", 2); hosts[i] = new HttpHost("http", parts[0], - parts.length > 1 ? Integer.parseInt(parts[1]) : 9200); + parts.length > 1 ? Integer.parseInt(parts[1]) : 9201); } final var module = new SimpleModule("PhotonResultDeserializer", From f89ba7799db8dff03f48b2b3e753ee13f80bccce Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 2 May 2024 16:43:29 +0200 Subject: [PATCH 10/14] add embedded mode and tweak search for new version Also slightly changes the index structure to reduce the number of fields that need to be looked up in multi-match queries. The new version seems to perform slightly worse on that. --- app/opensearch/build.gradle | 4 +- .../main/java/de/komoot/photon/Server.java | 53 +++++++++++++++---- .../de/komoot/photon/opensearch/Importer.java | 17 ++++++ .../photon/opensearch/IndexMapping.java | 51 +++++++++++------- .../opensearch/IndexSettingBuilder.java | 15 +----- .../photon/opensearch/SearchQueryBuilder.java | 37 +++++++------ 6 files changed, 115 insertions(+), 62 deletions(-) diff --git a/app/opensearch/build.gradle b/app/opensearch/build.gradle index 12939dbda..1f87039e7 100644 --- a/app/opensearch/build.gradle +++ b/app/opensearch/build.gradle @@ -20,7 +20,7 @@ dependencies { implementation 'org.apache.httpcomponents.client5:httpclient5:5.3.1' implementation 'com.fasterxml.jackson.core:jackson-databind:2.17.0' - testImplementation 'org.codelibs.opensearch:opensearch-runner:2.13.0.0' + implementation 'org.codelibs.opensearch:opensearch-runner:2.13.0.0' } tasks.named('jar') { @@ -34,6 +34,8 @@ shadowJar { archiveBaseName.set('photon-opensearch') archiveClassifier.set('') + transform(com.github.jengelman.gradle.plugins.shadow.transformers.Log4j2PluginsCacheFileTransformer) + exclude '**/module-info.class' // This mitigates against the log4j JNDI lookup vulnerability: diff --git a/app/opensearch/src/main/java/de/komoot/photon/Server.java b/app/opensearch/src/main/java/de/komoot/photon/Server.java index 66a593fc5..481fc9d08 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/Server.java +++ b/app/opensearch/src/main/java/de/komoot/photon/Server.java @@ -6,12 +6,14 @@ import de.komoot.photon.searcher.ReverseHandler; import de.komoot.photon.searcher.SearchHandler; import org.apache.hc.core5.http.HttpHost; +import org.codelibs.opensearch.runner.OpenSearchRunner; import org.opensearch.client.json.jackson.JacksonJsonpMapper; import org.opensearch.client.opensearch.OpenSearchClient; import org.opensearch.client.opensearch._types.HealthStatus; import org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder; import org.slf4j.Logger; +import java.io.File; import java.io.IOException; import java.util.Date; @@ -19,19 +21,24 @@ public class Server { private static final Logger LOGGER = org.slf4j.LoggerFactory.getLogger(Server.class); protected OpenSearchClient client; + private OpenSearchRunner runner = null; + final protected String dataDirectory; public Server(String mainDirectory) { + dataDirectory = new File(mainDirectory, "photon_data").getAbsolutePath(); } public Server start(String clusterName, String[] transportAddresses) { + HttpHost[] hosts; if (transportAddresses.length == 0) { - throw new RuntimeException("OpenSearch-port neds an external OpeSearch instance. Use -transport-addresses."); - } - final HttpHost[] hosts = new HttpHost[transportAddresses.length]; - for (int i = 0; i < transportAddresses.length; ++i) { - final String[] parts = transportAddresses[i].split(":", 2); - hosts[i] = new HttpHost("http", parts[0], - parts.length > 1 ? Integer.parseInt(parts[1]) : 9201); + hosts = startInternal(clusterName); + } else { + hosts = new HttpHost[transportAddresses.length]; + for (int i = 0; i < transportAddresses.length; ++i) { + final String[] parts = transportAddresses[i].split(":", 2); + hosts[i] = new HttpHost("http", parts[0], + parts.length > 1 ? Integer.parseInt(parts[1]) : 9201); + } } final var module = new SimpleModule("PhotonResultDeserializer", @@ -51,7 +58,27 @@ public Server start(String clusterName, String[] transportAddresses) { return this; } - public void waitForReady() throws IOException{ + private HttpHost[] startInternal(String clusterName) { + runner = new OpenSearchRunner(); + runner.onBuild((number, settingsBuilder) -> { + settingsBuilder.put("http.cors.enabled", false); + settingsBuilder.put("discovery.type", "single-node"); + settingsBuilder.putList("discovery.seed_hosts", "127.0.0.1:9201"); + settingsBuilder.put("indices.query.bool.max_clause_count", "30000"); + }).build(OpenSearchRunner.newConfigs().basePath(dataDirectory).clusterName(clusterName).numOfNode(1)); + + runner.ensureYellow(); + + HttpHost[] hosts = new HttpHost[runner.getNodeSize()]; + + for (int i = 0; i < runner.getNodeSize(); ++i) { + hosts[i] = new HttpHost("http", "127.0.0.1", Integer.parseInt(runner.getNode(i).settings().get("http.port"))); + } + + return hosts; + } + + public void waitForReady() throws IOException { client.cluster().health(h -> h.waitForStatus(HealthStatus.Yellow)); } @@ -61,7 +88,13 @@ public void refreshIndexes() throws IOException { } public void shutdown() { - // external node only, do nothing + if (runner != null) { + try { + runner.close(); + } catch (IOException e) { + LOGGER.error("IO error on closing database", e); + } + } } public DatabaseProperties recreateIndex(String[] languages, Date importDate) throws IOException { @@ -70,7 +103,7 @@ public DatabaseProperties recreateIndex(String[] languages, Date importDate) thr client.indices().delete(d -> d.index(PhotonIndex.NAME)); } - (new IndexSettingBuilder()).createIndex(client, PhotonIndex.NAME); + (new IndexSettingBuilder()).setShards(5).createIndex(client, PhotonIndex.NAME); (new IndexMapping()).addLanguages(languages).putMapping(client, PhotonIndex.NAME); diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/Importer.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/Importer.java index 034c30a76..c3e7b31bf 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/opensearch/Importer.java +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/Importer.java @@ -2,6 +2,7 @@ import de.komoot.photon.PhotonDoc; import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch._types.Time; import org.opensearch.client.opensearch.core.BulkRequest; import org.slf4j.Logger; @@ -16,6 +17,7 @@ public class Importer implements de.komoot.photon.Importer { public Importer(OpenSearchClient client) { this.client = client; + enableImportSettings(true); } @Override @@ -38,6 +40,8 @@ public void finish() { saveDocuments(); } + enableImportSettings(false); + try { client.indices().refresh(); } catch (IOException e) { @@ -59,4 +63,17 @@ private void saveDocuments() { bulkRequest = new BulkRequest.Builder(); todoDocuments = 0; } + + private void enableImportSettings(boolean enable) { + try { + client.indices().putSettings(s -> s + .index(PhotonIndex.NAME) + .settings(is -> is + .refreshInterval(Time.of(t -> t.time(enable ? "-1" : "15s"))) + .numberOfReplicas(enable ? "0" : "1"))); + } catch (IOException e) { + LOGGER.warn("IO error while setting refresh interval", e); + } + + } } diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexMapping.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexMapping.java index ca11639e3..6abf42f3b 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexMapping.java +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexMapping.java @@ -25,27 +25,29 @@ public IndexMapping addLanguages(String[] languages) { List name_collectors = new ArrayList<>(); for (var lang: languages) { mappings.properties("collector." + lang, - b -> b.text(p -> p.index(false) - .fields("ngrams", f -> f.text(pi -> pi.index(true).analyzer("index_ngram"))) - .fields("raw", f2 -> f2.text(pi2 -> pi2.index(true).analyzer("index_raw").searchAnalyzer("search_raw")) - ))); + b -> b.text(p -> p.index(true) + .analyzer("index_raw")) + ); for (var field: ADDRESS_FIELDS) { mappings.properties(String.format("%s.%s", field, lang), - b -> b.text(p -> p.index(false).copyTo("collector." + lang))); + b -> b.text(p -> p + .index(false) + .copyTo("collector.base", "collector." + lang))); } mappings.properties("name." + lang, b -> b.text(p -> p.index(false) .fields("ngrams", f -> f.text(pi -> pi.index(true).analyzer("index_ngram"))) - .fields("raw", f2 -> f2.text(pi2 -> pi2.index(true).analyzer("index_raw").searchAnalyzer("search_raw"))) - .copyTo("collector." + lang))); + .fields("raw", f2 -> f2.text(pi2 -> pi2.index(true).analyzer("index_raw"))) + .copyTo("collector." + lang, "collector.base"))); //add language-specific collector to default for name name_collectors.add("name." + lang); } name_collectors.add("collector.default"); + name_collectors.add("collector.base"); mappings.properties("name.default", b -> b.text(p -> p.index(false).copyTo(name_collectors))); return this; @@ -70,30 +72,43 @@ private void setupBaseMappings() { mappings.properties("housenumber", b -> b.text(p -> p.index(true) .analyzer("index_housenumber").searchAnalyzer("standard") - .copyTo("collector.default") + .copyTo("collector.default", "collector.base") )); mappings.properties("classification", b -> b.text(p -> p.index(true) .analyzer("keyword") .searchAnalyzer("search_classification") - .copyTo("collector.default"))); + .copyTo("collector.default", "collector.base"))); - mappings.properties("collector.default", - b -> b.text(p -> p.index(true) - .analyzer("index_ngram") - .fields("raw", f -> f.text(pi -> pi.index(true).analyzer("index_raw"))))); + // The catch-all collector used to find overall matches. + mappings.properties("collector.base", b -> b.text(p -> p + .index(true) + .analyzer("index_ngram"))); + + // Collector for all address parts in the default language. + mappings.properties("collector.default", b -> b.text(p -> p + .index(true) + .analyzer("index_raw"))); for (var field : ADDRESS_FIELDS) { - mappings.properties(field + ".default", b -> b.text(p -> p.index(false).copyTo("collector.default"))); + mappings.properties(field + ".default", b -> b.text(p -> p + .index(false) + .copyTo("collector.default", "collector.base"))); } - mappings.properties("postcode", b -> b.text(p -> p.index(false).copyTo("collector.default"))); + mappings.properties("postcode", b -> b.text(p -> p + .index(false) + .copyTo("collector.default", "collector.base"))); + + mappings.properties("name.default", b -> b.text(p -> p + .index(false) + .copyTo("collector.default", "collector_base"))); - mappings.properties("name.default", b -> b.text(p -> p.index(false).copyTo("collector.default"))); + // Collector for all name parts. + mappings.properties("name.other", b -> b.text(pi -> pi.index(true).analyzer("index_raw"))); for (var suffix : new String[]{"alt", "int", "loc", "old", "reg", "housename"}) { mappings.properties("name." + suffix, b -> b.text(p -> p.index(false) - .fields("raw", bi -> bi.text(pi -> pi.index(true).analyzer("index_raw"))) - .copyTo("collector.default"))); + .copyTo("collector.default", "name.other", "collector.base"))); } } } diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexSettingBuilder.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexSettingBuilder.java index 987607915..dcae0d6f2 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexSettingBuilder.java +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexSettingBuilder.java @@ -136,7 +136,7 @@ private void addDefaultSettings() { "german_normalization", "asciifolding", "unique"))); - settings.analyzer("search_ngram", + settings.analyzer("search", f -> f.custom(d -> { d.charFilter("punctuationgreedy") .tokenizer("standard") @@ -156,19 +156,6 @@ private void addDefaultSettings() { "german_normalization", "asciifolding", "unique"))); - settings.analyzer("search_raw", - f -> f.custom(d -> { - d.charFilter("punctuationgreedy") - .tokenizer("standard") - .filter("lowercase"); - for (var filter : extra_filters) { - d.filter(filter); - } - d.filter("german_normalization", - "asciifolding", - "unique"); - return d; - })); settings.analyzer("index_housenumber", f -> f.custom(d -> d .charFilter("punctuationgreedy", "remove_ws_hnr_suffix") diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java index 8eb8ac026..6c463514d 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java @@ -24,21 +24,17 @@ public SearchQueryBuilder(String query, String language, String[] languages, boo var query4QueryBuilder = QueryBuilders.bool(); // 1. All terms of the query must be contained in the place record somehow. Be more lenient on second try. - query4QueryBuilder.must(base -> base.multiMatch(q -> { - q.query(query) - .fields("collector.default^1.0") - .prefixLength(2) - .analyzer("search_ngram") - .tieBreaker(0.4) - .minimumShouldMatch(lenient ? "-34%" : "100%"); + query4QueryBuilder.must(base -> base.match(q -> { + q.query(fn -> fn.stringValue(query)); + q.analyzer("search"); + q.field("collector.base"); if (lenient) { - q.type(TextQueryType.BestFields).fuzziness("auto"); + q.fuzziness("AUTO"); + q.prefixLength(2); + q.minimumShouldMatch("-34%"); } else { - q.type(TextQueryType.CrossFields); - } - for (String lang : languages) { - q.fields(String.format("collector.%s.ngrams^%f", lang, lang.equals(language) ? 1.0f : 0.6f)); + q.operator(Operator.And); } return q; })); @@ -47,11 +43,11 @@ public SearchQueryBuilder(String query, String language, String[] languages, boo // filter criterion because they have no name. Boost the score in this case. query4QueryBuilder.should(shd -> shd.functionScore(fs -> fs .query(q -> q.multiMatch(mm -> { - mm.query(query).type(TextQueryType.BestFields); - mm.fields(String.format("%s^%f", "collector.default.raw", 1.0f)); + mm.query(query).type(TextQueryType.BestFields).analyzer("search"); + mm.fields(String.format("%s^%f", "collector.default", 1.0f)); for (String lang : languages) { - mm.fields(String.format("collector.%s.raw^%f", lang, lang.equals(language) ? 1.0f : 0.6f)); + mm.fields(String.format("collector.%s^%f", lang, lang.equals(language) ? 1.0f : 0.6f)); } return mm.boost(0.3f); @@ -68,15 +64,17 @@ public SearchQueryBuilder(String query, String language, String[] languages, boo // 3. Either the name or house number must be in the query terms. final String defLang = "default".equals(language) ? languages[0] : language; var nameNgramQuery = MultiMatchQuery.of(q -> { - q.query(query).type(TextQueryType.BestFields).fuzziness(lenient ? "1" : "0").analyzer("search_ngram"); + q.query(query).type(TextQueryType.BestFields).analyzer("search"); + + if (lenient) { + q.fuzziness("AUTO").prefixLength(2); + } for (String lang : languages) { q.fields(String.format("name.%s.ngrams^%f", lang, lang.equals(defLang) ? 1.0f : 0.4f)); } - for (String alt : ALT_NAMES) { - q.fields(String.format("name.%s.raw^0.4", alt)); - } + q.fields("name.other^0.4"); if (query.indexOf(',') < 0 && query.indexOf(' ') < 0) { q.boost(2f); @@ -108,6 +106,7 @@ public SearchQueryBuilder(String query, String language, String[] languages, boo query4QueryBuilder.should(m -> m.match(inner -> inner .query(q -> q.stringValue(query)) .field(String.format("name.%s.raw", language)) + .analyzer("search") .fuzziness(lenient ? "auto" : "0") )); From 92d88464c2600f4439b01fd650959203d5c73e55 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 6 May 2024 14:26:21 +0200 Subject: [PATCH 11/14] add note in README about new OpenSearch version --- README.md | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 904d1c543..b5ae0458e 100644 --- a/README.md +++ b/README.md @@ -52,10 +52,26 @@ photon uses [gradle](https://gradle.org) for building. To build the package from source make sure you have a JDK installed. Then run: ``` -./gradlew build +./gradlew app:es_embedded:build ``` -This will build and test photon. The final jar cn be found in `build/libs`. +This will build and test photon. The final jar can be found in `target`. + +#### Experimental OpenSearch version + +The repository also contains a version that runs against the latest +version of [OpenSearch](https://opensearch.org/). This version is still +experimental. To build the OpenSearch version run: + +``` +./gradlew app:opensearch:build +``` + +The final jar can be found in `target/photon-opensearch-.jar`. + +Indexes produced by this version are not compatible with the ElasticSearch +version. There are no prebuilt indexes available. You need to create your +own export from a Nominatim database. See 'Customized Search Data' below. ### Usage @@ -71,7 +87,7 @@ Check the URL `http://localhost:2322/api?q=berlin` to see if photon is running w To enable CORS (cross-site requests), use `-cors-any` to allow any origin or `-cors-origin` with a specific origin as the argument. By default, CORS support is disabled. -Discover more of photon's featurse with its usage `java -jar photon-*.jar -h`. The available options are as follows: +Discover more of photon's features with its usage `java -jar photon-*.jar -h`. The available options are as follows: ``` -h Show help / usage From 00cee43ad6ef4926ee0dd5a5c74bacde4e2db9e5 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 6 May 2024 14:44:47 +0200 Subject: [PATCH 12/14] add shared build script --- buildSrc/shared.gradle | 69 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 buildSrc/shared.gradle diff --git a/buildSrc/shared.gradle b/buildSrc/shared.gradle new file mode 100644 index 000000000..f91beac22 --- /dev/null +++ b/buildSrc/shared.gradle @@ -0,0 +1,69 @@ +group = 'de.komoot.photon' +version = '0.5.0' + +distZip.enabled = false +distTar.enabled = false +shadowDistZip.enabled = false +shadowDistTar.enabled = false + +application { + mainClass = 'de.komoot.photon.App'; +} + +java { + sourceCompatibility = JavaVersion.VERSION_11 + targetCompatibility = JavaVersion.VERSION_11 +} + +repositories { + maven { url "https://www.datanucleus.org/downloads/maven2/" } + mavenCentral() +} + +sourceSets { + main { + java { + srcDir rootProject.file('src/main/java') + } + resources { + srcDir rootProject.file('src/main/resources') + } + } + test { + java { + srcDir rootProject.file('src/test/java/') + } + resources { + srcDir rootProject.file('src/test/resources') + } + } +} + +dependencies { + implementation 'org.apache.logging.log4j:log4j-core:2.23.1' + implementation 'org.apache.logging.log4j:log4j-api:2.23.1' + implementation 'org.postgresql:postgresql:42.7.2' + implementation 'org.slf4j:slf4j-api:2.0.13' + implementation 'org.apache.logging.log4j:log4j-slf4j2-impl:2.23.1' + implementation 'com.beust:jcommander:1.82' + implementation 'org.apache.commons:commons-lang3:3.14.0' + implementation 'org.springframework:spring-jdbc:5.3.32' + implementation ('org.apache.commons:commons-dbcp2:2.12.0') { + exclude(module: 'commons-logging') + } + implementation 'org.locationtech.jts:jts-core:1.19.0' + implementation 'com.sparkjava:spark-core:2.9.4' + implementation 'net.postgis:postgis-jdbc:2023.1.0' + implementation 'org.json:json:20240303' + + testImplementation(platform("org.junit:junit-bom:5.10.2")) + testImplementation 'com.h2database:h2:2.2.224' + testImplementation 'org.junit.jupiter:junit-jupiter' + testImplementation 'org.mockito:mockito-core:5.11.0' + + testRuntimeOnly 'org.junit.platform:junit-platform-launcher' +} + +tasks.named('test') { + useJUnitPlatform() +} From 2da40231f4366477f03851ab4d16605aeb5c56e7 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 6 May 2024 14:32:05 +0200 Subject: [PATCH 13/14] actions: adapt to new directory structure --- .github/workflows/ci.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1eb5c6b99..bc000526e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -45,7 +45,7 @@ jobs: cache: 'gradle' - name: Compile project - run: ./gradlew assemble --no-daemon + run: ./gradlew app:es_embedded:assemble --no-daemon - uses: actions/checkout@v4 with: @@ -114,8 +114,9 @@ jobs: - name: Import Photon run: | - java -jar build/libs/photon-*.jar -nominatim-import -database nominatim -user runner -password foobar - java -jar build/libs/photon-*.jar -nominatim-update-init-for runner -database nominatim -user runner -password foobar + PHOTON_VERSION=`grep 'version =' buildSrc/shared.gradle | head -n 1 | sed "s:.*= '::;s:'.*::"` + java -jar target/photon-${PHOTON_VERSION}.jar -nominatim-import -database nominatim -user runner -password foobar + java -jar target/photon-${PHOTON_VERSION}.jar -nominatim-update-init-for runner -database nominatim -user runner -password foobar - name: Update Nominatim run: | @@ -128,4 +129,5 @@ jobs: - name: Update Photon run: | - java -jar build/libs/photon-*.jar -nominatim-update -database nominatim -user runner -password foobar + PHOTON_VERSION=`grep 'version =' buildSrc/shared.gradle | head -n 1 | sed "s:.*= '::;s:'.*::"` + java -jar target/photon-${PHOTON_VERSION}.jar -nominatim-update -database nominatim -user runner -password foobar From c428004f3a84ef4c1370faaf27728772040fe892 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 8 May 2024 16:29:41 +0200 Subject: [PATCH 14/14] update for indirect dependencies with vulerabilities --- app/opensearch/build.gradle | 4 ++++ buildSrc/shared.gradle | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/app/opensearch/build.gradle b/app/opensearch/build.gradle index 1f87039e7..bf015fe5f 100644 --- a/app/opensearch/build.gradle +++ b/app/opensearch/build.gradle @@ -21,6 +21,10 @@ dependencies { implementation 'com.fasterxml.jackson.core:jackson-databind:2.17.0' implementation 'org.codelibs.opensearch:opensearch-runner:2.13.0.0' + + // updates for indirect dependencies + implementation 'io.netty:netty-codec:4.1.109.Final' + implementation 'io.netty:netty-codec-http:4.1.109.Final' } tasks.named('jar') { diff --git a/buildSrc/shared.gradle b/buildSrc/shared.gradle index f91beac22..f0b4dd6cf 100644 --- a/buildSrc/shared.gradle +++ b/buildSrc/shared.gradle @@ -62,6 +62,12 @@ dependencies { testImplementation 'org.mockito:mockito-core:5.11.0' testRuntimeOnly 'org.junit.platform:junit-platform-launcher' + + // updates for indirect dependencies + implementation 'org.eclipse.jetty:jetty-server:9.4.54.v20240208' + implementation 'org.eclipse.jetty:jetty-webapp:9.4.54.v20240208' + implementation 'org.eclipse.jetty.websocket:websocket-server:9.4.54.v20240208' + implementation 'org.eclipse.jetty.websocket:websocket-servlet:9.4.54.v20240208' } tasks.named('test') {