From d9bc19f4e3e26e2e73a3e92541d936a64233a4dc Mon Sep 17 00:00:00 2001 From: tobiass Date: Thu, 13 Jun 2024 17:06:14 +0200 Subject: [PATCH 1/3] add support for structured queries (opensearch only) --- .../main/java/de/komoot/photon/Server.java | 23 +- .../java/de/komoot/photon/ESBaseTester.java | 2 +- .../main/java/de/komoot/photon/Server.java | 13 +- .../opensearch/AddressQueryBuilder.java | 275 ++++++++++++++++++ .../photon/opensearch/DBPropertyEntry.java | 2 + .../photon/opensearch/IndexMapping.java | 49 +++- .../OpenSearchStructuredSearchHandler.java | 85 ++++++ .../photon/opensearch/SearchQueryBuilder.java | 45 ++- .../java/de/komoot/photon/ESBaseTester.java | 2 +- .../opensearch/StructuredQueryTest.java | 237 +++++++++++++++ src/main/java/de/komoot/photon/App.java | 9 +- .../de/komoot/photon/CommandLineArgs.java | 5 + .../de/komoot/photon/DatabaseProperties.java | 11 + .../StructuredSearchRequestHandler.java | 56 ++++ .../de/komoot/photon/query/PhotonRequest.java | 103 +------ .../photon/query/PhotonRequestBase.java | 103 +++++++ .../photon/query/PhotonRequestFactory.java | 45 ++- .../photon/query/StructuredPhotonRequest.java | 98 +++++++ .../searcher/StructuredSearchHandler.java | 9 + .../photon/query/QueryFilterLayerTest.java | 3 +- .../photon/query/QueryFilterTagValueTest.java | 3 +- .../photon/query/QueryRelevanceTest.java | 13 +- 22 files changed, 1057 insertions(+), 134 deletions(-) create mode 100644 app/opensearch/src/main/java/de/komoot/photon/opensearch/AddressQueryBuilder.java create mode 100644 app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchStructuredSearchHandler.java create mode 100644 app/opensearch/src/test/java/de/komoot/photon/opensearch/StructuredQueryTest.java create mode 100644 src/main/java/de/komoot/photon/StructuredSearchRequestHandler.java create mode 100644 src/main/java/de/komoot/photon/query/PhotonRequestBase.java create mode 100644 src/main/java/de/komoot/photon/query/StructuredPhotonRequest.java create mode 100644 src/main/java/de/komoot/photon/searcher/StructuredSearchHandler.java diff --git a/app/es_embedded/src/main/java/de/komoot/photon/Server.java b/app/es_embedded/src/main/java/de/komoot/photon/Server.java index 2738dcf2..c0e5c335 100644 --- a/app/es_embedded/src/main/java/de/komoot/photon/Server.java +++ b/app/es_embedded/src/main/java/de/komoot/photon/Server.java @@ -3,6 +3,7 @@ import de.komoot.photon.DatabaseProperties; import de.komoot.photon.Importer; import de.komoot.photon.Updater; +import de.komoot.photon.searcher.StructuredSearchHandler; import de.komoot.photon.searcher.ReverseHandler; import de.komoot.photon.searcher.SearchHandler; import de.komoot.photon.elasticsearch.*; @@ -168,12 +169,12 @@ private void setupDirectories(URL directoryName) throws IOException, URISyntaxEx } - public DatabaseProperties recreateIndex(String[] languages, Date importDate) throws IOException { + public DatabaseProperties recreateIndex(String[] languages, Date importDate, boolean supportStructuredQueries) throws IOException { deleteIndex(); loadIndexSettings().createIndex(esClient, PhotonIndex.NAME); - new IndexMapping().addLanguages(languages).putMapping(esClient, PhotonIndex.NAME, PhotonIndex.TYPE); + createAndPutIndexMapping(languages, supportStructuredQueries); DatabaseProperties dbProperties = new DatabaseProperties() .setLanguages(languages) @@ -183,6 +184,16 @@ public DatabaseProperties recreateIndex(String[] languages, Date importDate) thr return dbProperties; } + private void createAndPutIndexMapping(String[] languages, boolean supportStructuredQueries) + { + if (supportStructuredQueries) { + throw new UnsupportedOperationException("Structured queries are not supported for elasticsearch-based Photon. Consider to use OpenSearch."); + } + + new IndexMapping().addLanguages(languages) + .putMapping(esClient, PhotonIndex.NAME, PhotonIndex.TYPE); + } + public void updateIndexSettings(String synonymFile) throws IOException { // Load the settings from the database to make sure it is at the right // version. If the version is wrong, we should not be messing with the @@ -195,9 +206,7 @@ public void updateIndexSettings(String synonymFile) throws IOException { // Sanity check: legacy databases don't save the languages, so there is no way to update // the mappings consistently. if (dbProperties.getLanguages() != null) { - new IndexMapping() - .addLanguages(dbProperties.getLanguages()) - .putMapping(esClient, PhotonIndex.NAME, PhotonIndex.TYPE); + this.createAndPutIndexMapping(dbProperties.getLanguages(), false); } } @@ -279,6 +288,10 @@ public SearchHandler createSearchHandler(String[] languages, int queryTimeoutSec return new ElasticsearchSearchHandler(esClient, languages, queryTimeoutSec); } + public StructuredSearchHandler createStructuredSearchHandler(String[] languages, int queryTimeoutSec) { + throw new UnsupportedOperationException("Structured queries are not supported for elasticsearch-based Photon. Consider to use OpenSearch."); + } + public ReverseHandler createReverseHandler(int queryTimeoutSec) { return new ElasticsearchReverseHandler(esClient, queryTimeoutSec); } diff --git a/app/es_embedded/src/test/java/de/komoot/photon/ESBaseTester.java b/app/es_embedded/src/test/java/de/komoot/photon/ESBaseTester.java index 655ab534..5b2e5008 100644 --- a/app/es_embedded/src/test/java/de/komoot/photon/ESBaseTester.java +++ b/app/es_embedded/src/test/java/de/komoot/photon/ESBaseTester.java @@ -55,7 +55,7 @@ public void setUpES() throws IOException { public void setUpES(Path test_directory, String... languages) throws IOException { server = new ElasticTestServer(test_directory.toString()); server.start(TEST_CLUSTER_NAME, new String[]{}); - server.recreateIndex(languages, new Date()); + server.recreateIndex(languages, new Date(), false); refresh(); } diff --git a/app/opensearch/src/main/java/de/komoot/photon/Server.java b/app/opensearch/src/main/java/de/komoot/photon/Server.java index 481fc9d0..ab745626 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/Server.java +++ b/app/opensearch/src/main/java/de/komoot/photon/Server.java @@ -5,6 +5,7 @@ import de.komoot.photon.opensearch.*; import de.komoot.photon.searcher.ReverseHandler; import de.komoot.photon.searcher.SearchHandler; +import de.komoot.photon.searcher.StructuredSearchHandler; import org.apache.hc.core5.http.HttpHost; import org.codelibs.opensearch.runner.OpenSearchRunner; import org.opensearch.client.json.jackson.JacksonJsonpMapper; @@ -97,7 +98,7 @@ public void shutdown() { } } - public DatabaseProperties recreateIndex(String[] languages, Date importDate) throws IOException { + public DatabaseProperties recreateIndex(String[] languages, Date importDate, boolean supportStructuredQueries) throws IOException { // delete any existing data if (client.indices().exists(e -> e.index(PhotonIndex.NAME)).value()) { client.indices().delete(d -> d.index(PhotonIndex.NAME)); @@ -105,10 +106,11 @@ public DatabaseProperties recreateIndex(String[] languages, Date importDate) thr (new IndexSettingBuilder()).setShards(5).createIndex(client, PhotonIndex.NAME); - (new IndexMapping()).addLanguages(languages).putMapping(client, PhotonIndex.NAME); + (new IndexMapping(supportStructuredQueries)).addLanguages(languages).putMapping(client, PhotonIndex.NAME); var dbProperties = new DatabaseProperties() .setLanguages(languages) + .setSupportStructuredQueries(supportStructuredQueries) .setImportDate(importDate); saveToDatabase(dbProperties); @@ -122,7 +124,7 @@ public void updateIndexSettings(String synonymFile) throws IOException { (new IndexSettingBuilder()).setSynonymFile(synonymFile).updateIndex(client, PhotonIndex.NAME); if (dbProperties.getLanguages() != null) { - (new IndexMapping()) + (new IndexMapping(dbProperties.getSupportStructuredQueries())) .addLanguages(dbProperties.getLanguages()) .putMapping(client, PhotonIndex.NAME); } @@ -154,6 +156,7 @@ public void loadFromDatabase(DatabaseProperties dbProperties) throws IOException dbProperties.setLanguages(dbEntry.source().languages); dbProperties.setImportDate(dbEntry.source().importDate); + dbProperties.setSupportStructuredQueries(dbEntry.source().supportStructuredQueries); } public Importer createImporter(String[] languages, String[] extraTags) { @@ -170,6 +173,10 @@ public SearchHandler createSearchHandler(String[] languages, int queryTimeoutSec return new OpenSearchSearchHandler(client, languages, queryTimeoutSec); } + public StructuredSearchHandler createStructuredSearchHandler(String[] languages, int queryTimeoutSec) { + return new OpenSearchStructuredSearchHandler(client, languages, queryTimeoutSec); + } + public ReverseHandler createReverseHandler(int queryTimeoutSec) { return new OpenSearchReverseHandler(client, queryTimeoutSec); } diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/AddressQueryBuilder.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/AddressQueryBuilder.java new file mode 100644 index 00000000..cb6a9b13 --- /dev/null +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/AddressQueryBuilder.java @@ -0,0 +1,275 @@ +package de.komoot.photon.opensearch; + +import de.komoot.photon.Constants; +import org.apache.commons.lang3.StringUtils; +import org.opensearch.client.opensearch._types.FieldValue; +import org.opensearch.client.opensearch._types.query_dsl.*; +import org.opensearch.common.unit.Fuzziness; + +import java.util.Objects; + +public class AddressQueryBuilder { + private static final float STATE_BOOST = 0.1f; // state is unreliable - some locations have e.g. "NY", some "New York". + private static final float COUNTY_BOOST = 4.0f; + private static final float CITY_BOOST = 3.0f; + private static final float POSTAL_CODE_BOOST = 7.0f; + private static final float DISTRICT_BOOST = 2.0f; + private static final float STREET_BOOST = 5.0f; // we filter streets in the wrong city / district / ... so we can use a high boost value + private static final float HOUSE_NUMBER_BOOST = 10.0f; + private static final float HOUSE_NUMBER_UNMATCHED_BOOST = 5f; + private static final float FACTOR_FOR_WRONG_LANGUAGE = 0.1f; + private final String[] languages; + private final String language; + + private BoolQuery.Builder query = QueryBuilders.bool(); + + private BoolQuery.Builder cityFilter; + + private boolean lenient; + + public AddressQueryBuilder(boolean lenient, String language, String[] languages) { + this.lenient = lenient; + this.language = language; + this.languages = languages; + } + + public Query getQuery() { + return query.build().toQuery(); + } + + public AddressQueryBuilder addCountryCode(String countryCode) { + if (countryCode == null) return this; + + query.filter(QueryBuilders.term().field(Constants.COUNTRYCODE).value(FieldValue.of(countryCode.toUpperCase())).build().toQuery()); + return this; + } + + public AddressQueryBuilder addState(String state, boolean hasMoreDetails) { + if (state == null) return this; + + var stateQuery = GetNameOrFieldQuery(Constants.STATE, state, STATE_BOOST, "state", hasMoreDetails); + query.should(stateQuery); + return this; + } + + public AddressQueryBuilder addCounty(String county, boolean hasMoreDetails) { + if (county == null) return this; + + AddNameOrFieldQuery(Constants.COUNTY, county, COUNTY_BOOST, "county", hasMoreDetails); + return this; + } + + public AddressQueryBuilder addCity(String city, boolean hasDistrict, boolean hasStreet, boolean hasPostCode) { + if (city == null) return this; + + Query combinedQuery; + Query nameQuery = GetFuzzyNameQueryBuilder(city, "city").boost(CITY_BOOST) + .build() + .toQuery(); + Query fieldQuery = GetFuzzyQueryBuilder(Constants.CITY, city).boost(CITY_BOOST).build().toQuery(); + + if (!hasDistrict) { + var districtNameQuery = GetFuzzyNameQueryBuilder(city, "district").boost(0.95f * CITY_BOOST) + .build() + .toQuery(); + nameQuery = QueryBuilders.bool() + .should(nameQuery) + .should(districtNameQuery) + .minimumShouldMatch("1") + .build() + .toQuery(); + + var districtFieldQuery = GetFuzzyQueryBuilder(Constants.DISTRICT, city).boost(0.95f * CITY_BOOST).build().toQuery(); + fieldQuery = QueryBuilders.bool() + .should(fieldQuery) + .should(districtFieldQuery) + .minimumShouldMatch("1") + .build() + .toQuery(); + } + + if (!hasStreet && !hasDistrict) { + // match only name + if (hasPostCode) { + // post code can implicitly specify a district that has the city in the address field (instead of the name) + combinedQuery = QueryBuilders.bool() + .should(nameQuery) + .should(fieldQuery) + .build() + .toQuery(); + } + else { + combinedQuery = nameQuery; + } + } else { + // match only address field + combinedQuery = fieldQuery; + } + + addToCityFilter(combinedQuery); + query.must(combinedQuery); + + return this; + } + + private void addToCityFilter(Query query) { + if (cityFilter == null) { + cityFilter = QueryBuilders.bool(); + } + + cityFilter.should(query); + } + + public AddressQueryBuilder addPostalCode(String postalCode) { + if (postalCode == null) return this; + + Fuzziness fuzziness = lenient ? Fuzziness.AUTO : Fuzziness.ZERO; + + Query query; + if (StringUtils.containsWhitespace(postalCode)) { + query = QueryBuilders.match() + .field(Constants.POSTCODE) + .query(FieldValue.of(postalCode)) + .fuzziness(fuzziness.asString()) + .boost(POSTAL_CODE_BOOST) + .build() + .toQuery(); + } else { + query = QueryBuilders.fuzzy() + .field(Constants.POSTCODE) + .value(FieldValue.of(postalCode)) + .fuzziness(fuzziness.asString()) + .boost(POSTAL_CODE_BOOST) + .build() + .toQuery(); + } + + addToCityFilter(query); + this.query.must(query); + + return this; + } + + public AddressQueryBuilder addDistrict(String district, boolean hasMoreDetails) { + if (district == null) return this; + + AddNameOrFieldQuery(Constants.DISTRICT, district, DISTRICT_BOOST, "district", hasMoreDetails); + return this; + } + + public AddressQueryBuilder addStreetAndHouseNumber(String street, String houseNumber) { + if (street == null) { + if (houseNumber != null) { + // some hamlets have no street name and only number the buildings + var houseNumberQuery = QueryBuilders.bool() + .mustNot(QueryBuilders.exists().field(Constants.STREET).build().toQuery()) + .must(QueryBuilders.matchPhrase() + .field(Constants.HOUSENUMBER) + .query(houseNumber) + .build() + .toQuery()); + + query.must(houseNumberQuery.build().toQuery()); + } + + return this; + } + + Query streetQuery; + + if (lenient) { + var nameFieldQuery = GetFuzzyNameQueryBuilder(street, "street"); + if (houseNumber == null) { + streetQuery = nameFieldQuery.boost(STREET_BOOST).build().toQuery(); + } else { + streetQuery = QueryBuilders.bool() + .should(GetFuzzyQuery(Constants.STREET, street)) + .should(nameFieldQuery.build().toQuery()) + .minimumShouldMatch("1") + .boost(STREET_BOOST).build().toQuery(); + } + } else { + streetQuery = GetFuzzyQueryBuilder(Constants.STREET, street).boost(STREET_BOOST).build().toQuery(); + } + + if (houseNumber != null) { + var houseNumberMatchQuery = QueryBuilders.bool().must(QueryBuilders.matchPhrase() + .field(Constants.HOUSENUMBER) + .query(houseNumber) + .build() + .toQuery()); + + houseNumberMatchQuery.filter(GetFuzzyQuery(Constants.STREET, street)); + if (cityFilter != null) { + houseNumberMatchQuery.filter(cityFilter.build().toQuery()); + } + + BoolQuery.Builder houseNumberQuery = QueryBuilders.bool() + .should(houseNumberMatchQuery.build().toQuery()) + .should(QueryBuilders.bool().mustNot(QueryBuilders.exists().field(Constants.HOUSENUMBER).build().toQuery()).build().toQuery()) + .boost(HOUSE_NUMBER_BOOST); + + query.must(houseNumberQuery.build().toQuery()); + } + + query.must(streetQuery); + + return this; + } + + private Query GetFuzzyQuery(String name, String value) { + return GetFuzzyQueryBuilder(name, value).build().toQuery(); + } + + private MatchPhraseQuery.Builder GetFuzzyQueryBuilder(String name, String value) { + return QueryBuilders.matchPhrase() + .field(name + "_collector") + .query(value); + } + + private BoolQuery.Builder GetFuzzyNameQueryBuilder(String value, String objectType) { + var or = QueryBuilders.bool(); + for (String lang : languages) { + float boost = lang.equals(language) ? 1.0f : FACTOR_FOR_WRONG_LANGUAGE; + var fieldName = Constants.NAME + '.' + lang + ".raw"; + + or.should(QueryBuilders.matchPhrase() + .field(fieldName) + .query(value) + .boost(boost) + .build() + .toQuery()); + } + + return or.minimumShouldMatch("1") + .filter(QueryBuilders.term() + .field(Constants.OBJECT_TYPE) + .value(FieldValue.of(objectType)) + .build() + .toQuery()); + } + + private static boolean isCityRelatedField(String name) { + return Objects.equals(name, Constants.POSTCODE) || Objects.equals(name, Constants.CITY) || Objects.equals(name, Constants.DISTRICT); + } + + private void AddNameOrFieldQuery(String field, String value, float boost, String objectType, boolean hasMoreDetails) { + var query = GetNameOrFieldQuery(field, value, boost, objectType, hasMoreDetails); + if (isCityRelatedField(field)) { + addToCityFilter(query); + } + + this.query.must(query); + } + + private Query GetNameOrFieldQuery(String field, String value, float boost, String objectType, boolean hasMoreDetails) { + if (hasMoreDetails) { + return GetFuzzyQuery(field, value); + } + + return GetFuzzyNameQueryBuilder(value, objectType) + .boost(boost) + .build() + .toQuery(); + } +} diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/DBPropertyEntry.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/DBPropertyEntry.java index 4eb3e61f..fa96a1d3 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/opensearch/DBPropertyEntry.java +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/DBPropertyEntry.java @@ -8,6 +8,7 @@ public class DBPropertyEntry { public String databaseVersion; public Date importDate; public String[] languages; + public boolean supportStructuredQueries; public DBPropertyEntry() {} @@ -15,5 +16,6 @@ public DBPropertyEntry(DatabaseProperties props) { databaseVersion = DatabaseProperties.DATABASE_VERSION; importDate = props.getImportDate(); languages = props.getLanguages(); + supportStructuredQueries = props.getSupportStructuredQueries(); } } diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexMapping.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexMapping.java index 7be54a96..219089b2 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexMapping.java +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexMapping.java @@ -3,17 +3,23 @@ import org.opensearch.client.opensearch.OpenSearchClient; import org.opensearch.client.opensearch._types.mapping.DynamicMapping; import org.opensearch.client.opensearch.indices.PutMappingRequest; +import de.komoot.photon.Constants; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import java.util.Objects; public class IndexMapping { private static final String[] ADDRESS_FIELDS = new String[]{"street", "city", "locality", "district", "county", "state", "country", "context"}; private PutMappingRequest.Builder mappings; - public IndexMapping() { + private boolean supportStructuredQueries; + + public IndexMapping(boolean supportStructuredQueries) { + this.supportStructuredQueries = supportStructuredQueries; setupBaseMappings(); } @@ -30,10 +36,14 @@ public IndexMapping addLanguages(String[] languages) { ); for (var field: ADDRESS_FIELDS) { - mappings.properties(String.format("%s.%s", field, lang), - b -> b.text(p -> p - .index(false) - .copyTo("collector.base", "collector." + lang))); + var propertyName = String.format("%s.%s", field, lang); + var collectors = new ArrayList<>(Arrays.asList("collector.base", "collector." + lang)); + if (shouldIndexAddressField(field)) { + collectors.add(getAddressFieldCollector(field)); + } + + mappings.properties(propertyName, + b -> b.text(p -> p.copyTo(collectors))); } mappings.properties("name." + lang, @@ -67,7 +77,7 @@ private void setupBaseMappings() { } mappings.properties("coordinate", b -> b.geoPoint(p -> p)); - mappings.properties("countrycode", b -> b.text(p -> p.index(true))); + mappings.properties("countrycode", b -> b.keyword(p -> p.index(true))); mappings.properties("importance", b -> b.float_(p -> p.index(false))); mappings.properties("housenumber", b -> b.text(p -> p.index(true) @@ -91,12 +101,25 @@ private void setupBaseMappings() { .analyzer("index_raw"))); for (var field : ADDRESS_FIELDS) { + var collectors = new ArrayList<>(Arrays.asList("collector.default", "collector.base")); + + if (shouldIndexAddressField(field)) { + var collectorName = getAddressFieldCollector(field); + mappings.properties(collectorName, + b -> b.text(p -> p.index(true) + .searchAnalyzer("search") + .analyzer("index_raw")) + ); + + collectors.add(collectorName); + } + mappings.properties(field + ".default", b -> b.text(p -> p - .index(false) - .copyTo("collector.default", "collector.base"))); + .index(shouldIndexAddressField(field)) + .copyTo(collectors))); } mappings.properties("postcode", b -> b.text(p -> p - .index(false) + .index(supportStructuredQueries) .copyTo("collector.default", "collector.base"))); mappings.properties("name.default", b -> b.text(p -> p @@ -111,4 +134,12 @@ private void setupBaseMappings() { .copyTo("collector.default", "name.other", "collector.base"))); } } + + private String getAddressFieldCollector(String field) { + return field + "_collector"; + } + + private boolean shouldIndexAddressField(String field) { + return supportStructuredQueries && !Objects.equals(field, "locality") && !Objects.equals(field, "context"); + } } diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchStructuredSearchHandler.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchStructuredSearchHandler.java new file mode 100644 index 00000000..847d5fa4 --- /dev/null +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/OpenSearchStructuredSearchHandler.java @@ -0,0 +1,85 @@ +package de.komoot.photon.opensearch; + +import de.komoot.photon.*; +import de.komoot.photon.searcher.StructuredSearchHandler; +import de.komoot.photon.searcher.PhotonResult; +import de.komoot.photon.query.StructuredPhotonRequest; +import org.opensearch.client.json.jackson.JacksonJsonpGenerator; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch._types.SearchType; +import org.opensearch.client.opensearch._types.query_dsl.Query; +import org.opensearch.client.opensearch.core.SearchResponse; +import org.opensearch.client.opensearch.core.explain.ExplanationDetail; +import org.opensearch.common.util.iterable.Iterables; + +import java.util.ArrayList; +import java.util.List; +import java.io.IOException; + +/** + * Execute a structured forward lookup on an Elasticsearch database. + */ +public class OpenSearchStructuredSearchHandler implements StructuredSearchHandler { + private final OpenSearchClient client; + private final String[] supportedLanguages; + private final String queryTimeout; + + public OpenSearchStructuredSearchHandler(OpenSearchClient client, String[] languages, int queryTimeoutSec) { + this.client = client; + this.supportedLanguages = languages; + queryTimeout = queryTimeoutSec + "s"; + } + + @Override + public List search(StructuredPhotonRequest photonRequest) { + var queryBuilder = buildQuery(photonRequest, false); + + // for the case of deduplication we need a bit more results, #300 + int limit = photonRequest.getLimit(); + int extLimit = limit > 1 ? (int) Math.round(photonRequest.getLimit() * 1.5) : 1; + + var results = sendQuery(queryBuilder.buildQuery(), extLimit); + + if (results.hits().total().value() == 0) { + results = sendQuery(buildQuery(photonRequest, true).buildQuery(), extLimit); + + if (results.hits().total().value() == 0 && photonRequest.hasStreet()) { + var street = photonRequest.getStreet(); + var houseNumber = photonRequest.getHouseNumber(); + photonRequest.setStreet(null); + photonRequest.setHouseNumber(null); + results = sendQuery(buildQuery(photonRequest, true).buildQuery(), extLimit); + photonRequest.setStreet(street); + photonRequest.setHouseNumber(houseNumber); + } + } + + List ret = new ArrayList<>(); + for (var hit : results.hits().hits()) { + ret.add(hit.source().setScore(hit.score())); + } + + return ret; + } + + public SearchQueryBuilder buildQuery(StructuredPhotonRequest photonRequest, boolean lenient) { + return new SearchQueryBuilder(photonRequest, photonRequest.getLanguage(), supportedLanguages, lenient). + withOsmTagFilters(photonRequest.getOsmTagFilters()). + withLayerFilters(photonRequest.getLayerFilters()). + withLocationBias(photonRequest.getLocationForBias(), photonRequest.getScaleForBias(), photonRequest.getZoomForBias()). + withBoundingBox(photonRequest.getBbox()); + } + + private SearchResponse sendQuery(Query query, Integer limit) { + try { + return client.search(s -> s + .index(PhotonIndex.NAME) + .searchType(SearchType.QueryThenFetch) + .query(query) + .size(limit) + .timeout(queryTimeout), OpenSearchResult.class); + } catch (IOException e) { + throw new RuntimeException("IO error during search", e); + } + } +} diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java index 6c463514..50cef05e 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java @@ -1,6 +1,9 @@ package de.komoot.photon.opensearch; import de.komoot.photon.searcher.TagFilter; +import de.komoot.photon.StructuredSearchRequestHandler; +import de.komoot.photon.query.StructuredPhotonRequest; +import de.komoot.photon.Constants; import org.locationtech.jts.geom.Envelope; import org.locationtech.jts.geom.Point; import org.opensearch.client.json.JsonData; @@ -44,10 +47,10 @@ public SearchQueryBuilder(String query, String language, String[] languages, boo query4QueryBuilder.should(shd -> shd.functionScore(fs -> fs .query(q -> q.multiMatch(mm -> { mm.query(query).type(TextQueryType.BestFields).analyzer("search"); - mm.fields(String.format("%s^%f", "collector.default", 1.0f)); + mm.fields(String.format(Locale.ROOT, "%s^%f", "collector.default", 1.0f)); for (String lang : languages) { - mm.fields(String.format("collector.%s^%f", lang, lang.equals(language) ? 1.0f : 0.6f)); + mm.fields(String.format(Locale.ROOT, "collector.%s^%f", lang, lang.equals(language) ? 1.0f : 0.6f)); } return mm.boost(0.3f); @@ -71,7 +74,7 @@ public SearchQueryBuilder(String query, String language, String[] languages, boo } for (String lang : languages) { - q.fields(String.format("name.%s.ngrams^%f", lang, lang.equals(defLang) ? 1.0f : 0.4f)); + q.fields(String.format(Locale.ROOT, "name.%s.ngrams^%f", lang, lang.equals(defLang) ? 1.0f : 0.4f)); } q.fields("name.other^0.4"); @@ -142,6 +145,37 @@ public SearchQueryBuilder(String query, String language, String[] languages, boo .field(String.format("name.%s.raw", language)))); } + public SearchQueryBuilder(StructuredPhotonRequest request, String language, String[] languages, boolean lenient) + { + var query4QueryBuilder = new AddressQueryBuilder(lenient, language, languages) + .addCountryCode(request.getCountryCode()) + .addState(request.getState(), request.hasCounty() || request.hasCityOrPostCode() || request.hasDistrict() || request.hasStreet()) + .addCounty(request.getCounty(), request.hasCityOrPostCode() || request.hasDistrict() || request.hasStreet()) + .addCity(request.getCity(), request.hasDistrict(), request.hasStreet(), request.hasPostCode()) + .addPostalCode(request.getPostCode()) + .addDistrict(request.getDistrict(), request.hasStreet()) + .addStreetAndHouseNumber(request.getStreet(), request.getHouseNumber()) + .getQuery(); + + finalQueryWithoutTagFilterBuilder = new Query.Builder().functionScore(fs -> fs + .query(query4QueryBuilder) + .functions(fn1 -> fn1 + .linear(df1 -> df1 + .field("importance") + .placement(p1 -> p1 + .origin(JsonData.of(1.0)) + .scale(JsonData.of(1.0)) + .decay(0.5)))) + .scoreMode(FunctionScoreMode.Sum)); + + if (!request.hasHouseNumber()) + { + queryBuilderForTopLevelFilter = QueryBuilders.bool().mustNot(QueryBuilders.exists().field(Constants.HOUSENUMBER).build().toQuery()); + } + + osmTagFilter = new OsmTagFilter(); + } + public SearchQueryBuilder withLocationBias(Point point, double scale, int zoom) { if (point == null || zoom < 4) return this; @@ -205,7 +239,10 @@ public Query buildQuery() { if (finalQuery == null) { finalQuery = BoolQuery.of(q -> { q.must(finalQueryWithoutTagFilterBuilder.build()); - q.filter(queryBuilderForTopLevelFilter.build().toQuery()); + if (queryBuilderForTopLevelFilter != null) { + q.filter(queryBuilderForTopLevelFilter.build().toQuery()); + } + q.filter(f -> f.bool(fb -> fb .mustNot(n -> n.ids(i -> i.values(PhotonIndex.PROPERTY_DOCUMENT_ID))) )); diff --git a/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java b/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java index d55a6ad3..e8d9eec9 100644 --- a/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java +++ b/app/opensearch/src/test/java/de/komoot/photon/ESBaseTester.java @@ -49,7 +49,7 @@ public void setUpES() throws IOException { public void setUpES(Path test_directory, String... languages) throws IOException { server = new OpenSearchTestServer(test_directory.toString()); server.startTestServer(TEST_CLUSTER_NAME); - server.recreateIndex(languages, new Date()); + server.recreateIndex(languages, new Date(), true); server.refreshIndexes(); } diff --git a/app/opensearch/src/test/java/de/komoot/photon/opensearch/StructuredQueryTest.java b/app/opensearch/src/test/java/de/komoot/photon/opensearch/StructuredQueryTest.java new file mode 100644 index 00000000..a5fba35c --- /dev/null +++ b/app/opensearch/src/test/java/de/komoot/photon/opensearch/StructuredQueryTest.java @@ -0,0 +1,237 @@ +package de.komoot.photon.opensearch; + +import de.komoot.photon.query.StructuredPhotonRequest; +import de.komoot.photon.Constants; +import de.komoot.photon.ESBaseTester; +import de.komoot.photon.Importer; +import de.komoot.photon.PhotonDoc; +import de.komoot.photon.nominatim.model.AddressType; +import de.komoot.photon.searcher.PhotonResult; +import de.komoot.photon.searcher.StructuredSearchHandler; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.io.TempDir; + +import java.nio.file.Path; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +public class StructuredQueryTest extends ESBaseTester { + + private static final String CountryCode = "DE"; + private static final String Language = "en"; + private static final String District = "MajorSuburb"; + private static final String HouseNumber = "42"; + private static final String City = "Some City"; + private static final String Hamlet = "Hamlet"; + private static final String Street = "Some street"; + public static final String DistrictPostCode = "12346"; + + @TempDir + private static Path instanceTestDirectory; + + private static int getRank(AddressType type) + { + for(int i = 0; i < 50; ++i) + { + if (type.coversRank(i)){ + return i; + } + } + + return 99; + } + + @BeforeEach + void setUp() throws Exception { + setUpES(instanceTestDirectory, Language, "de", "fr"); + Importer instance = makeImporter(); + + var country = new PhotonDoc(0, "R", 0, "place", "country") + .names(Collections.singletonMap("name", "Germany")) + .countryCode(CountryCode) + .importance(1.0) + .rankAddress(getRank(AddressType.COUNTRY)); + + var city = new PhotonDoc(1, "R", 1, "place", "city") + .names(Collections.singletonMap("name", City)) + .countryCode(CountryCode) + .postcode("12345") + .importance(1.0) + .rankAddress(getRank(AddressType.CITY)); + + Map address = new HashMap<>(); + address.put("city", City); + var suburb = new PhotonDoc(2, "N", 2, "place", "suburb") + .names(Collections.singletonMap("name", District)) + .countryCode(CountryCode) + .postcode(DistrictPostCode) + .address(address) + .importance(1.0) + .rankAddress(getRank(AddressType.DISTRICT)); + + var street = new PhotonDoc(3, "W", 3, "place", "street") + .names(Collections.singletonMap("name", Street)) + .countryCode(CountryCode) + .postcode("12345") + .address(address) + .importance(1.0) + .rankAddress(getRank(AddressType.STREET)); + + address.put("street", Street); + var house = new PhotonDoc(4, "R", 4, "place", "house") + .countryCode(CountryCode) + .postcode("12345") + .address(address) + .houseNumber(HouseNumber) + .importance(1.0) + .rankAddress(getRank(AddressType.HOUSE)); + + instance.add(country, 0); + instance.add(city, 1); + instance.add(suburb, 2); + instance.add(street, 3); + instance.add(house, 4); + addHamletHouse(instance, 5, "1"); + addHamletHouse(instance, 6, "2"); + addHamletHouse(instance, 7, "3"); + instance.finish(); + refresh(); + } + + @Test + void doesFindDistrictByPostcode() + { + var request = new StructuredPhotonRequest(Language); + request.setCountryCode(CountryCode); + request.setCity(City); + request.setPostCode(DistrictPostCode); + + var result = search(request); + Assertions.assertEquals(request.getPostCode(), result.get(Constants.POSTCODE)); + } + + @Test + void doesFindHouseNumberInHamletWithoutStreetName() { + var request = new StructuredPhotonRequest(Language); + request.setDistrict(Hamlet); + request.setHouseNumber("2"); + + StructuredSearchHandler queryHandler = getServer().createStructuredSearchHandler(new String[]{Language}, 1); + var results = queryHandler.search(request); + assertEquals(1, results.size()); + var result = results.get(0); + assertEquals(request.getHouseNumber(), result.get(Constants.HOUSENUMBER)); + } + + @Test + void doesNotReturnHousesForCityRequest() + { + var request = new StructuredPhotonRequest(Language); + request.setCountryCode(CountryCode); + request.setCity(City); + + StructuredSearchHandler queryHandler = getServer().createStructuredSearchHandler(new String[]{Language}, 1); + var results = queryHandler.search(request); + + for(var result : results) + { + assertNull(result.getLocalised(Constants.STREET, Language)); + assertNull(result.get(Constants.HOUSENUMBER)); + } + } + + @Test + void testWrongStreet() + { + var request = new StructuredPhotonRequest(Language); + request.setCountryCode(CountryCode); + request.setCity(City); + request.setStreet("totally wrong"); + request.setHouseNumber(HouseNumber); + + var result = search(request); + assertNull(result.getLocalised(Constants.STREET, Language)); + Assertions.assertEquals(request.getCity(), result.getLocalised(Constants.NAME, Language)); + } + + @Test + void testDistrictAsCity() + { + var request = new StructuredPhotonRequest(Language); + request.setCountryCode(CountryCode); + request.setCity(District); + var result = search(request); + Assertions.assertEquals(City, result.getLocalised(Constants.CITY, Language)); + Assertions.assertEquals(request.getCity(), result.getLocalised(Constants.NAME, Language)); + } + + @Test + void testWrongHouseNumber() + { + var request = new StructuredPhotonRequest(Language); + request.setCountryCode(CountryCode); + request.setCity(City); + request.setStreet(Street); + request.setHouseNumber("1"); + var result = search(request); + assertNull(result.getLocalised(Constants.HOUSENUMBER, Language)); + Assertions.assertEquals(request.getStreet(), result.getLocalised(Constants.NAME, Language)); + Assertions.assertEquals(request.getCity(), result.getLocalised(Constants.CITY, Language)); + } + + @Test + void testWrongHouseNumberAndWrongStreet() + { + var request = new StructuredPhotonRequest(Language); + request.setCountryCode(CountryCode); + request.setCity(City); + request.setStreet("does not exist"); + request.setHouseNumber("1"); + var result = search(request); + assertNull(result.getLocalised(Constants.HOUSENUMBER, Language)); + assertNull(result.getLocalised(Constants.STREET, Language)); + Assertions.assertEquals(request.getCity(), result.getLocalised(Constants.NAME, Language)); + } + + @Test + void testHouse() + { + var request = new StructuredPhotonRequest(Language); + request.setCountryCode(CountryCode); + request.setCity(City); + request.setStreet(Street); + request.setHouseNumber(HouseNumber); + + var result = search(request); + Assertions.assertEquals(request.getCity(), result.getLocalised(Constants.CITY, Language)); + Assertions.assertEquals(request.getStreet(), result.getLocalised(Constants.STREET, Language)); + Assertions.assertEquals(request.getHouseNumber(), result.get(Constants.HOUSENUMBER)); + } + + private PhotonResult search(StructuredPhotonRequest request) { + StructuredSearchHandler queryHandler = getServer().createStructuredSearchHandler(new String[]{Language}, 1); + var results = queryHandler.search(request); + + return results.get(0); + } + + private void addHamletHouse(Importer instance, int id, String houseNumber) { + var hamletAddress = new HashMap(); + hamletAddress.put("city", City); + hamletAddress.put("suburb", Hamlet); + + var doc = new PhotonDoc(id, "R", id, "place", "house") + .countryCode(CountryCode) + .address(hamletAddress) + .houseNumber(houseNumber) + .importance(1.0) + .rankAddress(getRank(AddressType.HOUSE)); + + instance.add(doc, id); + } +} diff --git a/src/main/java/de/komoot/photon/App.java b/src/main/java/de/komoot/photon/App.java index af7815be..5eabf086 100644 --- a/src/main/java/de/komoot/photon/App.java +++ b/src/main/java/de/komoot/photon/App.java @@ -6,6 +6,7 @@ import de.komoot.photon.nominatim.NominatimUpdater; import de.komoot.photon.searcher.ReverseHandler; import de.komoot.photon.searcher.SearchHandler; +import de.komoot.photon.searcher.StructuredSearchHandler; import de.komoot.photon.utils.CorsFilter; import org.slf4j.Logger; import spark.Request; @@ -119,7 +120,7 @@ private static void startNominatimImport(CommandLineArgs args, Server esServer) NominatimConnector nominatimConnector = new NominatimConnector(args.getHost(), args.getPort(), args.getDatabase(), args.getUser(), args.getPassword()); Date importDate = nominatimConnector.getLastImportDate(); try { - dbProperties = esServer.recreateIndex(args.getLanguages(), importDate); // clear out previous data + dbProperties = esServer.recreateIndex(args.getLanguages(), importDate, args.getSupportStructuredQueries()); // clear out previous data } catch (IOException e) { throw new RuntimeException("Cannot setup index, elastic search config files not readable", e); } @@ -193,6 +194,12 @@ private static void startApi(CommandLineArgs args, Server server) throws IOExcep get("api", new SearchRequestHandler("api", searchHandler, langs, args.getDefaultLanguage(), args.getMaxResults())); get("api/", new SearchRequestHandler("api/", searchHandler, langs, args.getDefaultLanguage(), args.getMaxResults())); + if (dbProperties.getSupportStructuredQueries()) { + StructuredSearchHandler structured = server.createStructuredSearchHandler(langs, args.getQueryTimeout()); + get("structured", new StructuredSearchRequestHandler("structured", structured, langs, args.getDefaultLanguage(), args.getMaxResults())); + get("structured/", new StructuredSearchRequestHandler("structured/", structured, langs, args.getDefaultLanguage(), args.getMaxResults())); + } + ReverseHandler reverseHandler = server.createReverseHandler(args.getQueryTimeout()); get("reverse", new ReverseSearchRequestHandler("reverse", reverseHandler, dbProperties.getLanguages(), args.getDefaultLanguage(), args.getMaxReverseResults())); diff --git a/src/main/java/de/komoot/photon/CommandLineArgs.java b/src/main/java/de/komoot/photon/CommandLineArgs.java index 3a05d46a..726c6fb9 100644 --- a/src/main/java/de/komoot/photon/CommandLineArgs.java +++ b/src/main/java/de/komoot/photon/CommandLineArgs.java @@ -10,6 +10,9 @@ */ public class CommandLineArgs { + @Parameter(names = "-structured", description = "Enable support for structured queries") + private boolean supportStructuredQueries = false; + @Parameter(names = "-cluster", description = "Name of ElasticSearch cluster to put the server into") private String cluster = "photon"; @@ -191,6 +194,8 @@ public boolean isEnableUpdateApi() { public boolean isUsage() { return this.usage; } + + public boolean getSupportStructuredQueries() { return supportStructuredQueries; } public int getMaxReverseResults() { return maxReverseResults; diff --git a/src/main/java/de/komoot/photon/DatabaseProperties.java b/src/main/java/de/komoot/photon/DatabaseProperties.java index 73fb31af..5ebfd4ee 100644 --- a/src/main/java/de/komoot/photon/DatabaseProperties.java +++ b/src/main/java/de/komoot/photon/DatabaseProperties.java @@ -26,6 +26,8 @@ public class DatabaseProperties { */ private Date importDate; + private boolean supportStructuredQueries; + /** * Return the list of languages for which the database is configured. * @return @@ -89,4 +91,13 @@ public DatabaseProperties setImportDate(Date importDate) { this.importDate = importDate; return this; } + + public boolean getSupportStructuredQueries() { + return supportStructuredQueries; + } + + public DatabaseProperties setSupportStructuredQueries(boolean supportStructuredQueries) { + this.supportStructuredQueries = supportStructuredQueries; + return this; + } } diff --git a/src/main/java/de/komoot/photon/StructuredSearchRequestHandler.java b/src/main/java/de/komoot/photon/StructuredSearchRequestHandler.java new file mode 100644 index 00000000..805c2556 --- /dev/null +++ b/src/main/java/de/komoot/photon/StructuredSearchRequestHandler.java @@ -0,0 +1,56 @@ +package de.komoot.photon; + +import de.komoot.photon.query.BadRequestException; +import de.komoot.photon.query.PhotonRequestFactory; +import de.komoot.photon.query.StructuredPhotonRequest; +import de.komoot.photon.searcher.*; +import org.json.JSONObject; +import spark.Request; +import spark.Response; +import spark.RouteImpl; + +import java.util.Arrays; +import java.util.List; + +import static spark.Spark.halt; + +public class StructuredSearchRequestHandler extends RouteImpl { + private final PhotonRequestFactory photonRequestFactory; + private final StructuredSearchHandler requestHandler; + + StructuredSearchRequestHandler(String path, StructuredSearchHandler dbHandler, String[] languages, String defaultLanguage, int maxResults) { + super(path); + List supportedLanguages = Arrays.asList(languages); + this.photonRequestFactory = new PhotonRequestFactory(supportedLanguages, defaultLanguage, maxResults); + this.requestHandler = dbHandler; + } + + @Override + public String handle(Request request, Response response) { + StructuredPhotonRequest photonRequest = null; + try { + photonRequest = photonRequestFactory.createStructured(request); + } catch (BadRequestException e) { + JSONObject json = new JSONObject(); + json.put("message", e.getMessage()); + throw halt(e.getHttpStatus(), json.toString()); + } + + List results = requestHandler.search(photonRequest); + + // Further filtering + results = new StreetDupesRemover(photonRequest.getLanguage()).execute(results); + + // Restrict to the requested limit. + if (results.size() > photonRequest.getLimit()) { + results = results.subList(0, photonRequest.getLimit()); + } + + String debugInfo = null; + /* if (photonRequest.getDebug()) { + debugInfo = requestHandler.dumpQuery(photonRequest); + } + */ + return new GeocodeJsonFormatter(photonRequest.getDebug(), photonRequest.getLanguage()).convert(results, debugInfo); + } +} \ No newline at end of file diff --git a/src/main/java/de/komoot/photon/query/PhotonRequest.java b/src/main/java/de/komoot/photon/query/PhotonRequest.java index 2150218e..310640a6 100644 --- a/src/main/java/de/komoot/photon/query/PhotonRequest.java +++ b/src/main/java/de/komoot/photon/query/PhotonRequest.java @@ -1,116 +1,17 @@ package de.komoot.photon.query; -import org.locationtech.jts.geom.Envelope; -import org.locationtech.jts.geom.Point; -import de.komoot.photon.searcher.TagFilter; - -import java.util.*; - /** * Collection of query parameters for a search request. */ -public class PhotonRequest { +public class PhotonRequest extends PhotonRequestBase { private final String query; - private final String language; - private int limit = 15; - private Point locationForBias = null; - private double scale = 0.2; - private int zoom = 14; - private Envelope bbox = null; - private boolean debug = false; - - private final List osmTagFilters = new ArrayList<>(1); - private Set layerFilters = new HashSet<>(1); - public PhotonRequest(String query, String language) { + super(language); this.query = query; - this.language = language; } public String getQuery() { return query; } - - public int getLimit() { - return limit; - } - - public Envelope getBbox() { - return bbox; - } - - public Point getLocationForBias() { - return locationForBias; - } - - public double getScaleForBias() { - return scale; - } - - public int getZoomForBias() { - return zoom; - } - - public String getLanguage() { - return language; - } - - public boolean getDebug() { return debug; } - - public List getOsmTagFilters() { - return osmTagFilters; - } - - public Set getLayerFilters() { - return layerFilters; - } - - PhotonRequest addOsmTagFilter(TagFilter filter) { - osmTagFilters.add(filter); - return this; - } - - PhotonRequest setLayerFilter(Set filters) { - layerFilters = filters; - return this; - } - - PhotonRequest setLimit(Integer limit) { - this.limit = limit; - return this; - } - - PhotonRequest setLocationForBias(Point locationForBias) { - if (locationForBias != null) { - this.locationForBias = locationForBias; - } - return this; - } - - PhotonRequest setScale(Double scale) { - if (scale != null) { - this.scale = Double.max(Double.min(scale, 1.0), 0.0); - } - return this; - } - - PhotonRequest setZoom(Integer zoom) { - if (zoom != null) { - this.zoom = Integer.max(Integer.min(zoom, 18), 0); - } - return this; - } - - PhotonRequest setBbox(Envelope bbox) { - if (bbox != null) { - this.bbox = bbox; - } - return this; - } - - PhotonRequest enableDebug() { - this.debug = true; - return this; - } } diff --git a/src/main/java/de/komoot/photon/query/PhotonRequestBase.java b/src/main/java/de/komoot/photon/query/PhotonRequestBase.java new file mode 100644 index 00000000..720686e4 --- /dev/null +++ b/src/main/java/de/komoot/photon/query/PhotonRequestBase.java @@ -0,0 +1,103 @@ +package de.komoot.photon.query; + +import org.locationtech.jts.geom.Envelope; +import org.locationtech.jts.geom.Point; +import de.komoot.photon.searcher.TagFilter; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public class PhotonRequestBase +{ + private final String language; + private int limit = 15; + private Point locationForBias = null; + private double scale = 0.2; + private int zoom = 14; + private Envelope bbox = null; + private boolean debug = false; + + private final List osmTagFilters = new ArrayList<>(1); + private Set layerFilters = new HashSet<>(1); + + protected PhotonRequestBase(String language) + { + this.language = language; + } + + public int getLimit() { + return limit; + } + + public Envelope getBbox() { + return bbox; + } + + public Point getLocationForBias() { + return locationForBias; + } + + public double getScaleForBias() { + return scale; + } + + public int getZoomForBias() { + return zoom; + } + + public String getLanguage() { + return language; + } + + public boolean getDebug() { return debug; } + + public List getOsmTagFilters() { + return osmTagFilters; + } + + public Set getLayerFilters() { + return layerFilters; + } + + void addOsmTagFilter(TagFilter filter) { + osmTagFilters.add(filter); + } + + void setLayerFilter(Set filters) { + layerFilters = filters; + } + + void setLimit(Integer limit) { + this.limit = limit; + } + + void setLocationForBias(Point locationForBias) { + if (locationForBias != null) { + this.locationForBias = locationForBias; + } + } + + void setScale(Double scale) { + if (scale != null) { + this.scale = Double.max(Double.min(scale, 1.0), 0.0); + } + } + + void setZoom(Integer zoom) { + if (zoom != null) { + this.zoom = Integer.max(Integer.min(zoom, 18), 0); + } + } + + void setBbox(Envelope bbox) { + if (bbox != null) { + this.bbox = bbox; + } + } + + void enableDebug() { + this.debug = true; + } +} diff --git a/src/main/java/de/komoot/photon/query/PhotonRequestFactory.java b/src/main/java/de/komoot/photon/query/PhotonRequestFactory.java index 147db043..081244d9 100644 --- a/src/main/java/de/komoot/photon/query/PhotonRequestFactory.java +++ b/src/main/java/de/komoot/photon/query/PhotonRequestFactory.java @@ -21,6 +21,13 @@ public class PhotonRequestFactory { private static final HashSet REQUEST_QUERY_PARAMS = new HashSet<>(Arrays.asList("lang", "q", "lon", "lat", "limit", "osm_tag", "location_bias_scale", "bbox", "debug", "zoom", "layer")); + private static final HashSet STRUCTURED_ADDRESS_FIELDS = new HashSet<>(Arrays.asList("countrycode", "state", "county", "city", + "postcode", "district", "housenumber", "street")); + private static final HashSet STRUCTURED_REQUEST_QUERY_PARAMS = new HashSet<>(Arrays.asList("lang", "limit", + "lon", "lat", "osm_tag", "location_bias_scale", "bbox", "debug", "zoom", "layer", + "countrycode", "state", "county", "city", "postcode", "district", "housenumber", "street")); + + public PhotonRequestFactory(List supportedLanguages, String defaultLanguage, int maxResults) { this.languageResolver = new RequestLanguageResolver(supportedLanguages, defaultLanguage); this.bboxParamConverter = new BoundingBoxParamConverter(); @@ -28,6 +35,35 @@ public PhotonRequestFactory(List supportedLanguages, String defaultLangu this.maxResults = maxResults; } + public StructuredPhotonRequest createStructured(Request webRequest) throws BadRequestException { + boolean hasAddressQueryParam = false; + for (String queryParam : webRequest.queryParams()) { + if (!STRUCTURED_REQUEST_QUERY_PARAMS.contains(queryParam)) + throw new BadRequestException(400, "unknown query parameter '" + queryParam + "'. Allowed parameters are: " + STRUCTURED_REQUEST_QUERY_PARAMS); + + if (STRUCTURED_ADDRESS_FIELDS.contains(queryParam)) { + hasAddressQueryParam = true; + } + } + + if (!hasAddressQueryParam) + throw new BadRequestException(400, "at least one of the parameters " + STRUCTURED_ADDRESS_FIELDS + " is required."); + + StructuredPhotonRequest result = new StructuredPhotonRequest(languageResolver.resolveRequestedLanguage(webRequest)); + result.setCountryCode(webRequest.queryParams("countrycode")); + result.setState(webRequest.queryParams("state")); + result.setCounty(webRequest.queryParams("county")); + result.setCity(webRequest.queryParams("city")); + result.setPostCode(webRequest.queryParams("postcode")); + result.setDistrict(webRequest.queryParams("district")); + result.setStreet(webRequest.queryParams("street")); + result.setHouseNumber(webRequest.queryParams("housenumber")); + + addCommonParameters(webRequest, result); + + return result; + } + public PhotonRequest create(Request webRequest) throws BadRequestException { for (String queryParam : webRequest.queryParams()) if (!REQUEST_QUERY_PARAMS.contains(queryParam)) @@ -40,11 +76,16 @@ public PhotonRequest create(Request webRequest) throws BadRequestException { PhotonRequest request = new PhotonRequest(query, languageResolver.resolveRequestedLanguage(webRequest)); + addCommonParameters(webRequest, request); + + return request; + } + + private void addCommonParameters(Request webRequest, PhotonRequestBase request) throws BadRequestException { Integer limit = parseInt(webRequest, "limit"); if (limit != null) { request.setLimit(Integer.max(Integer.min(limit, maxResults), 1)); } - request.setLocationForBias(optionalLocationParamConverter.apply(webRequest)); request.setBbox(bboxParamConverter.apply(webRequest)); request.setScale(parseDouble(webRequest, "location_bias_scale")); @@ -69,8 +110,6 @@ public PhotonRequest create(Request webRequest) throws BadRequestException { if (layerFiltersQueryMap.hasValue()) { request.setLayerFilter(layerParamValidator.validate(layerFiltersQueryMap.values())); } - - return request; } private Integer parseInt(Request webRequest, String param) throws BadRequestException { diff --git a/src/main/java/de/komoot/photon/query/StructuredPhotonRequest.java b/src/main/java/de/komoot/photon/query/StructuredPhotonRequest.java new file mode 100644 index 00000000..01a8e67e --- /dev/null +++ b/src/main/java/de/komoot/photon/query/StructuredPhotonRequest.java @@ -0,0 +1,98 @@ +package de.komoot.photon.query; + +import org.apache.commons.lang3.StringUtils; + +public class StructuredPhotonRequest extends PhotonRequestBase { + private String countryCode; + private String state; + private String county; + private String city; + private String postCode; + private String district; + private String street; + private String houseNumber; + + public StructuredPhotonRequest(String language) { + super(language); + } + + public boolean getDebug() { + return false; + } + + public String getCounty() { + return county; + } + + public void setCounty(String county) { + this.county = county; + } + + public String getCity() { + return city; + } + + public void setCity(String city) { + this.city = city; + } + + public String getPostCode() { + return postCode; + } + + public void setPostCode(String postCode) { + this.postCode = postCode; + } + + public String getDistrict() { + return district; + } + + public void setDistrict(String district) { + this.district = district; + } + + public String getStreet() { + return street; + } + + public void setStreet(String street) { + this.street = street; + } + + public String getHouseNumber() { + return houseNumber; + } + + public void setHouseNumber(String houseNumber) { + this.houseNumber = houseNumber; + } + + public String getCountryCode() { + return countryCode; + } + + public void setCountryCode(String countryCode) { + this.countryCode = countryCode; + } + + public String getState() { + return state; + } + + public void setState(String state) { + this.state = state; + } + + public boolean hasDistrict() { return !StringUtils.isEmpty(this.district); } + + public boolean hasPostCode() { return !StringUtils.isEmpty(this.postCode); } + + public boolean hasCityOrPostCode() { return !StringUtils.isEmpty(this.city) || hasPostCode(); } + + public boolean hasCounty() { return !StringUtils.isEmpty(this.county); } + + public boolean hasStreet() { return !StringUtils.isEmpty(this.street) || hasHouseNumber(); } + + public boolean hasHouseNumber() { return !StringUtils.isEmpty(this.houseNumber); } +} diff --git a/src/main/java/de/komoot/photon/searcher/StructuredSearchHandler.java b/src/main/java/de/komoot/photon/searcher/StructuredSearchHandler.java new file mode 100644 index 00000000..e0365afc --- /dev/null +++ b/src/main/java/de/komoot/photon/searcher/StructuredSearchHandler.java @@ -0,0 +1,9 @@ +package de.komoot.photon.searcher; + +import de.komoot.photon.query.StructuredPhotonRequest; + +import java.util.List; + +public interface StructuredSearchHandler { + List search(StructuredPhotonRequest photonRequest); +} diff --git a/src/test/java/de/komoot/photon/query/QueryFilterLayerTest.java b/src/test/java/de/komoot/photon/query/QueryFilterLayerTest.java index f9badee7..bff0a337 100644 --- a/src/test/java/de/komoot/photon/query/QueryFilterLayerTest.java +++ b/src/test/java/de/komoot/photon/query/QueryFilterLayerTest.java @@ -55,7 +55,8 @@ void testMultipleLayers() { } private List searchWithLayers(String... layers) { - PhotonRequest request = new PhotonRequest("berlin", "en").setLimit(50); + PhotonRequest request = new PhotonRequest("berlin", "en"); + request.setLimit(50); request.setLayerFilter(Arrays.stream(layers).collect(Collectors.toSet())); return getServer().createSearchHandler(new String[]{"en"}, 1).search(request); diff --git a/src/test/java/de/komoot/photon/query/QueryFilterTagValueTest.java b/src/test/java/de/komoot/photon/query/QueryFilterTagValueTest.java index eb95f9ff..ea3859d4 100644 --- a/src/test/java/de/komoot/photon/query/QueryFilterTagValueTest.java +++ b/src/test/java/de/komoot/photon/query/QueryFilterTagValueTest.java @@ -63,7 +63,8 @@ public void tearDown() throws IOException { } private List searchWithTags(String[] params) { - PhotonRequest request = new PhotonRequest("berlin", "en").setLimit(50); + PhotonRequest request = new PhotonRequest("berlin", "en"); + request.setLimit(50); for (String param : params) { request.addOsmTagFilter(TagFilter.buildOsmTagFilter(param)); } diff --git a/src/test/java/de/komoot/photon/query/QueryRelevanceTest.java b/src/test/java/de/komoot/photon/query/QueryRelevanceTest.java index 5225806d..8621a9c2 100644 --- a/src/test/java/de/komoot/photon/query/QueryRelevanceTest.java +++ b/src/test/java/de/komoot/photon/query/QueryRelevanceTest.java @@ -99,8 +99,7 @@ void testLocationPreferenceForEqualImportance(String placeName) throws IOExcepti instance.finish(); refresh(); - List results = search(new PhotonRequest("ham", "en") - .setLocationForBias(FACTORY.createPoint(new Coordinate(-9.9, -10)))); + List results = search(createBiasedRequest()); assertEquals(2, results.size()); assertEquals(1001, results.get(0).get("osm_id")); @@ -118,10 +117,16 @@ void testLocationPreferenceForHigherImportance() throws IOException { instance.finish(); refresh(); - List results = search(new PhotonRequest("ham", "en") - .setLocationForBias(FACTORY.createPoint(new Coordinate(-9.9, -10)))); + List results = search(createBiasedRequest()); assertEquals(2, results.size()); assertEquals(1000, results.get(0).get("osm_id")); } + + private PhotonRequest createBiasedRequest() + { + PhotonRequest result = new PhotonRequest("ham", "en"); + result.setLocationForBias(FACTORY.createPoint(new Coordinate(-9.9, -10))); + return result; + } } From 6e48f883df609f3a7aeafe438b4d620c957a9783 Mon Sep 17 00:00:00 2001 From: tobiass Date: Mon, 24 Jun 2024 17:31:26 +0200 Subject: [PATCH 2/3] review comments / bug fixes --- .../opensearch/AddressQueryBuilder.java | 80 +++++++++++------- .../photon/opensearch/IndexMapping.java | 6 +- .../photon/opensearch/SearchQueryBuilder.java | 8 +- .../opensearch/StructuredQueryTest.java | 82 +++++++++++++------ .../photon/query/StructuredPhotonRequest.java | 2 + 5 files changed, 119 insertions(+), 59 deletions(-) diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/AddressQueryBuilder.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/AddressQueryBuilder.java index cb6a9b13..6b57996b 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/opensearch/AddressQueryBuilder.java +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/AddressQueryBuilder.java @@ -16,7 +16,6 @@ public class AddressQueryBuilder { private static final float DISTRICT_BOOST = 2.0f; private static final float STREET_BOOST = 5.0f; // we filter streets in the wrong city / district / ... so we can use a high boost value private static final float HOUSE_NUMBER_BOOST = 10.0f; - private static final float HOUSE_NUMBER_UNMATCHED_BOOST = 5f; private static final float FACTOR_FOR_WRONG_LANGUAGE = 0.1f; private final String[] languages; private final String language; @@ -37,17 +36,26 @@ public Query getQuery() { return query.build().toQuery(); } - public AddressQueryBuilder addCountryCode(String countryCode) { + public AddressQueryBuilder addCountryCode(String countryCode, boolean hasMoreDetails) { if (countryCode == null) return this; query.filter(QueryBuilders.term().field(Constants.COUNTRYCODE).value(FieldValue.of(countryCode.toUpperCase())).build().toQuery()); + if(!hasMoreDetails) + { + query.filter(QueryBuilders.term() + .field(Constants.OBJECT_TYPE) + .value(FieldValue.of("country")) + .build() + .toQuery()); + } + return this; } public AddressQueryBuilder addState(String state, boolean hasMoreDetails) { if (state == null) return this; - var stateQuery = GetNameOrFieldQuery(Constants.STATE, state, STATE_BOOST, "state", hasMoreDetails); + var stateQuery = getNameOrFieldQuery(Constants.STATE, state, STATE_BOOST, "state", hasMoreDetails); query.should(stateQuery); return this; } @@ -55,7 +63,7 @@ public AddressQueryBuilder addState(String state, boolean hasMoreDetails) { public AddressQueryBuilder addCounty(String county, boolean hasMoreDetails) { if (county == null) return this; - AddNameOrFieldQuery(Constants.COUNTY, county, COUNTY_BOOST, "county", hasMoreDetails); + addNameOrFieldQuery(Constants.COUNTY, county, COUNTY_BOOST, "county", hasMoreDetails); return this; } @@ -63,13 +71,13 @@ public AddressQueryBuilder addCity(String city, boolean hasDistrict, boolean has if (city == null) return this; Query combinedQuery; - Query nameQuery = GetFuzzyNameQueryBuilder(city, "city").boost(CITY_BOOST) + Query nameQuery = getFuzzyNameQueryBuilder(city, "city").boost(CITY_BOOST) .build() .toQuery(); - Query fieldQuery = GetFuzzyQueryBuilder(Constants.CITY, city).boost(CITY_BOOST).build().toQuery(); + Query fieldQuery = getFuzzyQuery(Constants.CITY, city, CITY_BOOST); if (!hasDistrict) { - var districtNameQuery = GetFuzzyNameQueryBuilder(city, "district").boost(0.95f * CITY_BOOST) + var districtNameQuery = getFuzzyNameQueryBuilder(city, "district").boost(0.95f * CITY_BOOST) .build() .toQuery(); nameQuery = QueryBuilders.bool() @@ -79,7 +87,7 @@ public AddressQueryBuilder addCity(String city, boolean hasDistrict, boolean has .build() .toQuery(); - var districtFieldQuery = GetFuzzyQueryBuilder(Constants.DISTRICT, city).boost(0.95f * CITY_BOOST).build().toQuery(); + var districtFieldQuery = getFuzzyQuery(Constants.DISTRICT, city, 0.95f * CITY_BOOST); fieldQuery = QueryBuilders.bool() .should(fieldQuery) .should(districtFieldQuery) @@ -153,7 +161,7 @@ public AddressQueryBuilder addPostalCode(String postalCode) { public AddressQueryBuilder addDistrict(String district, boolean hasMoreDetails) { if (district == null) return this; - AddNameOrFieldQuery(Constants.DISTRICT, district, DISTRICT_BOOST, "district", hasMoreDetails); + addNameOrFieldQuery(Constants.DISTRICT, district, DISTRICT_BOOST, "district", hasMoreDetails); return this; } @@ -178,18 +186,18 @@ public AddressQueryBuilder addStreetAndHouseNumber(String street, String houseNu Query streetQuery; if (lenient) { - var nameFieldQuery = GetFuzzyNameQueryBuilder(street, "street"); + var nameFieldQuery = getFuzzyNameQueryBuilder(street, "street"); if (houseNumber == null) { streetQuery = nameFieldQuery.boost(STREET_BOOST).build().toQuery(); } else { streetQuery = QueryBuilders.bool() - .should(GetFuzzyQuery(Constants.STREET, street)) + .should(getFuzzyQuery(Constants.STREET, street)) .should(nameFieldQuery.build().toQuery()) .minimumShouldMatch("1") .boost(STREET_BOOST).build().toQuery(); } } else { - streetQuery = GetFuzzyQueryBuilder(Constants.STREET, street).boost(STREET_BOOST).build().toQuery(); + streetQuery = getFuzzyQuery(Constants.STREET, street, STREET_BOOST); } if (houseNumber != null) { @@ -199,7 +207,7 @@ public AddressQueryBuilder addStreetAndHouseNumber(String street, String houseNu .build() .toQuery()); - houseNumberMatchQuery.filter(GetFuzzyQuery(Constants.STREET, street)); + houseNumberMatchQuery.filter(getFuzzyQuery(Constants.STREET, street)); if (cityFilter != null) { houseNumberMatchQuery.filter(cityFilter.build().toQuery()); } @@ -217,28 +225,40 @@ public AddressQueryBuilder addStreetAndHouseNumber(String street, String houseNu return this; } - private Query GetFuzzyQuery(String name, String value) { - return GetFuzzyQueryBuilder(name, value).build().toQuery(); + private Query getFuzzyQuery(String name, String value) { + return getFuzzyQuery(name, value, 1.0f); } - private MatchPhraseQuery.Builder GetFuzzyQueryBuilder(String name, String value) { + private Query getFuzzyQuery(String name, String value, float boost) { + return getFuzzyQueryCore(name + "_collector", value, boost); + } + + private Query getFuzzyQueryCore(String field, String value, float boost) { + if (lenient) { + return QueryBuilders.match() + .field(field) + .query(FieldValue.of(value)) + .fuzziness(Fuzziness.AUTO.asString()) + .boost(boost) + .build() + .toQuery(); + } + return QueryBuilders.matchPhrase() - .field(name + "_collector") - .query(value); + .field(field) + .query(value) + .boost(boost) + .build() + .toQuery(); } - private BoolQuery.Builder GetFuzzyNameQueryBuilder(String value, String objectType) { + private BoolQuery.Builder getFuzzyNameQueryBuilder(String value, String objectType) { var or = QueryBuilders.bool(); for (String lang : languages) { float boost = lang.equals(language) ? 1.0f : FACTOR_FOR_WRONG_LANGUAGE; var fieldName = Constants.NAME + '.' + lang + ".raw"; - or.should(QueryBuilders.matchPhrase() - .field(fieldName) - .query(value) - .boost(boost) - .build() - .toQuery()); + or.should(getFuzzyQueryCore(fieldName, value, boost)); } return or.minimumShouldMatch("1") @@ -253,8 +273,8 @@ private static boolean isCityRelatedField(String name) { return Objects.equals(name, Constants.POSTCODE) || Objects.equals(name, Constants.CITY) || Objects.equals(name, Constants.DISTRICT); } - private void AddNameOrFieldQuery(String field, String value, float boost, String objectType, boolean hasMoreDetails) { - var query = GetNameOrFieldQuery(field, value, boost, objectType, hasMoreDetails); + private void addNameOrFieldQuery(String field, String value, float boost, String objectType, boolean hasMoreDetails) { + var query = getNameOrFieldQuery(field, value, boost, objectType, hasMoreDetails); if (isCityRelatedField(field)) { addToCityFilter(query); } @@ -262,12 +282,12 @@ private void AddNameOrFieldQuery(String field, String value, float boost, String this.query.must(query); } - private Query GetNameOrFieldQuery(String field, String value, float boost, String objectType, boolean hasMoreDetails) { + private Query getNameOrFieldQuery(String field, String value, float boost, String objectType, boolean hasMoreDetails) { if (hasMoreDetails) { - return GetFuzzyQuery(field, value); + return getFuzzyQuery(field, value); } - return GetFuzzyNameQueryBuilder(value, objectType) + return getFuzzyNameQueryBuilder(value, objectType) .boost(boost) .build() .toQuery(); diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexMapping.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexMapping.java index 219089b2..2c81d76a 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexMapping.java +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/IndexMapping.java @@ -43,7 +43,9 @@ public IndexMapping addLanguages(String[] languages) { } mappings.properties(propertyName, - b -> b.text(p -> p.copyTo(collectors))); + b -> b.text(p -> p + .index(false) + .copyTo(collectors))); } mappings.properties("name." + lang, @@ -115,7 +117,7 @@ private void setupBaseMappings() { } mappings.properties(field + ".default", b -> b.text(p -> p - .index(shouldIndexAddressField(field)) + .index(false) .copyTo(collectors))); } mappings.properties("postcode", b -> b.text(p -> p diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java index 50cef05e..4eeb1d88 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java @@ -147,9 +147,10 @@ public SearchQueryBuilder(String query, String language, String[] languages, boo public SearchQueryBuilder(StructuredPhotonRequest request, String language, String[] languages, boolean lenient) { + var hasSubStateField = request.hasCounty() || request.hasCityOrPostCode() || request.hasDistrict() || request.hasStreet(); var query4QueryBuilder = new AddressQueryBuilder(lenient, language, languages) - .addCountryCode(request.getCountryCode()) - .addState(request.getState(), request.hasCounty() || request.hasCityOrPostCode() || request.hasDistrict() || request.hasStreet()) + .addCountryCode(request.getCountryCode(), request.hasState() || hasSubStateField) + .addState(request.getState(), hasSubStateField) .addCounty(request.getCounty(), request.hasCityOrPostCode() || request.hasDistrict() || request.hasStreet()) .addCity(request.getCity(), request.hasDistrict(), request.hasStreet(), request.hasPostCode()) .addPostalCode(request.getPostCode()) @@ -170,7 +171,8 @@ public SearchQueryBuilder(StructuredPhotonRequest request, String language, Stri if (!request.hasHouseNumber()) { - queryBuilderForTopLevelFilter = QueryBuilders.bool().mustNot(QueryBuilders.exists().field(Constants.HOUSENUMBER).build().toQuery()); + queryBuilderForTopLevelFilter = QueryBuilders.bool().mustNot(QueryBuilders.exists().field(Constants.HOUSENUMBER).build().toQuery()) + .mustNot(QueryBuilders.term().field(Constants.OBJECT_TYPE).value(FieldValue.of("house")).build().toQuery()); } osmTagFilter = new OsmTagFilter(); diff --git a/app/opensearch/src/test/java/de/komoot/photon/opensearch/StructuredQueryTest.java b/app/opensearch/src/test/java/de/komoot/photon/opensearch/StructuredQueryTest.java index a5fba35c..99b7b6a7 100644 --- a/app/opensearch/src/test/java/de/komoot/photon/opensearch/StructuredQueryTest.java +++ b/app/opensearch/src/test/java/de/komoot/photon/opensearch/StructuredQueryTest.java @@ -16,8 +16,7 @@ import java.util.HashMap; import java.util.Map; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.*; @TestInstance(TestInstance.Lifecycle.PER_CLASS) public class StructuredQueryTest extends ESBaseTester { @@ -34,11 +33,9 @@ public class StructuredQueryTest extends ESBaseTester { @TempDir private static Path instanceTestDirectory; - private static int getRank(AddressType type) - { - for(int i = 0; i < 50; ++i) - { - if (type.coversRank(i)){ + private static int getRank(AddressType type) { + for (int i = 0; i < 50; ++i) { + if (type.coversRank(i)) { return i; } } @@ -91,6 +88,15 @@ void setUp() throws Exception { .importance(1.0) .rankAddress(getRank(AddressType.HOUSE)); + var busStop = new PhotonDoc(8, "N", 8, "highway", "house") + .names(Collections.singletonMap("name", City + ' ' + Street)) + .countryCode(CountryCode) + .postcode("12345") + .address(address) + .houseNumber(HouseNumber) + .importance(1.0) + .rankAddress(getRank(AddressType.HOUSE)); + instance.add(country, 0); instance.add(city, 1); instance.add(suburb, 2); @@ -99,13 +105,23 @@ void setUp() throws Exception { addHamletHouse(instance, 5, "1"); addHamletHouse(instance, 6, "2"); addHamletHouse(instance, 7, "3"); + instance.add(busStop, 8); instance.finish(); refresh(); } @Test - void doesFindDistrictByPostcode() - { + void findsDistrictFuzzy() { + var request = new StructuredPhotonRequest(Language); + request.setCountryCode(CountryCode); + request.setDistrict(District + District.charAt(District.length() - 1)); + + var result = search(request); + Assertions.assertEquals(2, result.get(Constants.OSM_ID)); + } + + @Test + void findsDistrictByPostcode() { var request = new StructuredPhotonRequest(Language); request.setCountryCode(CountryCode); request.setCity(City); @@ -116,7 +132,7 @@ void doesFindDistrictByPostcode() } @Test - void doesFindHouseNumberInHamletWithoutStreetName() { + void findsHouseNumberInHamletWithoutStreetName() { var request = new StructuredPhotonRequest(Language); request.setDistrict(Hamlet); request.setHouseNumber("2"); @@ -129,8 +145,32 @@ void doesFindHouseNumberInHamletWithoutStreetName() { } @Test - void doesNotReturnHousesForCityRequest() - { + void doesNotReturnBusStops() { + var request = new StructuredPhotonRequest(Language); + request.setCountryCode(CountryCode); + request.setCity(City); + request.setStreet(Street); + StructuredSearchHandler queryHandler = getServer().createStructuredSearchHandler(new String[]{Language}, 1); + var results = queryHandler.search(request); + for (var result : results) + { + assertNotEquals(5, result.get(Constants.OSM_ID)); + } + } + + @Test + void returnsOnlyCountryForCountryRequests() { + var request = new StructuredPhotonRequest(Language); + request.setCountryCode(CountryCode); + StructuredSearchHandler queryHandler = getServer().createStructuredSearchHandler(new String[]{Language}, 1); + var results = queryHandler.search(request); + assertEquals(1, results.size()); + var result = results.get(0); + assertEquals(0, result.get(Constants.OSM_ID)); + } + + @Test + void doesNotReturnHousesForCityRequest() { var request = new StructuredPhotonRequest(Language); request.setCountryCode(CountryCode); request.setCity(City); @@ -138,16 +178,14 @@ void doesNotReturnHousesForCityRequest() StructuredSearchHandler queryHandler = getServer().createStructuredSearchHandler(new String[]{Language}, 1); var results = queryHandler.search(request); - for(var result : results) - { + for (var result : results) { assertNull(result.getLocalised(Constants.STREET, Language)); assertNull(result.get(Constants.HOUSENUMBER)); } } @Test - void testWrongStreet() - { + void testWrongStreet() { var request = new StructuredPhotonRequest(Language); request.setCountryCode(CountryCode); request.setCity(City); @@ -160,8 +198,7 @@ void testWrongStreet() } @Test - void testDistrictAsCity() - { + void testDistrictAsCity() { var request = new StructuredPhotonRequest(Language); request.setCountryCode(CountryCode); request.setCity(District); @@ -171,8 +208,7 @@ void testDistrictAsCity() } @Test - void testWrongHouseNumber() - { + void testWrongHouseNumber() { var request = new StructuredPhotonRequest(Language); request.setCountryCode(CountryCode); request.setCity(City); @@ -185,8 +221,7 @@ void testWrongHouseNumber() } @Test - void testWrongHouseNumberAndWrongStreet() - { + void testWrongHouseNumberAndWrongStreet() { var request = new StructuredPhotonRequest(Language); request.setCountryCode(CountryCode); request.setCity(City); @@ -199,8 +234,7 @@ void testWrongHouseNumberAndWrongStreet() } @Test - void testHouse() - { + void testHouse() { var request = new StructuredPhotonRequest(Language); request.setCountryCode(CountryCode); request.setCity(City); diff --git a/src/main/java/de/komoot/photon/query/StructuredPhotonRequest.java b/src/main/java/de/komoot/photon/query/StructuredPhotonRequest.java index 01a8e67e..49bbd63e 100644 --- a/src/main/java/de/komoot/photon/query/StructuredPhotonRequest.java +++ b/src/main/java/de/komoot/photon/query/StructuredPhotonRequest.java @@ -84,6 +84,8 @@ public void setState(String state) { this.state = state; } + public boolean hasState() { return !StringUtils.isEmpty(this.state); } + public boolean hasDistrict() { return !StringUtils.isEmpty(this.district); } public boolean hasPostCode() { return !StringUtils.isEmpty(this.postCode); } From bc230d2909ac7dd3476de55f743b53ddfbfe559a Mon Sep 17 00:00:00 2001 From: tobiass Date: Fri, 5 Jul 2024 12:59:55 +0200 Subject: [PATCH 3/3] stricter filter for houses without house number and objects that are not address-like. --- .../photon/opensearch/SearchQueryBuilder.java | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java b/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java index 4eeb1d88..cae44c8f 100644 --- a/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java +++ b/app/opensearch/src/main/java/de/komoot/photon/opensearch/SearchQueryBuilder.java @@ -169,10 +169,22 @@ public SearchQueryBuilder(StructuredPhotonRequest request, String language, Stri .decay(0.5)))) .scoreMode(FunctionScoreMode.Sum)); + var hasHouseNumberQuery = QueryBuilders.exists().field(Constants.HOUSENUMBER).build().toQuery(); + var isHouseQuery = QueryBuilders.term().field(Constants.OBJECT_TYPE).value(FieldValue.of("house")).build().toQuery(); + var typeOtherQuery = QueryBuilders.term().field(Constants.OBJECT_TYPE).value(FieldValue.of("other")).build().toQuery(); if (!request.hasHouseNumber()) { - queryBuilderForTopLevelFilter = QueryBuilders.bool().mustNot(QueryBuilders.exists().field(Constants.HOUSENUMBER).build().toQuery()) - .mustNot(QueryBuilders.term().field(Constants.OBJECT_TYPE).value(FieldValue.of("house")).build().toQuery()); + queryBuilderForTopLevelFilter = QueryBuilders.bool().mustNot(hasHouseNumberQuery) + .mustNot(isHouseQuery) + .mustNot(typeOtherQuery); + } + else { + var noHouseOrHouseNumber = QueryBuilders.bool().should(hasHouseNumberQuery) + .should(QueryBuilders.bool().mustNot(isHouseQuery).build().toQuery()) + .build() + .toQuery(); + queryBuilderForTopLevelFilter = QueryBuilders.bool().must(noHouseOrHouseNumber) + .mustNot(typeOtherQuery); } osmTagFilter = new OsmTagFilter();