From e0f730087719bfb2cb83577e7c2cf9aff6ef8e15 Mon Sep 17 00:00:00 2001 From: Jeff Reiffers Date: Wed, 9 Oct 2024 16:47:54 +0200 Subject: [PATCH] fix: sort on all languages for anbefaltTerm, case-insensitive --- .../concept_catalog/elastic/ElasticUpdater.kt | 8 +- .../concept_catalog/model/CurrentConcept.kt | 4 + .../no/fdk/concept_catalog/model/SortField.kt | 12 +- .../service/ConceptSearchService.kt | 120 ++++++++++++++---- .../concept_catalog/service/ConceptService.kt | 1 - .../current-concept-mappings.json | 68 ++++++++++ .../current-concept-settings.json | 11 ++ .../contract/SearchConcepts.kt | 25 +++- .../no/fdk/concept_catalog/utils/TestData.kt | 2 +- 9 files changed, 218 insertions(+), 33 deletions(-) create mode 100644 src/main/resources/elasticsearch/current-concept-mappings.json create mode 100644 src/main/resources/elasticsearch/current-concept-settings.json diff --git a/src/main/kotlin/no/fdk/concept_catalog/elastic/ElasticUpdater.kt b/src/main/kotlin/no/fdk/concept_catalog/elastic/ElasticUpdater.kt index a4c378c9..79a87c72 100644 --- a/src/main/kotlin/no/fdk/concept_catalog/elastic/ElasticUpdater.kt +++ b/src/main/kotlin/no/fdk/concept_catalog/elastic/ElasticUpdater.kt @@ -21,12 +21,18 @@ class ElasticUpdater( fun reindexElastic() = runBlocking { launch { try { + logger.info("deleting all current concepts") currentConceptRepository.deleteAll() } catch (_: Exception) { } conceptRepository.findAll() .forEach { - if (it.shouldBeCurrent(currentConceptRepository.findByIdOrNull(it.originaltBegrep))) currentConceptRepository.save(CurrentConcept(it)) + if (it.shouldBeCurrent(currentConceptRepository.findByIdOrNull(it.originaltBegrep))) { + logger.info("reindexing ${it.id}, ${it.ansvarligVirksomhet.id}") + currentConceptRepository.save(CurrentConcept(it)) + } else { + logger.info("skipping not current ${it.id}, ${it.ansvarligVirksomhet.id}") + } } logger.info("finished reindexing elastic") diff --git a/src/main/kotlin/no/fdk/concept_catalog/model/CurrentConcept.kt b/src/main/kotlin/no/fdk/concept_catalog/model/CurrentConcept.kt index cd435849..f68af14c 100644 --- a/src/main/kotlin/no/fdk/concept_catalog/model/CurrentConcept.kt +++ b/src/main/kotlin/no/fdk/concept_catalog/model/CurrentConcept.kt @@ -6,10 +6,14 @@ import org.springframework.data.annotation.Id import org.springframework.data.elasticsearch.annotations.Document import org.springframework.data.elasticsearch.annotations.Field import org.springframework.data.elasticsearch.annotations.FieldType +import org.springframework.data.elasticsearch.annotations.Mapping +import org.springframework.data.elasticsearch.annotations.Setting import java.time.Instant import java.time.LocalDate @Document(indexName = "concepts-current") +@Setting(settingPath = "/elasticsearch/current-concept-settings.json") +@Mapping(mappingPath = "/elasticsearch/current-concept-mappings.json") @JsonInclude(JsonInclude.Include.NON_NULL) data class CurrentConcept( val idOfThisVersion: String, diff --git a/src/main/kotlin/no/fdk/concept_catalog/model/SortField.kt b/src/main/kotlin/no/fdk/concept_catalog/model/SortField.kt index e7f37ad5..faa987e4 100644 --- a/src/main/kotlin/no/fdk/concept_catalog/model/SortField.kt +++ b/src/main/kotlin/no/fdk/concept_catalog/model/SortField.kt @@ -1,13 +1,13 @@ package no.fdk.concept_catalog.model -enum class SortFieldEnum(val label: String) { - SIST_ENDRET("sistEndret"), - ANBEFALT_TERM_NB("anbefaltTerm.nb"), +enum class SortFieldEnum { + SIST_ENDRET, + ANBEFALT_TERM, } -enum class SortDirection(val label: String) { - ASC("ASC"), - DESC("DESC"), +enum class SortDirection { + ASC, + DESC, } class SortField( diff --git a/src/main/kotlin/no/fdk/concept_catalog/service/ConceptSearchService.kt b/src/main/kotlin/no/fdk/concept_catalog/service/ConceptSearchService.kt index 29a75908..3f7492e7 100644 --- a/src/main/kotlin/no/fdk/concept_catalog/service/ConceptSearchService.kt +++ b/src/main/kotlin/no/fdk/concept_catalog/service/ConceptSearchService.kt @@ -1,9 +1,11 @@ package no.fdk.concept_catalog.service +import co.elastic.clients.elasticsearch._types.ScriptSortType import co.elastic.clients.elasticsearch._types.SortOrder import co.elastic.clients.elasticsearch._types.query_dsl.Operator import co.elastic.clients.elasticsearch._types.query_dsl.TextQueryType import no.fdk.concept_catalog.model.* +import org.slf4j.LoggerFactory import org.springframework.data.domain.Pageable import org.springframework.data.elasticsearch.client.elc.NativeQuery import org.springframework.data.elasticsearch.client.elc.NativeQueryBuilder @@ -18,6 +20,8 @@ class ConceptSearchService( private val elasticsearchOperations: ElasticsearchOperations ) { + private val logger = LoggerFactory.getLogger(ConceptSearchService::class.java) + fun suggestConcepts(orgNumber: String, published: Boolean?, query: String): SearchHits = elasticsearchOperations.search( suggestionQuery(orgNumber, published, query), @@ -25,12 +29,19 @@ class ConceptSearchService( IndexCoordinates.of("concepts-current") ) - fun searchCurrentConcepts(orgNumber: String, search: SearchOperation): SearchHits = - elasticsearchOperations.search( - search.toElasticQuery(orgNumber), - CurrentConcept::class.java, - IndexCoordinates.of("concepts-current") - ) + fun searchCurrentConcepts(orgNumber: String, search: SearchOperation): SearchHits { + try { + val query = search.toElasticQuery(orgNumber) + return elasticsearchOperations.search( + query, + CurrentConcept::class.java, + IndexCoordinates.of("concepts-current") + ) + } catch (e: Exception) { + logger.error("Failed to search for concepts", e) + throw RuntimeException("Failed to search for concepts", e) + } + } private fun suggestionQuery(orgNumber: String, published: Boolean?, query: String): Query { val builder = NativeQuery.builder() @@ -40,14 +51,89 @@ class ConceptSearchService( } } builder.withQuery { queryBuilder -> - queryBuilder.matchPhrasePrefix { matchBuilder -> - matchBuilder.query(query) - .field("anbefaltTerm.navn.nb") + queryBuilder.bool { boolBuilder -> + boolBuilder.should { shouldBuilder1 -> + shouldBuilder1.matchPhrasePrefix { matchBuilder -> + matchBuilder.query(query) + .field("anbefaltTerm.navn.nb") + } + } + boolBuilder.should { shouldBuilder2 -> + shouldBuilder2.bool { nnFieldCheck -> + nnFieldCheck.mustNot { mustNotBuilder -> + mustNotBuilder.exists { existsBuilder -> + existsBuilder.field("anbefaltTerm.navn.nb") + } + mustNotBuilder.term { termBuilder -> + termBuilder + .field("anbefaltTerm.navn.nb") + .value("") + } + } + nnFieldCheck.should { shouldBuilder -> + shouldBuilder.matchPhrasePrefix { matchBuilder -> + matchBuilder.query(query) + .field("anbefaltTerm.navn.nn") + } + } + } + } + boolBuilder.should { shouldBuilder3 -> + shouldBuilder3.bool { enFieldCheck -> + enFieldCheck.mustNot { mustNotBuilder1 -> + mustNotBuilder1.exists { existsBuilder1 -> + existsBuilder1.field("anbefaltTerm.navn.nb") + } + mustNotBuilder1.term { termBuilder -> + termBuilder + .field("anbefaltTerm.navn.nb") + .value("") + } + } + enFieldCheck.mustNot { mustNotBuilder2 -> + mustNotBuilder2.exists { existsBuilder2 -> + existsBuilder2.field("anbefaltTerm.navn.nn") + } + mustNotBuilder2.term { termBuilder -> + termBuilder + .field("anbefaltTerm.navn.nn") + .value("") + } + } + enFieldCheck.should { shouldBuilder -> + shouldBuilder.matchPhrasePrefix { matchBuilder -> + matchBuilder.query(query) + .field("anbefaltTerm.navn.en") + } + } + } + } + } + } + builder.withSort { sortBuilder -> + sortBuilder.field { fieldBuilder -> + fieldBuilder.field("anbefaltTerm_sort").order(SortOrder.Desc) } } return builder.build() } + private fun SortField.buildSort(builder: NativeQueryBuilder) { + if (field == SortFieldEnum.ANBEFALT_TERM) { + builder.withSort { sortBuilder -> + sortBuilder.field { fieldBuilder -> + fieldBuilder.field("anbefaltTerm_sort").order(sortDirection()) + } + } + } else { + builder.withSort { sortBuilder -> + sortBuilder.field { fieldBuilder -> + fieldBuilder.field("endringslogelement.endringstidspunkt").order(sortDirection()) + } + } + } + } + private fun SearchOperation.toElasticQuery(orgNumber: String): Query { val builder = NativeQuery.builder() builder.withFilter { queryBuilder -> @@ -59,13 +145,7 @@ class ConceptSearchService( ) } } - if (sort != null) { - builder.withSort { sortBuilder -> - sortBuilder.field { fieldBuilder -> - fieldBuilder.field(sort.sortField()).order(sort.sortDirection()) - } - } - } + sort?.buildSort(builder) if (!query.isNullOrBlank()) builder.addFieldsQuery(fields, query) builder.withPageable(Pageable.ofSize(pagination.getSize()).withPage(pagination.getPage())) @@ -105,12 +185,6 @@ class ConceptSearchService( else -> SortOrder.Desc } - private fun SortField.sortField(): String = - when (field) { - SortFieldEnum.ANBEFALT_TERM_NB -> "anbefaltTerm.navn.nb.keyword" - else -> "endringslogelement.endringstidspunkt" - } - private fun QueryFields.exactPaths(): List = listOf( if (anbefaltTerm) languagePaths("anbefaltTerm.navn", 30) @@ -153,3 +227,5 @@ class ConceptSearchService( "$basePath.en${if (boost != null) "^$boost" else ""}") } + + diff --git a/src/main/kotlin/no/fdk/concept_catalog/service/ConceptService.kt b/src/main/kotlin/no/fdk/concept_catalog/service/ConceptService.kt index ab243d4e..799e3edf 100644 --- a/src/main/kotlin/no/fdk/concept_catalog/service/ConceptService.kt +++ b/src/main/kotlin/no/fdk/concept_catalog/service/ConceptService.kt @@ -266,7 +266,6 @@ class ConceptService( fun searchConcepts(orgNumber: String, search: SearchOperation): Paginated { val hits = conceptSearchService.searchCurrentConcepts(orgNumber, search) - return hits.map { it.content } .map { it.toDBO() } .map { it.withHighestVersionDTO() } diff --git a/src/main/resources/elasticsearch/current-concept-mappings.json b/src/main/resources/elasticsearch/current-concept-mappings.json new file mode 100644 index 00000000..c648b70d --- /dev/null +++ b/src/main/resources/elasticsearch/current-concept-mappings.json @@ -0,0 +1,68 @@ +{ + "runtime": { + "anbefaltTerm_sort": { + "type": "keyword", + "script": { + "source": "if (doc.containsKey('anbefaltTerm.navn.nb.keyword_lower') && doc['anbefaltTerm.navn.nb.keyword_lower'].size() > 0 && doc['anbefaltTerm.navn.nb.keyword_lower'].value != null && !doc['anbefaltTerm.navn.nb.keyword_lower'].value.isEmpty()) {emit(doc['anbefaltTerm.navn.nb.keyword_lower'].value);} else if (doc.containsKey('anbefaltTerm.navn.nn.keyword_lower') && doc['anbefaltTerm.navn.nn.keyword_lower'].size() > 0 && doc['anbefaltTerm.navn.nn.keyword_lower'].value != null && !doc['anbefaltTerm.navn.nn.keyword_lower'].value.isEmpty()) {emit(doc['anbefaltTerm.navn.nn.keyword_lower'].value);} else if (doc.containsKey('anbefaltTerm.navn.en.keyword_lower') && doc['anbefaltTerm.navn.en.keyword_lower'].size() > 0 && doc['anbefaltTerm.navn.en.keyword_lower'].value != null && !doc['anbefaltTerm.navn.en.keyword_lower'].value.isEmpty()) {emit(doc['anbefaltTerm.navn.en.keyword_lower'].value);} else {emit(null);}" + } + } + }, + "properties": { + "anbefaltTerm": { + "properties": { + "navn": { + "properties": { + "en": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256, + "doc_values": true + }, + "keyword_lower": { + "type": "keyword", + "ignore_above": 256, + "normalizer": "lowercase_normalizer", + "doc_values": true + } + } + }, + "nb": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256, + "doc_values": true + }, + "keyword_lower": { + "type": "keyword", + "ignore_above": 256, + "normalizer": "lowercase_normalizer", + "doc_values": true + } + } + }, + "nn": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256, + "doc_values": true + }, + "keyword_lower": { + "type": "keyword", + "ignore_above": 256, + "normalizer": "lowercase_normalizer", + "doc_values": true + } + } + } + } + } + } + } + } +} diff --git a/src/main/resources/elasticsearch/current-concept-settings.json b/src/main/resources/elasticsearch/current-concept-settings.json new file mode 100644 index 00000000..13cddf06 --- /dev/null +++ b/src/main/resources/elasticsearch/current-concept-settings.json @@ -0,0 +1,11 @@ +{ + "analysis": { + "normalizer": { + "lowercase_normalizer": { + "type": "custom", + "char_filter": [], + "filter": ["lowercase"] + } + } + } +} diff --git a/src/test/kotlin/no/fdk/concept_catalog/contract/SearchConcepts.kt b/src/test/kotlin/no/fdk/concept_catalog/contract/SearchConcepts.kt index 405e6249..41751251 100644 --- a/src/test/kotlin/no/fdk/concept_catalog/contract/SearchConcepts.kt +++ b/src/test/kotlin/no/fdk/concept_catalog/contract/SearchConcepts.kt @@ -572,11 +572,30 @@ class SearchConcepts : ApiTestContext() { assertEquals(listOf(BEGREP_2, BEGREP_0, BEGREP_1), result.hits) } + @Test + fun `Query returns sorted results ordered by anbefaltTerm ascending`() { + val searchOp = SearchOperation( + query = "", + sort = SortField(field = SortFieldEnum.ANBEFALT_TERM, direction = SortDirection.ASC) + ) + val rsp = authorizedRequest( + "/begreper/search?orgNummer=123456789", + port, mapper.writeValueAsString(searchOp), JwtToken(Access.ORG_WRITE).toString(), + HttpMethod.POST + ) + assertEquals(HttpStatus.OK.value(), rsp["status"]) + + val result: Paginated = mapper.readValue(rsp["body"] as String) + assertEquals(BEGREP_0.id, result.hits[0].id) + assertEquals(BEGREP_1.id, result.hits[1].id) + assertEquals(BEGREP_2.id, result.hits[2].id) + } + @Test fun `Query returns sorted results ordered by anbefaltTerm descending`() { val searchOp = SearchOperation( query = "", - sort = SortField(field = SortFieldEnum.ANBEFALT_TERM_NB, direction = SortDirection.DESC) + sort = SortField(field = SortFieldEnum.ANBEFALT_TERM, direction = SortDirection.DESC) ) val rsp = authorizedRequest( "/begreper/search?orgNummer=123456789", @@ -586,7 +605,9 @@ class SearchConcepts : ApiTestContext() { assertEquals(HttpStatus.OK.value(), rsp["status"]) val result: Paginated = mapper.readValue(rsp["body"] as String) - assertEquals(listOf(BEGREP_0, BEGREP_2, BEGREP_1), result.hits) + assertEquals(BEGREP_2.id, result.hits[0].id) + assertEquals(BEGREP_1.id, result.hits[1].id) + assertEquals(BEGREP_0.id, result.hits[2].id) } @Test diff --git a/src/test/kotlin/no/fdk/concept_catalog/utils/TestData.kt b/src/test/kotlin/no/fdk/concept_catalog/utils/TestData.kt index 6a9a480d..19ac5721 100644 --- a/src/test/kotlin/no/fdk/concept_catalog/utils/TestData.kt +++ b/src/test/kotlin/no/fdk/concept_catalog/utils/TestData.kt @@ -160,7 +160,7 @@ val BEGREP_2 = Begrep( gjeldendeRevisjon = null, status = Status.HOERING, statusURI = "http://publications.europa.eu/resource/authority/concept-status/CANDIDATE", - anbefaltTerm = Term(navn = mapOf(Pair("nb", "Begrep 2"))), + anbefaltTerm = Term(navn = mapOf(Pair("nb", ""), Pair("nn", "begrep 2"))), tillattTerm = mapOf(Pair("nb", listOf("Lorem ipsum"))), ansvarligVirksomhet = Virksomhet( id = "123456789"