From c7f9d90ae18b84f9ce4608ed28e0a63ab914ae44 Mon Sep 17 00:00:00 2001 From: erik-a-e <65284304+erik-a-e@users.noreply.github.com> Date: Mon, 4 Dec 2023 12:34:37 +0100 Subject: [PATCH] backend: Refactor Nom related code --- .../java/no/nav/data/team/org/OrgService.java | 10 ++ .../no/nav/data/team/resource/NomClient.java | 125 +++++++++++++++-- .../nav/data/team/resource/ResourceState.java | 129 ------------------ .../nav/data/team/resource/NomClientTest.java | 3 +- 4 files changed, 128 insertions(+), 139 deletions(-) create mode 100644 apps/backend/src/main/java/no/nav/data/team/org/OrgService.java delete mode 100644 apps/backend/src/main/java/no/nav/data/team/resource/ResourceState.java diff --git a/apps/backend/src/main/java/no/nav/data/team/org/OrgService.java b/apps/backend/src/main/java/no/nav/data/team/org/OrgService.java new file mode 100644 index 000000000..fd27535bf --- /dev/null +++ b/apps/backend/src/main/java/no/nav/data/team/org/OrgService.java @@ -0,0 +1,10 @@ +package no.nav.data.team.org; + +import org.springframework.stereotype.Service; + +@Service +public class OrgService { + + + +} diff --git a/apps/backend/src/main/java/no/nav/data/team/resource/NomClient.java b/apps/backend/src/main/java/no/nav/data/team/resource/NomClient.java index 92f90307d..8c480dfdc 100644 --- a/apps/backend/src/main/java/no/nav/data/team/resource/NomClient.java +++ b/apps/backend/src/main/java/no/nav/data/team/resource/NomClient.java @@ -18,10 +18,20 @@ import no.nav.data.team.settings.SettingsService; import no.nav.data.team.settings.dto.Settings; import org.apache.commons.codec.language.DoubleMetaphone; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.LowerCaseFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; +import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; +import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; +import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter; +import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.TextField; -import org.apache.lucene.index.Term; +import org.apache.lucene.index.*; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; @@ -31,24 +41,21 @@ import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.ByteBuffersDirectory; +import org.apache.lucene.store.Directory; import org.jetbrains.annotations.NotNull; import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Service; import java.io.IOException; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Random; +import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; import static java.util.Comparator.comparing; import static java.util.stream.Collectors.groupingBy; import static no.nav.data.common.utils.StreamUtils.convert; -import static no.nav.data.team.resource.ResourceState.*; +import static no.nav.data.team.resource.NomClient.ResourceState.*; import static org.apache.lucene.queryparser.classic.QueryParserBase.escape; @Slf4j @@ -337,4 +344,106 @@ record ResourceStatus(boolean shouldSave, Resource previous) { } + private static class ResourceState { + + static final String FIELD_IDENT = "ident"; + static final String FIELD_NAME_VERBATIM = "name_verbatim"; + static final String FIELD_NAME_NGRAMS = "name_ngrams"; + static final String FIELD_NAME_PHONETIC = "name_phonetic"; + + private static final Map allResources = new HashMap<>(1 << 15); + private static final Map allResourcesByMail = new HashMap<>(1 << 15); + private static Directory index = new ByteBuffersDirectory(); + private static final PerFieldAnalyzerWrapper analyzer; + + static { + var analyzerPerField = new HashMap(); + analyzerPerField.put(FIELD_NAME_NGRAMS, createNGramAnalyzer()); + analyzerPerField.put(FIELD_NAME_PHONETIC, createMetaphoneAnalyzer()); + analyzer = new PerFieldAnalyzerWrapper(createSimpleIgnoreCaseAnalyzer(), analyzerPerField); + } + + static Optional get(String ident) { + return Optional.ofNullable(allResources.get(ident.toUpperCase())); + } + + static List findAll(List idents) { + return allResources.values().stream().filter(r -> idents.contains(r.getNavIdent())).toList(); + } + + static Optional getByEmail(String email) { + return Optional.ofNullable(allResourcesByMail.get(email.toLowerCase())); + } + + static void put(Resource resource) { + allResources.put(resource.getNavIdent().toUpperCase(), resource); + if (resource.getEmail() != null) { + allResourcesByMail.put(resource.getEmail().toLowerCase(), resource); + } + } + + static int count() { + return allResources.size(); + } + + static void clear() { + index = new ByteBuffersDirectory(); + allResources.clear(); + allResourcesByMail.clear(); + } + + @SneakyThrows + static IndexReader createReader() { + return DirectoryReader.open(index); + } + + @SneakyThrows + static IndexWriter createWriter() { + IndexWriterConfig writerConfig = new IndexWriterConfig(getAnalyzer()); + return new IndexWriter(index, writerConfig); + } + + static Analyzer getAnalyzer() { + return analyzer; + } + + @SneakyThrows + private static Analyzer createNGramAnalyzer(){ + return new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer source = new StandardTokenizer(); + TokenStream result = new LowerCaseFilter(source); + result = new EdgeNGramTokenFilter(result ,3,40,false); + return new TokenStreamComponents(source, result); + } + }; + } + + @SneakyThrows + private static Analyzer createMetaphoneAnalyzer(){ + return new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer source = new StandardTokenizer(); + TokenStream result = new LowerCaseFilter(source); + result = new DoubleMetaphoneFilter(result ,10,false); + return new TokenStreamComponents(source, result); + } + }; + } + + @SneakyThrows + private static Analyzer createSimpleIgnoreCaseAnalyzer(){ + return new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer source = new WhitespaceTokenizer(); + TokenStream result = new LowerCaseFilter(source); + result = new ASCIIFoldingFilter(result); + return new TokenStreamComponents(source, result); + } + }; + } + } } diff --git a/apps/backend/src/main/java/no/nav/data/team/resource/ResourceState.java b/apps/backend/src/main/java/no/nav/data/team/resource/ResourceState.java deleted file mode 100644 index d965867be..000000000 --- a/apps/backend/src/main/java/no/nav/data/team/resource/ResourceState.java +++ /dev/null @@ -1,129 +0,0 @@ -package no.nav.data.team.resource; - -import lombok.SneakyThrows; -import no.nav.data.team.resource.domain.Resource; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.LowerCaseFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.core.WhitespaceTokenizer; -import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; -import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; -import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; -import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter; -import org.apache.lucene.analysis.standard.StandardTokenizer; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.store.ByteBuffersDirectory; -import org.apache.lucene.store.Directory; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.stream.Stream; - -class ResourceState { - - static final String FIELD_IDENT = "ident"; - static final String FIELD_NAME_VERBATIM = "name_verbatim"; - static final String FIELD_NAME_NGRAMS = "name_ngrams"; - static final String FIELD_NAME_PHONETIC = "name_phonetic"; - - private static final Map allResources = new HashMap<>(1 << 15); - private static final Map allResourcesByMail = new HashMap<>(1 << 15); - private static Directory index = new ByteBuffersDirectory(); - private static final PerFieldAnalyzerWrapper analyzer; - - static { - var analyzerPerField = new HashMap(); - analyzerPerField.put(FIELD_NAME_NGRAMS, createNGramAnalyzer()); - analyzerPerField.put(FIELD_NAME_PHONETIC, createMetaphoneAnalyzer()); - analyzer = new PerFieldAnalyzerWrapper(createSimpleIgnoreCaseAnalyzer(), analyzerPerField); - } - - static Optional get(String ident) { - return Optional.ofNullable(allResources.get(ident.toUpperCase())); - } - - static List findAll(List idents) { - return allResources.values().stream().filter(r -> idents.contains(r.getNavIdent())).toList(); - } - - static Optional getByEmail(String email) { - return Optional.ofNullable(allResourcesByMail.get(email.toLowerCase())); - } - - static void put(Resource resource) { - allResources.put(resource.getNavIdent().toUpperCase(), resource); - if (resource.getEmail() != null) { - allResourcesByMail.put(resource.getEmail().toLowerCase(), resource); - } - } - - static int count() { - return allResources.size(); - } - - static void clear() { - index = new ByteBuffersDirectory(); - allResources.clear(); - allResourcesByMail.clear(); - } - - @SneakyThrows - static IndexReader createReader() { - return DirectoryReader.open(index); - } - - @SneakyThrows - static IndexWriter createWriter() { - IndexWriterConfig writerConfig = new IndexWriterConfig(getAnalyzer()); - return new IndexWriter(index, writerConfig); - } - - static Analyzer getAnalyzer() { - return analyzer; - } - - @SneakyThrows - private static Analyzer createNGramAnalyzer(){ - return new Analyzer() { - @Override - protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer source = new StandardTokenizer(); - TokenStream result = new LowerCaseFilter(source); - result = new EdgeNGramTokenFilter(result ,3,40,false); - return new TokenStreamComponents(source, result); - } - }; - } - - @SneakyThrows - private static Analyzer createMetaphoneAnalyzer(){ - return new Analyzer() { - @Override - protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer source = new StandardTokenizer(); - TokenStream result = new LowerCaseFilter(source); - result = new DoubleMetaphoneFilter(result ,10,false); - return new TokenStreamComponents(source, result); - } - }; - } - - @SneakyThrows - private static Analyzer createSimpleIgnoreCaseAnalyzer(){ - return new Analyzer() { - @Override - protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer source = new WhitespaceTokenizer(); - TokenStream result = new LowerCaseFilter(source); - result = new ASCIIFoldingFilter(result); - return new TokenStreamComponents(source, result); - } - }; - } -} diff --git a/apps/backend/src/test/java/no/nav/data/team/resource/NomClientTest.java b/apps/backend/src/test/java/no/nav/data/team/resource/NomClientTest.java index 317cd6c40..854c7b7df 100644 --- a/apps/backend/src/test/java/no/nav/data/team/resource/NomClientTest.java +++ b/apps/backend/src/test/java/no/nav/data/team/resource/NomClientTest.java @@ -15,7 +15,6 @@ import static no.nav.data.team.TestDataHelper.createResource; import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.ArgumentMatchers.anyList; import static org.mockito.Mockito.when; @ExtendWith(MockitoExtension.class) @@ -32,7 +31,7 @@ class NomClientTest { @BeforeEach void setup() { - ResourceState.clear(); + client.clear(); client = new NomClient(storage, settingsService, resourceRepository); }