Merge pull request #4529 from gchq/gh-4526-dict-de-dup

PR for #4526 - stroom:dictionary() is dropping { and } when on a separate line
gchq · Oct 10, 2024 · 3d45bba · 3d45bba
2 parents 399efd0 + 6d17f51
commit 3d45bba
Show file tree

Hide file tree

Showing 9 changed files with 152 additions and 50 deletions.
diff --git a/stroom-core-client/src/main/java/stroom/dictionary/client/presenter/DictionaryPresenter.java b/stroom-core-client/src/main/java/stroom/dictionary/client/presenter/DictionaryPresenter.java
@@ -82,7 +82,8 @@ public DictionaryPresenter(final EventBus eventBus,
         this.restFactory = restFactory;
         this.locationManager = locationManager;
 
-        downloadButton = SvgButton.create(SvgPresets.DOWNLOAD);
+        downloadButton = SvgButton.create(SvgPresets.DOWNLOAD.title("Download Dictionary words"));
+
         toolbar.addButton(downloadButton);
 
         addTab(WORDS, new AbstractTabProvider<DictionaryDoc, EditorPresenter>(eventBus) {

diff --git a/stroom-core-client/src/main/java/stroom/dictionary/client/presenter/WordListPresenter.java b/stroom-core-client/src/main/java/stroom/dictionary/client/presenter/WordListPresenter.java
@@ -145,7 +145,7 @@ private void initTableColumns() {
         dataGrid.addResizableColumn(
                 nodeNameColumn,
                 DataGridUtil.headingBuilder("Source Dictionary")
-                        .withToolTip("The first Dictionary that contains the word.")
+                        .withToolTip("The dictionary that contains the word.")
                         .build(),
                 300);
 

diff --git a/stroom-core-shared/src/main/java/stroom/dictionary/shared/WordList.java b/stroom-core-shared/src/main/java/stroom/dictionary/shared/WordList.java
@@ -160,12 +160,9 @@ public static Builder builder(final boolean deDup) {
 
     public static class Builder {
 
-        private final List<String> wordList = new ArrayList<>();
-        // Needed if we are not de-duping. Essentially the primary source
-        private final List<String> sourceList = new ArrayList<>();
+        private final List<SourcedWord> wordList = new ArrayList<>();
         private final Set<String> wordSet = new HashSet<>();
         private final Map<String, DocRef> sourceUuidToDocRefMap = new HashMap<>();
-        private final Map<String, String> wordToPrimarySourceUuidMap = new HashMap<>();
         private final Map<String, List<String>> wordToSourceUuidsMap = new HashMap<>();
         private final boolean deDup;
 
@@ -179,9 +176,6 @@ public Builder addWord(final String word, final DocRef source) {
                 final String sourceUuid = Objects.requireNonNull(
                         source,
                         "Source DocRef required for word").getUuid();
-//                final Word wordObj = new Word(
-//                        word, Objects.requireNonNull(source, "Source DocRef required for word").getUuid(),
-//                        secondarySourceUuids);
                 boolean doAdd = true;
                 // Assumes that words are added in precedence order, i.e. most important source first
                 if (deDup) {
@@ -190,18 +184,16 @@ public Builder addWord(final String word, final DocRef source) {
                     }
                 }
                 if (doAdd) {
-                    wordList.add(word);
-                    sourceList.add(sourceUuid);
+                    wordList.add(new SourcedWord(word, sourceUuid));
+//                    sourceList.add(sourceUuid);
                     wordSet.add(word);
                 }
 
-                if (deDup) {
-                    // If not de-duping, then each word only has one source so we don't need this list
-                    final List<String> sourceUuids = wordToSourceUuidsMap.computeIfAbsent(
-                            word, k -> new ArrayList<>());
-                    if (!sourceUuids.contains(sourceUuid)) {
-                        sourceUuids.add(sourceUuid);
-                    }
+                // If not de-duping, then each word only has one source so we don't need this list
+                final List<String> sourceUuids = wordToSourceUuidsMap.computeIfAbsent(
+                        word, k -> new ArrayList<>());
+                if (!sourceUuids.contains(sourceUuid)) {
+                    sourceUuids.add(sourceUuid);
                 }
 
                 // Our reference lookup of uuid -> DocRef
@@ -215,18 +207,22 @@ public WordList build() {
                 return EMPTY;
             } else {
                 final List<Word> wordObjList = new ArrayList<>(wordList.size());
-                for (int i = 0; i < wordList.size(); i++) {
-                    final String word = wordList.get(i);
-                    final String sourceUuid = Objects.requireNonNull(sourceList.get(i),
-                            () -> "Null source for word " + word);
+                for (final SourcedWord sourcedWord : wordList) {
+                    final String word = sourcedWord.word;
+                    final String sourceUuid = sourcedWord.sourceUuid;
 
                     List<String> additionalSources = null;
+                    final List<String> allSources = GwtNullSafe.list(wordToSourceUuidsMap.get(word));
                     if (deDup) {
-                        final List<String> allSources = GwtNullSafe.list(wordToSourceUuidsMap.get(word));
                         // First one is the primary source, so ignore it
                         if (allSources.size() > 1) {
                             additionalSources = allSources.subList(1, allSources.size());
                         }
+                    } else {
+                        // Not de-duping so get all sources except ours
+                        additionalSources = allSources.stream()
+                                .filter(uuid -> !Objects.equals(sourceUuid, uuid))
+                                .collect(Collectors.toList());
                     }
                     final Word wordObj = new Word(word, sourceUuid, additionalSources);
                     wordObjList.add(wordObj);
@@ -235,4 +231,37 @@ public WordList build() {
             }
         }
     }
+
+
+    // --------------------------------------------------------------------------------
+
+
+    @SuppressWarnings("ClassCanBeRecord") // Not in GWT
+    private static final class SourcedWord {
+
+        private final String word;
+        private final String sourceUuid;
+
+        private SourcedWord(final String word, final String sourceUuid) {
+            Objects.requireNonNull(word);
+            Objects.requireNonNull(sourceUuid);
+            this.word = word;
+            this.sourceUuid = sourceUuid;
+        }
+
+        public String word() {
+            return word;
+        }
+
+        public String sourceUuid() {
+            return sourceUuid;
+        }
+
+        @Override
+        public String toString() {
+            return "SourcedWord[" +
+                    "word=" + word + ", " +
+                    "sourceUuid=" + sourceUuid + ']';
+        }
+    }
 }
diff --git a/stroom-core-shared/src/test/java/stroom/dictionary/shared/TestWordList.java b/stroom-core-shared/src/test/java/stroom/dictionary/shared/TestWordList.java
@@ -17,6 +17,7 @@
 package stroom.dictionary.shared;
 
 import stroom.docref.DocRef;
+import stroom.util.NullSafe;
 
 import org.junit.jupiter.api.Test;
 
@@ -64,8 +65,7 @@ void testAdd_deDup() {
                         new Word("cat", petsRef.getUuid(), null),
                         new Word("dog", petsRef.getUuid(), null),
                         new Word("hamster", petsRef.getUuid(), null),
-                        new Word("pig", petsRef.getUuid(),
-                                List.of(farmAnimalsRef.getUuid(), wildAnimalsRef.getUuid())),
+                        new Word("pig", petsRef.getUuid(), sources(farmAnimalsRef, wildAnimalsRef)),
                         new Word("cow", farmAnimalsRef.getUuid(), null),
                         new Word("goat", farmAnimalsRef.getUuid(), null)));
         assertThat(wordList.getSortedList())
@@ -75,8 +75,7 @@ void testAdd_deDup() {
                         new Word("dog", petsRef.getUuid(), null),
                         new Word("goat", farmAnimalsRef.getUuid(), null),
                         new Word("hamster", petsRef.getUuid(), null),
-                        new Word("pig", petsRef.getUuid(),
-                                List.of(farmAnimalsRef.getUuid(), wildAnimalsRef.getUuid()))));
+                        new Word("pig", petsRef.getUuid(), sources(farmAnimalsRef, wildAnimalsRef))));
 
         assertThat(wordList.getSource(new Word("cat", petsRef.getUuid(), null)))
                 .hasValue(petsRef);
@@ -113,15 +112,15 @@ void testAdd_deDup() {
     void testAdd_noDeDup() {
 
         final DocRef petsRef = DictionaryDoc.buildDocRef()
-                .uuid("200")
+                .uuid("pets-uuid")
                 .name("Pets")
                 .build();
         final DocRef farmAnimalsRef = DictionaryDoc.buildDocRef()
-                .uuid("100")
+                .uuid("farm-uuid")
                 .name("Farm Animals")
                 .build();
         final DocRef wildAnimalsRef = DictionaryDoc.buildDocRef()
-                .uuid("300")
+                .uuid("wild-uuid")
                 .name("Wild Animals")
                 .build();
 
@@ -147,21 +146,21 @@ void testAdd_noDeDup() {
                         new Word("cat", petsRef.getUuid()),
                         new Word("dog", petsRef.getUuid()),
                         new Word("hamster", petsRef.getUuid()),
-                        new Word("pig", petsRef.getUuid()),
+                        new Word("pig", petsRef.getUuid(), sources(farmAnimalsRef, wildAnimalsRef)),
                         new Word("cow", farmAnimalsRef.getUuid()),
                         new Word("goat", farmAnimalsRef.getUuid()),
-                        new Word("pig", farmAnimalsRef.getUuid()),
-                        new Word("pig", wildAnimalsRef.getUuid())));
+                        new Word("pig", farmAnimalsRef.getUuid(), sources(petsRef, wildAnimalsRef)),
+                        new Word("pig", wildAnimalsRef.getUuid(), sources(petsRef, farmAnimalsRef))));
         assertThat(wordList.getSortedList())
                 .containsExactlyElementsOf(List.of(
                         new Word("cat", petsRef.getUuid()),
                         new Word("cow", farmAnimalsRef.getUuid()),
                         new Word("dog", petsRef.getUuid()),
                         new Word("goat", farmAnimalsRef.getUuid()),
                         new Word("hamster", petsRef.getUuid()),
-                        new Word("pig", farmAnimalsRef.getUuid()),
-                        new Word("pig", petsRef.getUuid()),
-                        new Word("pig", wildAnimalsRef.getUuid())));
+                        new Word("pig", farmAnimalsRef.getUuid(), sources(petsRef, wildAnimalsRef)),
+                        new Word("pig", petsRef.getUuid(), sources(farmAnimalsRef, wildAnimalsRef)),
+                        new Word("pig", wildAnimalsRef.getUuid(), sources(petsRef, farmAnimalsRef))));
 
         assertThat(wordList.getSource(new Word("cat", petsRef.getUuid())))
                 .hasValue(petsRef);
@@ -237,4 +236,10 @@ void testNullsAndBlanks() {
         assertThat(wordList.size())
                 .isEqualTo(1);
     }
+
+    private List<String> sources(final DocRef... docRefs) {
+        return NullSafe.stream(docRefs)
+                .map(DocRef::getUuid)
+                .toList();
+    }
 }
diff --git a/...nary/stroom-dictionary-impl/src/main/java/stroom/dictionary/impl/DictionaryStoreImpl.java b/...nary/stroom-dictionary-impl/src/main/java/stroom/dictionary/impl/DictionaryStoreImpl.java
@@ -64,6 +64,7 @@ class DictionaryStoreImpl implements DictionaryStore, WordListProvider {
             DictionaryDoc.DOCUMENT_TYPE,
             DictionaryDoc.DOCUMENT_TYPE,
             DictionaryDoc.ICON);
+    public static final boolean IS_DE_DUP_DEFAULT = false;
     private final Store<DictionaryDoc> store;
 
     @Inject
@@ -288,17 +289,28 @@ public List<DocRef> list() {
 
     @Override
     public String getCombinedData(final DocRef docRef) {
-        return getCombinedWordList(docRef, null, true).asString();
+        return getCombinedWordList(docRef, null, IS_DE_DUP_DEFAULT).asString();
     }
 
-    public WordList getCombinedWordList(final DocRef docRef,
+    @Override
+    public String[] getWords(final DocRef dictionaryRef) {
+        return getCombinedWordList(dictionaryRef, null, IS_DE_DUP_DEFAULT).asWordArray();
+    }
+
+    @Override
+    public WordList getCombinedWordList(final DocRef dictionaryRef,
+                                        final DocRefDecorator docRefDecorator) {
+        return getCombinedWordList(dictionaryRef, docRefDecorator, IS_DE_DUP_DEFAULT);
+    }
+
+    public WordList getCombinedWordList(final DocRef dictionaryRef,
                                         final DocRefDecorator docRefDecorator,
                                         final boolean deDup) {
         final Builder builder = WordList.builder(deDup);
         final Set<DocRef> visited = new HashSet<>();
         final Stack<DocRef> visitPath = new Stack<>();
-        doGetCombinedWordList(docRefDecorator, builder, docRef, visited, visitPath);
 
+        doGetCombinedWordList(docRefDecorator, builder, dictionaryRef, visited, visitPath);
 
         final WordList wordList = builder.build();
 
@@ -308,17 +320,6 @@ public WordList getCombinedWordList(final DocRef docRef,
         return wordList;
     }
 
-    @Override
-    public String[] getWords(final DocRef dictionaryRef) {
-        return getCombinedWordList(dictionaryRef, null, true).asWordArray();
-    }
-
-    @Override
-    public WordList getCombinedWordList(final DocRef dictionaryRef,
-                                        final DocRefDecorator docRefDecorator) {
-        return getCombinedWordList(dictionaryRef, docRefDecorator, true);
-    }
-
     private void doGetCombinedWordList(final DocRefDecorator docRefDecorator,
                                        final WordList.Builder wordListBuilder,
                                        final DocRef docRef,

diff --git a/stroom-docref/src/main/java/stroom/docref/DocRef.java b/stroom-docref/src/main/java/stroom/docref/DocRef.java
@@ -167,6 +167,7 @@ public String toInfoString() {
             sb.append(name);
         }
         if (uuid != null) {
+            //noinspection SizeReplaceableByIsEmpty // Not in GWT
             if (sb.length() > 0) {
                 sb.append(" ");
             }
@@ -175,6 +176,7 @@ public String toInfoString() {
             sb.append("}");
         }
 
+        //noinspection SizeReplaceableByIsEmpty // Not in GWT
         if (sb.length() > 0) {
             return sb.toString();
         }
@@ -190,6 +192,7 @@ public boolean equals(final Object o) {
         if (!(o instanceof DocRef)) {
             return false;
         }
+        //noinspection PatternVariableCanBeUsed // Not in GWT
         final DocRef docRef = (DocRef) o;
         return Objects.equals(type, docRef.type) &&
                 Objects.equals(uuid, docRef.uuid);

diff --git a/stroom-util/src/main/java/stroom/util/string/StringUtil.java b/stroom-util/src/main/java/stroom/util/string/StringUtil.java
@@ -16,6 +16,7 @@
 
 package stroom.util.string;
 
+import stroom.util.NullSafe;
 import stroom.util.logging.LogUtil;
 
 import com.google.common.base.Preconditions;
@@ -72,6 +73,18 @@ public static Stream<String> splitToLines(final String text,
         }
     }
 
+    /**
+     * Trims all lines and removes any blank lines after trimming
+     */
+    public static String trimLines(final String text) {
+        if (NullSafe.isEmptyString(text)) {
+            return text;
+        } else {
+            return splitToLines(text, true)
+                    .collect(Collectors.joining("\n"));
+        }
+    }
+
     /**
      * Splits text into words where delimiters are taken to be \n, \r\n or the regex \s.
      * Ignores blank words.

diff --git a/stroom-util/src/test/java/stroom/util/string/TestStringUtil.java b/stroom-util/src/test/java/stroom/util/string/TestStringUtil.java
@@ -106,6 +106,32 @@ Stream<DynamicTest> splitToLines_trim() {
                 .build();
     }
 
+
+    @TestFactory
+    Stream<DynamicTest> testTrimLines() {
+        return TestUtil.buildDynamicTestStream()
+                .withInputAndOutputType(String.class)
+                .withSingleArgTestFunction(StringUtil::trimLines)
+                .withSimpleEqualityAssertion()
+                .addCase(null, null)
+                .addCase("", "")
+                .addCase(" ", "")
+                .addCase(" foo ", "foo")
+                .addCase("""
+                        \sfoo\s
+                        \s bar \s""", """
+                        foo
+                        bar""")
+                .addCase("""
+                        \sfoo\s
+                        """, "foo")
+                .addCase("""
+
+                        \s \s""", "")
+                .build();
+    }
+
+
     @TestFactory
     Stream<DynamicTest> splitToWords() {
         return TestUtil.buildDynamicTestStream()

diff --git a/unreleased_changes/20241009_152941_767__4526.md b/unreleased_changes/20241009_152941_767__4526.md
@@ -0,0 +1,24 @@
+* Issue **#4526** : Change Dictionary to not de-duplicate words as this is breaking JSON when used for holding SSL config in JSON form.
+
+
+```sh
+# ********************************************************************************
+# Issue title: stroom:dictionary() is dropping { and } when on a separate line
+# Issue link:  https://github.com/gchq/stroom/issues/4526
+# ********************************************************************************
+
+# ONLY the top line will be included as a change entry in the CHANGELOG.
+# The entry should be in GitHub flavour markdown and should be written on a SINGLE
+# line with no hard breaks. You can have multiple change files for a single GitHub issue.
+# The  entry should be written in the imperative mood, i.e. 'Fix nasty bug' rather than
+# 'Fixed nasty bug'.
+#
+# Examples of acceptable entries are:
+#
+#
+# * Issue **123** : Fix bug with an associated GitHub issue in this repository
+#
+# * Issue **namespace/other-repo#456** : Fix bug with an associated GitHub issue in another repository
+#
+# * Fix bug with no associated GitHub issue.
+```