Skip to content

Commit

Permalink
Merge pull request #4529 from gchq/gh-4526-dict-de-dup
Browse files Browse the repository at this point in the history
PR for #4526 - stroom:dictionary() is dropping { and } when on a separate line
  • Loading branch information
at055612 authored Oct 10, 2024
2 parents 399efd0 + 6d17f51 commit 3d45bba
Show file tree
Hide file tree
Showing 9 changed files with 152 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ public DictionaryPresenter(final EventBus eventBus,
this.restFactory = restFactory;
this.locationManager = locationManager;

downloadButton = SvgButton.create(SvgPresets.DOWNLOAD);
downloadButton = SvgButton.create(SvgPresets.DOWNLOAD.title("Download Dictionary words"));

toolbar.addButton(downloadButton);

addTab(WORDS, new AbstractTabProvider<DictionaryDoc, EditorPresenter>(eventBus) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ private void initTableColumns() {
dataGrid.addResizableColumn(
nodeNameColumn,
DataGridUtil.headingBuilder("Source Dictionary")
.withToolTip("The first Dictionary that contains the word.")
.withToolTip("The dictionary that contains the word.")
.build(),
300);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,9 @@ public static Builder builder(final boolean deDup) {

public static class Builder {

private final List<String> wordList = new ArrayList<>();
// Needed if we are not de-duping. Essentially the primary source
private final List<String> sourceList = new ArrayList<>();
private final List<SourcedWord> wordList = new ArrayList<>();
private final Set<String> wordSet = new HashSet<>();
private final Map<String, DocRef> sourceUuidToDocRefMap = new HashMap<>();
private final Map<String, String> wordToPrimarySourceUuidMap = new HashMap<>();
private final Map<String, List<String>> wordToSourceUuidsMap = new HashMap<>();
private final boolean deDup;

Expand All @@ -179,9 +176,6 @@ public Builder addWord(final String word, final DocRef source) {
final String sourceUuid = Objects.requireNonNull(
source,
"Source DocRef required for word").getUuid();
// final Word wordObj = new Word(
// word, Objects.requireNonNull(source, "Source DocRef required for word").getUuid(),
// secondarySourceUuids);
boolean doAdd = true;
// Assumes that words are added in precedence order, i.e. most important source first
if (deDup) {
Expand All @@ -190,18 +184,16 @@ public Builder addWord(final String word, final DocRef source) {
}
}
if (doAdd) {
wordList.add(word);
sourceList.add(sourceUuid);
wordList.add(new SourcedWord(word, sourceUuid));
// sourceList.add(sourceUuid);
wordSet.add(word);
}

if (deDup) {
// If not de-duping, then each word only has one source so we don't need this list
final List<String> sourceUuids = wordToSourceUuidsMap.computeIfAbsent(
word, k -> new ArrayList<>());
if (!sourceUuids.contains(sourceUuid)) {
sourceUuids.add(sourceUuid);
}
// If not de-duping, then each word only has one source so we don't need this list
final List<String> sourceUuids = wordToSourceUuidsMap.computeIfAbsent(
word, k -> new ArrayList<>());
if (!sourceUuids.contains(sourceUuid)) {
sourceUuids.add(sourceUuid);
}

// Our reference lookup of uuid -> DocRef
Expand All @@ -215,18 +207,22 @@ public WordList build() {
return EMPTY;
} else {
final List<Word> wordObjList = new ArrayList<>(wordList.size());
for (int i = 0; i < wordList.size(); i++) {
final String word = wordList.get(i);
final String sourceUuid = Objects.requireNonNull(sourceList.get(i),
() -> "Null source for word " + word);
for (final SourcedWord sourcedWord : wordList) {
final String word = sourcedWord.word;
final String sourceUuid = sourcedWord.sourceUuid;

List<String> additionalSources = null;
final List<String> allSources = GwtNullSafe.list(wordToSourceUuidsMap.get(word));
if (deDup) {
final List<String> allSources = GwtNullSafe.list(wordToSourceUuidsMap.get(word));
// First one is the primary source, so ignore it
if (allSources.size() > 1) {
additionalSources = allSources.subList(1, allSources.size());
}
} else {
// Not de-duping so get all sources except ours
additionalSources = allSources.stream()
.filter(uuid -> !Objects.equals(sourceUuid, uuid))
.collect(Collectors.toList());
}
final Word wordObj = new Word(word, sourceUuid, additionalSources);
wordObjList.add(wordObj);
Expand All @@ -235,4 +231,37 @@ public WordList build() {
}
}
}


// --------------------------------------------------------------------------------


@SuppressWarnings("ClassCanBeRecord") // Not in GWT
private static final class SourcedWord {

private final String word;
private final String sourceUuid;

private SourcedWord(final String word, final String sourceUuid) {
Objects.requireNonNull(word);
Objects.requireNonNull(sourceUuid);
this.word = word;
this.sourceUuid = sourceUuid;
}

public String word() {
return word;
}

public String sourceUuid() {
return sourceUuid;
}

@Override
public String toString() {
return "SourcedWord[" +
"word=" + word + ", " +
"sourceUuid=" + sourceUuid + ']';
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package stroom.dictionary.shared;

import stroom.docref.DocRef;
import stroom.util.NullSafe;

import org.junit.jupiter.api.Test;

Expand Down Expand Up @@ -64,8 +65,7 @@ void testAdd_deDup() {
new Word("cat", petsRef.getUuid(), null),
new Word("dog", petsRef.getUuid(), null),
new Word("hamster", petsRef.getUuid(), null),
new Word("pig", petsRef.getUuid(),
List.of(farmAnimalsRef.getUuid(), wildAnimalsRef.getUuid())),
new Word("pig", petsRef.getUuid(), sources(farmAnimalsRef, wildAnimalsRef)),
new Word("cow", farmAnimalsRef.getUuid(), null),
new Word("goat", farmAnimalsRef.getUuid(), null)));
assertThat(wordList.getSortedList())
Expand All @@ -75,8 +75,7 @@ void testAdd_deDup() {
new Word("dog", petsRef.getUuid(), null),
new Word("goat", farmAnimalsRef.getUuid(), null),
new Word("hamster", petsRef.getUuid(), null),
new Word("pig", petsRef.getUuid(),
List.of(farmAnimalsRef.getUuid(), wildAnimalsRef.getUuid()))));
new Word("pig", petsRef.getUuid(), sources(farmAnimalsRef, wildAnimalsRef))));

assertThat(wordList.getSource(new Word("cat", petsRef.getUuid(), null)))
.hasValue(petsRef);
Expand Down Expand Up @@ -113,15 +112,15 @@ void testAdd_deDup() {
void testAdd_noDeDup() {

final DocRef petsRef = DictionaryDoc.buildDocRef()
.uuid("200")
.uuid("pets-uuid")
.name("Pets")
.build();
final DocRef farmAnimalsRef = DictionaryDoc.buildDocRef()
.uuid("100")
.uuid("farm-uuid")
.name("Farm Animals")
.build();
final DocRef wildAnimalsRef = DictionaryDoc.buildDocRef()
.uuid("300")
.uuid("wild-uuid")
.name("Wild Animals")
.build();

Expand All @@ -147,21 +146,21 @@ void testAdd_noDeDup() {
new Word("cat", petsRef.getUuid()),
new Word("dog", petsRef.getUuid()),
new Word("hamster", petsRef.getUuid()),
new Word("pig", petsRef.getUuid()),
new Word("pig", petsRef.getUuid(), sources(farmAnimalsRef, wildAnimalsRef)),
new Word("cow", farmAnimalsRef.getUuid()),
new Word("goat", farmAnimalsRef.getUuid()),
new Word("pig", farmAnimalsRef.getUuid()),
new Word("pig", wildAnimalsRef.getUuid())));
new Word("pig", farmAnimalsRef.getUuid(), sources(petsRef, wildAnimalsRef)),
new Word("pig", wildAnimalsRef.getUuid(), sources(petsRef, farmAnimalsRef))));
assertThat(wordList.getSortedList())
.containsExactlyElementsOf(List.of(
new Word("cat", petsRef.getUuid()),
new Word("cow", farmAnimalsRef.getUuid()),
new Word("dog", petsRef.getUuid()),
new Word("goat", farmAnimalsRef.getUuid()),
new Word("hamster", petsRef.getUuid()),
new Word("pig", farmAnimalsRef.getUuid()),
new Word("pig", petsRef.getUuid()),
new Word("pig", wildAnimalsRef.getUuid())));
new Word("pig", farmAnimalsRef.getUuid(), sources(petsRef, wildAnimalsRef)),
new Word("pig", petsRef.getUuid(), sources(farmAnimalsRef, wildAnimalsRef)),
new Word("pig", wildAnimalsRef.getUuid(), sources(petsRef, farmAnimalsRef))));

assertThat(wordList.getSource(new Word("cat", petsRef.getUuid())))
.hasValue(petsRef);
Expand Down Expand Up @@ -237,4 +236,10 @@ void testNullsAndBlanks() {
assertThat(wordList.size())
.isEqualTo(1);
}

private List<String> sources(final DocRef... docRefs) {
return NullSafe.stream(docRefs)
.map(DocRef::getUuid)
.toList();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ class DictionaryStoreImpl implements DictionaryStore, WordListProvider {
DictionaryDoc.DOCUMENT_TYPE,
DictionaryDoc.DOCUMENT_TYPE,
DictionaryDoc.ICON);
public static final boolean IS_DE_DUP_DEFAULT = false;
private final Store<DictionaryDoc> store;

@Inject
Expand Down Expand Up @@ -288,17 +289,28 @@ public List<DocRef> list() {

@Override
public String getCombinedData(final DocRef docRef) {
return getCombinedWordList(docRef, null, true).asString();
return getCombinedWordList(docRef, null, IS_DE_DUP_DEFAULT).asString();
}

public WordList getCombinedWordList(final DocRef docRef,
@Override
public String[] getWords(final DocRef dictionaryRef) {
return getCombinedWordList(dictionaryRef, null, IS_DE_DUP_DEFAULT).asWordArray();
}

@Override
public WordList getCombinedWordList(final DocRef dictionaryRef,
final DocRefDecorator docRefDecorator) {
return getCombinedWordList(dictionaryRef, docRefDecorator, IS_DE_DUP_DEFAULT);
}

public WordList getCombinedWordList(final DocRef dictionaryRef,
final DocRefDecorator docRefDecorator,
final boolean deDup) {
final Builder builder = WordList.builder(deDup);
final Set<DocRef> visited = new HashSet<>();
final Stack<DocRef> visitPath = new Stack<>();
doGetCombinedWordList(docRefDecorator, builder, docRef, visited, visitPath);

doGetCombinedWordList(docRefDecorator, builder, dictionaryRef, visited, visitPath);

final WordList wordList = builder.build();

Expand All @@ -308,17 +320,6 @@ public WordList getCombinedWordList(final DocRef docRef,
return wordList;
}

@Override
public String[] getWords(final DocRef dictionaryRef) {
return getCombinedWordList(dictionaryRef, null, true).asWordArray();
}

@Override
public WordList getCombinedWordList(final DocRef dictionaryRef,
final DocRefDecorator docRefDecorator) {
return getCombinedWordList(dictionaryRef, docRefDecorator, true);
}

private void doGetCombinedWordList(final DocRefDecorator docRefDecorator,
final WordList.Builder wordListBuilder,
final DocRef docRef,
Expand Down
3 changes: 3 additions & 0 deletions stroom-docref/src/main/java/stroom/docref/DocRef.java
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ public String toInfoString() {
sb.append(name);
}
if (uuid != null) {
//noinspection SizeReplaceableByIsEmpty // Not in GWT
if (sb.length() > 0) {
sb.append(" ");
}
Expand All @@ -175,6 +176,7 @@ public String toInfoString() {
sb.append("}");
}

//noinspection SizeReplaceableByIsEmpty // Not in GWT
if (sb.length() > 0) {
return sb.toString();
}
Expand All @@ -190,6 +192,7 @@ public boolean equals(final Object o) {
if (!(o instanceof DocRef)) {
return false;
}
//noinspection PatternVariableCanBeUsed // Not in GWT
final DocRef docRef = (DocRef) o;
return Objects.equals(type, docRef.type) &&
Objects.equals(uuid, docRef.uuid);
Expand Down
13 changes: 13 additions & 0 deletions stroom-util/src/main/java/stroom/util/string/StringUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package stroom.util.string;

import stroom.util.NullSafe;
import stroom.util.logging.LogUtil;

import com.google.common.base.Preconditions;
Expand Down Expand Up @@ -72,6 +73,18 @@ public static Stream<String> splitToLines(final String text,
}
}

/**
* Trims all lines and removes any blank lines after trimming
*/
public static String trimLines(final String text) {
if (NullSafe.isEmptyString(text)) {
return text;
} else {
return splitToLines(text, true)
.collect(Collectors.joining("\n"));
}
}

/**
* Splits text into words where delimiters are taken to be \n, \r\n or the regex \s.
* Ignores blank words.
Expand Down
26 changes: 26 additions & 0 deletions stroom-util/src/test/java/stroom/util/string/TestStringUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,32 @@ Stream<DynamicTest> splitToLines_trim() {
.build();
}


@TestFactory
Stream<DynamicTest> testTrimLines() {
return TestUtil.buildDynamicTestStream()
.withInputAndOutputType(String.class)
.withSingleArgTestFunction(StringUtil::trimLines)
.withSimpleEqualityAssertion()
.addCase(null, null)
.addCase("", "")
.addCase(" ", "")
.addCase(" foo ", "foo")
.addCase("""
\sfoo\s
\s bar \s""", """
foo
bar""")
.addCase("""
\sfoo\s
""", "foo")
.addCase("""
\s \s""", "")
.build();
}


@TestFactory
Stream<DynamicTest> splitToWords() {
return TestUtil.buildDynamicTestStream()
Expand Down
24 changes: 24 additions & 0 deletions unreleased_changes/20241009_152941_767__4526.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
* Issue **#4526** : Change Dictionary to not de-duplicate words as this is breaking JSON when used for holding SSL config in JSON form.


```sh
# ********************************************************************************
# Issue title: stroom:dictionary() is dropping { and } when on a separate line
# Issue link: https://github.com/gchq/stroom/issues/4526
# ********************************************************************************

# ONLY the top line will be included as a change entry in the CHANGELOG.
# The entry should be in GitHub flavour markdown and should be written on a SINGLE
# line with no hard breaks. You can have multiple change files for a single GitHub issue.
# The entry should be written in the imperative mood, i.e. 'Fix nasty bug' rather than
# 'Fixed nasty bug'.
#
# Examples of acceptable entries are:
#
#
# * Issue **123** : Fix bug with an associated GitHub issue in this repository
#
# * Issue **namespace/other-repo#456** : Fix bug with an associated GitHub issue in another repository
#
# * Fix bug with no associated GitHub issue.
```

0 comments on commit 3d45bba

Please sign in to comment.