diff --git a/CHANGELOG.md b/CHANGELOG.md index 67deb76ec92..6f069f7dd8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv - The export formats `listrefs`, `tablerefs`, `tablerefsabsbib`, now use the ISO date format in the footer [#10383](https://github.com/JabRef/jabref/pull/10383). - When searching for an identifier in the "Web search", the title of the search window is now "Identifier-based Web Search". [#10391](https://github.com/JabRef/jabref/pull/10391) +- The ampersand checker now skips verbatim fields (`file`, `url`, ...). [#10419](https://github.com/JabRef/jabref/pull/10419) ### Fixed diff --git a/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java b/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java index 9da189890c0..243fbbddea4 100644 --- a/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java +++ b/src/main/java/org/jabref/logic/integrity/AmpersandChecker.java @@ -1,19 +1,23 @@ package org.jabref.logic.integrity; -import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.regex.MatchResult; import java.util.regex.Pattern; +import java.util.stream.Stream; + +import javafx.util.Pair; import org.jabref.logic.l10n.Localization; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.Field; +import org.jabref.model.entry.field.FieldProperty; import com.google.common.base.CharMatcher; /** * Checks if the BibEntry contains unescaped ampersands. + * This is done in nonverbatim fields. Similar to {@link HTMLCharacterChecker} */ public class AmpersandChecker implements EntryChecker { // matches for an & preceded by any number of \ @@ -21,21 +25,24 @@ public class AmpersandChecker implements EntryChecker { @Override public List check(BibEntry entry) { - List results = new ArrayList<>(); - - for (Map.Entry field : entry.getFieldMap().entrySet()) { - // counts the number of even \ occurrences preceding an & - long unescapedAmpersands = BACKSLASH_PRECEDED_AMPERSAND.matcher(field.getValue()) - .results() - .map(MatchResult::group) - .filter(m -> CharMatcher.is('\\').countIn(m) % 2 == 0) - .count(); + return entry.getFieldMap().entrySet().stream() + .filter(field -> !field.getKey().getProperties().contains(FieldProperty.VERBATIM)) + // We use "flatMap" instead of filtering later, because we assume there won't be that much error messages - and construction of Stream.empty() is faster than construction of a new Tuple2 (including lifting long to Long) + .flatMap(AmpersandChecker::getUnescapedAmpersandsWithCount) + .map(pair -> new IntegrityMessage(Localization.lang("Found %0 unescaped '&'", pair.getValue()), entry, pair.getKey())) + .toList(); + } - if (unescapedAmpersands > 0) { - results.add(new IntegrityMessage(Localization.lang("Found %0 unescaped '&'", unescapedAmpersands), entry, field.getKey())); - // note: when changing the message - also do so in tests - } + private static Stream> getUnescapedAmpersandsWithCount(Map.Entry entry) { + // counts the number of even \ occurrences preceding an & + long unescapedAmpersands = BACKSLASH_PRECEDED_AMPERSAND.matcher(entry.getValue()) + .results() + .map(MatchResult::group) + .filter(m -> CharMatcher.is('\\').countIn(m) % 2 == 0) + .count(); + if (unescapedAmpersands == 0) { + return Stream.empty(); } - return results; + return Stream.of(new Pair<>(entry.getKey(), unescapedAmpersands)); } } diff --git a/src/main/java/org/jabref/logic/integrity/HTMLCharacterChecker.java b/src/main/java/org/jabref/logic/integrity/HTMLCharacterChecker.java index 267cf2435b4..874a9c79c8d 100644 --- a/src/main/java/org/jabref/logic/integrity/HTMLCharacterChecker.java +++ b/src/main/java/org/jabref/logic/integrity/HTMLCharacterChecker.java @@ -1,14 +1,10 @@ package org.jabref.logic.integrity; -import java.util.ArrayList; import java.util.List; -import java.util.Map; -import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jabref.logic.l10n.Localization; import org.jabref.model.entry.BibEntry; -import org.jabref.model.entry.field.Field; import org.jabref.model.entry.field.FieldProperty; /** @@ -20,18 +16,10 @@ public class HTMLCharacterChecker implements EntryChecker { @Override public List check(BibEntry entry) { - List results = new ArrayList<>(); - for (Map.Entry field : entry.getFieldMap().entrySet()) { - if (field.getKey().getProperties().contains(FieldProperty.VERBATIM)) { - continue; - } - - Matcher characterMatcher = HTML_CHARACTER_PATTERN.matcher(field.getValue()); - if (characterMatcher.find()) { - results.add( - new IntegrityMessage(Localization.lang("HTML encoded character found"), entry, field.getKey())); - } - } - return results; + return entry.getFieldMap().entrySet().stream() + .filter(field -> !field.getKey().getProperties().contains(FieldProperty.VERBATIM)) + .filter(field -> HTML_CHARACTER_PATTERN.matcher(field.getValue()).find()) + .map(field -> new IntegrityMessage(Localization.lang("HTML encoded character found"), entry, field.getKey())) + .toList(); } } diff --git a/src/test/java/org/jabref/logic/integrity/AmpersandCheckerTest.java b/src/test/java/org/jabref/logic/integrity/AmpersandCheckerTest.java index 81d9942c162..1efdd5ae82d 100644 --- a/src/test/java/org/jabref/logic/integrity/AmpersandCheckerTest.java +++ b/src/test/java/org/jabref/logic/integrity/AmpersandCheckerTest.java @@ -65,4 +65,18 @@ void entryWithMultipleEscapedAndUnescapedAmpersands() { entry.setField(StandardField.AFTERWORD, "May the force be with you & live long \\\\& prosper \\& to infinity \\\\\\& beyond & assemble \\\\\\\\& excelsior!"); assertEquals(List.of(new IntegrityMessage("Found 4 unescaped '&'", entry, StandardField.AFTERWORD)), checker.check(entry)); } + + static Stream entryWithVerabitmFieldsNotCausingMessages() { + return Stream.of( + Arguments.of(StandardField.FILE, "one & another.pdf"), + Arguments.of(StandardField.URL, "https://example.org?key=value&key2=value2") + ); + } + + @ParameterizedTest + @MethodSource + void entryWithVerabitmFieldsNotCausingMessages(Field field, String value) { + entry.setField(field, value); + assertEquals(List.of(), checker.check(entry)); + } } diff --git a/src/test/java/org/jabref/logic/integrity/HTMLCharacterCheckerTest.java b/src/test/java/org/jabref/logic/integrity/HTMLCharacterCheckerTest.java index 3a08d49af93..f185132a29c 100644 --- a/src/test/java/org/jabref/logic/integrity/HTMLCharacterCheckerTest.java +++ b/src/test/java/org/jabref/logic/integrity/HTMLCharacterCheckerTest.java @@ -2,11 +2,16 @@ import java.util.Collections; import java.util.List; +import java.util.stream.Stream; import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.field.Field; import org.jabref.model.entry.field.StandardField; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -59,4 +64,18 @@ void journalDoesNotAcceptHTMLEncodedCharacters() { entry.setField(StandardField.JOURNAL, "Ärling Ström for – ‱"); assertEquals(List.of(new IntegrityMessage("HTML encoded character found", entry, StandardField.JOURNAL)), checker.check(entry)); } + + static Stream entryWithVerabitmFieldsNotCausingMessages() { + return Stream.of( + Arguments.of(StandardField.FILE, "one & another.pdf"), + Arguments.of(StandardField.URL, "https://example.org?key=value&key2=value2") + ); + } + + @ParameterizedTest + @MethodSource + void entryWithVerabitmFieldsNotCausingMessages(Field field, String value) { + entry.setField(field, value); + assertEquals(List.of(), checker.check(entry)); + } }