From a36114c654ba6d7606d0da6e0f4e67c9faace0bf Mon Sep 17 00:00:00 2001 From: Thilo Bangert Date: Wed, 25 Oct 2023 16:24:47 +0200 Subject: [PATCH 1/2] readd an implementation of Whitelist for backwards compatibility --- src/main/java/org/jsoup/Jsoup.java | 17 ++++ src/main/java/org/jsoup/safety/Cleaner.java | 4 + src/main/java/org/jsoup/safety/Safelist.java | 11 ++- src/main/java/org/jsoup/safety/Whitelist.java | 96 +++++++++++++++++++ .../java/org/jsoup/safety/WhitelistTest.java | 77 +++++++++++++++ 5 files changed, 204 insertions(+), 1 deletion(-) create mode 100644 src/main/java/org/jsoup/safety/Whitelist.java create mode 100644 src/test/java/org/jsoup/safety/WhitelistTest.java diff --git a/src/main/java/org/jsoup/Jsoup.java b/src/main/java/org/jsoup/Jsoup.java index b4a99ebcb1..3c4e2b4aca 100644 --- a/src/main/java/org/jsoup/Jsoup.java +++ b/src/main/java/org/jsoup/Jsoup.java @@ -7,6 +7,7 @@ import org.jsoup.parser.Parser; import org.jsoup.safety.Cleaner; import org.jsoup.safety.Safelist; +import org.jsoup.safety.Whitelist; import javax.annotation.Nullable; import javax.annotation.WillClose; @@ -363,4 +364,20 @@

Assumes the HTML is a body fragment (i.e. will be used in an existing HTML do public static boolean isValid(String bodyHtml, Safelist safelist) { return new Cleaner(safelist).isValidBodyHtml(bodyHtml); } + + public static String clean(String bodyHtml, String baseUri, Whitelist whitelist, Document.OutputSettings outputSettings) { + return clean(bodyHtml, baseUri, whitelist.getSafelist(), outputSettings); + } + + public static boolean isValid(String bodyHtml, Whitelist whitelist) { + return isValid(bodyHtml, whitelist.getSafelist()); + } + + public static String clean(String bodyHtml, Whitelist whitelist) { + return clean(bodyHtml, whitelist.getSafelist()); + } + + public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) { + return clean(bodyHtml, baseUri, whitelist.getSafelist()); + } } diff --git a/src/main/java/org/jsoup/safety/Cleaner.java b/src/main/java/org/jsoup/safety/Cleaner.java index 7b9317ec29..dc52dc030d 100644 --- a/src/main/java/org/jsoup/safety/Cleaner.java +++ b/src/main/java/org/jsoup/safety/Cleaner.java @@ -45,6 +45,10 @@ public Cleaner(Safelist safelist) { this.safelist = safelist; } + public Cleaner(Whitelist whitelist) { + this(whitelist.getSafelist()); + } + /** Creates a new, clean document, from the original dirty document, containing only elements allowed by the safelist. The original document is not modified. Only elements from the dirty document's body are used. The diff --git a/src/main/java/org/jsoup/safety/Safelist.java b/src/main/java/org/jsoup/safety/Safelist.java index d1f275df39..a33fea9df6 100644 --- a/src/main/java/org/jsoup/safety/Safelist.java +++ b/src/main/java/org/jsoup/safety/Safelist.java @@ -613,7 +613,16 @@ public Attributes getEnforcedAttributes(String tagName) { } return attrs; } - + + protected Set getTagNames() { + return this.tagNames; + } + + protected Safelist setPreserverRelativeLinks(boolean preserve) { + this.preserveRelativeLinks = preserve; + return this; + } + // named types for config. All just hold strings, but here for my sanity. static class TagName extends TypedValue { diff --git a/src/main/java/org/jsoup/safety/Whitelist.java b/src/main/java/org/jsoup/safety/Whitelist.java new file mode 100644 index 0000000000..f1ca8ccd7e --- /dev/null +++ b/src/main/java/org/jsoup/safety/Whitelist.java @@ -0,0 +1,96 @@ +package org.jsoup.safety; + +import org.jsoup.nodes.Attribute; +import org.jsoup.nodes.Attributes; +import org.jsoup.nodes.Element; +import org.jsoup.safety.Safelist.TagName; + +public class Whitelist { + + protected Safelist safelist; + + public Whitelist() { + this.safelist = new Safelist(); + } + + public Whitelist(Whitelist copy) { + this.safelist = new Safelist(copy.safelist); + } + + private Whitelist(Safelist embedded) { + this.safelist = embedded; + } + + public static Whitelist none() { + return new Whitelist(); + } + + public static Whitelist simpleText() { + return new Whitelist(Safelist.simpleText()); + } + + public static Whitelist basic() { + return new Whitelist(Safelist.basic()); + } + + public Safelist getSafelist() { + return this.safelist; + } + + public Whitelist addTags(String... tags) { + this.safelist.addTags(tags); + return this; + } + + public Whitelist removeTags(String... tags) { + this.safelist.removeTags(tags); + return this; + } + + public Whitelist addAttributes(String tag, String... attributes) { + this.safelist.addAttributes(tag, attributes); + return this; + } + + public Whitelist removeAttributes(String tag, String... attributes) { + this.safelist.removeAttributes(tag, attributes); + return this; + } + + public Whitelist addEnforcedAttribute(String tag, String attribute, String value) { + this.safelist.addEnforcedAttribute(tag, attribute, value); + return this; + } + + public Whitelist removeEnforcedAttribute(String tag, String attribute) { + this.safelist.removeEnforcedAttribute(tag, attribute); + return this; + } + + public Whitelist preserveRelativeLinks(boolean preserve) { + this.safelist.setPreserverRelativeLinks(preserve); + return this; + } + + public Whitelist addProtocols(String tag, String attribute, String... protocols) { + this.safelist.addProtocols(tag, attribute, protocols); + return this; + } + + public Whitelist removeProtocols(String tag, String attribute, String... removeProtocols) { + this.safelist.removeProtocols(tag, attribute, removeProtocols); + return this; + } + + public boolean isSafeTag(String tag) { + return this.safelist.getTagNames().contains(TagName.valueOf(tag)); + } + + public boolean isSafeAttribute(String tagName, Element el, Attribute attr) { + return this.safelist.isSafeAttribute(tagName, el, attr); + } + + public Attributes getEnforcedAttributes(String tagName) { + return this.safelist.getEnforcedAttributes(tagName); + } +} diff --git a/src/test/java/org/jsoup/safety/WhitelistTest.java b/src/test/java/org/jsoup/safety/WhitelistTest.java new file mode 100644 index 0000000000..5949ed548e --- /dev/null +++ b/src/test/java/org/jsoup/safety/WhitelistTest.java @@ -0,0 +1,77 @@ +package org.jsoup.safety; + +import org.jsoup.helper.ValidationException; +import org.jsoup.nodes.Attribute; +import org.jsoup.nodes.Attributes; +import org.jsoup.nodes.Element; +import org.jsoup.parser.Tag; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +public class WhitelistTest { + private static final String TEST_TAG = "testTag"; + private static final String TEST_ATTRIBUTE = "testAttribute"; + private static final String TEST_SCHEME = "valid-scheme"; + private static final String TEST_VALUE = TEST_SCHEME + "://testValue"; + + @Test + public void testCopyConstructor_noSideEffectOnTags() { + Whitelist whitelist1 = Whitelist.none().addTags(TEST_TAG); + Whitelist whitelist2 = new Whitelist(whitelist1); + whitelist1.addTags("invalidTag"); + + assertFalse(whitelist2.isSafeTag("invalidTag")); + } + + @Test + public void testCopyConstructor_noSideEffectOnAttributes() { + Whitelist whitelist1 = Whitelist.none().addAttributes(TEST_TAG, TEST_ATTRIBUTE); + Whitelist whitelist2 = new Whitelist(whitelist1); + whitelist1.addAttributes(TEST_TAG, "invalidAttribute"); + + assertFalse(whitelist2.isSafeAttribute(TEST_TAG, null, new Attribute("invalidAttribute", TEST_VALUE))); + } + + @Test + public void testCopyConstructor_noSideEffectOnEnforcedAttributes() { + Whitelist whitelist1 = Whitelist.none().addEnforcedAttribute(TEST_TAG, TEST_ATTRIBUTE, TEST_VALUE); + Whitelist whitelist2 = new Whitelist(whitelist1); + whitelist1.addEnforcedAttribute(TEST_TAG, TEST_ATTRIBUTE, "invalidValue"); + + for (Attribute enforcedAttribute : whitelist2.getEnforcedAttributes(TEST_TAG)) { + assertNotEquals("invalidValue", enforcedAttribute.getValue()); + } + } + + @Test + public void testCopyConstructor_noSideEffectOnProtocols() { + final String invalidScheme = "invalid-scheme"; + Whitelist whitelist1 = Whitelist.none() + .addAttributes(TEST_TAG, TEST_ATTRIBUTE) + .addProtocols(TEST_TAG, TEST_ATTRIBUTE, TEST_SCHEME); + Whitelist whitelist2 = new Whitelist(whitelist1); + whitelist1.addProtocols(TEST_TAG, TEST_ATTRIBUTE, invalidScheme); + + Attributes attributes = new Attributes(); + Attribute invalidAttribute = new Attribute(TEST_ATTRIBUTE, invalidScheme + "://someValue"); + attributes.put(invalidAttribute); + Element invalidElement = new Element(Tag.valueOf(TEST_TAG), "", attributes); + + assertFalse(whitelist2.isSafeAttribute(TEST_TAG, invalidElement, invalidAttribute)); + } + + @Test + void noscriptIsBlocked() { + boolean threw = false; + Whitelist whitelist = null; + try { + whitelist = Whitelist.none().addTags("NOSCRIPT"); + } catch (ValidationException validationException) { + threw = true; + assertTrue(validationException.getMessage().contains("unsupported")); + } + assertTrue(threw); + assertNull(whitelist); + } +} From 9fad7d3017e337b18094a87d2be5f9a2a64677b9 Mon Sep 17 00:00:00 2001 From: Thilo Bangert Date: Thu, 26 Oct 2023 08:37:01 +0200 Subject: [PATCH 2/2] add @Deprecated annotation --- src/main/java/org/jsoup/Jsoup.java | 4 ++++ src/main/java/org/jsoup/safety/Cleaner.java | 1 + src/main/java/org/jsoup/safety/Whitelist.java | 1 + 3 files changed, 6 insertions(+) diff --git a/src/main/java/org/jsoup/Jsoup.java b/src/main/java/org/jsoup/Jsoup.java index 3c4e2b4aca..8bacbfd1b2 100644 --- a/src/main/java/org/jsoup/Jsoup.java +++ b/src/main/java/org/jsoup/Jsoup.java @@ -365,18 +365,22 @@ public static boolean isValid(String bodyHtml, Safelist safelist) { return new Cleaner(safelist).isValidBodyHtml(bodyHtml); } + @Deprecated public static String clean(String bodyHtml, String baseUri, Whitelist whitelist, Document.OutputSettings outputSettings) { return clean(bodyHtml, baseUri, whitelist.getSafelist(), outputSettings); } + @Deprecated public static boolean isValid(String bodyHtml, Whitelist whitelist) { return isValid(bodyHtml, whitelist.getSafelist()); } + @Deprecated public static String clean(String bodyHtml, Whitelist whitelist) { return clean(bodyHtml, whitelist.getSafelist()); } + @Deprecated public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) { return clean(bodyHtml, baseUri, whitelist.getSafelist()); } diff --git a/src/main/java/org/jsoup/safety/Cleaner.java b/src/main/java/org/jsoup/safety/Cleaner.java index dc52dc030d..f430d1db29 100644 --- a/src/main/java/org/jsoup/safety/Cleaner.java +++ b/src/main/java/org/jsoup/safety/Cleaner.java @@ -45,6 +45,7 @@ public Cleaner(Safelist safelist) { this.safelist = safelist; } + @Deprecated public Cleaner(Whitelist whitelist) { this(whitelist.getSafelist()); } diff --git a/src/main/java/org/jsoup/safety/Whitelist.java b/src/main/java/org/jsoup/safety/Whitelist.java index f1ca8ccd7e..acd2381b0a 100644 --- a/src/main/java/org/jsoup/safety/Whitelist.java +++ b/src/main/java/org/jsoup/safety/Whitelist.java @@ -5,6 +5,7 @@ import org.jsoup.nodes.Element; import org.jsoup.safety.Safelist.TagName; +@Deprecated public class Whitelist { protected Safelist safelist;