From b4751ad1c3e49acf06824ba2e3d9e046be159f0d Mon Sep 17 00:00:00 2001 From: Jonathan Hedley Date: Wed, 29 Nov 2023 13:47:44 +1100 Subject: [PATCH] Attribute accessor, and maintain source range after key change Fixes #2069 Fixes #2070 --- CHANGES.md | 6 +++ src/main/java/org/jsoup/nodes/Attribute.java | 12 ++++- src/main/java/org/jsoup/nodes/Attributes.java | 23 +++++++++- src/main/java/org/jsoup/nodes/Element.java | 11 +++++ .../java/org/jsoup/nodes/ElementTest.java | 19 ++++++++ .../java/org/jsoup/parser/PositionTest.java | 46 +++++++++++++++++++ 6 files changed, 113 insertions(+), 4 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index ec8ff4cf2e..a0852ec691 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,12 @@ ## 1.17.2 (Pending) +### Improvements +* Added `Element.attribute(String)` and `Attributes.attribute(String)` to more simply obtain an `Attribute` object. + [2069](https://github.com/jhy/jsoup/issues/2069) +* If source tracking is on, and an Attribute's key is changed (via `Attribute.setKey(String)`), the source range is + now still tracked in `Attribute.sourceRange()`. [2070](https://github.com/jhy/jsoup/issues/2070) + ### Bug Fixes * When tracking the source position of attributes, if source attribute name was mix-cased but the parser was diff --git a/src/main/java/org/jsoup/nodes/Attribute.java b/src/main/java/org/jsoup/nodes/Attribute.java index f77106db1d..5f0e2d356a 100644 --- a/src/main/java/org/jsoup/nodes/Attribute.java +++ b/src/main/java/org/jsoup/nodes/Attribute.java @@ -3,7 +3,6 @@ import org.jsoup.SerializationException; import org.jsoup.helper.Validate; import org.jsoup.internal.Normalizer; -import org.jsoup.internal.SharedConstants; import org.jsoup.internal.StringUtil; import org.jsoup.nodes.Document.OutputSettings.Syntax; import org.jspecify.annotations.Nullable; @@ -72,8 +71,17 @@ public void setKey(String key) { Validate.notEmpty(key); // trimming could potentially make empty, so validate here if (parent != null) { int i = parent.indexOfKey(this.key); - if (i != Attributes.NotFound) + if (i != Attributes.NotFound) { + String oldKey = parent.keys[i]; parent.keys[i] = key; + + // if tracking source positions, update the key in the range map + Map ranges = parent.getRanges(); + if (ranges != null) { + Range.AttributeRange range = ranges.remove(oldKey); + ranges.put(key, range); + } + } } this.key = key; } diff --git a/src/main/java/org/jsoup/nodes/Attributes.java b/src/main/java/org/jsoup/nodes/Attributes.java index ecbe9f4ead..ff2f0b995e 100644 --- a/src/main/java/org/jsoup/nodes/Attributes.java +++ b/src/main/java/org/jsoup/nodes/Attributes.java @@ -110,6 +110,19 @@ public String get(String key) { return i == NotFound ? EmptyString : checkNotNull(vals[i]); } + /** + Get an Attribute by key. The Attribute will remain connected to these Attributes, so changes made via + {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc will cascade back to these Attributes and + their owning Element. + @param key the (case-sensitive) attribute key + @return the Attribute for this key, or null if not present. + @since 1.17.2 + */ + public Attribute attribute(String key) { + int i = indexOfKey(key); + return i == NotFound ? null : new Attribute(key, checkNotNull(vals[i]), this); + } + /** * Get an attribute's value by case-insensitive key * @param key the attribute name @@ -360,13 +373,19 @@ Get the source ranges (start to end position) in the original input source from */ public Range.AttributeRange sourceRange(String key) { if (!hasKey(key)) return UntrackedAttr; - //noinspection unchecked - Map ranges = (Map) userData(AttrRangeKey); + Map ranges = getRanges(); if (ranges == null) return Range.AttributeRange.UntrackedAttr; Range.AttributeRange range = ranges.get(key); return range != null ? range : Range.AttributeRange.UntrackedAttr; } + /** Get the Ranges, if tracking is enabled; null otherwise. */ + @Nullable Map getRanges() { + //noinspection unchecked + return (Map) userData(AttrRangeKey); + } + + @Override public Iterator iterator() { return new Iterator() { diff --git a/src/main/java/org/jsoup/nodes/Element.java b/src/main/java/org/jsoup/nodes/Element.java index cfb19113b1..4a2961788b 100644 --- a/src/main/java/org/jsoup/nodes/Element.java +++ b/src/main/java/org/jsoup/nodes/Element.java @@ -266,6 +266,17 @@ public Element attr(String attributeKey, boolean attributeValue) { return this; } + /** + Get an Attribute by key. Changes made via {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc + will cascade back to this Element. + @param key the (case-sensitive) attribute key + @return the Attribute for this key, or null if not present. + @since 1.17.2 + */ + public Attribute attribute(String key) { + return hasAttributes() ? attributes().attribute(key) : null; + } + /** * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key * starting with "data-" is included the dataset. diff --git a/src/test/java/org/jsoup/nodes/ElementTest.java b/src/test/java/org/jsoup/nodes/ElementTest.java index 12a51a653f..b4d3747754 100644 --- a/src/test/java/org/jsoup/nodes/ElementTest.java +++ b/src/test/java/org/jsoup/nodes/ElementTest.java @@ -2865,4 +2865,23 @@ void prettySerializationRoundTrips(Document.OutputSettings settings) { doc.outputSettings().escapeMode(Entities.EscapeMode.extended); assertEquals("Foo ≻", doc.body().html()); // succ is alias for Succeeds, and first hit in entities } + + @Test void attribute() { + String html = "

One

"; + Document doc = Jsoup.parse(html); + Element p = doc.expectFirst("p"); + Attribute attr = p.attribute("class"); // HTML parse lower-cases names + assertNotNull(attr); + assertEquals("class", attr.getKey()); + assertEquals("yes", attr.getValue()); + assertFalse(attr.sourceRange().nameRange().start().isTracked()); // tracking disabled + + assertNull(p.attribute("CLASS")); // no such key + + attr.setKey("CLASS"); // set preserves input case + attr.setValue("YES"); + + assertEquals("

One

", p.outerHtml()); + assertEquals("CLASS=\"YES\"", attr.html()); + } } diff --git a/src/test/java/org/jsoup/parser/PositionTest.java b/src/test/java/org/jsoup/parser/PositionTest.java index 35f1494b55..24beec8d5a 100644 --- a/src/test/java/org/jsoup/parser/PositionTest.java +++ b/src/test/java/org/jsoup/parser/PositionTest.java @@ -487,6 +487,52 @@ private void printRange(Node node) { assertEquals("h1:0-9~12-17; id:4-6=7-8; #text:9-12; #text:17-18; h2:18-27~30-35; id:22-24=25-26; #text:27-30; h10:35-40~43-49; #text:40-43; ", track.toString()); } + @Test void updateKeyMaintainsRangeLc() { + String html = "

One

"; + Document doc = Jsoup.parse(html, TrackingHtmlParser); + Element p = doc.expectFirst("p"); + Attribute attr = p.attribute("xsi:class"); + assertNotNull(attr); + + String expectedRange = "1,4:3-1,13:12=1,14:13-1,16:15"; + assertEquals(expectedRange, attr.sourceRange().toString()); + attr.setKey("class"); + assertEquals(expectedRange, attr.sourceRange().toString()); + assertEquals("class=\"On\"", attr.html()); + } + + @Test void updateKeyMaintainsRangeUc() { + String html = "

One

"; + Document doc = Jsoup.parse(html, TrackingXmlParser); + Element p = doc.expectFirst("p"); + Attribute attr = p.attribute("xsi:CLASS"); + assertNotNull(attr); + + String expectedRange = "1,4:3-1,13:12=1,14:13-1,16:15"; + assertEquals(expectedRange, attr.sourceRange().toString()); + attr.setKey("class"); + assertEquals(expectedRange, attr.sourceRange().toString()); + assertEquals("class=\"On\"", attr.html()); + + attr.setKey("CLASSY"); + assertEquals(expectedRange, attr.sourceRange().toString()); + assertEquals("CLASSY=\"On\"", attr.html()); + + attr.setValue("To"); + assertEquals(expectedRange, attr.sourceRange().toString()); + assertEquals("CLASSY=\"To\"", attr.html()); + + assertEquals("

One

", p.outerHtml()); + + p.attr("CLASSY", "Tree"); + assertEquals(expectedRange, attr.sourceRange().toString()); + assertEquals("CLASSY=\"To\"", attr.html()); // changes in this direction do not get to the attribute as it's not connected that way + + Attribute attr2 = p.attribute("CLASSY"); + assertEquals("CLASSY=\"Tree\"", attr2.html()); + assertEquals(expectedRange, attr2.sourceRange().toString()); + } + static void accumulateAttributePositions(Node node, StringBuilder sb) { if (node instanceof LeafNode) return; // leafnode pseudo attributes are not tracked for (Attribute attribute : node.attributes()) {