From 0979e1ccf258d07dfbff298322f30f565c38c62a Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Mon, 27 Nov 2023 14:51:04 +0100 Subject: [PATCH 1/6] Using jsoup 1.17.1 which can track attribute locations --- pom.xml | 2 +- src/org/rascalmpl/library/lang/xml/IO.java | 28 +++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 1dee5f48e9..50cb003307 100644 --- a/pom.xml +++ b/pom.xml @@ -405,7 +405,7 @@ org.jsoup jsoup - 1.15.4 + 1.17.1 org.jdom diff --git a/src/org/rascalmpl/library/lang/xml/IO.java b/src/org/rascalmpl/library/lang/xml/IO.java index 867e5a114d..dd4f51b9c8 100644 --- a/src/org/rascalmpl/library/lang/xml/IO.java +++ b/src/org/rascalmpl/library/lang/xml/IO.java @@ -53,7 +53,8 @@ public class IO { private final IValueFactory vf; private static final String SRC_ATTR = "src"; private static final String QUALIFIED_SRC_ATTR = "rascal-src"; - + private static final String SRCS_ATTR = "srcs"; + private static final String QUALIFIED_SRCS_ATTR = "rascal-srcs"; public IO(IValueFactory vf) { this.vf = vf; @@ -147,6 +148,15 @@ else if (a.getKey().equals("xmlns")) { .map(a -> removeNamespace(a, elem.attributes(), fullyQualify)) .collect(Collectors.toMap(a -> normalizeAttr(a.getKey()), a -> vf.string(a.getValue()))); + // we traverse again to record the source positions of each attribute + IMap Srcs = file != null ? StreamSupport.stream(elem.attributes().spliterator(), false) + .filter(a -> !a.getKey().startsWith("xmlns")) + .map(a -> removeNamespace(a, elem.attributes(), fullyQualify)) + .map(a -> vf.tuple(vf.string(normalizeAttr(a.getKey())), attrToLoc(a, file))) + .collect(vf.mapWriter()) + : vf.map() + ; + if (fullyQualify) { IMap m = namespaces.done(); @@ -164,6 +174,9 @@ else if (a.getKey().equals("xmlns")) { if (file != null) { kws.put(kws.containsKey(SRC_ATTR) ? QUALIFIED_SRC_ATTR : SRC_ATTR, nodeToLoc((Element) node, file, includeEndTags)); + if (!Srcs.isEmpty()) { + kws.put(kws.containsKey(SRCS_ATTR) ? QUALIFIED_SRCS_ATTR : SRCS_ATTR, Srcs); + } } return vf.node(removeNamespace(node.nodeName(), fullyQualify), args).asWithKeywordParameters().setParameters(kws); @@ -209,6 +222,19 @@ private static Attribute removeNamespace(Attribute a, Attributes otherAttributes return new Attribute(newKey, a.getValue()); } + private ISourceLocation attrToLoc(Attribute a, ISourceLocation file) { + Range startRange = a.sourceRange().valueRange(); + + return vf.sourceLocation(file, + startRange.start().pos(), + startRange.end().pos() - startRange.start().pos(), + startRange.start().lineNumber(), + startRange.end().lineNumber(), + startRange.start().columnNumber() - 1, + startRange.end().columnNumber() - 1 + ); + } + private ISourceLocation nodeToLoc(Element node, ISourceLocation file, boolean includeEndTags) { Range startRange = node.sourceRange(); if (!startRange.isTracked()) { From 16fe28156c7c57305c7c4f4702cba5d9ce93cb58 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Mon, 27 Nov 2023 15:09:47 +0100 Subject: [PATCH 2/6] minor refactoring for clarity --- src/org/rascalmpl/library/lang/xml/IO.java | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/org/rascalmpl/library/lang/xml/IO.java b/src/org/rascalmpl/library/lang/xml/IO.java index dd4f51b9c8..46435c217d 100644 --- a/src/org/rascalmpl/library/lang/xml/IO.java +++ b/src/org/rascalmpl/library/lang/xml/IO.java @@ -223,15 +223,20 @@ private static Attribute removeNamespace(Attribute a, Attributes otherAttributes } private ISourceLocation attrToLoc(Attribute a, ISourceLocation file) { - Range startRange = a.sourceRange().valueRange(); + Range range = a.sourceRange().valueRange(); + + if (range.start().pos() < 0) { + // this is strange + return file; + } return vf.sourceLocation(file, - startRange.start().pos(), - startRange.end().pos() - startRange.start().pos(), - startRange.start().lineNumber(), - startRange.end().lineNumber(), - startRange.start().columnNumber() - 1, - startRange.end().columnNumber() - 1 + range.start().pos(), + range.end().pos() - range.start().pos(), + range.start().lineNumber(), + range.end().lineNumber(), + range.start().columnNumber() - 1, + range.end().columnNumber() - 1 ); } From d4db728d810d0f81e89ce437538c6a1fd0f8c7cb Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Tue, 28 Nov 2023 13:38:41 +0100 Subject: [PATCH 3/6] make sure to lookup the attribute location with the right name, and clone the parent --- pom.xml | 2 +- src/org/rascalmpl/library/lang/html/IO.java | 3 +- src/org/rascalmpl/library/lang/xml/IO.java | 49 +++++++++++++-------- 3 files changed, 34 insertions(+), 20 deletions(-) diff --git a/pom.xml b/pom.xml index 50cb003307..56bdb47d61 100644 --- a/pom.xml +++ b/pom.xml @@ -405,7 +405,7 @@ org.jsoup jsoup - 1.17.1 + 1.17.2-SNAPSHOT org.jdom diff --git a/src/org/rascalmpl/library/lang/html/IO.java b/src/org/rascalmpl/library/lang/html/IO.java index 708fdf5ecf..280caa8a19 100644 --- a/src/org/rascalmpl/library/lang/html/IO.java +++ b/src/org/rascalmpl/library/lang/html/IO.java @@ -263,7 +263,8 @@ private Document createHTMLDocument(IConstructor cons, boolean dropOrigins) thro Node node = normalise(cons, createElement(cons, dropOrigins)); doc.appendChild(node); - return doc.normalise(); + + return doc; } private Node normalise(IConstructor cons, Node elem) { diff --git a/src/org/rascalmpl/library/lang/xml/IO.java b/src/org/rascalmpl/library/lang/xml/IO.java index 46435c217d..4c58cbb1d3 100644 --- a/src/org/rascalmpl/library/lang/xml/IO.java +++ b/src/org/rascalmpl/library/lang/xml/IO.java @@ -151,8 +151,9 @@ else if (a.getKey().equals("xmlns")) { // we traverse again to record the source positions of each attribute IMap Srcs = file != null ? StreamSupport.stream(elem.attributes().spliterator(), false) .filter(a -> !a.getKey().startsWith("xmlns")) - .map(a -> removeNamespace(a, elem.attributes(), fullyQualify)) - .map(a -> vf.tuple(vf.string(normalizeAttr(a.getKey())), attrToLoc(a, file))) + // .map(a -> removeNamespace(a, elem.attributes(), fullyQualify)) + .map(a -> vf.tuple(vf.string(normalizeAttr(removeNamespace(a, elem.attributes(), fullyQualify).getKey())), attrToLoc(a, file))) // key-value tuples for the map collector + .filter(t -> !t.get(1).equals(vf.tuple())) // remove the failed location lookups for robustness sake .collect(vf.mapWriter()) : vf.map() ; @@ -219,24 +220,36 @@ private static Attribute removeNamespace(Attribute a, Attributes otherAttributes return a; } - return new Attribute(newKey, a.getValue()); + Attribute newVersion = a.clone(); + newVersion.setKey(newKey); + return newVersion; } - private ISourceLocation attrToLoc(Attribute a, ISourceLocation file) { - Range range = a.sourceRange().valueRange(); - - if (range.start().pos() < 0) { - // this is strange - return file; - } - - return vf.sourceLocation(file, - range.start().pos(), - range.end().pos() - range.start().pos(), - range.start().lineNumber(), - range.end().lineNumber(), - range.start().columnNumber() - 1, - range.end().columnNumber() - 1 + private ITuple attrToLoc(Attribute a, ISourceLocation file) { + Range nameRange = a.sourceRange().nameRange(); + Range valueRange = a.sourceRange().valueRange(); + + if (valueRange.start().pos() < 0 || nameRange.start().pos() < 0) { + // this is strange, tagging an error here so it can be filtered. + assert false; + return vf.tuple(); + } + + return vf.tuple( + vf.sourceLocation(file, + nameRange.start().pos(), + nameRange.end().pos() - nameRange.start().pos(), + nameRange.start().lineNumber(), + nameRange.end().lineNumber(), + nameRange.start().columnNumber() - 1, + nameRange.end().columnNumber() - 1) + , vf.sourceLocation(file, + valueRange.start().pos(), + valueRange.end().pos() - valueRange.start().pos(), + valueRange.start().lineNumber(), + valueRange.end().lineNumber(), + valueRange.start().columnNumber() - 1, + valueRange.end().columnNumber() - 1) ); } From 93164594329607f22715b852c77072279b649ba0 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Tue, 28 Nov 2023 13:45:21 +0100 Subject: [PATCH 4/6] fixed issue with Attribute keys and source locations --- src/org/rascalmpl/library/lang/xml/IO.java | 23 ++++++---------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/src/org/rascalmpl/library/lang/xml/IO.java b/src/org/rascalmpl/library/lang/xml/IO.java index 4c58cbb1d3..f6712d8e48 100644 --- a/src/org/rascalmpl/library/lang/xml/IO.java +++ b/src/org/rascalmpl/library/lang/xml/IO.java @@ -145,14 +145,12 @@ else if (a.getKey().equals("xmlns")) { // remove all the namespace attributes return !a.getKey().startsWith("xmlns"); }) - .map(a -> removeNamespace(a, elem.attributes(), fullyQualify)) - .collect(Collectors.toMap(a -> normalizeAttr(a.getKey()), a -> vf.string(a.getValue()))); + .collect(Collectors.toMap(a -> normalizeAttr(a.getKey()), a -> vf.string(removeNamespace(a, elem.attributes(), fullyQualify)))); // we traverse again to record the source positions of each attribute IMap Srcs = file != null ? StreamSupport.stream(elem.attributes().spliterator(), false) .filter(a -> !a.getKey().startsWith("xmlns")) - // .map(a -> removeNamespace(a, elem.attributes(), fullyQualify)) - .map(a -> vf.tuple(vf.string(normalizeAttr(removeNamespace(a, elem.attributes(), fullyQualify).getKey())), attrToLoc(a, file))) // key-value tuples for the map collector + .map(a -> vf.tuple(vf.string(normalizeAttr(removeNamespace(a, elem.attributes(), fullyQualify))), attrToLoc(a, file))) // key-value tuples for the map collector .filter(t -> !t.get(1).equals(vf.tuple())) // remove the failed location lookups for robustness sake .collect(vf.mapWriter()) : vf.map() @@ -201,28 +199,19 @@ private static String removeNamespace(String name, boolean fullyQualify) { return name.substring(index+1); } - private static Attribute removeNamespace(Attribute a, Attributes otherAttributes, boolean fullyQualify) { + private static String removeNamespace(Attribute a, Attributes otherAttributes, boolean fullyQualify) { if (fullyQualify) { - return a; + return a.getKey(); } String key = a.getKey(); int index = key.indexOf(":"); if (index == -1) { - return a; + return a.getKey(); } - String newKey = key.substring(index+1); - - if (otherAttributes.hasKey(newKey)) { - // keep disambiguation if necessary - return a; - } - - Attribute newVersion = a.clone(); - newVersion.setKey(newKey); - return newVersion; + return key.substring(index+1); } private ITuple attrToLoc(Attribute a, ISourceLocation file) { From adf9878831de63f84418c74a8a23e4c13662cd8f Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Tue, 28 Nov 2023 15:05:34 +0100 Subject: [PATCH 5/6] keep attribute and tag name capitalization --- pom.xml | 2 +- src/org/rascalmpl/library/lang/xml/IO.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 56bdb47d61..50cb003307 100644 --- a/pom.xml +++ b/pom.xml @@ -405,7 +405,7 @@ org.jsoup jsoup - 1.17.2-SNAPSHOT + 1.17.1 org.jdom diff --git a/src/org/rascalmpl/library/lang/xml/IO.java b/src/org/rascalmpl/library/lang/xml/IO.java index f6712d8e48..e8546fac52 100644 --- a/src/org/rascalmpl/library/lang/xml/IO.java +++ b/src/org/rascalmpl/library/lang/xml/IO.java @@ -67,7 +67,7 @@ public IValue readXML(ISourceLocation loc, IBool fullyQualify, IBool trackOrigin try (InputStream reader = URIResolverRegistry.getInstance().getInputStream(loc)) { Parser xmlParser = Parser.xmlParser() - .settings(new ParseSettings(false, false)) + .settings(new ParseSettings(true, true)) .setTrackPosition(trackOrigins.getValue()) ; From f5a673c829a12a76cb263ca95ef57054d511b48d Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Tue, 28 Nov 2023 16:04:05 +0100 Subject: [PATCH 6/6] really need this version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 50cb003307..56bdb47d61 100644 --- a/pom.xml +++ b/pom.xml @@ -405,7 +405,7 @@ org.jsoup jsoup - 1.17.1 + 1.17.2-SNAPSHOT org.jdom