From 0ee12bf7717e78d92fbddb8b54052f6a2ce2f4b0 Mon Sep 17 00:00:00 2001 From: Knut Wannheden Date: Thu, 5 Oct 2023 10:55:23 +0200 Subject: [PATCH 1/3] Fix parsing of unicode escape sequences in YAML A YAML scalar value can contain unicode escape sequences like `\u0051`. Currently, this throws off the parser. --- .../org/openrewrite/json/JsonParserTest.java | 14 +++++ .../yaml/FormatPreservingReader.java | 3 +- .../java/org/openrewrite/yaml/YamlParser.java | 51 +++++++++++++------ .../yaml/internal/YamlPrinter.java | 18 +------ .../org/openrewrite/yaml/YamlParserTest.java | 13 +++++ 5 files changed, 64 insertions(+), 35 deletions(-) diff --git a/rewrite-json/src/test/java/org/openrewrite/json/JsonParserTest.java b/rewrite-json/src/test/java/org/openrewrite/json/JsonParserTest.java index 0c355456338..2829a33cf93 100644 --- a/rewrite-json/src/test/java/org/openrewrite/json/JsonParserTest.java +++ b/rewrite-json/src/test/java/org/openrewrite/json/JsonParserTest.java @@ -162,4 +162,18 @@ void multiBytesUnicode() { ) ); } + + @Test + void unicodeEscapes() { + rewriteRun( + json( + """ + { + "nul": "\\u0000", + "reverse-solidus": "\\u005c", + } + """ + ) + ); + } } diff --git a/rewrite-yaml/src/main/java/org/openrewrite/yaml/FormatPreservingReader.java b/rewrite-yaml/src/main/java/org/openrewrite/yaml/FormatPreservingReader.java index 14c6dfa7621..187c914d58b 100755 --- a/rewrite-yaml/src/main/java/org/openrewrite/yaml/FormatPreservingReader.java +++ b/rewrite-yaml/src/main/java/org/openrewrite/yaml/FormatPreservingReader.java @@ -16,7 +16,6 @@ package org.openrewrite.yaml; import lombok.Getter; -import org.openrewrite.internal.lang.NonNull; import org.yaml.snakeyaml.events.Event; import java.io.IOException; @@ -115,7 +114,7 @@ public String readStringFromBuffer(int start, int end) { } @Override - public int read(@NonNull char[] cbuf, int off, int len) throws IOException { + public int read(char[] cbuf, int off, int len) throws IOException { int read = delegate.read(cbuf, off, len); if (read > 0) { buffer.ensureCapacity(buffer.size() + read); diff --git a/rewrite-yaml/src/main/java/org/openrewrite/yaml/YamlParser.java b/rewrite-yaml/src/main/java/org/openrewrite/yaml/YamlParser.java index 75a3ec1eec2..6245fcf1799 100644 --- a/rewrite-yaml/src/main/java/org/openrewrite/yaml/YamlParser.java +++ b/rewrite-yaml/src/main/java/org/openrewrite/yaml/YamlParser.java @@ -17,7 +17,10 @@ import lombok.Getter; import org.intellij.lang.annotations.Language; -import org.openrewrite.*; +import org.openrewrite.ExecutionContext; +import org.openrewrite.FileAttributes; +import org.openrewrite.InMemoryExecutionContext; +import org.openrewrite.SourceFile; import org.openrewrite.internal.EncodingDetectingInputStream; import org.openrewrite.internal.ListUtils; import org.openrewrite.internal.lang.Nullable; @@ -36,7 +39,6 @@ import org.yaml.snakeyaml.scanner.ScannerImpl; import java.io.IOException; -import java.io.StringReader; import java.io.UncheckedIOException; import java.nio.file.Path; import java.util.*; @@ -186,15 +188,40 @@ private Yaml.Documents parseFromInput(Path sourceFile, EncodingDetectingInputStr newLine = ""; ScalarEvent scalar = (ScalarEvent) event; - String scalarValue = scalar.getValue(); - if (variableByUuid.containsKey(scalarValue)) { - scalarValue = variableByUuid.get(scalarValue); - } Yaml.Anchor anchor = null; + int valueStart; if (scalar.getAnchor() != null) { anchor = buildYamlAnchor(reader, lastEnd, fmt, scalar.getAnchor(), event.getEndMark().getIndex(), true); anchors.put(scalar.getAnchor(), anchor); + valueStart = lastEnd + fmt.length() + scalar.getAnchor().length() + 1 + anchor.getPostfix().length(); + } else { + valueStart = lastEnd + fmt.length(); + } + + String scalarValue; + switch (scalar.getScalarStyle()) { + case DOUBLE_QUOTED: + case SINGLE_QUOTED: + scalarValue = reader.readStringFromBuffer(valueStart + 1, event.getEndMark().getIndex() - 2); + break; + case PLAIN: + scalarValue = reader.readStringFromBuffer(valueStart, event.getEndMark().getIndex() - 1); + break; + case LITERAL: + scalarValue = reader.readStringFromBuffer(valueStart + 1, event.getEndMark().getIndex() - 1); + if (scalarValue.endsWith("\n")) { + newLine = "\n"; + scalarValue = scalarValue.substring(0, scalarValue.length() - 1); + } + break; + case FOLDED: + default: + scalarValue = reader.readStringFromBuffer(valueStart + 1, event.getEndMark().getIndex() - 1); + break; + } + if (variableByUuid.containsKey(scalarValue)) { + scalarValue = variableByUuid.get(scalarValue); } Yaml.Scalar.Style style; @@ -207,22 +234,13 @@ private Yaml.Documents parseFromInput(Path sourceFile, EncodingDetectingInputStr break; case LITERAL: style = Yaml.Scalar.Style.LITERAL; - scalarValue = reader.readStringFromBuffer(event.getStartMark().getIndex() + 1, event.getEndMark().getIndex() - 1); - if (scalarValue.endsWith("\n")) { - newLine = "\n"; - scalarValue = scalarValue.substring(0, scalarValue.length() - 1); - } break; case FOLDED: style = Yaml.Scalar.Style.FOLDED; - scalarValue = reader.readStringFromBuffer(event.getStartMark().getIndex() + 1, event.getEndMark().getIndex() - 1); break; case PLAIN: default: style = Yaml.Scalar.Style.PLAIN; - if (!scalarValue.startsWith("@") && event.getStartMark().getIndex() >= reader.getBufferIndex()) { - scalarValue = reader.readStringFromBuffer(event.getStartMark().getIndex(), event.getEndMark().getIndex() - 1); - } break; } BlockBuilder builder = blockStack.isEmpty() ? null : blockStack.peek(); @@ -347,7 +365,8 @@ private Yaml.Anchor buildYamlAnchor(FormatPreservingReader reader, int lastEnd, lastEnd + eventPrefix.length() + anchorLength, eventEndIndex); StringBuilder postFix = new StringBuilder(); - for (char c : whitespaceAndScalar.toCharArray()) { + for (int i = 0; i < whitespaceAndScalar.length(); i++) { + char c = whitespaceAndScalar.charAt(i); if (c != ' ' && c != '\t') { break; } diff --git a/rewrite-yaml/src/main/java/org/openrewrite/yaml/internal/YamlPrinter.java b/rewrite-yaml/src/main/java/org/openrewrite/yaml/internal/YamlPrinter.java index 9a0426a9a2b..8c56c1752cd 100755 --- a/rewrite-yaml/src/main/java/org/openrewrite/yaml/internal/YamlPrinter.java +++ b/rewrite-yaml/src/main/java/org/openrewrite/yaml/internal/YamlPrinter.java @@ -118,23 +118,7 @@ public Yaml visitScalar(Yaml.Scalar scalar, PrintOutputCapture

p) { switch (scalar.getStyle()) { case DOUBLE_QUOTED: p.append('"') - .append(scalar.getValue() - .replace("\\", "\\\\") - .replace("\0", "\\0") - .replace("\u0007", "\\a") - .replace("\b", "\\b") - .replace("\t", "\\t") - .replace("\n", "\\n") - .replace("\u000B", "\\v") - .replace("\f", "\\f") - .replace("\r", "\\r") - .replace("\u001B", "\\e") - .replace("\"", "\\\"") - .replace("\u0085", "\\N") - .replace("\u00A0", "\\_") - .replace("\u2028", "\\L") - .replace("\u2029", "\\P") - ) + .append(scalar.getValue()) .append('"'); break; case SINGLE_QUOTED: diff --git a/rewrite-yaml/src/test/java/org/openrewrite/yaml/YamlParserTest.java b/rewrite-yaml/src/test/java/org/openrewrite/yaml/YamlParserTest.java index 5a42250bd0b..1daf691cbc0 100644 --- a/rewrite-yaml/src/test/java/org/openrewrite/yaml/YamlParserTest.java +++ b/rewrite-yaml/src/test/java/org/openrewrite/yaml/YamlParserTest.java @@ -116,4 +116,17 @@ void newlinesCombinedWithUnniCode() { ) ); } + + @Test + void unicodeEscapes() { + rewriteRun( + yaml( + """ + root: + "nul": "\\u0000" + "reverse-solidus": "\\u005c" + """ + ) + ); + } } From 918662acc031e493c2c7838f02ca5e7505027615 Mon Sep 17 00:00:00 2001 From: Knut Wannheden Date: Thu, 5 Oct 2023 11:11:36 +0200 Subject: [PATCH 2/3] Fix `MappingTest#escapeSequences()` --- .../java/org/openrewrite/yaml/tree/MappingTest.java | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/rewrite-yaml/src/test/java/org/openrewrite/yaml/tree/MappingTest.java b/rewrite-yaml/src/test/java/org/openrewrite/yaml/tree/MappingTest.java index 03709c13f24..899e1c598e9 100644 --- a/rewrite-yaml/src/test/java/org/openrewrite/yaml/tree/MappingTest.java +++ b/rewrite-yaml/src/test/java/org/openrewrite/yaml/tree/MappingTest.java @@ -400,34 +400,30 @@ void mappingAnchor() { " '\\n' ", " '\n' ", " \n ", - " \"\\.\" ", " \"\\0\" ", " \"\\0\" ", " \"\\a\" ", " \"\\a\" ", " \"\\b\" ", - " \"\b\" ", " \"\\t\" ", " \"\t\" ", " \"\\n\" ", " \"\n\" ", " \"\\v\" ", " \"\\f\" ", - " \"\f\" ", " \"\\r\" ", " \"\r\" ", " \"\\e\" ", " \"\\\\\" ", - " \"\\\" ", " \"\\\"\" ", - " \"\"\" ", " \"\\N\" ", " \"\\_\" ", " \"\\L\" ", " \"\\P\" ", }) - void escapeSequences() { + void escapeSequences(String str) { rewriteRun( - yaml("escaped-value: $string")); + yaml("escaped-value: $string".replace("$string", str)) + ); } } From 47a0489f0feb10e5a60068c4461c5046112502a3 Mon Sep 17 00:00:00 2001 From: Knut Wannheden Date: Thu, 5 Oct 2023 11:14:07 +0200 Subject: [PATCH 3/3] Polish test --- .../src/test/java/org/openrewrite/yaml/tree/MappingTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rewrite-yaml/src/test/java/org/openrewrite/yaml/tree/MappingTest.java b/rewrite-yaml/src/test/java/org/openrewrite/yaml/tree/MappingTest.java index 899e1c598e9..972e3fd2a36 100644 --- a/rewrite-yaml/src/test/java/org/openrewrite/yaml/tree/MappingTest.java +++ b/rewrite-yaml/src/test/java/org/openrewrite/yaml/tree/MappingTest.java @@ -423,7 +423,7 @@ void mappingAnchor() { }) void escapeSequences(String str) { rewriteRun( - yaml("escaped-value: $string".replace("$string", str)) + yaml("escaped-value: " + str) ); } }