From 212296420cd490706a23e1b11ee6c85a2813df61 Mon Sep 17 00:00:00 2001 From: Pascal <50587579+pdulich@users.noreply.github.com> Date: Sat, 11 May 2024 10:15:45 +0200 Subject: [PATCH] YAML: Support other cases of lists of lists (#4187) The commit 264e8e86 introduced support for list of lists of scalars. But this didn't fix lists of lists of other blocks (like mappings). This commit will also fix these cases. The underlying problem was that some tokens were interpreted twice. This was caused by YamlParser.parseFromInput. This method calculates the prefix of a token by using the substring yamlSource[lastEnd:tokenIdx]. Normally, the characters that were newly interpreted should be marked as used by incrementing the index `lastEnd`. But for sequences this index wasn't updated in every case. It was only updated if there was an anchor or an openingBracketIndex. Because of this, the prefix was used for multiple consecutive sequences in other cases. To fix this, update `lastEnd` in every case for sequences. The yaml-parser has inconsistent behaviour when parsing sequences with dashes. If a dash-sequence has indentation the start- and the event-mark point to the same character: the dash. If a dash-sequence has no indentation the event-mark points to the character AFTER the dash. Because of this, the `lastEnd` would get an offset and one which means the YamlParser would skip one character. Introduce a workaround to correct the `lastEnd` if this faulty case. With these changes, every dash is only interpreted once. That means that in the SequenceBuilder.push method the rawPrefix will contain at most one dash. Because of this, the old fix isn't needed anymore. Remove it. Fixes #4176 --- .../java/org/openrewrite/yaml/YamlParser.java | 39 +++++++++---------- .../org/openrewrite/yaml/YamlParserTest.java | 36 +++++++++++++---- 2 files changed, 46 insertions(+), 29 deletions(-) diff --git a/rewrite-yaml/src/main/java/org/openrewrite/yaml/YamlParser.java b/rewrite-yaml/src/main/java/org/openrewrite/yaml/YamlParser.java index c8d97be9b06..5647e18cbea 100644 --- a/rewrite-yaml/src/main/java/org/openrewrite/yaml/YamlParser.java +++ b/rewrite-yaml/src/main/java/org/openrewrite/yaml/YamlParser.java @@ -333,7 +333,10 @@ private Yaml.Documents parseFromInput(Path sourceFile, EncodingDetectingInputStr if (openingBracketIndex != -1) { int startIndex = commentAwareIndexOf(':', fullPrefix) + 1; startBracketPrefix = fullPrefix.substring(startIndex, openingBracketIndex); - lastEnd = event.getEndMark().getIndex(); + } + lastEnd = event.getEndMark().getIndex(); + if (shouldUseYamlParserBugWorkaround(sse)) { + lastEnd--; } blockStack.push(new SequenceBuilder(fmt, startBracketPrefix, anchor)); break; @@ -375,6 +378,17 @@ private Yaml.Documents parseFromInput(Path sourceFile, EncodingDetectingInputStr } } + /* + The yaml-parser library unfortunately returns inconsistent marks. + If the dashes of the sequence have an indentation, the end mark and the start mark point to the dash. + If the dashes of the sequence do not have an indentation, the end mark will point to the character AFTER the dash. + */ + private boolean shouldUseYamlParserBugWorkaround(SequenceStartEvent event) { + int startChar = event.getStartMark().getBuffer()[event.getStartMark().getIndex()]; + int endChar = event.getEndMark().getBuffer()[event.getEndMark().getIndex()]; + return startChar == '-' && endChar != '-'; + } + private Yaml.Anchor buildYamlAnchor(FormatPreservingReader reader, int lastEnd, String eventPrefix, String anchorKey, int eventEndIndex, boolean isForScalar) { int anchorLength = isForScalar ? anchorKey.length() + 1 : anchorKey.length(); String whitespaceAndScalar = reader.prefix( @@ -398,16 +412,7 @@ private Yaml.Anchor buildYamlAnchor(FormatPreservingReader reader, int lastEnd, } private static int commentAwareIndexOf(char target, String s) { - return commentAwareIndexOf(target, s, FindIndexStrategy.FIRST); - } - - /** - * Return the first or last index of the target character that appears in a non-comment portion of the String, - * or -1 if it does not appear. - */ - private static int commentAwareIndexOf(char target, String s, FindIndexStrategy strategy) { boolean inComment = false; - int lastFoundIndex = -1; for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (inComment) { @@ -416,21 +421,13 @@ private static int commentAwareIndexOf(char target, String s, FindIndexStrategy } } else { if (c == target) { - if (strategy == FindIndexStrategy.FIRST) { - return i; - } - lastFoundIndex = i; + return i; } else if (c == '#') { inComment = true; } } } - return lastFoundIndex; - } - - private enum FindIndexStrategy { - FIRST, - LAST + return -1; } @Override @@ -527,7 +524,7 @@ public void push(Yaml.Block block) { public void push(Yaml.Block block, @Nullable String commaPrefix) { String rawPrefix = block.getPrefix(); - int dashIndex = commentAwareIndexOf('-', rawPrefix, FindIndexStrategy.LAST); + int dashIndex = commentAwareIndexOf('-', rawPrefix); String entryPrefix; String blockPrefix; boolean hasDash = dashIndex != -1; diff --git a/rewrite-yaml/src/test/java/org/openrewrite/yaml/YamlParserTest.java b/rewrite-yaml/src/test/java/org/openrewrite/yaml/YamlParserTest.java index 72b173f4066..42a3ae292ad 100644 --- a/rewrite-yaml/src/test/java/org/openrewrite/yaml/YamlParserTest.java +++ b/rewrite-yaml/src/test/java/org/openrewrite/yaml/YamlParserTest.java @@ -67,18 +67,38 @@ void fourBytesUnicode() { @Test @Issue("https://github.com/openrewrite/rewrite/issues/4176") - void listOfLists() { + void listsAndListsOfLists() { rewriteRun( yaml( """ root: - listOfLists: - - - a - - b - - - c - - d - - - e - - f + normalListOfScalars: + - a + - b + normalListOfScalarsWithIndentation: + - a + - b + normalListOfMappings: + - a: b + c: d + - e: f + normalListOfSquareBracketLists: + - [ mno, pqr] + - [stu , vwx] + squareList: [x, y, z] + listOfListsOfScalars: + - - a + - b + listOfListsOfScalarsWithIndentation: + - - a + - b + listOfListsOfMappings: + - - a: b + c: d + - e: f + listOfListsOfSquareBracketLists: + - - [mno, pqr ] + - [stu , vwx] """ ) );