Skip to content

Commit

Permalink
yaml: block scalar multiline improvements.
Browse files Browse the repository at this point in the history
  • Loading branch information
yaythomas authored and alecthomas committed Sep 10, 2020
1 parent d65f279 commit 290ff86
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 11 deletions.
49 changes: 49 additions & 0 deletions lexers/testdata/yaml.actual
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,55 @@ literal_block_with_keep_chomping: |+

The literal continues until de-dented, and the leading indentation is
stripped.

a: |
multiline literal
line 2
b: >
multiline: folded
line 2
c: |-
multiline # literal strip
line 2
d: >-
multiline folded strip
line 2: test

# not a comment
indented by 1
e: |+
multiline literal keep
line: 2
# this is a comment
f: >+
multiline folded keep one space
line 2
g: test

block_scalars_with_indent:
a: |
multiline literal
line 2
b: >
multiline: folded
line 2
c: |-
multiline # literal strip
line 2 6 leading spaces

d: >-
multiline folded strip
line 2: test
# not a comment
e: |+
multiline literal keep
line: 2
# this is a comment
f: >+
multiline folded keep
line 2
g: test

####################
# COLLECTION TYPES #
####################
Expand Down
117 changes: 107 additions & 10 deletions lexers/testdata/yaml.expected
Original file line number Diff line number Diff line change
Expand Up @@ -189,23 +189,116 @@
{"type":"NameTag","value":"literal_block"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralStringDoc","value":"|\n This entire block of text will be the value of the 'literal_block' key,\n with line breaks being preserved.\n\n The literal continues until de-dented, and the leading indentation is\n stripped.\n\n Any lines that are 'more-indented' keep the rest of their indentation -\n these lines will be indented by 4 spaces."},
{"type":"TextWhitespace","value":"\n"},
{"type":"Punctuation","value":"|"},
{"type":"LiteralStringDoc","value":"\n This entire block of text will be the value of the 'literal_block' key,\n with line breaks being preserved.\n\n The literal continues until de-dented, and the leading indentation is\n stripped.\n\n Any lines that are 'more-indented' keep the rest of their indentation -\n these lines will be indented by 4 spaces."},
{"type":"TextWhitespace","value":" \n"},
{"type":"NameTag","value":"folded_style"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralStringDoc","value":"\u003e\n This entire block of text will be the value of 'folded_style', but this\n time, all newlines will be replaced with a single space.\n\n Blank lines, like above, are converted to a newline character.\n\n 'More-indented' lines keep their newlines, too -\n this text will appear over two lines."},
{"type":"TextWhitespace","value":"\n"},
{"type":"Punctuation","value":"\u003e"},
{"type":"LiteralStringDoc","value":"\n This entire block of text will be the value of 'folded_style', but this\n time, all newlines will be replaced with a single space.\n\n Blank lines, like above, are converted to a newline character.\n\n 'More-indented' lines keep their newlines, too -\n this text will appear over two lines."},
{"type":"TextWhitespace","value":" \n"},
{"type":"NameTag","value":"literal_block_with_strip_chomping"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralStringDoc","value":"|-\n This entire block of text will be the value of the 'literal_block' key,\n with line breaks being preserved and the strip chomping indicator.\n\n The literal continues until de-dented, and the leading indentation is\n stripped.\n\n Any lines that are 'more-indented' keep the rest of their indentation -\n these lines will be indented by 4 spaces."},
{"type":"TextWhitespace","value":"\n"},
{"type":"Punctuation","value":"|-"},
{"type":"LiteralStringDoc","value":"\n This entire block of text will be the value of the 'literal_block' key,\n with line breaks being preserved and the strip chomping indicator.\n\n The literal continues until de-dented, and the leading indentation is\n stripped.\n\n Any lines that are 'more-indented' keep the rest of their indentation -\n these lines will be indented by 4 spaces."},
{"type":"TextWhitespace","value":" \n"},
{"type":"NameTag","value":"literal_block_with_keep_chomping"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralStringDoc","value":"|+\n This entire block of text will be the value of the 'literal_block' key,\n with line breaks being preserved and the keep chomping indicator.\n\n The literal continues until de-dented, and the leading indentation is\n stripped."},
{"type":"Punctuation","value":"|+"},
{"type":"LiteralStringDoc","value":"\n This entire block of text will be the value of the 'literal_block' key,\n with line breaks being preserved and the keep chomping indicator.\n\n The literal continues until de-dented, and the leading indentation is\n stripped."},
{"type":"TextWhitespace","value":" \n\n"},
{"type":"NameTag","value":"a"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|"},
{"type":"LiteralStringDoc","value":"\n multiline literal\n line 2"},
{"type":"TextWhitespace","value":" \n"},
{"type":"NameTag","value":"b"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"\u003e"},
{"type":"LiteralStringDoc","value":"\n multiline: folded\n line 2"},
{"type":"TextWhitespace","value":" \n"},
{"type":"NameTag","value":"c"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|-"},
{"type":"LiteralStringDoc","value":"\n multiline # literal strip\n line 2"},
{"type":"TextWhitespace","value":" \n"},
{"type":"NameTag","value":"d"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"\u003e-"},
{"type":"LiteralStringDoc","value":"\n multiline folded strip\n line 2: test\n\n # not a comment\n indented by 1"},
{"type":"TextWhitespace","value":" \n"},
{"type":"NameTag","value":"e"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|+"},
{"type":"LiteralStringDoc","value":"\n multiline literal keep\n line: 2"},
{"type":"TextWhitespace","value":" \n"},
{"type":"Comment","value":"# this is a comment"},
{"type":"TextWhitespace","value":"\n"},
{"type":"NameTag","value":"f"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"\u003e+"},
{"type":"LiteralStringDoc","value":"\n multiline folded keep one space\n line 2"},
{"type":"TextWhitespace","value":" \n"},
{"type":"NameTag","value":"g"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Literal","value":"test"},
{"type":"TextWhitespace","value":"\n\n"},
{"type":"NameTag","value":"block_scalars_with_indent"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":"\n "},
{"type":"NameTag","value":"a"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|"},
{"type":"LiteralStringDoc","value":"\n multiline literal\n line 2"},
{"type":"TextWhitespace","value":" \n "},
{"type":"NameTag","value":"b"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"\u003e"},
{"type":"LiteralStringDoc","value":"\n multiline: folded\n line 2"},
{"type":"TextWhitespace","value":" \n "},
{"type":"NameTag","value":"c"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|-"},
{"type":"LiteralStringDoc","value":"\n multiline # literal strip\n line 2 6 leading spaces"},
{"type":"TextWhitespace","value":" \n\n "},
{"type":"NameTag","value":"d"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"\u003e-"},
{"type":"LiteralStringDoc","value":"\n multiline folded strip\n line 2: test\n # not a comment"},
{"type":"TextWhitespace","value":" \n "},
{"type":"NameTag","value":"e"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|+"},
{"type":"LiteralStringDoc","value":"\n multiline literal keep\n line: 2"},
{"type":"TextWhitespace","value":" \n "},
{"type":"Comment","value":"# this is a comment"},
{"type":"TextWhitespace","value":"\n "},
{"type":"NameTag","value":"f"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"\u003e+"},
{"type":"LiteralStringDoc","value":"\n multiline folded keep\n line 2"},
{"type":"TextWhitespace","value":" \n "},
{"type":"NameTag","value":"g"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Literal","value":"test"},
{"type":"TextWhitespace","value":"\n\n"},
{"type":"Comment","value":"####################"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Comment","value":"# COLLECTION TYPES #"},
Expand Down Expand Up @@ -248,8 +341,9 @@
{"type":"TextWhitespace","value":"\n"},
{"type":"Punctuation","value":"?"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralStringDoc","value":"|\n This is a key\n that has multiple lines"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Punctuation","value":"|"},
{"type":"LiteralStringDoc","value":"\n This is a key\n that has multiple lines"},
{"type":"TextWhitespace","value":" \n"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Literal","value":"and this is its value"},
Expand Down Expand Up @@ -514,7 +608,10 @@
{"type":"TextWhitespace","value":" "},
{"type":"CommentPreproc","value":"!!binary"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralStringDoc","value":"|\n R0lGODlhDAAMAIQAAP//9/X17unp5WZmZgAAAOfn515eXvPz7Y6OjuDg4J+fn5\n OTk6enp56enmlpaWNjY6Ojo4SEhP/++f/++f/++f/++f/++f/++f/++f/++f/+\n +f/++f/++f/++f/++f/++SH+Dk1hZGUgd2l0aCBHSU1QACwAAAAADAAMAAAFLC\n AgjoEwnuNAFOhpEMTRiggcz4BNJHrv/zCFcLiwMWYNG84BwwEeECcgggoBADs=\n\n# YAML also has a set type, which looks like this:"},
{"type":"Punctuation","value":"|"},
{"type":"LiteralStringDoc","value":"\n R0lGODlhDAAMAIQAAP//9/X17unp5WZmZgAAAOfn515eXvPz7Y6OjuDg4J+fn5\n OTk6enp56enmlpaWNjY6Ojo4SEhP/++f/++f/++f/++f/++f/++f/++f/++f/+\n +f/++f/++f/++f/++f/++SH+Dk1hZGUgd2l0aCBHSU1QACwAAAAADAAMAAAFLC\n AgjoEwnuNAFOhpEMTRiggcz4BNJHrv/zCFcLiwMWYNG84BwwEeECcgggoBADs="},
{"type":"TextWhitespace","value":" \n\n"},
{"type":"Comment","value":"# YAML also has a set type, which looks like this:"},
{"type":"TextWhitespace","value":"\n"},
{"type":"NameTag","value":"set"},
{"type":"Punctuation","value":":"},
Expand Down
2 changes: 1 addition & 1 deletion lexers/y/yaml.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ var YAML = internal.Register(MustNewLexer(
{`&[^\s]+`, CommentPreproc, nil},
{`\*[^\s]+`, CommentPreproc, nil},
{`^%include\s+[^\n\r]+`, CommentPreproc, nil},
{`[>|](?:[+-])?\s(?:^(?:[ \n]{1})+.*\n?)*$`, StringDoc, nil},
Include("key"),
Include("value"),
{`[?:,\[\]]`, Punctuation, nil},
{`.`, Text, nil},
},
"value": {
{`([>|](?:[+-])?)(\n(^ {1,})(?:.*\n*(?:^\3 *).*)*)`, ByGroups(Punctuation, StringDoc, Whitespace), nil},
{Words(``, `\b`, "true", "True", "TRUE", "false", "False", "FALSE", "null",
"y", "Y", "yes", "Yes", "YES", "n", "N", "no", "No", "NO",
"on", "On", "ON", "off", "Off", "OFF"), KeywordConstant, nil},
Expand Down

0 comments on commit 290ff86

Please sign in to comment.