diff --git a/src/parser/grammar.lalrpop b/src/parser/grammar.lalrpop index 7fcf877ed5..0631056627 100644 --- a/src/parser/grammar.lalrpop +++ b/src/parser/grammar.lalrpop @@ -474,10 +474,15 @@ Bool: bool = { "false" => false, }; -// Strings that support interpolation. +// String-like syntax which supports interpolation. +// Depending on the opening brace, these either parse as strings, or as "symbolic strings", +// which get desugared here to an array of terms. StrChunks: RichTerm = { => { - debug_assert_eq!(start, end); + debug_assert!( + start.is_closed_by(&end), + "Fatal parser error: a string starting with {start:?} should never be closed by {end:?}" + ); let chunks: Vec> = fst.into_iter() .map(StrChunk::Literal) @@ -490,26 +495,35 @@ StrChunks: RichTerm = { .chain(lasts.into_iter()) .collect(); - let mut chunks = if start == StringKind::Multiline { + let chunks = if start.needs_strip_indent() { strip_indent(chunks) - } - else { + } else { chunks }; - chunks.reverse(); - RichTerm::from(Term::StrChunks(chunks)) + if start == StringStartDelimiter::Symbolic { + let terms = chunks.into_iter().map(|chunk| match chunk { + StrChunk::Literal(l) => Term::Str(l).into(), + StrChunk::Expr(e, _) => e, + }).collect(); + RichTerm::from(Term::Array(terms, Default::default())) + } else { + let mut chunks = chunks; + chunks.reverse(); + RichTerm::from(Term::StrChunks(chunks)) + } }, }; -StringStart : StringKind = { - "\"" => StringKind::Standard, - "m%\"" => StringKind::Multiline, +StringStart : StringStartDelimiter = { + "\"" => StringStartDelimiter::Standard, + "m%\"" => StringStartDelimiter::Multiline, + "s%\"" => StringStartDelimiter::Symbolic, }; -StringEnd : StringKind = { - "\"" => StringKind::Standard, - "\"%" => StringKind::Multiline, +StringEnd : StringEndDelimiter = { + "\"" => StringEndDelimiter::Standard, + "\"%" => StringEndDelimiter::Special, }; ChunkLiteral : String = @@ -884,6 +898,7 @@ extern { "\"" => Token::Normal(NormalToken::DoubleQuote), "\"%" => Token::MultiStr(MultiStringToken::End), "m%\"" => Token::Normal(NormalToken::MultiStringStart()), + "s%\"" => Token::Normal(NormalToken::SymbolicStringStart()), "Num" => Token::Normal(NormalToken::Num), "Dyn" => Token::Normal(NormalToken::Dyn), diff --git a/src/parser/lexer.rs b/src/parser/lexer.rs index e0d021c49e..32fb85e82d 100644 --- a/src/parser/lexer.rs +++ b/src/parser/lexer.rs @@ -160,6 +160,8 @@ pub enum NormalToken<'input> { Underscore, #[regex("m(%+)\"", |lex| lex.slice().len())] MultiStringStart(usize), + #[regex("s(%+)\"", |lex| lex.slice().len())] + SymbolicStringStart(usize), #[token("%tag%")] Tag, @@ -605,7 +607,10 @@ impl<'input> Iterator for Lexer<'input> { Some(Normal(NormalToken::DoubleQuote | NormalToken::StrEnumTagBegin)) => { self.enter_str() } - Some(Normal(NormalToken::MultiStringStart(delim_size))) => { + Some(Normal( + NormalToken::MultiStringStart(delim_size) + | NormalToken::SymbolicStringStart(delim_size), + )) => { // for interpolation & closing delimeters we only care about // the number of `%`s (plus the opening `"` or `{`) so we // drop the "kind marker" size here (i.e. the `m` character). diff --git a/src/parser/tests.rs b/src/parser/tests.rs index 4590e330c2..f43acbc822 100644 --- a/src/parser/tests.rs +++ b/src/parser/tests.rs @@ -287,69 +287,100 @@ fn invalid_record_types() { #[test] fn string_lexing() { - assert_eq!( - lex_without_pos("\"Good\" \"strings\""), - Ok(vec![ - Token::Normal(NormalToken::DoubleQuote), - Token::Str(StringToken::Literal("Good")), - Token::Normal(NormalToken::DoubleQuote), - Token::Normal(NormalToken::DoubleQuote), - Token::Str(StringToken::Literal("strings")), - Token::Normal(NormalToken::DoubleQuote), - ]) - ); - - assert_eq!( - lex_without_pos("\"Good\\nEscape\\t\\\"\""), - Ok(vec![ - Token::Normal(NormalToken::DoubleQuote), - Token::Str(StringToken::Literal("Good")), - Token::Str(StringToken::EscapedChar('\n')), - Token::Str(StringToken::Literal("Escape")), - Token::Str(StringToken::EscapedChar('\t')), - Token::Str(StringToken::EscapedChar('\"')), - Token::Normal(NormalToken::DoubleQuote), - ]) - ); - - assert_eq!( - lex_without_pos("\"1 + %{ 1 } + 2\""), - Ok(vec![ - Token::Normal(NormalToken::DoubleQuote), - Token::Str(StringToken::Literal("1 + ")), - Token::Str(StringToken::Interpolation), - Token::Normal(NormalToken::NumLiteral(1.0)), - Token::Normal(NormalToken::RBrace), - Token::Str(StringToken::Literal(" + 2")), - Token::Normal(NormalToken::DoubleQuote), - ]) - ); - - assert_eq!( - lex_without_pos("\"1 + %{ \"%{ 1 }\" } + 2\""), - Ok(vec![ - Token::Normal(NormalToken::DoubleQuote), - Token::Str(StringToken::Literal("1 + ")), - Token::Str(StringToken::Interpolation), - Token::Normal(NormalToken::DoubleQuote), - Token::Str(StringToken::Interpolation), - Token::Normal(NormalToken::NumLiteral(1.0)), - Token::Normal(NormalToken::RBrace), - Token::Normal(NormalToken::DoubleQuote), - Token::Normal(NormalToken::RBrace), - Token::Str(StringToken::Literal(" + 2")), - Token::Normal(NormalToken::DoubleQuote), - ]) - ); - - assert_eq!( - lex_without_pos(r#"m%%""%"%%"#), - Ok(vec![ - Token::Normal(NormalToken::MultiStringStart(4)), - Token::MultiStr(MultiStringToken::Literal("\"%")), - Token::MultiStr(MultiStringToken::End), - ]) - ); + for (name, input, expected) in [ + ( + "simple strings", + r#""Good" "strings""#, + vec![ + Token::Normal(NormalToken::DoubleQuote), + Token::Str(StringToken::Literal("Good")), + Token::Normal(NormalToken::DoubleQuote), + Token::Normal(NormalToken::DoubleQuote), + Token::Str(StringToken::Literal("strings")), + Token::Normal(NormalToken::DoubleQuote), + ], + ), + ( + "valid escape sequence", + r#""Good\nEscape\t\"""#, + vec![ + Token::Normal(NormalToken::DoubleQuote), + Token::Str(StringToken::Literal("Good")), + Token::Str(StringToken::EscapedChar('\n')), + Token::Str(StringToken::Literal("Escape")), + Token::Str(StringToken::EscapedChar('\t')), + Token::Str(StringToken::EscapedChar('\"')), + Token::Normal(NormalToken::DoubleQuote), + ], + ), + ( + "simple interpolation", + r#""1 + %{ 1 } + 2""#, + vec![ + Token::Normal(NormalToken::DoubleQuote), + Token::Str(StringToken::Literal("1 + ")), + Token::Str(StringToken::Interpolation), + Token::Normal(NormalToken::NumLiteral(1.0)), + Token::Normal(NormalToken::RBrace), + Token::Str(StringToken::Literal(" + 2")), + Token::Normal(NormalToken::DoubleQuote), + ], + ), + ( + "nested interpolated strings", + r#""1 + %{ "%{ 1 }" } + 2""#, + vec![ + Token::Normal(NormalToken::DoubleQuote), + Token::Str(StringToken::Literal("1 + ")), + Token::Str(StringToken::Interpolation), + Token::Normal(NormalToken::DoubleQuote), + Token::Str(StringToken::Interpolation), + Token::Normal(NormalToken::NumLiteral(1.0)), + Token::Normal(NormalToken::RBrace), + Token::Normal(NormalToken::DoubleQuote), + Token::Normal(NormalToken::RBrace), + Token::Str(StringToken::Literal(" + 2")), + Token::Normal(NormalToken::DoubleQuote), + ], + ), + ( + "multiline strings only close on delmiter with correct number of %s", + r#"m%%""%"%%"#, + vec![ + Token::Normal(NormalToken::MultiStringStart(4)), + Token::MultiStr(MultiStringToken::Literal("\"%")), + Token::MultiStr(MultiStringToken::End), + ], + ), + ( + "empty symbolic string lexes like multi-line str", + r#"s%""%"#, + vec![ + Token::Normal(NormalToken::SymbolicStringStart(3)), + Token::MultiStr(MultiStringToken::End), + ], + ), + ( + "symbolic string with interpolation", + r#"s%"text %{ 1 } etc."%"#, + vec![ + Token::Normal(NormalToken::SymbolicStringStart(3)), + Token::MultiStr(MultiStringToken::Literal("text ")), + Token::MultiStr(MultiStringToken::Interpolation), + Token::Normal(NormalToken::NumLiteral(1.0)), + Token::Normal(NormalToken::RBrace), + Token::MultiStr(MultiStringToken::Literal(" etc.")), + Token::MultiStr(MultiStringToken::End), + ], + ), + ] { + assert_eq!( + lex_without_pos(input), + Ok(expected), + "Case failed: {}", + name + ) + } } #[test] diff --git a/src/parser/utils.rs b/src/parser/utils.rs index 46fc628f9e..ddbdd6fe4b 100644 --- a/src/parser/utils.rs +++ b/src/parser/utils.rs @@ -23,12 +23,39 @@ use crate::{ types::{TypeF, Types}, }; -/// Distinguish between the standard string separators `"`/`"` and the multi-line string separators -/// `m%"`/`"%` in the parser. +/// Distinguish between the standard string opening delimiter `"`, the multi-line string +/// opening delimter `m%"`, and the symbolic string opening delimiter `s%"`. #[derive(Copy, Clone, Eq, PartialEq, Debug)] -pub enum StringKind { +pub enum StringStartDelimiter { Standard, Multiline, + Symbolic, +} + +impl StringStartDelimiter { + pub fn is_closed_by(&self, close: &StringEndDelimiter) -> bool { + matches!( + (self, close), + (StringStartDelimiter::Standard, StringEndDelimiter::Standard) + | (StringStartDelimiter::Multiline, StringEndDelimiter::Special) + | (StringStartDelimiter::Symbolic, StringEndDelimiter::Special) + ) + } + + pub fn needs_strip_indent(&self) -> bool { + match self { + StringStartDelimiter::Standard => false, + StringStartDelimiter::Multiline | StringStartDelimiter::Symbolic => true, + } + } +} + +/// Distinguish between the standard string closing delimter `"` and the "special" string +/// closing delimeter `"%`. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum StringEndDelimiter { + Standard, + Special, } /// Distinguish between a normal case `id => exp` and a default case `_ => exp`. diff --git a/tests/integration/pass/symbolic-strings.ncl b/tests/integration/pass/symbolic-strings.ncl new file mode 100644 index 0000000000..9e4d7e7ab6 --- /dev/null +++ b/tests/integration/pass/symbolic-strings.ncl @@ -0,0 +1,44 @@ +let {check, ..} = import "lib/assert.ncl" in + +[ + # Static symbolic string + s%"hello, world"% == ["hello, world"], + # Interpolating a string + let s = "test" in + s%"This is a %{s}"% == ["This is a ", "test"], + # Interpolating an interpolated string + let f = "f" in + s%"abc %{"de%{f}"}"% == ["abc ", "def"], + # Interpolating a number + s%"num: %{100}"% == ["num: ", 100], + # Interpolating a bool + s%"bool: %{true}"% == ["bool: ", true], + # Interpolating an array + s%"array: %{[true, 1, "yes"]}"% == ["array: ", [true, 1, "yes"]], + # Interpolating a record + let r = { a = 1, b = false } in + s%"record: %{r}"% == ["record: ", r], + # Interpolating multiple values + let str = "some string" in + let num = 999.999 in + let bool = false in + let array = ["an", "array", 100] in + let record = { a = 1, simple = "yes", record = true } in + let actual = s%" + 1. %{str} + 2. %{num} + 3. %{bool} + 4. %{array} + 5. %{record}"% + in + let expected = [ + "1. ", str, + "\n2. ", num, + "\n3. ", bool, + "\n4. ", array, + "\n5. ", record + ] + in + actual == expected, +] +|> check