Skip to content

Commit

Permalink
Symbolic strings (#994)
Browse files Browse the repository at this point in the history
* Decouple string start & end delimiters in parser logic

Since both multiline and symbolic strings will end with `"%`, it will no
longer be true that the start & end delimiters of a string must match. It
will also no longer be the case that we can infer whether or not
indentation must be stripped from the starting delimiter.

This commit separates the opening and closing delimeter into two
different types, and provides a more explicit API for working with
them, which will be resilient to the addition of a new
`StringStartDelimiter` in a later commit.

* Desugar symbolic strings to arrays

This commit implements the symbolic string syntax `s%"..."%`.

These strings using the same parsing rules as multiline strings, but
are then desugared to `Term::Array`s rather than `Term::StrChunks`.

* `StringEndDelimiter::Multiline` -> `StringEndDelimiter::Special`
  • Loading branch information
matthew-healy authored Dec 16, 2022
1 parent 0e90872 commit c30ad1f
Show file tree
Hide file tree
Showing 5 changed files with 202 additions and 80 deletions.
41 changes: 28 additions & 13 deletions src/parser/grammar.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -474,10 +474,15 @@ Bool: bool = {
"false" => false,
};

// Strings that support interpolation.
// String-like syntax which supports interpolation.
// Depending on the opening brace, these either parse as strings, or as "symbolic strings",
// which get desugared here to an array of terms.
StrChunks: RichTerm = {
<start: StringStart> <fst: ChunkLiteral?> <chunks: (ChunkExpr+ChunkLiteral)*> <lasts:ChunkExpr*> <end: StringEnd> => {
debug_assert_eq!(start, end);
debug_assert!(
start.is_closed_by(&end),
"Fatal parser error: a string starting with {start:?} should never be closed by {end:?}"
);

let chunks: Vec<StrChunk<RichTerm>> = fst.into_iter()
.map(StrChunk::Literal)
Expand All @@ -490,26 +495,35 @@ StrChunks: RichTerm = {
.chain(lasts.into_iter())
.collect();

let mut chunks = if start == StringKind::Multiline {
let chunks = if start.needs_strip_indent() {
strip_indent(chunks)
}
else {
} else {
chunks
};
chunks.reverse();

RichTerm::from(Term::StrChunks(chunks))
if start == StringStartDelimiter::Symbolic {
let terms = chunks.into_iter().map(|chunk| match chunk {
StrChunk::Literal(l) => Term::Str(l).into(),
StrChunk::Expr(e, _) => e,
}).collect();
RichTerm::from(Term::Array(terms, Default::default()))
} else {
let mut chunks = chunks;
chunks.reverse();
RichTerm::from(Term::StrChunks(chunks))
}
},
};

StringStart : StringKind = {
"\"" => StringKind::Standard,
"m%\"" => StringKind::Multiline,
StringStart : StringStartDelimiter = {
"\"" => StringStartDelimiter::Standard,
"m%\"" => StringStartDelimiter::Multiline,
"s%\"" => StringStartDelimiter::Symbolic,
};

StringEnd : StringKind = {
"\"" => StringKind::Standard,
"\"%" => StringKind::Multiline,
StringEnd : StringEndDelimiter = {
"\"" => StringEndDelimiter::Standard,
"\"%" => StringEndDelimiter::Special,
};

ChunkLiteral : String =
Expand Down Expand Up @@ -884,6 +898,7 @@ extern {
"\"" => Token::Normal(NormalToken::DoubleQuote),
"\"%" => Token::MultiStr(MultiStringToken::End),
"m%\"" => Token::Normal(NormalToken::MultiStringStart(<usize>)),
"s%\"" => Token::Normal(NormalToken::SymbolicStringStart(<usize>)),

"Num" => Token::Normal(NormalToken::Num),
"Dyn" => Token::Normal(NormalToken::Dyn),
Expand Down
7 changes: 6 additions & 1 deletion src/parser/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ pub enum NormalToken<'input> {
Underscore,
#[regex("m(%+)\"", |lex| lex.slice().len())]
MultiStringStart(usize),
#[regex("s(%+)\"", |lex| lex.slice().len())]
SymbolicStringStart(usize),

#[token("%tag%")]
Tag,
Expand Down Expand Up @@ -605,7 +607,10 @@ impl<'input> Iterator for Lexer<'input> {
Some(Normal(NormalToken::DoubleQuote | NormalToken::StrEnumTagBegin)) => {
self.enter_str()
}
Some(Normal(NormalToken::MultiStringStart(delim_size))) => {
Some(Normal(
NormalToken::MultiStringStart(delim_size)
| NormalToken::SymbolicStringStart(delim_size),
)) => {
// for interpolation & closing delimeters we only care about
// the number of `%`s (plus the opening `"` or `{`) so we
// drop the "kind marker" size here (i.e. the `m` character).
Expand Down
157 changes: 94 additions & 63 deletions src/parser/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,69 +287,100 @@ fn invalid_record_types() {

#[test]
fn string_lexing() {
assert_eq!(
lex_without_pos("\"Good\" \"strings\""),
Ok(vec![
Token::Normal(NormalToken::DoubleQuote),
Token::Str(StringToken::Literal("Good")),
Token::Normal(NormalToken::DoubleQuote),
Token::Normal(NormalToken::DoubleQuote),
Token::Str(StringToken::Literal("strings")),
Token::Normal(NormalToken::DoubleQuote),
])
);

assert_eq!(
lex_without_pos("\"Good\\nEscape\\t\\\"\""),
Ok(vec![
Token::Normal(NormalToken::DoubleQuote),
Token::Str(StringToken::Literal("Good")),
Token::Str(StringToken::EscapedChar('\n')),
Token::Str(StringToken::Literal("Escape")),
Token::Str(StringToken::EscapedChar('\t')),
Token::Str(StringToken::EscapedChar('\"')),
Token::Normal(NormalToken::DoubleQuote),
])
);

assert_eq!(
lex_without_pos("\"1 + %{ 1 } + 2\""),
Ok(vec![
Token::Normal(NormalToken::DoubleQuote),
Token::Str(StringToken::Literal("1 + ")),
Token::Str(StringToken::Interpolation),
Token::Normal(NormalToken::NumLiteral(1.0)),
Token::Normal(NormalToken::RBrace),
Token::Str(StringToken::Literal(" + 2")),
Token::Normal(NormalToken::DoubleQuote),
])
);

assert_eq!(
lex_without_pos("\"1 + %{ \"%{ 1 }\" } + 2\""),
Ok(vec![
Token::Normal(NormalToken::DoubleQuote),
Token::Str(StringToken::Literal("1 + ")),
Token::Str(StringToken::Interpolation),
Token::Normal(NormalToken::DoubleQuote),
Token::Str(StringToken::Interpolation),
Token::Normal(NormalToken::NumLiteral(1.0)),
Token::Normal(NormalToken::RBrace),
Token::Normal(NormalToken::DoubleQuote),
Token::Normal(NormalToken::RBrace),
Token::Str(StringToken::Literal(" + 2")),
Token::Normal(NormalToken::DoubleQuote),
])
);

assert_eq!(
lex_without_pos(r#"m%%""%"%%"#),
Ok(vec![
Token::Normal(NormalToken::MultiStringStart(4)),
Token::MultiStr(MultiStringToken::Literal("\"%")),
Token::MultiStr(MultiStringToken::End),
])
);
for (name, input, expected) in [
(
"simple strings",
r#""Good" "strings""#,
vec![
Token::Normal(NormalToken::DoubleQuote),
Token::Str(StringToken::Literal("Good")),
Token::Normal(NormalToken::DoubleQuote),
Token::Normal(NormalToken::DoubleQuote),
Token::Str(StringToken::Literal("strings")),
Token::Normal(NormalToken::DoubleQuote),
],
),
(
"valid escape sequence",
r#""Good\nEscape\t\"""#,
vec![
Token::Normal(NormalToken::DoubleQuote),
Token::Str(StringToken::Literal("Good")),
Token::Str(StringToken::EscapedChar('\n')),
Token::Str(StringToken::Literal("Escape")),
Token::Str(StringToken::EscapedChar('\t')),
Token::Str(StringToken::EscapedChar('\"')),
Token::Normal(NormalToken::DoubleQuote),
],
),
(
"simple interpolation",
r#""1 + %{ 1 } + 2""#,
vec![
Token::Normal(NormalToken::DoubleQuote),
Token::Str(StringToken::Literal("1 + ")),
Token::Str(StringToken::Interpolation),
Token::Normal(NormalToken::NumLiteral(1.0)),
Token::Normal(NormalToken::RBrace),
Token::Str(StringToken::Literal(" + 2")),
Token::Normal(NormalToken::DoubleQuote),
],
),
(
"nested interpolated strings",
r#""1 + %{ "%{ 1 }" } + 2""#,
vec![
Token::Normal(NormalToken::DoubleQuote),
Token::Str(StringToken::Literal("1 + ")),
Token::Str(StringToken::Interpolation),
Token::Normal(NormalToken::DoubleQuote),
Token::Str(StringToken::Interpolation),
Token::Normal(NormalToken::NumLiteral(1.0)),
Token::Normal(NormalToken::RBrace),
Token::Normal(NormalToken::DoubleQuote),
Token::Normal(NormalToken::RBrace),
Token::Str(StringToken::Literal(" + 2")),
Token::Normal(NormalToken::DoubleQuote),
],
),
(
"multiline strings only close on delmiter with correct number of %s",
r#"m%%""%"%%"#,
vec![
Token::Normal(NormalToken::MultiStringStart(4)),
Token::MultiStr(MultiStringToken::Literal("\"%")),
Token::MultiStr(MultiStringToken::End),
],
),
(
"empty symbolic string lexes like multi-line str",
r#"s%""%"#,
vec![
Token::Normal(NormalToken::SymbolicStringStart(3)),
Token::MultiStr(MultiStringToken::End),
],
),
(
"symbolic string with interpolation",
r#"s%"text %{ 1 } etc."%"#,
vec![
Token::Normal(NormalToken::SymbolicStringStart(3)),
Token::MultiStr(MultiStringToken::Literal("text ")),
Token::MultiStr(MultiStringToken::Interpolation),
Token::Normal(NormalToken::NumLiteral(1.0)),
Token::Normal(NormalToken::RBrace),
Token::MultiStr(MultiStringToken::Literal(" etc.")),
Token::MultiStr(MultiStringToken::End),
],
),
] {
assert_eq!(
lex_without_pos(input),
Ok(expected),
"Case failed: {}",
name
)
}
}

#[test]
Expand Down
33 changes: 30 additions & 3 deletions src/parser/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,39 @@ use crate::{
types::{TypeF, Types},
};

/// Distinguish between the standard string separators `"`/`"` and the multi-line string separators
/// `m%"`/`"%` in the parser.
/// Distinguish between the standard string opening delimiter `"`, the multi-line string
/// opening delimter `m%"`, and the symbolic string opening delimiter `s%"`.
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub enum StringKind {
pub enum StringStartDelimiter {
Standard,
Multiline,
Symbolic,
}

impl StringStartDelimiter {
pub fn is_closed_by(&self, close: &StringEndDelimiter) -> bool {
matches!(
(self, close),
(StringStartDelimiter::Standard, StringEndDelimiter::Standard)
| (StringStartDelimiter::Multiline, StringEndDelimiter::Special)
| (StringStartDelimiter::Symbolic, StringEndDelimiter::Special)
)
}

pub fn needs_strip_indent(&self) -> bool {
match self {
StringStartDelimiter::Standard => false,
StringStartDelimiter::Multiline | StringStartDelimiter::Symbolic => true,
}
}
}

/// Distinguish between the standard string closing delimter `"` and the "special" string
/// closing delimeter `"%`.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum StringEndDelimiter {
Standard,
Special,
}

/// Distinguish between a normal case `id => exp` and a default case `_ => exp`.
Expand Down
44 changes: 44 additions & 0 deletions tests/integration/pass/symbolic-strings.ncl
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
let {check, ..} = import "lib/assert.ncl" in

[
# Static symbolic string
s%"hello, world"% == ["hello, world"],
# Interpolating a string
let s = "test" in
s%"This is a %{s}"% == ["This is a ", "test"],
# Interpolating an interpolated string
let f = "f" in
s%"abc %{"de%{f}"}"% == ["abc ", "def"],
# Interpolating a number
s%"num: %{100}"% == ["num: ", 100],
# Interpolating a bool
s%"bool: %{true}"% == ["bool: ", true],
# Interpolating an array
s%"array: %{[true, 1, "yes"]}"% == ["array: ", [true, 1, "yes"]],
# Interpolating a record
let r = { a = 1, b = false } in
s%"record: %{r}"% == ["record: ", r],
# Interpolating multiple values
let str = "some string" in
let num = 999.999 in
let bool = false in
let array = ["an", "array", 100] in
let record = { a = 1, simple = "yes", record = true } in
let actual = s%"
1. %{str}
2. %{num}
3. %{bool}
4. %{array}
5. %{record}"%
in
let expected = [
"1. ", str,
"\n2. ", num,
"\n3. ", bool,
"\n4. ", array,
"\n5. ", record
]
in
actual == expected,
]
|> check

0 comments on commit c30ad1f

Please sign in to comment.