From 3c665c037d820a1b9b747458233d312e065238e3 Mon Sep 17 00:00:00 2001 From: WebFreak001 Date: Sun, 29 Oct 2023 11:52:11 +0100 Subject: [PATCH 1/2] interpolated strings, v2 better implementation, doing all the tokenizing at the start, an interpolated string being multiple tokens --- src/dparse/ast.d | 115 ++++++++++++- src/dparse/astprinter.d | 27 +++ src/dparse/formatter.d | 37 +++++ src/dparse/lexer.d | 210 ++++++++++++++++++++++-- src/dparse/parser.d | 87 ++++++++++ test/ast_checks/interpolated_string.d | 4 + test/ast_checks/interpolated_string.txt | 7 + test/run_tests.sh | 6 +- 8 files changed, 472 insertions(+), 21 deletions(-) create mode 100644 test/ast_checks/interpolated_string.d create mode 100644 test/ast_checks/interpolated_string.txt diff --git a/src/dparse/ast.d b/src/dparse/ast.d index eba8d1a3..d1252b5f 100644 --- a/src/dparse/ast.d +++ b/src/dparse/ast.d @@ -73,6 +73,9 @@ shared static this() typeMap[typeid(TypeofExpression)] = 46; typeMap[typeid(UnaryExpression)] = 47; typeMap[typeid(XorExpression)] = 48; + typeMap[typeid(InterpolatedStringExpression)] = 49; + typeMap[typeid(InterpolatedStringText)] = 50; + typeMap[typeid(InterpolatedStringVariable)] = 51; } /// Describes which syntax was used in a list of declarations in the containing AST node @@ -167,6 +170,19 @@ abstract class ASTVisitor case 46: visit(cast(TypeofExpression) n); break; case 47: visit(cast(UnaryExpression) n); break; case 48: visit(cast(XorExpression) n); break; + // skip 49, 50, 51 (used for InterpolatedStringPart) + default: assert(false, __MODULE__ ~ " has a bug"); + } + } + + /// ditto + void dynamicDispatch(const InterpolatedStringPart n) + { + switch (typeMap.get(typeid(n), 0)) + { + case 49: visit(cast(InterpolatedStringExpression) n); break; + case 50: visit(cast(InterpolatedStringText) n); break; + case 51: visit(cast(InterpolatedStringVariable) n); break; default: assert(false, __MODULE__ ~ " has a bug"); } } @@ -289,6 +305,10 @@ abstract class ASTVisitor /** */ void visit(const Initialize initialize) { initialize.accept(this); } /** */ void visit(const Initializer initializer) { initializer.accept(this); } /** */ void visit(const InterfaceDeclaration interfaceDeclaration) { interfaceDeclaration.accept(this); } + /** */ void visit(const InterpolatedString interpolatedString) { interpolatedString.accept(this); } + /** */ void visit(const InterpolatedStringExpression interpolatedStringExpression) { interpolatedStringExpression.accept(this); } + /** */ void visit(const InterpolatedStringText interpolatedStringText) { interpolatedStringText.accept(this); } + /** */ void visit(const InterpolatedStringVariable interpolatedStringVariable) { interpolatedStringVariable.accept(this); } /** */ void visit(const Invariant invariant_) { invariant_.accept(this); } /** */ void visit(const IsExpression isExpression) { isExpression.accept(this); } /** */ void visit(const KeyValuePair keyValuePair) { keyValuePair.accept(this); } @@ -426,7 +446,7 @@ template visitIfNotNull(fields ...) } } -mixin template OpEquals(bool print = false) +private mixin template OpEquals(extraFields...) { override bool opEquals(Object other) const { @@ -443,6 +463,9 @@ mixin template OpEquals(bool print = false) if (field != obj.tupleof[i]) return false; } + static foreach (field; extraFields) + if (mixin("this." ~ field ~ " != obj." ~ field)) + return false; return true; } return false; @@ -2318,6 +2341,93 @@ final class InterfaceDeclaration : BaseNode mixin OpEquals; } +/// +final class InterpolatedString : BaseNode +{ + override void accept(ASTVisitor visitor) const + { + mixin (visitIfNotNull!(parts)); + } + + /** */ InterpolatedStringPart[] parts; + + inout(Token) startQuote() inout pure nothrow @nogc @safe scope + { + return tokens.length ? tokens[0] : Token.init; + } + + inout(Token) endQuote() inout pure nothrow @nogc @safe scope + { + return tokens.length && tokens[$ - 1].type == tok!"istringLiteralEnd" + ? tokens[$ - 1] + : Token.init; + } + + mixin OpEquals!("startQuote.text"); +} + +/// +abstract class InterpolatedStringPart : BaseNode +{ +} + +/// +final class InterpolatedStringText : InterpolatedStringPart +{ + override void accept(ASTVisitor visitor) const + { + } + + /// The token containing the plain text part in its `.text` property. + inout(Token) text() inout pure nothrow @nogc @safe scope + { + return tokens.length ? tokens[0] : Token.init; + } + + mixin OpEquals!("text.text"); +} + +/// +final class InterpolatedStringVariable : InterpolatedStringPart +{ + override void accept(ASTVisitor visitor) const + { + } + + /// The dollar token. + inout(Token) dollar() inout pure nothrow @nogc @safe scope + { + return tokens.length == 2 ? tokens[0] : Token.init; + } + + /// The variable name token. + inout(Token) name() inout pure nothrow @nogc @safe scope + { + return tokens.length == 2 ? tokens[1] : Token.init; + } + + mixin OpEquals!("name.text"); +} + +/// +final class InterpolatedStringExpression : InterpolatedStringPart +{ + override void accept(ASTVisitor visitor) const + { + mixin (visitIfNotNull!(expression)); + } + + /** */ Expression expression; + + /// The dollar token. + inout(Token) dollar() inout pure nothrow @nogc @safe scope + { + return tokens.length ? tokens[0] : Token.init; + } + + mixin OpEquals; +} + /// final class Invariant : BaseNode { @@ -2798,7 +2908,7 @@ final class PrimaryExpression : ExpressionNode typeofExpression, typeidExpression, arrayLiteral, assocArrayLiteral, expression, dot, identifierOrTemplateInstance, isExpression, functionLiteralExpression,traitsExpression, mixinExpression, - importExpression, vector, arguments)); + importExpression, vector, arguments, interpolatedString)); } /** */ Token dot; /** */ Token primary; @@ -2818,6 +2928,7 @@ final class PrimaryExpression : ExpressionNode /** */ Type type; /** */ Token typeConstructor; /** */ Arguments arguments; + /** */ InterpolatedString interpolatedString; mixin OpEquals; } diff --git a/src/dparse/astprinter.d b/src/dparse/astprinter.d index ba17f6d6..edecca32 100644 --- a/src/dparse/astprinter.d +++ b/src/dparse/astprinter.d @@ -582,6 +582,33 @@ class XMLPrinter : ASTVisitor output.writeln(""); } + override void visit(const InterpolatedString interpolatedString) + { + output.writeln(""); + foreach (part; interpolatedString.parts) + dynamicDispatch(part); + output.writeln(""); + } + + override void visit(const InterpolatedStringText interpolatedStringText) + { + output.writeln("", xmlEscape(interpolatedStringText.text.text), ""); + } + + override void visit(const InterpolatedStringVariable interpolatedStringVariable) + { + output.writeln("", xmlEscape(interpolatedStringVariable.name.text), ""); + } + + override void visit(const InterpolatedStringExpression interpolatedStringExpression) + { + visit(interpolatedStringExpression.expression); + } + override void visit(const Invariant invariant_) { output.writeln(""); diff --git a/src/dparse/formatter.d b/src/dparse/formatter.d index 1474f682..b852957b 100644 --- a/src/dparse/formatter.d +++ b/src/dparse/formatter.d @@ -2012,6 +2012,36 @@ class Formatter(Sink) } } + void format(const InterpolatedString interpolatedString) + { + put(interpolatedString.startQuote.text); + foreach (part; interpolatedString.parts) + { + if (cast(InterpolatedStringText) part) format(cast(InterpolatedStringText) part); + else if (cast(InterpolatedStringVariable) part) format(cast(InterpolatedStringVariable) part); + else if (cast(InterpolatedStringExpression) part) format(cast(InterpolatedStringExpression) part); + } + put(interpolatedString.endQuote.text); + } + + void format(const InterpolatedStringText interpolatedStringText) + { + put(interpolatedStringText.text.text); + } + + void format(const InterpolatedStringVariable interpolatedStringVariable) + { + put("$"); + put(interpolatedStringVariable.name.text); + } + + void format(const InterpolatedStringExpression interpolatedStringExpression) + { + put("$("); + format(interpolatedStringExpression.expression); + put(")"); + } + void format(const Invariant invariant_, const Attribute[] attrs = null) { debug(verbose) writeln("Invariant"); @@ -2571,6 +2601,7 @@ class Formatter(Sink) Type type; Token typeConstructor; Arguments arguments; + InterpolatedString interpolatedString; **/ with(primaryExpression) @@ -2605,6 +2636,7 @@ class Formatter(Sink) else if (vector) format(vector); else if (type) format(type); else if (arguments) format(arguments); + else if (interpolatedString) format(interpolatedString); } } @@ -4348,4 +4380,9 @@ do { } }}, `a == b && c == d`); + testFormatNode!(VariableDeclaration)(`T x = i"hello";`); + testFormatNode!(VariableDeclaration)(`T x = i" hello ";`); + testFormatNode!(VariableDeclaration)(`T x = i" hello $name ";`); + testFormatNode!(VariableDeclaration)(`T x = i" hello $(name) ";`); + testFormatNode!(VariableDeclaration)(`T x = i" hello $( name ) ";`, `T x = i" hello $(name) ";`); } diff --git a/src/dparse/lexer.d b/src/dparse/lexer.d index 168943af..774d1753 100644 --- a/src/dparse/lexer.d +++ b/src/dparse/lexer.d @@ -47,7 +47,8 @@ private immutable dynamicTokens = [ "whitespace", "doubleLiteral", "floatLiteral", "idoubleLiteral", "ifloatLiteral", "intLiteral", "longLiteral", "realLiteral", "irealLiteral", "uintLiteral", "ulongLiteral", "characterLiteral", - "dstringLiteral", "stringLiteral", "wstringLiteral" + "dstringLiteral", "stringLiteral", "wstringLiteral", "istringLiteralStart", + "istringLiteralText", "istringLiteralEnd" ]; private immutable pseudoTokenHandlers = [ @@ -68,6 +69,7 @@ private immutable pseudoTokenHandlers = [ "7", "lexDecimal", "8", "lexDecimal", "9", "lexDecimal", + "i\"", "lexInterpolatedString", "q\"", "lexDelimitedString", "q{", "lexTokenString", "r\"", "lexWysiwygString", @@ -642,10 +644,31 @@ public struct DLexer /// public void popFront()() pure nothrow @safe { - do - _popFront(); - while (config.whitespaceBehavior == WhitespaceBehavior.skip - && _front.type == tok!"whitespace"); + if (range.index >= range.bytes.length) + { + _front.type = _tok!"\0"; + return; + } + + if (istringStack.length && istringStack[$ - 1].parens == 0) + { + lexIstringContent(_front); + } + else + { + do + _popFront(); + while (config.whitespaceBehavior == WhitespaceBehavior.skip + && _front.type == tok!"whitespace"); + + if (istringStack.length) + { + if (_front.type == tok!"(") + istringStack[$ - 1].parens++; + else if (_front.type == tok!")") + istringStack[$ - 1].parens--; + } + } } /** @@ -1411,6 +1434,98 @@ private pure nothrow @safe: } } + void lexInterpolatedString(ref Token token) + { + mixin (tokenStart); + range.index += 2; + range.column += 2; + token = Token(tok!"istringLiteralStart", cache.intern(range.slice(mark)), line, column, index); + istringStack ~= IstringState.init; + } + + void lexIstringContent(ref Token token) + { + mixin (tokenStart); + + assert(istringStack.length > 0); + assert(istringStack[$ - 1].parens == 0); + + if (istringStack[$ - 1].dollar) + { + istringStack[$ - 1].dollar = false; + if (range.front == '(') + { + istringStack[$ - 1].parens++; + range.popFront(); + token = Token(tok!"(", null, line, column, index); + } + else + lexIdentifier(token); + return; + } + + switch (range.front) + { + case '$': + if (isAtIstringExpression) + { + istringStack[$ - 1].dollar = true; + range.popFront(); + token = Token(tok!"$", null, line, column, index); + break; + } + else + goto default; + case '"': + istringStack.length--; + range.popFront(); + token = Token(tok!"istringLiteralEnd", cache.intern(range.slice(mark)), line, + column, index); + break; + default: + lexIstringPlain(token); + break; + } + } + + void lexIstringPlain(ref Token token) + { + mixin (tokenStart); + Loop: while (!range.empty) + { + switch (range.bytes[range.index]) + { + case '\\': + lexEscapeSequence(); + break; + case '$': + if (isAtIstringExpression) + break Loop; + else + goto default; + case '"': + break Loop; + default: + popFrontWhitespaceAware(); + break; + } + } + token = Token(tok!"istringLiteralText", cache.intern(range.slice(mark)), + line, column, index); + } + + bool isAtIstringExpression() + { + if (range.index + 1 >= range.bytes.length) + return false; + auto c = range.bytes[range.index + 1]; + if (c == '(') + return true; + if (c >= '0' && c <= '9') + return false; + return !isSeparating(1); + } + void lexDelimitedString(ref Token token) { mixin (tokenStart); @@ -1548,7 +1663,7 @@ private pure nothrow @safe: config.stringBehavior = oldString; } - advance(_front); + popFront(); if (range.index >= range.bytes.length) { @@ -1652,6 +1767,7 @@ private pure nothrow @safe: case '\'': case '"': case '?': + case '$': case '\\': case 'a': case 'b': @@ -1958,6 +2074,13 @@ private pure nothrow @safe: StringCache* cache; LexerConfig config; bool haveSSE42; + IstringState[] istringStack; + + static struct IstringState + { + int parens; + bool dollar; + } } /** @@ -2260,11 +2383,62 @@ private extern(C) void free(void*) nothrow pure @nogc @trusted; unittest { - auto source = cast(ubyte[]) q{ import std.stdio;}c; - auto tokens = getTokensForParser(source, LexerConfig(), - new StringCache(StringCache.defaultBucketCount)); - assert (tokens.map!"a.type"().equal([tok!"import", tok!"identifier", tok!".", - tok!"identifier", tok!";"])); + import std.conv; + auto tokens(string source) + { + auto tokens = getTokensForParser(cast(ubyte[]) source, LexerConfig(), + new StringCache(StringCache.defaultBucketCount)); + return tokens; + } + assert (tokens(q{ import std.stdio;}c).map!"a.type"().equal( + [tok!"import", tok!"identifier", tok!".", tok!"identifier", tok!";"])); + + assert (tokens(`i"hello".foo`).map!"a.type"().equal( + [ + tok!"istringLiteralStart", + tok!"istringLiteralText", + tok!"istringLiteralEnd", + tok!".", + tok!"identifier" + ]), tokens(`i"hello".foo`).to!string); + + assert (tokens(`i"hello $name".foo`).map!"a.type"().equal( + [ + tok!"istringLiteralStart", + tok!"istringLiteralText", + tok!"$", + tok!"identifier", + tok!"istringLiteralEnd", + tok!".", + tok!"identifier" + ])); + + assert (tokens(`i"hello $name".foo`).map!"a.type"().equal( + [ + tok!"istringLiteralStart", + tok!"istringLiteralText", + tok!"$", + tok!"identifier", + tok!"istringLiteralEnd", + tok!".", + tok!"identifier" + ])); + + assert (tokens(`i"hello $(x + "hello $(world)") bar".foo`).map!"a.type"().equal( + [ + tok!"istringLiteralStart", + tok!"istringLiteralText", + tok!"$", + tok!"(", + tok!"identifier", + tok!"+", + tok!"stringLiteral", + tok!")", + tok!"istringLiteralText", + tok!"istringLiteralEnd", + tok!".", + tok!"identifier" + ])); } /// Test \x char sequence @@ -2584,12 +2758,12 @@ void main() { assert(tokens[i++].type == tok!";"); assert(tokens[i++].type == tok!"}"); - void checkInvalidTrailingString(const Token[] tokens) + void checkInvalidTrailingString(const Token[] tokens, int expected = 3) { - assert(tokens.length == 3); - assert(tokens[2].index != 0); - assert(tokens[2].column >= 4); - assert(tokens[2].type == tok!""); + assert(tokens.length == expected); + assert(tokens[$ - 1].index != 0); + assert(tokens[$ - 1].column >= 4); + assert(tokens[$ - 1].type == tok!""); } checkInvalidTrailingString(getTokensForParser(`x = "foo`, cf, &ca)); @@ -2599,4 +2773,8 @@ void main() { checkInvalidTrailingString(getTokensForParser("x = q{foo", cf, &ca)); checkInvalidTrailingString(getTokensForParser(`x = q"foo`, cf, &ca)); checkInvalidTrailingString(getTokensForParser("x = '", cf, &ca)); + checkInvalidTrailingString(getTokensForParser(`i"$("`, cf, &ca), 4); + checkInvalidTrailingString(getTokensForParser(`i"$("foo`, cf, &ca), 4); + checkInvalidTrailingString(getTokensForParser(`i"$(q{`, cf, &ca), 4); + checkInvalidTrailingString(getTokensForParser(`i"$(q{foo`, cf, &ca), 4); } diff --git a/src/dparse/parser.d b/src/dparse/parser.d index 572dd6d8..2de5a960 100644 --- a/src/dparse/parser.d +++ b/src/dparse/parser.d @@ -4617,6 +4617,89 @@ class Parser return parseInterfaceOrClass(node, startIndex); } + /** + * Parses an InterpolatedString + * + * $(GRAMMAR $(RULEDEF interpolatedString): + * $(LITERAL 'i"') $(RULE InterpolatedStringPart)* $(LITERAL '"') + * ;) + */ + InterpolatedString parseInterpolatedString() + { + mixin(traceEnterAndExit!(__FUNCTION__)); + auto startIndex = index; + auto node = allocator.make!InterpolatedString; + mixin(tokenCheck!"istringLiteralStart"); + StackBuffer parts; + while (moreTokens && !currentIs(tok!"istringLiteralEnd")) + { + if (auto c = parseInterpolatedStringPart()) + parts.put(c); + else + advance(); + } + ownArray(node.parts, parts); + expect(tok!"istringLiteralEnd"); + + node.tokens = tokens[startIndex .. index]; + return node; + } + + /** + * Parses an InterpolatedStringPart + * + * $(GRAMMAR $(RULEDEF interpolatedStringPart): + * $(LITERAL '$') $(RULE identifier) + * | $(LITERAL '$') $(LITERAL '$(LPAREN)') $(RULE expression) $(LITERAL '$(RPAREN)') + * | $(RULE stringEscapeSequence) + * | $(RULE NOT:$(LPAREN)$(LITERAL '$') | $(LITERAL '"')$(RPAREN))+ + * ;) + */ + InterpolatedStringPart parseInterpolatedStringPart() + { + mixin(traceEnterAndExit!(__FUNCTION__)); + auto startIndex = index; + + InterpolatedStringPart node; + + if (currentIs(tok!"istringLiteralText")) + { + node = allocator.make!InterpolatedStringText; + advance(); + } + else if (currentIs(tok!"$")) + { + if (peekIs(tok!"identifier")) + { + node = allocator.make!InterpolatedStringVariable; + advance(); + advance(); + } + else if (peekIs(tok!"(")) + { + advance(); + advance(); + auto expNode = allocator.make!InterpolatedStringExpression; + expNode.expression = parseExpression(); + node = expNode; + expect(tok!")"); + } + else + { + error("Unexpected token after dollar inside interpolated string literal"); + return null; + } + } + else + { + error("Unexpected token inside interpolated string literal"); + return null; + } + + node.tokens = tokens[startIndex .. index]; + return node; + } + /** * Parses an Invariant * @@ -5801,6 +5884,7 @@ class Parser * | $(LITERAL FloatLiteral) * | $(LITERAL StringLiteral)+ * | $(LITERAL CharacterLiteral) + * | $(LITERAL IstringLiteral) * ;) */ PrimaryExpression parsePrimaryExpression() @@ -5919,6 +6003,9 @@ class Parser case tok!"import": mixin(parseNodeQ!(`node.importExpression`, `ImportExpression`)); break; + case tok!"istringLiteralStart": + mixin(parseNodeQ!(`node.interpolatedString`, `InterpolatedString`)); + break; case tok!"this": case tok!"super": foreach (L; Literals) { case L: } diff --git a/test/ast_checks/interpolated_string.d b/test/ast_checks/interpolated_string.d new file mode 100644 index 00000000..0c46fe73 --- /dev/null +++ b/test/ast_checks/interpolated_string.d @@ -0,0 +1,4 @@ +void foo() +{ + writeln(i"Hello $name, you have $$(wealth) in your account right now"); +} diff --git a/test/ast_checks/interpolated_string.txt b/test/ast_checks/interpolated_string.txt new file mode 100644 index 00000000..ce741e4d --- /dev/null +++ b/test/ast_checks/interpolated_string.txt @@ -0,0 +1,7 @@ +//interpolatedString[@startQuote='i"'] +//interpolatedString[@endQuote='"'] +//interpolatedString/text[text()='Hello '] +//interpolatedString/text[text()=', you have $'] +//interpolatedString/text[text()=' in your account right now'] +//interpolatedString/variable[text()='name'] +//interpolatedString/expression/unaryExpression diff --git a/test/run_tests.sh b/test/run_tests.sh index 1e267d29..c2d66093 100755 --- a/test/run_tests.sh +++ b/test/run_tests.sh @@ -88,12 +88,12 @@ if [[ ${BUILDKITE:-} != "true" ]]; then expectParseFailure=1 elif [[ "$line" =~ ^# ]]; then true # comment line - elif echo "$AST" | xmllint --xpath "${line}" - 2>/dev/null > /dev/null; then - ((currentPasses=currentPasses+1)) - else + elif echo "$AST" | xmllint --xpath "${line}" - 2>&1 | grep 'XPath set is empty' >/dev/null; then echo echo -e " ${RED}Check on line $lineCount of $queryFile failed.${NORMAL}" ((currentFailures=currentFailures+1)) + else + ((currentPasses=currentPasses+1)) fi ((lineCount=lineCount+1)) done < "$queryFile" From bdd87a62c6b0b4342384a35231b5f42b19c863f6 Mon Sep 17 00:00:00 2001 From: WebFreak001 Date: Sun, 29 Oct 2023 13:59:04 +0100 Subject: [PATCH 2/2] support i", i`, iq{ and istring postfixes --- src/dparse/ast.d | 18 +++++- src/dparse/lexer.d | 142 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 152 insertions(+), 8 deletions(-) diff --git a/src/dparse/ast.d b/src/dparse/ast.d index d1252b5f..0d9f9af4 100644 --- a/src/dparse/ast.d +++ b/src/dparse/ast.d @@ -2363,7 +2363,23 @@ final class InterpolatedString : BaseNode : Token.init; } - mixin OpEquals!("startQuote.text"); + /// '\0'/'c'/'w'/'d' for `i""`, `i""c`, `i""w` and `i""d` respectively. + char postfixType() inout pure nothrow @nogc @safe scope + { + auto end = endQuote.text; + auto endChar = end.length ? end[$ - 1] : ' '; + switch (endChar) + { + case 'c': + case 'w': + case 'd': + return endChar; + default: + return '\0'; + } + } + + mixin OpEquals!("startQuote.text", "postfixType"); } /// diff --git a/src/dparse/lexer.d b/src/dparse/lexer.d index 774d1753..60106c17 100644 --- a/src/dparse/lexer.d +++ b/src/dparse/lexer.d @@ -70,6 +70,8 @@ private immutable pseudoTokenHandlers = [ "8", "lexDecimal", "9", "lexDecimal", "i\"", "lexInterpolatedString", + "i`", "lexInterpolatedString", + "iq{", "lexInterpolatedString", "q\"", "lexDelimitedString", "q{", "lexTokenString", "r\"", "lexWysiwygString", @@ -1415,6 +1417,12 @@ private pure nothrow @safe: index); } + private ubyte lexStringSuffix() pure nothrow @safe + { + IdType t; + return lexStringSuffix(t); + } + private ubyte lexStringSuffix(ref IdType type) pure nothrow @safe { if (range.index >= range.bytes.length) @@ -1437,10 +1445,22 @@ private pure nothrow @safe: void lexInterpolatedString(ref Token token) { mixin (tokenStart); - range.index += 2; - range.column += 2; + IstringState.Type type; + range.popFront(); + switch (range.bytes[range.index]) + { + case '"': type = IstringState.type.quote; break; + case '`': type = IstringState.type.backtick; break; + case 'q': + type = IstringState.type.tokenString; + range.popFront(); + break; + default: + assert(false); + } + range.popFront(); token = Token(tok!"istringLiteralStart", cache.intern(range.slice(mark)), line, column, index); - istringStack ~= IstringState.init; + istringStack ~= IstringState(0, 0, type); } void lexIstringContent(ref Token token) @@ -1476,9 +1496,15 @@ private pure nothrow @safe: } else goto default; + case '}': case '"': + case '`': + if (range.front != istringStack[$ - 1].type || istringStack[$ - 1].braces) + goto default; + istringStack.length--; range.popFront(); + lexStringSuffix(); token = Token(tok!"istringLiteralEnd", cache.intern(range.slice(mark)), line, column, index); break; @@ -1493,18 +1519,36 @@ private pure nothrow @safe: mixin (tokenStart); Loop: while (!range.empty) { - switch (range.bytes[range.index]) + char c = range.bytes[range.index]; + switch (c) { case '\\': - lexEscapeSequence(); + if (istringStack[$ - 1].type == IstringState.Type.quote) + lexEscapeSequence(); + else + goto default; break; case '$': if (isAtIstringExpression) break Loop; else goto default; + case '{': + if (istringStack[$ - 1].type == IstringState.Type.tokenString) + istringStack[$ - 1].braces++; + goto default; + case '}': + if (istringStack[$ - 1].type == IstringState.Type.tokenString) + { + assert(istringStack[$ - 1].braces > 0); + istringStack[$ - 1].braces--; + } + goto default; case '"': - break Loop; + case '`': + if (c == istringStack[$ - 1].type) + break Loop; + goto default; default: popFrontWhitespaceAware(); break; @@ -2078,7 +2122,16 @@ private pure nothrow @safe: static struct IstringState { - int parens; + enum Type : ubyte + { + quote = '"', + backtick = '`', + tokenString = '}', + } + + ushort parens; + ushort braces; + Type type; bool dollar; } } @@ -2778,3 +2831,78 @@ void main() { checkInvalidTrailingString(getTokensForParser(`i"$(q{`, cf, &ca), 4); checkInvalidTrailingString(getTokensForParser(`i"$(q{foo`, cf, &ca), 4); } + +unittest +{ + import std.conv; + + auto test(string content, bool debugPrint = false) + { + LexerConfig cf; + StringCache ca = StringCache(16); + + const tokens = getTokensForParser(content, cf, &ca); + if (debugPrint) + return tokens.to!(char[][]).join("\n"); + + char[] ret = new char[content.length]; + ret[] = ' '; + foreach_reverse (t; tokens) + { + ret[t.index .. t.index + max(1, t.text.length)] = + t.type == tok!"$" ? '$' : + t.type == tok!"(" ? '(' : + t.type == tok!")" ? ')' : + t.type == tok!"identifier" ? 'i' : + t.type == tok!"istringLiteralStart" ? 'S' : + t.type == tok!"istringLiteralText" ? '.' : + t.type == tok!"istringLiteralEnd" ? 'E' : + t.type == tok!"" ? '%' : + '?'; + } + return ret; + } + + // dfmt off + + assert(test(`i"$name"`) + == `SS$iiiiE`); + + assert(test(`i"\$plain\0"`) + == `SS.........E`); + + assert(test(`i"$(expression)"w`) + == `SS$(iiiiiiiiii)EE`); + + assert(test(`i"$(expression"c`) + == `SS$(iiiiiiiiii `); + + assert(test(`i"$name "`) + == `SS$iiii.E`); + + assert(test(`i"$ {}plain"`) + == `SS.........E`); + + assert(test("i\"$ ``plain\"") + == `SS.........E`); + + assert(test(`i"$0 plain"`) + == `SS........E`); + + assert(test(`i"$$0 plain"`) + == `SS.........E`); + + assert(test(`i"$.1 plain"`) + == `SS.........E`); + + assert(test(`i"I have $$money"`) + == `SS........$iiiiiE`); + + assert(test("i`I \"have\" $$money`") + == "SS..........$iiiiiE", test("i`I \"have\" $$money`", true)); + + assert(test("iq{I `\"have\"` $$money}") + == "SSS............$iiiiiE"); + + // dfmt on +}