From 35cc4391595553cb9695af8b03e7d622b20c5130 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Wed, 31 Jan 2024 17:20:25 -0500 Subject: [PATCH] fix: format scanner and don't bother parsing during error recovery --- common/define-grammar.js | 1 + common/scanner.h | 391 +++++++++++++++++++++------------------ 2 files changed, 215 insertions(+), 177 deletions(-) diff --git a/common/define-grammar.js b/common/define-grammar.js index ade7dd88..f1bd7220 100644 --- a/common/define-grammar.js +++ b/common/define-grammar.js @@ -6,6 +6,7 @@ module.exports = function defineGrammar(dialect) { externals: ($, previous) => previous.concat([ $._function_signature_automatic_semicolon, + $.__error_recovery, ]), supertypes: ($, previous) => previous.concat([ diff --git a/common/scanner.h b/common/scanner.h index 37ebc976..0662dd64 100644 --- a/common/scanner.h +++ b/common/scanner.h @@ -3,200 +3,234 @@ #include enum TokenType { - AUTOMATIC_SEMICOLON, - TEMPLATE_CHARS, - TERNARY_QMARK, - HTML_COMMENT, - LOGICAL_OR, - ESCAPE_SEQUENCE, - FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON, + AUTOMATIC_SEMICOLON, + TEMPLATE_CHARS, + TERNARY_QMARK, + HTML_COMMENT, + LOGICAL_OR, + ESCAPE_SEQUENCE, + FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON, + ERROR_RECOVERY, }; static void advance(TSLexer *lexer) { lexer->advance(lexer, false); } + static void skip(TSLexer *lexer) { lexer->advance(lexer, true); } static bool scan_template_chars(TSLexer *lexer) { - lexer->result_symbol = TEMPLATE_CHARS; - for (bool has_content = false;; has_content = true) { - lexer->mark_end(lexer); - switch (lexer->lookahead) { - case '`': - return has_content; - case '\0': - return false; - case '$': - advance(lexer); - if (lexer->lookahead == '{') return has_content; - break; - case '\\': - return has_content; - default: - advance(lexer); + lexer->result_symbol = TEMPLATE_CHARS; + for (bool has_content = false;; has_content = true) { + lexer->mark_end(lexer); + switch (lexer->lookahead) { + case '`': + return has_content; + case '\0': + return false; + case '$': + advance(lexer); + if (lexer->lookahead == '{') { + return has_content; + } + break; + case '\\': + return has_content; + default: + advance(lexer); + } } - } } static bool scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment) { - for (;;) { - while (iswspace(lexer->lookahead)) { - skip(lexer); - } - - if (lexer->lookahead == '/') { - skip(lexer); - - if (lexer->lookahead == '/') { - skip(lexer); - while (lexer->lookahead != 0 && lexer->lookahead != '\n') { - skip(lexer); + for (;;) { + while (iswspace(lexer->lookahead)) { + skip(lexer); } - *scanned_comment = true; - } else if (lexer->lookahead == '*') { - skip(lexer); - while (lexer->lookahead != 0) { - if (lexer->lookahead == '*') { + + if (lexer->lookahead == '/') { skip(lexer); + if (lexer->lookahead == '/') { - skip(lexer); - break; + skip(lexer); + while (lexer->lookahead != 0 && lexer->lookahead != '\n') { + skip(lexer); + } + *scanned_comment = true; + } else if (lexer->lookahead == '*') { + skip(lexer); + while (lexer->lookahead != 0) { + if (lexer->lookahead == '*') { + skip(lexer); + if (lexer->lookahead == '/') { + skip(lexer); + break; + } + } else { + skip(lexer); + } + } + } else { + return false; } - } else { - skip(lexer); - } + } else { + return true; } - } else { - return false; - } - } else { - return true; } - } } static bool scan_automatic_semicolon(TSLexer *lexer, const bool *valid_symbols, bool *scanned_comment) { - lexer->result_symbol = AUTOMATIC_SEMICOLON; - lexer->mark_end(lexer); - - for (;;) { - if (lexer->lookahead == 0) return true; - if (lexer->lookahead == '}') { - // Automatic semicolon insertion breaks detection of object patterns - // in a typed context: - // type F = ({a}: {a: number}) => number; - // Therefore, disable automatic semicolons when followed by typing - do { + lexer->result_symbol = AUTOMATIC_SEMICOLON; + lexer->mark_end(lexer); + + for (;;) { + if (lexer->lookahead == 0) { + return true; + } + if (lexer->lookahead == '}') { + // Automatic semicolon insertion breaks detection of object patterns + // in a typed context: + // type F = ({a}: {a: number}) => number; + // Therefore, disable automatic semicolons when followed by typing + do { + skip(lexer); + } while (iswspace(lexer->lookahead)); + if (lexer->lookahead == ':') { + return false; + } + return true; + } + if (!iswspace(lexer->lookahead)) { + return false; + } + if (lexer->lookahead == '\n') { + break; + } skip(lexer); - } while (iswspace(lexer->lookahead)); - if (lexer->lookahead == ':') return false; - return true; } - if (!iswspace(lexer->lookahead)) return false; - if (lexer->lookahead == '\n') break; + skip(lexer); - } - - skip(lexer); - - if (!scan_whitespace_and_comments(lexer, scanned_comment)) return false; - - switch (lexer->lookahead) { - case ',': - case '.': - case ';': - case '*': - case '%': - case '>': - case '<': - case '=': - case '?': - case '^': - case '|': - case '&': - case '/': - case ':': - return false; - case '{': - if (valid_symbols[FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON]) return false; - break; + if (!scan_whitespace_and_comments(lexer, scanned_comment)) { + return false; + } - // Don't insert a semicolon before a '[' or '(', unless we're parsing - // a type. Detect whether we're parsing a type or an expression using - // the validity of a binary operator token. - case '(': - case '[': - if (valid_symbols[LOGICAL_OR]) return false; - break; + switch (lexer->lookahead) { + case ',': + case '.': + case ';': + case '*': + case '%': + case '>': + case '<': + case '=': + case '?': + case '^': + case '|': + case '&': + case '/': + case ':': + return false; + + case '{': + if (valid_symbols[FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON]) { + return false; + } + break; + + // Don't insert a semicolon before a '[' or '(', unless we're parsing + // a type. Detect whether we're parsing a type or an expression using + // the validity of a binary operator token. + case '(': + case '[': + if (valid_symbols[LOGICAL_OR]) { + return false; + } + break; - // Insert a semicolon before `--` and `++`, but not before binary `+` or `-`. - case '+': - skip(lexer); - return lexer->lookahead == '+'; - case '-': - skip(lexer); - return lexer->lookahead == '-'; + // Insert a semicolon before `--` and `++`, but not before binary `+` or `-`. + case '+': + skip(lexer); + return lexer->lookahead == '+'; + case '-': + skip(lexer); + return lexer->lookahead == '-'; - // Don't insert a semicolon before `!=`, but do insert one before a unary `!`. - case '!': - skip(lexer); - return lexer->lookahead != '='; + // Don't insert a semicolon before `!=`, but do insert one before a unary `!`. + case '!': + skip(lexer); + return lexer->lookahead != '='; - // Don't insert a semicolon before `in` or `instanceof`, but do insert one - // before an identifier. - case 'i': - skip(lexer); + // Don't insert a semicolon before `in` or `instanceof`, but do insert one + // before an identifier. + case 'i': + skip(lexer); - if (lexer->lookahead != 'n') return true; - skip(lexer); + if (lexer->lookahead != 'n') { + return true; + } + skip(lexer); - if (!iswalpha(lexer->lookahead)) return false; + if (!iswalpha(lexer->lookahead)) { + return false; + } - for (unsigned i = 0; i < 8; i++) { - if (lexer->lookahead != "stanceof"[i]) return true; - skip(lexer); - } + for (unsigned i = 0; i < 8; i++) { + if (lexer->lookahead != "stanceof"[i]) { + return true; + } + skip(lexer); + } - if (!iswalpha(lexer->lookahead)) return false; - break; - } + if (!iswalpha(lexer->lookahead)) { + return false; + } + break; + } - return true; + return true; } static bool scan_ternary_qmark(TSLexer *lexer) { - for(;;) { - if (!iswspace(lexer->lookahead)) break; - skip(lexer); - } + for (;;) { + if (!iswspace(lexer->lookahead)) { + break; + } + skip(lexer); + } - if (lexer->lookahead == '?') { - advance(lexer); + if (lexer->lookahead == '?') { + advance(lexer); - if (lexer->lookahead == '?') return false; - /* Optional chaining. */ - if (lexer->lookahead == '.') return false; + /* Optional chaining. */ + if (lexer->lookahead == '?' || lexer->lookahead == '.') { + return false; + } - lexer->mark_end(lexer); - lexer->result_symbol = TERNARY_QMARK; + lexer->mark_end(lexer); + lexer->result_symbol = TERNARY_QMARK; - /* TypeScript optional arguments contain the ?: sequence, possibly - with whitespace. */ - for(;;) { - if (!iswspace(lexer->lookahead)) break; - advance(lexer); - } - if (lexer->lookahead == ':') return false; - if (lexer->lookahead == ')') return false; - if (lexer->lookahead == ',') return false; - - if (lexer->lookahead == '.') { - advance(lexer); - if (iswdigit(lexer->lookahead)) return true; - return false; + /* TypeScript optional arguments contain the ?: sequence, possibly + with whitespace. */ + for (;;) { + if (!iswspace(lexer->lookahead)) { + break; + } + advance(lexer); + } + + if (lexer->lookahead == ':' || lexer->lookahead == ')' || lexer->lookahead == ',') { + return false; + } + + if (lexer->lookahead == '.') { + advance(lexer); + if (iswdigit(lexer->lookahead)) { + return true; + } + return false; + } + return true; } - return true; - } - return false; + return false; } static bool scan_closing_comment(TSLexer *lexer) { @@ -236,29 +270,32 @@ static bool scan_closing_comment(TSLexer *lexer) { return true; } - static inline bool external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { - if (valid_symbols[TEMPLATE_CHARS]) { - if (valid_symbols[AUTOMATIC_SEMICOLON]) return false; - return scan_template_chars(lexer); - } else if ( - valid_symbols[AUTOMATIC_SEMICOLON] || - valid_symbols[FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON] - ) { - bool scanned_comment = false; - bool ret = scan_automatic_semicolon(lexer, valid_symbols, &scanned_comment); - if (!ret && !scanned_comment && valid_symbols[TERNARY_QMARK] && lexer->lookahead == '?') - return scan_ternary_qmark(lexer); - return ret; - } - if (valid_symbols[TERNARY_QMARK]) { - return scan_ternary_qmark(lexer); - } - - if (valid_symbols[HTML_COMMENT] && !valid_symbols[LOGICAL_OR] && !valid_symbols[ESCAPE_SEQUENCE]) { - return scan_closing_comment(lexer); - } - - return false; + if (valid_symbols[ERROR_RECOVERY]) { + return false; + } + + if (valid_symbols[TEMPLATE_CHARS]) { + if (valid_symbols[AUTOMATIC_SEMICOLON]) { + return false; + } + return scan_template_chars(lexer); + } + if (valid_symbols[AUTOMATIC_SEMICOLON] || valid_symbols[FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON]) { + bool scanned_comment = false; + bool ret = scan_automatic_semicolon(lexer, valid_symbols, &scanned_comment); + if (!ret && !scanned_comment && valid_symbols[TERNARY_QMARK] && lexer->lookahead == '?') { + return scan_ternary_qmark(lexer); + } + return ret; + } + if (valid_symbols[TERNARY_QMARK]) { + return scan_ternary_qmark(lexer); + } + if (valid_symbols[HTML_COMMENT] && !valid_symbols[LOGICAL_OR] && !valid_symbols[ESCAPE_SEQUENCE]) { + return scan_closing_comment(lexer); + } + + return false; }