From 09657f1e0981f6af440e0830fbf03d949b8d5ed6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 7 Jan 2024 06:44:07 -0600 Subject: [PATCH] Extracting again --- grammar.js | 5 +- src/grammar.json | 20 ++++++- src/parser.c | 117 ++++++++++++++++++------------------- test/corpus/injections.txt | 2 + test/corpus/recipes.txt | 1 + test/corpus/statements.txt | 2 + 6 files changed, 82 insertions(+), 65 deletions(-) diff --git a/grammar.js b/grammar.js index 6a4f47e..5b033e6 100644 --- a/grammar.js +++ b/grammar.js @@ -298,7 +298,10 @@ module.exports = grammar({ recipe_line_prefix: (_) => choice("@-", "-@", "@", "-"), shebang: ($) => - prec.left(10, seq(token.immediate(/#!/, /.*/), optional($._newline))), + prec.left(seq( + token.immediate(seq(/#!/, choice(/.*/))), + optional($._newline), + )), // prec( // 4, // seq( diff --git a/src/grammar.json b/src/grammar.json index 6e81642..46ef91d 100644 --- a/src/grammar.json +++ b/src/grammar.json @@ -1253,15 +1253,29 @@ }, "shebang": { "type": "PREC_LEFT", - "value": 10, + "value": 0, "content": { "type": "SEQ", "members": [ { "type": "IMMEDIATE_TOKEN", "content": { - "type": "PATTERN", - "value": "#!" + "type": "SEQ", + "members": [ + { + "type": "PATTERN", + "value": "#!" + }, + { + "type": "CHOICE", + "members": [ + { + "type": "PATTERN", + "value": ".*" + } + ] + } + ] } }, { diff --git a/src/parser.c b/src/parser.c index 698af4a..cf7da76 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1402,7 +1402,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { case 0: if (eof) ADVANCE(14); if (lookahead == '!') ADVANCE(5); - if (lookahead == '#') ADVANCE(53); + if (lookahead == '#') ADVANCE(52); if (lookahead == '$') ADVANCE(35); if (lookahead == '&') ADVANCE(4); if (lookahead == '(') ADVANCE(28); @@ -1427,36 +1427,36 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { lookahead == ' ') SKIP(11) if (('A' <= lookahead && lookahead <= 'Z') || lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(58); + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(57); END_STATE(); case 1: if (lookahead == '\n') SKIP(2) - if (lookahead == '#') ADVANCE(48); + if (lookahead == '#') ADVANCE(47); if (lookahead == '-') ADVANCE(45); if (lookahead == '@') ADVANCE(32); - if (lookahead == '{') ADVANCE(51); + if (lookahead == '{') ADVANCE(50); if (lookahead == '\t' || lookahead == '\r' || - lookahead == ' ') ADVANCE(49); - if (lookahead != 0) ADVANCE(52); + lookahead == ' ') ADVANCE(48); + if (lookahead != 0) ADVANCE(51); END_STATE(); case 2: if (lookahead == '\n') SKIP(2) if (lookahead == '-') ADVANCE(45); if (lookahead == '@') ADVANCE(32); - if (lookahead == '{') ADVANCE(51); + if (lookahead == '{') ADVANCE(50); if (lookahead == '\t' || lookahead == '\r' || - lookahead == ' ') ADVANCE(49); - if (lookahead != 0) ADVANCE(52); + lookahead == ' ') ADVANCE(48); + if (lookahead != 0) ADVANCE(51); END_STATE(); case 3: if (lookahead == '\n') SKIP(3) - if (lookahead == '{') ADVANCE(51); + if (lookahead == '{') ADVANCE(50); if (lookahead == '\t' || lookahead == '\r' || - lookahead == ' ') ADVANCE(50); - if (lookahead != 0) ADVANCE(52); + lookahead == ' ') ADVANCE(49); + if (lookahead != 0) ADVANCE(51); END_STATE(); case 4: if (lookahead == '&') ADVANCE(39); @@ -1469,22 +1469,22 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { if (lookahead == '~') ADVANCE(27); END_STATE(); case 7: - if (lookahead == '{') ADVANCE(55); + if (lookahead == '{') ADVANCE(54); END_STATE(); case 8: - if (lookahead == '}') ADVANCE(57); + if (lookahead == '}') ADVANCE(56); END_STATE(); case 9: if (lookahead == '"' || lookahead == '\\' || lookahead == 'n' || lookahead == 'r' || - lookahead == 't') ADVANCE(59); + lookahead == 't') ADVANCE(58); END_STATE(); case 10: if (eof) ADVANCE(14); if (lookahead == '!') ADVANCE(5); - if (lookahead == '#') ADVANCE(53); + if (lookahead == '#') ADVANCE(52); if (lookahead == '$') ADVANCE(35); if (lookahead == '(') ADVANCE(28); if (lookahead == ')') ADVANCE(29); @@ -1505,12 +1505,12 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { lookahead == ' ') SKIP(12) if (('A' <= lookahead && lookahead <= 'Z') || lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(58); + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(57); END_STATE(); case 11: if (eof) ADVANCE(14); if (lookahead == '!') ADVANCE(5); - if (lookahead == '#') ADVANCE(54); + if (lookahead == '#') ADVANCE(53); if (lookahead == '$') ADVANCE(35); if (lookahead == '&') ADVANCE(4); if (lookahead == '(') ADVANCE(28); @@ -1535,12 +1535,12 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { lookahead == ' ') SKIP(11) if (('A' <= lookahead && lookahead <= 'Z') || lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(58); + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(57); END_STATE(); case 12: if (eof) ADVANCE(14); if (lookahead == '!') ADVANCE(5); - if (lookahead == '#') ADVANCE(54); + if (lookahead == '#') ADVANCE(53); if (lookahead == '$') ADVANCE(35); if (lookahead == '(') ADVANCE(28); if (lookahead == ')') ADVANCE(29); @@ -1561,11 +1561,11 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { lookahead == ' ') SKIP(12) if (('A' <= lookahead && lookahead <= 'Z') || lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(58); + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(57); END_STATE(); case 13: if (eof) ADVANCE(14); - if (lookahead == '#') ADVANCE(54); + if (lookahead == '#') ADVANCE(53); if (lookahead == '$') ADVANCE(35); if (lookahead == '(') ADVANCE(28); if (lookahead == '*') ADVANCE(38); @@ -1582,7 +1582,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { lookahead == ' ') SKIP(13) if (('A' <= lookahead && lookahead <= 'Z') || lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(58); + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(57); END_STATE(); case 14: ACCEPT_TOKEN(ts_builtin_sym_end); @@ -1613,7 +1613,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { END_STATE(); case 23: ACCEPT_TOKEN(anon_sym_LBRACE); - if (lookahead == '{') ADVANCE(55); + if (lookahead == '{') ADVANCE(54); END_STATE(); case 24: ACCEPT_TOKEN(anon_sym_RBRACE); @@ -1644,7 +1644,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { ACCEPT_TOKEN(anon_sym_AT); if (lookahead == '-') ADVANCE(41); if (lookahead != 0 && - lookahead != '\n') ADVANCE(52); + lookahead != '\n') ADVANCE(51); END_STATE(); case 33: ACCEPT_TOKEN(anon_sym_COLON); @@ -1676,7 +1676,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { case 41: ACCEPT_TOKEN(anon_sym_AT_DASH); if (lookahead != 0 && - lookahead != '\n') ADVANCE(52); + lookahead != '\n') ADVANCE(51); END_STATE(); case 42: ACCEPT_TOKEN(anon_sym_DASH_AT); @@ -1684,7 +1684,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { case 43: ACCEPT_TOKEN(anon_sym_DASH_AT); if (lookahead != 0 && - lookahead != '\n') ADVANCE(52); + lookahead != '\n') ADVANCE(51); END_STATE(); case 44: ACCEPT_TOKEN(anon_sym_DASH); @@ -1694,86 +1694,81 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { ACCEPT_TOKEN(anon_sym_DASH); if (lookahead == '@') ADVANCE(43); if (lookahead != 0 && - lookahead != '\n') ADVANCE(52); + lookahead != '\n') ADVANCE(51); END_STATE(); case 46: ACCEPT_TOKEN(aux_sym_shebang_token1); if (lookahead != 0 && - lookahead != '\n') ADVANCE(54); + lookahead != '\n') ADVANCE(46); END_STATE(); case 47: - ACCEPT_TOKEN(aux_sym_shebang_token1); + ACCEPT_TOKEN(aux_sym_shebang_language_token1); + if (lookahead == '!') ADVANCE(46); if (lookahead != 0 && - lookahead != '\n') ADVANCE(52); + lookahead != '\n') ADVANCE(51); END_STATE(); case 48: ACCEPT_TOKEN(aux_sym_shebang_language_token1); - if (lookahead == '!') ADVANCE(47); + if (lookahead == '-') ADVANCE(45); + if (lookahead == '@') ADVANCE(32); + if (lookahead == '{') ADVANCE(50); + if (lookahead == '\t' || + lookahead == '\r' || + lookahead == ' ') ADVANCE(48); if (lookahead != 0 && - lookahead != '\n') ADVANCE(52); + lookahead != '\n') ADVANCE(51); END_STATE(); case 49: ACCEPT_TOKEN(aux_sym_shebang_language_token1); - if (lookahead == '-') ADVANCE(45); - if (lookahead == '@') ADVANCE(32); - if (lookahead == '{') ADVANCE(51); + if (lookahead == '{') ADVANCE(50); if (lookahead == '\t' || lookahead == '\r' || lookahead == ' ') ADVANCE(49); if (lookahead != 0 && - lookahead != '\n') ADVANCE(52); + lookahead != '\n') ADVANCE(51); END_STATE(); case 50: ACCEPT_TOKEN(aux_sym_shebang_language_token1); - if (lookahead == '{') ADVANCE(51); - if (lookahead == '\t' || - lookahead == '\r' || - lookahead == ' ') ADVANCE(50); + if (lookahead == '{') ADVANCE(55); if (lookahead != 0 && - lookahead != '\n') ADVANCE(52); + lookahead != '\n') ADVANCE(51); END_STATE(); case 51: ACCEPT_TOKEN(aux_sym_shebang_language_token1); - if (lookahead == '{') ADVANCE(56); if (lookahead != 0 && - lookahead != '\n') ADVANCE(52); + lookahead != '\n') ADVANCE(51); END_STATE(); case 52: - ACCEPT_TOKEN(aux_sym_shebang_language_token1); - if (lookahead != 0 && - lookahead != '\n') ADVANCE(52); - END_STATE(); - case 53: ACCEPT_TOKEN(aux_sym_comment_token1); if (lookahead == '!') ADVANCE(46); if (lookahead != 0 && - lookahead != '\n') ADVANCE(54); + lookahead != '\n') ADVANCE(53); END_STATE(); - case 54: + case 53: ACCEPT_TOKEN(aux_sym_comment_token1); if (lookahead != 0 && - lookahead != '\n') ADVANCE(54); + lookahead != '\n') ADVANCE(53); END_STATE(); - case 55: + case 54: ACCEPT_TOKEN(anon_sym_LBRACE_LBRACE); END_STATE(); - case 56: + case 55: ACCEPT_TOKEN(anon_sym_LBRACE_LBRACE); if (lookahead != 0 && - lookahead != '\n') ADVANCE(52); + lookahead != '\n') ADVANCE(51); END_STATE(); - case 57: + case 56: ACCEPT_TOKEN(anon_sym_RBRACE_RBRACE); END_STATE(); - case 58: + case 57: ACCEPT_TOKEN(sym_identifier); if (lookahead == '-' || ('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(58); + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(57); END_STATE(); - case 59: + case 58: ACCEPT_TOKEN(sym_escape_sequence); END_STATE(); default: @@ -7027,7 +7022,7 @@ static const TSParseActionEntry ts_parse_actions[] = { [15] = {.entry = {.count = 1, .reusable = false}}, SHIFT(298), [17] = {.entry = {.count = 1, .reusable = true}}, SHIFT(356), [19] = {.entry = {.count = 1, .reusable = true}}, SHIFT(355), - [21] = {.entry = {.count = 1, .reusable = false}}, SHIFT(74), + [21] = {.entry = {.count = 1, .reusable = true}}, SHIFT(74), [23] = {.entry = {.count = 1, .reusable = false}}, SHIFT(352), [25] = {.entry = {.count = 1, .reusable = true}}, SHIFT(76), [27] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 2), @@ -7079,7 +7074,7 @@ static const TSParseActionEntry ts_parse_actions[] = { [136] = {.entry = {.count = 1, .reusable = true}}, SHIFT(302), [138] = {.entry = {.count = 1, .reusable = true}}, SHIFT(169), [140] = {.entry = {.count = 1, .reusable = false}}, SHIFT(266), - [142] = {.entry = {.count = 1, .reusable = false}}, SHIFT(150), + [142] = {.entry = {.count = 1, .reusable = true}}, SHIFT(150), [144] = {.entry = {.count = 1, .reusable = false}}, SHIFT(265), [146] = {.entry = {.count = 1, .reusable = false}}, SHIFT(30), [148] = {.entry = {.count = 1, .reusable = true}}, SHIFT(101), diff --git a/test/corpus/injections.txt b/test/corpus/injections.txt index 95db855..7b7f8dc 100644 --- a/test/corpus/injections.txt +++ b/test/corpus/injections.txt @@ -7,6 +7,8 @@ global shebang (source_file (shebang) + (item + (eol)) (item (eol))) diff --git a/test/corpus/recipes.txt b/test/corpus/recipes.txt index f914c40..93b96f9 100644 --- a/test/corpus/recipes.txt +++ b/test/corpus/recipes.txt @@ -403,6 +403,7 @@ foo: (identifier)) (recipe_body (shebang) + (recipe_line) (recipe_line (text)) (recipe_line diff --git a/test/corpus/statements.txt b/test/corpus/statements.txt index 138495a..8ad5933 100644 --- a/test/corpus/statements.txt +++ b/test/corpus/statements.txt @@ -622,6 +622,8 @@ foo: (source_file (shebang) + (item + (eol)) (item (recipe (recipe_header