From 09657f1e0981f6af440e0830fbf03d949b8d5ed6 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 7 Jan 2024 06:44:07 -0600
Subject: [PATCH] Extracting again

---
 grammar.js                 |   5 +-
 src/grammar.json           |  20 ++++++-
 src/parser.c               | 117 ++++++++++++++++++-------------------
 test/corpus/injections.txt |   2 +
 test/corpus/recipes.txt    |   1 +
 test/corpus/statements.txt |   2 +
 6 files changed, 82 insertions(+), 65 deletions(-)

diff --git a/grammar.js b/grammar.js
index 6a4f47e..5b033e6 100644
--- a/grammar.js
+++ b/grammar.js
@@ -298,7 +298,10 @@ module.exports = grammar({
     recipe_line_prefix: (_) => choice("@-", "-@", "@", "-"),
 
     shebang: ($) =>
-      prec.left(10, seq(token.immediate(/#!/, /.*/), optional($._newline))),
+      prec.left(seq(
+        token.immediate(seq(/#!/, choice(/.*/))),
+        optional($._newline),
+      )),
     // prec(
     //   4,
     //   seq(
diff --git a/src/grammar.json b/src/grammar.json
index 6e81642..46ef91d 100644
--- a/src/grammar.json
+++ b/src/grammar.json
@@ -1253,15 +1253,29 @@
     },
     "shebang": {
       "type": "PREC_LEFT",
-      "value": 10,
+      "value": 0,
       "content": {
         "type": "SEQ",
         "members": [
           {
             "type": "IMMEDIATE_TOKEN",
             "content": {
-              "type": "PATTERN",
-              "value": "#!"
+              "type": "SEQ",
+              "members": [
+                {
+                  "type": "PATTERN",
+                  "value": "#!"
+                },
+                {
+                  "type": "CHOICE",
+                  "members": [
+                    {
+                      "type": "PATTERN",
+                      "value": ".*"
+                    }
+                  ]
+                }
+              ]
             }
           },
           {
diff --git a/src/parser.c b/src/parser.c
index 698af4a..cf7da76 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -1402,7 +1402,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
     case 0:
       if (eof) ADVANCE(14);
       if (lookahead == '!') ADVANCE(5);
-      if (lookahead == '#') ADVANCE(53);
+      if (lookahead == '#') ADVANCE(52);
       if (lookahead == '$') ADVANCE(35);
       if (lookahead == '&') ADVANCE(4);
       if (lookahead == '(') ADVANCE(28);
@@ -1427,36 +1427,36 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
           lookahead == ' ') SKIP(11)
       if (('A' <= lookahead && lookahead <= 'Z') ||
           lookahead == '_' ||
-          ('a' <= lookahead && lookahead <= 'z')) ADVANCE(58);
+          ('a' <= lookahead && lookahead <= 'z')) ADVANCE(57);
       END_STATE();
     case 1:
       if (lookahead == '\n') SKIP(2)
-      if (lookahead == '#') ADVANCE(48);
+      if (lookahead == '#') ADVANCE(47);
       if (lookahead == '-') ADVANCE(45);
       if (lookahead == '@') ADVANCE(32);
-      if (lookahead == '{') ADVANCE(51);
+      if (lookahead == '{') ADVANCE(50);
       if (lookahead == '\t' ||
           lookahead == '\r' ||
-          lookahead == ' ') ADVANCE(49);
-      if (lookahead != 0) ADVANCE(52);
+          lookahead == ' ') ADVANCE(48);
+      if (lookahead != 0) ADVANCE(51);
       END_STATE();
     case 2:
       if (lookahead == '\n') SKIP(2)
       if (lookahead == '-') ADVANCE(45);
       if (lookahead == '@') ADVANCE(32);
-      if (lookahead == '{') ADVANCE(51);
+      if (lookahead == '{') ADVANCE(50);
       if (lookahead == '\t' ||
           lookahead == '\r' ||
-          lookahead == ' ') ADVANCE(49);
-      if (lookahead != 0) ADVANCE(52);
+          lookahead == ' ') ADVANCE(48);
+      if (lookahead != 0) ADVANCE(51);
       END_STATE();
     case 3:
       if (lookahead == '\n') SKIP(3)
-      if (lookahead == '{') ADVANCE(51);
+      if (lookahead == '{') ADVANCE(50);
       if (lookahead == '\t' ||
           lookahead == '\r' ||
-          lookahead == ' ') ADVANCE(50);
-      if (lookahead != 0) ADVANCE(52);
+          lookahead == ' ') ADVANCE(49);
+      if (lookahead != 0) ADVANCE(51);
       END_STATE();
     case 4:
       if (lookahead == '&') ADVANCE(39);
@@ -1469,22 +1469,22 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
       if (lookahead == '~') ADVANCE(27);
       END_STATE();
     case 7:
-      if (lookahead == '{') ADVANCE(55);
+      if (lookahead == '{') ADVANCE(54);
       END_STATE();
     case 8:
-      if (lookahead == '}') ADVANCE(57);
+      if (lookahead == '}') ADVANCE(56);
       END_STATE();
     case 9:
       if (lookahead == '"' ||
           lookahead == '\\' ||
           lookahead == 'n' ||
           lookahead == 'r' ||
-          lookahead == 't') ADVANCE(59);
+          lookahead == 't') ADVANCE(58);
       END_STATE();
     case 10:
       if (eof) ADVANCE(14);
       if (lookahead == '!') ADVANCE(5);
-      if (lookahead == '#') ADVANCE(53);
+      if (lookahead == '#') ADVANCE(52);
       if (lookahead == '$') ADVANCE(35);
       if (lookahead == '(') ADVANCE(28);
       if (lookahead == ')') ADVANCE(29);
@@ -1505,12 +1505,12 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
           lookahead == ' ') SKIP(12)
       if (('A' <= lookahead && lookahead <= 'Z') ||
           lookahead == '_' ||
-          ('a' <= lookahead && lookahead <= 'z')) ADVANCE(58);
+          ('a' <= lookahead && lookahead <= 'z')) ADVANCE(57);
       END_STATE();
     case 11:
       if (eof) ADVANCE(14);
       if (lookahead == '!') ADVANCE(5);
-      if (lookahead == '#') ADVANCE(54);
+      if (lookahead == '#') ADVANCE(53);
       if (lookahead == '$') ADVANCE(35);
       if (lookahead == '&') ADVANCE(4);
       if (lookahead == '(') ADVANCE(28);
@@ -1535,12 +1535,12 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
           lookahead == ' ') SKIP(11)
       if (('A' <= lookahead && lookahead <= 'Z') ||
           lookahead == '_' ||
-          ('a' <= lookahead && lookahead <= 'z')) ADVANCE(58);
+          ('a' <= lookahead && lookahead <= 'z')) ADVANCE(57);
       END_STATE();
     case 12:
       if (eof) ADVANCE(14);
       if (lookahead == '!') ADVANCE(5);
-      if (lookahead == '#') ADVANCE(54);
+      if (lookahead == '#') ADVANCE(53);
       if (lookahead == '$') ADVANCE(35);
       if (lookahead == '(') ADVANCE(28);
       if (lookahead == ')') ADVANCE(29);
@@ -1561,11 +1561,11 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
           lookahead == ' ') SKIP(12)
       if (('A' <= lookahead && lookahead <= 'Z') ||
           lookahead == '_' ||
-          ('a' <= lookahead && lookahead <= 'z')) ADVANCE(58);
+          ('a' <= lookahead && lookahead <= 'z')) ADVANCE(57);
       END_STATE();
     case 13:
       if (eof) ADVANCE(14);
-      if (lookahead == '#') ADVANCE(54);
+      if (lookahead == '#') ADVANCE(53);
       if (lookahead == '$') ADVANCE(35);
       if (lookahead == '(') ADVANCE(28);
       if (lookahead == '*') ADVANCE(38);
@@ -1582,7 +1582,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
           lookahead == ' ') SKIP(13)
       if (('A' <= lookahead && lookahead <= 'Z') ||
           lookahead == '_' ||
-          ('a' <= lookahead && lookahead <= 'z')) ADVANCE(58);
+          ('a' <= lookahead && lookahead <= 'z')) ADVANCE(57);
       END_STATE();
     case 14:
       ACCEPT_TOKEN(ts_builtin_sym_end);
@@ -1613,7 +1613,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
       END_STATE();
     case 23:
       ACCEPT_TOKEN(anon_sym_LBRACE);
-      if (lookahead == '{') ADVANCE(55);
+      if (lookahead == '{') ADVANCE(54);
       END_STATE();
     case 24:
       ACCEPT_TOKEN(anon_sym_RBRACE);
@@ -1644,7 +1644,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
       ACCEPT_TOKEN(anon_sym_AT);
       if (lookahead == '-') ADVANCE(41);
       if (lookahead != 0 &&
-          lookahead != '\n') ADVANCE(52);
+          lookahead != '\n') ADVANCE(51);
       END_STATE();
     case 33:
       ACCEPT_TOKEN(anon_sym_COLON);
@@ -1676,7 +1676,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
     case 41:
       ACCEPT_TOKEN(anon_sym_AT_DASH);
       if (lookahead != 0 &&
-          lookahead != '\n') ADVANCE(52);
+          lookahead != '\n') ADVANCE(51);
       END_STATE();
     case 42:
       ACCEPT_TOKEN(anon_sym_DASH_AT);
@@ -1684,7 +1684,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
     case 43:
       ACCEPT_TOKEN(anon_sym_DASH_AT);
       if (lookahead != 0 &&
-          lookahead != '\n') ADVANCE(52);
+          lookahead != '\n') ADVANCE(51);
       END_STATE();
     case 44:
       ACCEPT_TOKEN(anon_sym_DASH);
@@ -1694,86 +1694,81 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
       ACCEPT_TOKEN(anon_sym_DASH);
       if (lookahead == '@') ADVANCE(43);
       if (lookahead != 0 &&
-          lookahead != '\n') ADVANCE(52);
+          lookahead != '\n') ADVANCE(51);
       END_STATE();
     case 46:
       ACCEPT_TOKEN(aux_sym_shebang_token1);
       if (lookahead != 0 &&
-          lookahead != '\n') ADVANCE(54);
+          lookahead != '\n') ADVANCE(46);
       END_STATE();
     case 47:
-      ACCEPT_TOKEN(aux_sym_shebang_token1);
+      ACCEPT_TOKEN(aux_sym_shebang_language_token1);
+      if (lookahead == '!') ADVANCE(46);
       if (lookahead != 0 &&
-          lookahead != '\n') ADVANCE(52);
+          lookahead != '\n') ADVANCE(51);
       END_STATE();
     case 48:
       ACCEPT_TOKEN(aux_sym_shebang_language_token1);
-      if (lookahead == '!') ADVANCE(47);
+      if (lookahead == '-') ADVANCE(45);
+      if (lookahead == '@') ADVANCE(32);
+      if (lookahead == '{') ADVANCE(50);
+      if (lookahead == '\t' ||
+          lookahead == '\r' ||
+          lookahead == ' ') ADVANCE(48);
       if (lookahead != 0 &&
-          lookahead != '\n') ADVANCE(52);
+          lookahead != '\n') ADVANCE(51);
       END_STATE();
     case 49:
       ACCEPT_TOKEN(aux_sym_shebang_language_token1);
-      if (lookahead == '-') ADVANCE(45);
-      if (lookahead == '@') ADVANCE(32);
-      if (lookahead == '{') ADVANCE(51);
+      if (lookahead == '{') ADVANCE(50);
       if (lookahead == '\t' ||
           lookahead == '\r' ||
           lookahead == ' ') ADVANCE(49);
       if (lookahead != 0 &&
-          lookahead != '\n') ADVANCE(52);
+          lookahead != '\n') ADVANCE(51);
       END_STATE();
     case 50:
       ACCEPT_TOKEN(aux_sym_shebang_language_token1);
-      if (lookahead == '{') ADVANCE(51);
-      if (lookahead == '\t' ||
-          lookahead == '\r' ||
-          lookahead == ' ') ADVANCE(50);
+      if (lookahead == '{') ADVANCE(55);
       if (lookahead != 0 &&
-          lookahead != '\n') ADVANCE(52);
+          lookahead != '\n') ADVANCE(51);
       END_STATE();
     case 51:
       ACCEPT_TOKEN(aux_sym_shebang_language_token1);
-      if (lookahead == '{') ADVANCE(56);
       if (lookahead != 0 &&
-          lookahead != '\n') ADVANCE(52);
+          lookahead != '\n') ADVANCE(51);
       END_STATE();
     case 52:
-      ACCEPT_TOKEN(aux_sym_shebang_language_token1);
-      if (lookahead != 0 &&
-          lookahead != '\n') ADVANCE(52);
-      END_STATE();
-    case 53:
       ACCEPT_TOKEN(aux_sym_comment_token1);
       if (lookahead == '!') ADVANCE(46);
       if (lookahead != 0 &&
-          lookahead != '\n') ADVANCE(54);
+          lookahead != '\n') ADVANCE(53);
       END_STATE();
-    case 54:
+    case 53:
       ACCEPT_TOKEN(aux_sym_comment_token1);
       if (lookahead != 0 &&
-          lookahead != '\n') ADVANCE(54);
+          lookahead != '\n') ADVANCE(53);
       END_STATE();
-    case 55:
+    case 54:
       ACCEPT_TOKEN(anon_sym_LBRACE_LBRACE);
       END_STATE();
-    case 56:
+    case 55:
       ACCEPT_TOKEN(anon_sym_LBRACE_LBRACE);
       if (lookahead != 0 &&
-          lookahead != '\n') ADVANCE(52);
+          lookahead != '\n') ADVANCE(51);
       END_STATE();
-    case 57:
+    case 56:
       ACCEPT_TOKEN(anon_sym_RBRACE_RBRACE);
       END_STATE();
-    case 58:
+    case 57:
       ACCEPT_TOKEN(sym_identifier);
       if (lookahead == '-' ||
           ('0' <= lookahead && lookahead <= '9') ||
           ('A' <= lookahead && lookahead <= 'Z') ||
           lookahead == '_' ||
-          ('a' <= lookahead && lookahead <= 'z')) ADVANCE(58);
+          ('a' <= lookahead && lookahead <= 'z')) ADVANCE(57);
       END_STATE();
-    case 59:
+    case 58:
       ACCEPT_TOKEN(sym_escape_sequence);
       END_STATE();
     default:
@@ -7027,7 +7022,7 @@ static const TSParseActionEntry ts_parse_actions[] = {
   [15] = {.entry = {.count = 1, .reusable = false}}, SHIFT(298),
   [17] = {.entry = {.count = 1, .reusable = true}}, SHIFT(356),
   [19] = {.entry = {.count = 1, .reusable = true}}, SHIFT(355),
-  [21] = {.entry = {.count = 1, .reusable = false}}, SHIFT(74),
+  [21] = {.entry = {.count = 1, .reusable = true}}, SHIFT(74),
   [23] = {.entry = {.count = 1, .reusable = false}}, SHIFT(352),
   [25] = {.entry = {.count = 1, .reusable = true}}, SHIFT(76),
   [27] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 2),
@@ -7079,7 +7074,7 @@ static const TSParseActionEntry ts_parse_actions[] = {
   [136] = {.entry = {.count = 1, .reusable = true}}, SHIFT(302),
   [138] = {.entry = {.count = 1, .reusable = true}}, SHIFT(169),
   [140] = {.entry = {.count = 1, .reusable = false}}, SHIFT(266),
-  [142] = {.entry = {.count = 1, .reusable = false}}, SHIFT(150),
+  [142] = {.entry = {.count = 1, .reusable = true}}, SHIFT(150),
   [144] = {.entry = {.count = 1, .reusable = false}}, SHIFT(265),
   [146] = {.entry = {.count = 1, .reusable = false}}, SHIFT(30),
   [148] = {.entry = {.count = 1, .reusable = true}}, SHIFT(101),
diff --git a/test/corpus/injections.txt b/test/corpus/injections.txt
index 95db855..7b7f8dc 100644
--- a/test/corpus/injections.txt
+++ b/test/corpus/injections.txt
@@ -7,6 +7,8 @@ global shebang
 
 (source_file
   (shebang)
+  (item
+    (eol))
   (item
     (eol)))
 
diff --git a/test/corpus/recipes.txt b/test/corpus/recipes.txt
index f914c40..93b96f9 100644
--- a/test/corpus/recipes.txt
+++ b/test/corpus/recipes.txt
@@ -403,6 +403,7 @@ foo:
         (identifier))
       (recipe_body
         (shebang)
+        (recipe_line)
         (recipe_line
           (text))
         (recipe_line
diff --git a/test/corpus/statements.txt b/test/corpus/statements.txt
index 138495a..8ad5933 100644
--- a/test/corpus/statements.txt
+++ b/test/corpus/statements.txt
@@ -622,6 +622,8 @@ foo:
 
 (source_file
   (shebang)
+  (item
+    (eol))
   (item
     (recipe
       (recipe_header