From 2bf2bbd172f567d5494abff193c74de9a4cd0cf2 Mon Sep 17 00:00:00 2001 From: Alan Pearce Date: Wed, 1 May 2024 23:33:58 +0200 Subject: [PATCH] allow spaces after shebang Dennis Ritchie introduced shebangs to Version 8 Unix in 1980. An excerpt of his message: > To take advantage of this wonderful opportunity, > put > > #! /bin/sh > > at the left margin of the first line of your shell scripts. > Blanks after ! are OK. Use a complete pathname (no search is done). > At the moment the whole line is restricted to 16 characters but > this limit will be raised. https://www.in-ulm.de/~mascheck/various/shebang/sys1.c.html --- grammar.js | 3 ++- src/grammar.json | 4 ++-- src/node-types.json | 4 ---- src/parser.c | 20 +++++++++++--------- test/corpus/recipes.txt | 31 +++++++++++++++++++++++++++++++ test/corpus/statements.txt | 21 +++++++++++++++++++++ 6 files changed, 67 insertions(+), 16 deletions(-) diff --git a/grammar.js b/grammar.js index 450eb92..205a8bd 100644 --- a/grammar.js +++ b/grammar.js @@ -294,7 +294,8 @@ module.exports = grammar({ recipe_line_prefix: (_) => choice("@-", "-@", "@", "-"), // Any shebang. Needs a named field to apply injection queries correctly. - shebang: ($) => seq("#!", choice($._shebang_with_lang, $._opaque_shebang)), + shebang: ($) => + seq(/#![ \t]*/, choice($._shebang_with_lang, $._opaque_shebang)), // Shebang with a nested `language` token that we can extract _shebang_with_lang: ($) => diff --git a/src/grammar.json b/src/grammar.json index c2a9ffb..ec018d1 100644 --- a/src/grammar.json +++ b/src/grammar.json @@ -1283,8 +1283,8 @@ "type": "SEQ", "members": [ { - "type": "STRING", - "value": "#!" + "type": "PATTERN", + "value": "#![ \\t]*" }, { "type": "CHOICE", diff --git a/src/node-types.json b/src/node-types.json index 91037a6..d1d0d8c 100644 --- a/src/node-types.json +++ b/src/node-types.json @@ -815,10 +815,6 @@ "type": "\"\"\"", "named": false }, - { - "type": "#!", - "named": false - }, { "type": "$", "named": false diff --git a/src/parser.c b/src/parser.c index 37be837..8e43a21 100644 --- a/src/parser.c +++ b/src/parser.c @@ -51,7 +51,7 @@ enum ts_symbol_identifiers { anon_sym_AT_DASH = 32, anon_sym_DASH_AT = 33, anon_sym_DASH = 34, - anon_sym_POUND_BANG = 35, + aux_sym_shebang_token1 = 35, aux_sym__shebang_with_lang_token1 = 36, anon_sym_env = 37, aux_sym__shebang_with_lang_token2 = 38, @@ -170,7 +170,7 @@ static const char *const ts_symbol_names[] = { [anon_sym_AT_DASH] = "@-", [anon_sym_DASH_AT] = "-@", [anon_sym_DASH] = "-", - [anon_sym_POUND_BANG] = "#!", + [aux_sym_shebang_token1] = "shebang_token1", [aux_sym__shebang_with_lang_token1] = "_shebang_with_lang_token1", [anon_sym_env] = "env", [aux_sym__shebang_with_lang_token2] = "_shebang_with_lang_token2", @@ -289,7 +289,7 @@ static const TSSymbol ts_symbol_map[] = { [anon_sym_AT_DASH] = anon_sym_AT_DASH, [anon_sym_DASH_AT] = anon_sym_DASH_AT, [anon_sym_DASH] = anon_sym_DASH, - [anon_sym_POUND_BANG] = anon_sym_POUND_BANG, + [aux_sym_shebang_token1] = aux_sym_shebang_token1, [aux_sym__shebang_with_lang_token1] = aux_sym__shebang_with_lang_token1, [anon_sym_env] = anon_sym_env, [aux_sym__shebang_with_lang_token2] = aux_sym__shebang_with_lang_token2, @@ -550,9 +550,9 @@ static const TSSymbolMetadata ts_symbol_metadata[] = { .visible = true, .named = false, }, - [anon_sym_POUND_BANG] = + [aux_sym_shebang_token1] = { - .visible = true, + .visible = false, .named = false, }, [aux_sym__shebang_with_lang_token1] = @@ -1753,7 +1753,9 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { ADVANCE(62); END_STATE(); case 64: - ACCEPT_TOKEN(anon_sym_POUND_BANG); + ACCEPT_TOKEN(aux_sym_shebang_token1); + if (lookahead == '\t' || lookahead == ' ') + ADVANCE(64); END_STATE(); case 65: ACCEPT_TOKEN(aux_sym__shebang_with_lang_token1); @@ -2661,7 +2663,7 @@ static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { [anon_sym_AT_DASH] = ACTIONS(1), [anon_sym_DASH_AT] = ACTIONS(1), [anon_sym_DASH] = ACTIONS(1), - [anon_sym_POUND_BANG] = ACTIONS(1), + [aux_sym_shebang_token1] = ACTIONS(1), [anon_sym_env] = ACTIONS(1), [aux_sym_string_token1] = ACTIONS(1), [anon_sym_SQUOTE_SQUOTE_SQUOTE] = ACTIONS(1), @@ -2703,7 +2705,7 @@ static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { [anon_sym_set] = ACTIONS(17), [anon_sym_LBRACK] = ACTIONS(19), [anon_sym_AT] = ACTIONS(21), - [anon_sym_POUND_BANG] = ACTIONS(23), + [aux_sym_shebang_token1] = ACTIONS(23), [sym_comment] = ACTIONS(3), }, }; @@ -5908,7 +5910,7 @@ static const uint16_t ts_small_parse_table[] = { sym_comment, ACTIONS(23), 1, - anon_sym_POUND_BANG, + aux_sym_shebang_token1, ACTIONS(277), 1, anon_sym_LBRACE_LBRACE, diff --git a/test/corpus/recipes.txt b/test/corpus/recipes.txt index f54ae25..3ebb501 100644 --- a/test/corpus/recipes.txt +++ b/test/corpus/recipes.txt @@ -358,6 +358,37 @@ foo: (recipe_line (text))))) +================================================================================ +shebang with spaces +================================================================================ + +foo: + #! /bin/sh + if [ -f "foo" ]; then + echo "foo {{var}}" + fi + +-------------------------------------------------------------------------------- + +(source_file + (recipe + (recipe_header + (identifier)) + (recipe_body + (shebang + (language)) + (recipe_line + (text)) + (recipe_line + (text) + (interpolation + (expression + (value + (identifier)))) + (text)) + (recipe_line + (text))))) + ================================================================================ shebang with longer body ================================================================================ diff --git a/test/corpus/statements.txt b/test/corpus/statements.txt index 4963b04..557be27 100644 --- a/test/corpus/statements.txt +++ b/test/corpus/statements.txt @@ -492,6 +492,27 @@ foo: -------------------------------------------------------------------------------- +(source_file + (shebang + (language)) + (recipe + (recipe_header + (identifier)) + (recipe_body + (recipe_line + (text))))) + +================================================================================ +shebang with space +================================================================================ + +#! /usr/bin/env just + +foo: + body + +-------------------------------------------------------------------------------- + (source_file (shebang (language))