From 4ad7d07bc433c8f5551799065bc651da51c35811 Mon Sep 17 00:00:00 2001 From: Dee Vazquez Date: Wed, 10 May 2023 13:25:09 -0700 Subject: [PATCH 1/2] Use external raw_text token to resolve inline JS and CSS --- grammar.js | 23 ++- src/grammar.json | 42 ++--- src/node-types.json | 8 + src/parser.c | 451 +++++++++++++++++++++++++------------------- src/scanner.cc | 40 +++- src/tag.h | 285 ++++++++++++++++++++++++++++ 6 files changed, 628 insertions(+), 221 deletions(-) create mode 100644 src/tag.h diff --git a/grammar.js b/grammar.js index 64b9c0d..203612d 100644 --- a/grammar.js +++ b/grammar.js @@ -1,7 +1,15 @@ module.exports = grammar({ name: "pug", - externals: ($) => [$._newline, $._indent, $._dedent], + externals: ($) => [ + $._newline, + $._indent, + $._dedent, + $._tag_name, + $._script_tag_name, + $._style_tag_name, + $.raw_text, + ], rules: { source_file: ($) => @@ -25,12 +33,21 @@ module.exports = grammar({ script_tag: ($) => seq("script", optional($._attributes), optional(seq(".", $._newline))), + // No ERROR nodes, without external token raw_text + // style_tag: ($) => seq("style", optional($._attributes)), + + // Produces many ERROR nodes style_tag: ($) => - seq("style", optional($._attributes), optional(seq(".", $._newline))), + seq( + "style", + optional($._attributes), + optional(seq(".", $._newline, $.raw_text)) + ), path: ($) => seq(repeat1(choice(/\w/, "/")), ".", repeat1(/\w/)), - raw_text: ($) => choice(seq($._indent, /\w/, $._dedent)), + // Alternative to external token raw_text + // raw_text: ($) => choice(seq($._indent, /\w/, $._dedent)), _attributes: ($) => seq("(", repeat(seq($.attribute, optional(choice(",", " ")))), ")"), diff --git a/src/grammar.json b/src/grammar.json index 57f36d3..a74bbce 100644 --- a/src/grammar.json +++ b/src/grammar.json @@ -162,6 +162,10 @@ { "type": "SYMBOL", "name": "_newline" + }, + { + "type": "SYMBOL", + "name": "raw_text" } ] }, @@ -204,28 +208,6 @@ } ] }, - "raw_text": { - "type": "CHOICE", - "members": [ - { - "type": "SEQ", - "members": [ - { - "type": "SYMBOL", - "name": "_indent" - }, - { - "type": "PATTERN", - "value": "\\w" - }, - { - "type": "SYMBOL", - "name": "_dedent" - } - ] - } - ] - }, "_attributes": { "type": "SEQ", "members": [ @@ -416,6 +398,22 @@ { "type": "SYMBOL", "name": "_dedent" + }, + { + "type": "SYMBOL", + "name": "_tag_name" + }, + { + "type": "SYMBOL", + "name": "_script_tag_name" + }, + { + "type": "SYMBOL", + "name": "_style_tag_name" + }, + { + "type": "SYMBOL", + "name": "raw_text" } ], "inline": [], diff --git a/src/node-types.json b/src/node-types.json index 71bc598..c36f71e 100644 --- a/src/node-types.json +++ b/src/node-types.json @@ -148,6 +148,10 @@ { "type": "attribute", "named": true + }, + { + "type": "raw_text", + "named": true } ] } @@ -213,6 +217,10 @@ "type": "link", "named": false }, + { + "type": "raw_text", + "named": true + }, { "type": "script", "named": false diff --git a/src/parser.c b/src/parser.c index 1721ed8..30dd5c2 100644 --- a/src/parser.c +++ b/src/parser.c @@ -6,14 +6,14 @@ #endif #define LANGUAGE_VERSION 14 -#define STATE_COUNT 52 +#define STATE_COUNT 54 #define LARGE_STATE_COUNT 2 -#define SYMBOL_COUNT 40 +#define SYMBOL_COUNT 44 #define ALIAS_COUNT 0 -#define TOKEN_COUNT 24 -#define EXTERNAL_TOKEN_COUNT 3 +#define TOKEN_COUNT 28 +#define EXTERNAL_TOKEN_COUNT 7 #define FIELD_COUNT 0 -#define MAX_ALIAS_SEQUENCE_LENGTH 4 +#define MAX_ALIAS_SEQUENCE_LENGTH 5 #define PRODUCTION_ID_COUNT 1 enum { @@ -40,22 +40,26 @@ enum { sym__newline = 21, sym__indent = 22, sym__dedent = 23, - sym_source_file = 24, - sym_extends_statement = 25, - sym_include_statement = 26, - sym_link_tag = 27, - sym_script_tag = 28, - sym_style_tag = 29, - sym_path = 30, - sym__attributes = 31, - sym_attribute = 32, - sym_quoted_attribute_value = 33, - sym_tbd = 34, - aux_sym_source_file_repeat1 = 35, - aux_sym_path_repeat1 = 36, - aux_sym_path_repeat2 = 37, - aux_sym__attributes_repeat1 = 38, - aux_sym_tbd_repeat1 = 39, + sym__tag_name = 24, + sym__script_tag_name = 25, + sym__style_tag_name = 26, + sym_raw_text = 27, + sym_source_file = 28, + sym_extends_statement = 29, + sym_include_statement = 30, + sym_link_tag = 31, + sym_script_tag = 32, + sym_style_tag = 33, + sym_path = 34, + sym__attributes = 35, + sym_attribute = 36, + sym_quoted_attribute_value = 37, + sym_tbd = 38, + aux_sym_source_file_repeat1 = 39, + aux_sym_path_repeat1 = 40, + aux_sym_path_repeat2 = 41, + aux_sym__attributes_repeat1 = 42, + aux_sym_tbd_repeat1 = 43, }; static const char * const ts_symbol_names[] = { @@ -83,6 +87,10 @@ static const char * const ts_symbol_names[] = { [sym__newline] = "_newline", [sym__indent] = "_indent", [sym__dedent] = "_dedent", + [sym__tag_name] = "_tag_name", + [sym__script_tag_name] = "_script_tag_name", + [sym__style_tag_name] = "_style_tag_name", + [sym_raw_text] = "raw_text", [sym_source_file] = "source_file", [sym_extends_statement] = "extends_statement", [sym_include_statement] = "include_statement", @@ -126,6 +134,10 @@ static const TSSymbol ts_symbol_map[] = { [sym__newline] = sym__newline, [sym__indent] = sym__indent, [sym__dedent] = sym__dedent, + [sym__tag_name] = sym__tag_name, + [sym__script_tag_name] = sym__script_tag_name, + [sym__style_tag_name] = sym__style_tag_name, + [sym_raw_text] = sym_raw_text, [sym_source_file] = sym_source_file, [sym_extends_statement] = sym_extends_statement, [sym_include_statement] = sym_include_statement, @@ -241,6 +253,22 @@ static const TSSymbolMetadata ts_symbol_metadata[] = { .visible = false, .named = true, }, + [sym__tag_name] = { + .visible = false, + .named = true, + }, + [sym__script_tag_name] = { + .visible = false, + .named = true, + }, + [sym__style_tag_name] = { + .visible = false, + .named = true, + }, + [sym_raw_text] = { + .visible = true, + .named = true, + }, [sym_source_file] = { .visible = true, .named = true, @@ -333,27 +361,27 @@ static const TSStateId ts_primary_state_ids[STATE_COUNT] = { [14] = 14, [15] = 15, [16] = 16, - [17] = 14, + [17] = 17, [18] = 18, [19] = 19, [20] = 20, - [21] = 21, + [21] = 14, [22] = 22, [23] = 11, [24] = 24, [25] = 25, [26] = 26, - [27] = 27, - [28] = 25, + [27] = 25, + [28] = 28, [29] = 29, [30] = 30, - [31] = 27, + [31] = 31, [32] = 32, [33] = 33, [34] = 34, [35] = 35, [36] = 36, - [37] = 37, + [37] = 26, [38] = 38, [39] = 39, [40] = 40, @@ -368,6 +396,8 @@ static const TSStateId ts_primary_state_ids[STATE_COUNT] = { [49] = 49, [50] = 50, [51] = 51, + [52] = 52, + [53] = 53, }; static bool ts_lex(TSLexer *lexer, TSStateId state) { @@ -847,55 +877,72 @@ static const TSLexMode ts_lex_modes[STATE_COUNT] = { [23] = {.lex_state = 26}, [24] = {.lex_state = 1}, [25] = {.lex_state = 5}, - [26] = {.lex_state = 1}, + [26] = {.lex_state = 5}, [27] = {.lex_state = 5}, - [28] = {.lex_state = 5}, + [28] = {.lex_state = 1}, [29] = {.lex_state = 1}, - [30] = {.lex_state = 1}, - [31] = {.lex_state = 5}, - [32] = {.lex_state = 6}, - [33] = {.lex_state = 2}, + [30] = {.lex_state = 6}, + [31] = {.lex_state = 2}, + [32] = {.lex_state = 1}, + [33] = {.lex_state = 5}, [34] = {.lex_state = 6}, [35] = {.lex_state = 6}, - [36] = {.lex_state = 5}, - [37] = {.lex_state = 1}, - [38] = {.lex_state = 6}, - [39] = {.lex_state = 4}, - [40] = {.lex_state = 3}, + [36] = {.lex_state = 6}, + [37] = {.lex_state = 5}, + [38] = {.lex_state = 1}, + [39] = {.lex_state = 3}, + [40] = {.lex_state = 6}, [41] = {.lex_state = 5}, - [42] = {.lex_state = 6}, + [42] = {.lex_state = 4}, [43] = {.lex_state = 0, .external_lex_state = 2}, - [44] = {.lex_state = 0, .external_lex_state = 2}, - [45] = {.lex_state = 0}, - [46] = {.lex_state = 0}, - [47] = {.lex_state = 0, .external_lex_state = 2}, + [44] = {.lex_state = 1}, + [45] = {.lex_state = 0, .external_lex_state = 3}, + [46] = {.lex_state = 0, .external_lex_state = 2}, + [47] = {.lex_state = 0}, [48] = {.lex_state = 0}, - [49] = {.lex_state = 1}, - [50] = {.lex_state = 0, .external_lex_state = 2}, + [49] = {.lex_state = 0, .external_lex_state = 2}, + [50] = {.lex_state = 0}, [51] = {.lex_state = 1}, + [52] = {.lex_state = 0, .external_lex_state = 3}, + [53] = {.lex_state = 0, .external_lex_state = 2}, }; enum { ts_external_token__newline = 0, ts_external_token__indent = 1, ts_external_token__dedent = 2, + ts_external_token__tag_name = 3, + ts_external_token__script_tag_name = 4, + ts_external_token__style_tag_name = 5, + ts_external_token_raw_text = 6, }; static const TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = { [ts_external_token__newline] = sym__newline, [ts_external_token__indent] = sym__indent, [ts_external_token__dedent] = sym__dedent, + [ts_external_token__tag_name] = sym__tag_name, + [ts_external_token__script_tag_name] = sym__script_tag_name, + [ts_external_token__style_tag_name] = sym__style_tag_name, + [ts_external_token_raw_text] = sym_raw_text, }; -static const bool ts_external_scanner_states[3][EXTERNAL_TOKEN_COUNT] = { +static const bool ts_external_scanner_states[4][EXTERNAL_TOKEN_COUNT] = { [1] = { [ts_external_token__newline] = true, [ts_external_token__indent] = true, [ts_external_token__dedent] = true, + [ts_external_token__tag_name] = true, + [ts_external_token__script_tag_name] = true, + [ts_external_token__style_tag_name] = true, + [ts_external_token_raw_text] = true, }, [2] = { [ts_external_token__newline] = true, }, + [3] = { + [ts_external_token_raw_text] = true, + }, }; static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { @@ -918,9 +965,13 @@ static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { [sym__newline] = ACTIONS(1), [sym__indent] = ACTIONS(1), [sym__dedent] = ACTIONS(1), + [sym__tag_name] = ACTIONS(1), + [sym__script_tag_name] = ACTIONS(1), + [sym__style_tag_name] = ACTIONS(1), + [sym_raw_text] = ACTIONS(1), }, [1] = { - [sym_source_file] = STATE(48), + [sym_source_file] = STATE(50), [sym_extends_statement] = STATE(3), [sym_include_statement] = STATE(3), [sym_link_tag] = STATE(3), @@ -1157,9 +1208,9 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_style, aux_sym_tbd_token1, [278] = 2, - ACTIONS(101), 1, + ACTIONS(113), 1, ts_builtin_sym_end, - ACTIONS(103), 6, + ACTIONS(115), 6, anon_sym_extends, anon_sym_include, anon_sym_link, @@ -1167,9 +1218,9 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_style, aux_sym_tbd_token1, [290] = 2, - ACTIONS(113), 1, + ACTIONS(117), 1, ts_builtin_sym_end, - ACTIONS(115), 6, + ACTIONS(119), 6, anon_sym_extends, anon_sym_include, anon_sym_link, @@ -1177,9 +1228,9 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_style, aux_sym_tbd_token1, [302] = 2, - ACTIONS(117), 1, + ACTIONS(121), 1, ts_builtin_sym_end, - ACTIONS(119), 6, + ACTIONS(123), 6, anon_sym_extends, anon_sym_include, anon_sym_link, @@ -1187,9 +1238,9 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_style, aux_sym_tbd_token1, [314] = 2, - ACTIONS(121), 1, + ACTIONS(125), 1, ts_builtin_sym_end, - ACTIONS(123), 6, + ACTIONS(127), 6, anon_sym_extends, anon_sym_include, anon_sym_link, @@ -1197,9 +1248,9 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_style, aux_sym_tbd_token1, [326] = 2, - ACTIONS(125), 1, + ACTIONS(101), 1, ts_builtin_sym_end, - ACTIONS(127), 6, + ACTIONS(103), 6, anon_sym_extends, anon_sym_include, anon_sym_link, @@ -1240,162 +1291,168 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_RPAREN, ACTIONS(141), 1, sym_attribute_name, - STATE(29), 1, + STATE(32), 1, sym_attribute, - STATE(31), 1, + STATE(37), 1, aux_sym__attributes_repeat1, - [387] = 2, - ACTIONS(143), 1, - anon_sym_, - ACTIONS(145), 3, - anon_sym_COMMA, - anon_sym_RPAREN, - sym_attribute_name, - [396] = 4, + [387] = 4, ACTIONS(141), 1, sym_attribute_name, - ACTIONS(147), 1, + ACTIONS(143), 1, anon_sym_RPAREN, - STATE(29), 1, + STATE(32), 1, sym_attribute, - STATE(36), 1, + STATE(33), 1, aux_sym__attributes_repeat1, - [409] = 4, + [400] = 4, ACTIONS(141), 1, sym_attribute_name, - ACTIONS(149), 1, + ACTIONS(145), 1, anon_sym_RPAREN, - STATE(27), 1, + STATE(26), 1, aux_sym__attributes_repeat1, - STATE(29), 1, + STATE(32), 1, sym_attribute, - [422] = 3, - ACTIONS(151), 1, + [413] = 2, + ACTIONS(147), 1, anon_sym_, - ACTIONS(153), 1, + ACTIONS(149), 3, anon_sym_COMMA, - ACTIONS(155), 2, anon_sym_RPAREN, sym_attribute_name, - [433] = 2, - ACTIONS(157), 1, + [422] = 2, + ACTIONS(151), 1, anon_sym_, - ACTIONS(159), 3, + ACTIONS(153), 3, anon_sym_COMMA, anon_sym_RPAREN, sym_attribute_name, - [442] = 4, - ACTIONS(141), 1, - sym_attribute_name, - ACTIONS(161), 1, - anon_sym_RPAREN, - STATE(29), 1, - sym_attribute, - STATE(36), 1, - aux_sym__attributes_repeat1, - [455] = 3, - ACTIONS(163), 1, + [431] = 3, + ACTIONS(155), 1, anon_sym_DOT, - STATE(32), 1, + STATE(30), 1, aux_sym_path_repeat1, - ACTIONS(165), 2, + ACTIONS(157), 2, aux_sym_path_token1, anon_sym_SLASH, - [466] = 4, - ACTIONS(168), 1, + [442] = 4, + ACTIONS(160), 1, sym_attribute_value, - ACTIONS(170), 1, + ACTIONS(162), 1, anon_sym_SQUOTE, - ACTIONS(172), 1, + ACTIONS(164), 1, anon_sym_DQUOTE, - STATE(37), 1, + STATE(38), 1, sym_quoted_attribute_value, + [455] = 3, + ACTIONS(166), 1, + anon_sym_, + ACTIONS(168), 1, + anon_sym_COMMA, + ACTIONS(170), 2, + anon_sym_RPAREN, + sym_attribute_name, + [466] = 4, + ACTIONS(172), 1, + anon_sym_RPAREN, + ACTIONS(174), 1, + sym_attribute_name, + STATE(32), 1, + sym_attribute, + STATE(33), 1, + aux_sym__attributes_repeat1, [479] = 3, + ACTIONS(177), 1, + anon_sym_DOT, + STATE(30), 1, + aux_sym_path_repeat1, + ACTIONS(179), 2, + aux_sym_path_token1, + anon_sym_SLASH, + [490] = 3, STATE(20), 1, sym_path, - STATE(35), 1, + STATE(34), 1, aux_sym_path_repeat1, - ACTIONS(174), 2, + ACTIONS(181), 2, aux_sym_path_token1, anon_sym_SLASH, - [490] = 3, - ACTIONS(176), 1, - anon_sym_DOT, - STATE(32), 1, + [501] = 3, + STATE(22), 1, + sym_path, + STATE(34), 1, aux_sym_path_repeat1, - ACTIONS(178), 2, + ACTIONS(181), 2, aux_sym_path_token1, anon_sym_SLASH, - [501] = 4, - ACTIONS(180), 1, - anon_sym_RPAREN, - ACTIONS(182), 1, + [512] = 4, + ACTIONS(141), 1, sym_attribute_name, - STATE(29), 1, + ACTIONS(183), 1, + anon_sym_RPAREN, + STATE(32), 1, sym_attribute, - STATE(36), 1, + STATE(33), 1, aux_sym__attributes_repeat1, - [514] = 2, + [525] = 2, ACTIONS(185), 1, anon_sym_, ACTIONS(187), 3, anon_sym_COMMA, anon_sym_RPAREN, sym_attribute_name, - [523] = 3, - STATE(22), 1, - sym_path, - STATE(35), 1, - aux_sym_path_repeat1, - ACTIONS(174), 2, - aux_sym_path_token1, - anon_sym_SLASH, [534] = 2, ACTIONS(189), 1, - anon_sym_SQUOTE, + anon_sym_DQUOTE, ACTIONS(191), 1, - aux_sym_quoted_attribute_value_token1, + aux_sym_quoted_attribute_value_token2, [541] = 2, - ACTIONS(189), 1, - anon_sym_DQUOTE, ACTIONS(193), 1, - aux_sym_quoted_attribute_value_token2, + aux_sym_path_token1, + STATE(6), 1, + aux_sym_path_repeat2, [548] = 1, - ACTIONS(180), 2, + ACTIONS(172), 2, anon_sym_RPAREN, sym_attribute_name, [553] = 2, + ACTIONS(189), 1, + anon_sym_SQUOTE, ACTIONS(195), 1, - aux_sym_path_token1, - STATE(6), 1, - aux_sym_path_repeat2, + aux_sym_quoted_attribute_value_token1, [560] = 1, ACTIONS(197), 1, sym__newline, [564] = 1, ACTIONS(199), 1, - sym__newline, + anon_sym_, [568] = 1, ACTIONS(201), 1, - anon_sym_SQUOTE, + sym_raw_text, [572] = 1, - ACTIONS(201), 1, - anon_sym_DQUOTE, - [576] = 1, ACTIONS(203), 1, sym__newline, + [576] = 1, + ACTIONS(205), 1, + anon_sym_SQUOTE, [580] = 1, ACTIONS(205), 1, - ts_builtin_sym_end, + anon_sym_DQUOTE, [584] = 1, ACTIONS(207), 1, - anon_sym_, + sym__newline, [588] = 1, ACTIONS(209), 1, - sym__newline, + ts_builtin_sym_end, [592] = 1, ACTIONS(211), 1, anon_sym_, + [596] = 1, + ACTIONS(213), 1, + sym_raw_text, + [600] = 1, + ACTIONS(215), 1, + sym__newline, }; static const uint32_t ts_small_parse_table_map[] = { @@ -1424,18 +1481,18 @@ static const uint32_t ts_small_parse_table_map[] = { [SMALL_STATE(24)] = 362, [SMALL_STATE(25)] = 374, [SMALL_STATE(26)] = 387, - [SMALL_STATE(27)] = 396, - [SMALL_STATE(28)] = 409, + [SMALL_STATE(27)] = 400, + [SMALL_STATE(28)] = 413, [SMALL_STATE(29)] = 422, - [SMALL_STATE(30)] = 433, + [SMALL_STATE(30)] = 431, [SMALL_STATE(31)] = 442, [SMALL_STATE(32)] = 455, [SMALL_STATE(33)] = 466, [SMALL_STATE(34)] = 479, [SMALL_STATE(35)] = 490, [SMALL_STATE(36)] = 501, - [SMALL_STATE(37)] = 514, - [SMALL_STATE(38)] = 523, + [SMALL_STATE(37)] = 512, + [SMALL_STATE(38)] = 525, [SMALL_STATE(39)] = 534, [SMALL_STATE(40)] = 541, [SMALL_STATE(41)] = 548, @@ -1449,21 +1506,23 @@ static const uint32_t ts_small_parse_table_map[] = { [SMALL_STATE(49)] = 584, [SMALL_STATE(50)] = 588, [SMALL_STATE(51)] = 592, + [SMALL_STATE(52)] = 596, + [SMALL_STATE(53)] = 600, }; static const TSParseActionEntry ts_parse_actions[] = { [0] = {.entry = {.count = 0, .reusable = false}}, [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(), [3] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 0), - [5] = {.entry = {.count = 1, .reusable = false}}, SHIFT(51), - [7] = {.entry = {.count = 1, .reusable = false}}, SHIFT(49), + [5] = {.entry = {.count = 1, .reusable = false}}, SHIFT(44), + [7] = {.entry = {.count = 1, .reusable = false}}, SHIFT(51), [9] = {.entry = {.count = 1, .reusable = false}}, SHIFT(7), [11] = {.entry = {.count = 1, .reusable = false}}, SHIFT(4), [13] = {.entry = {.count = 1, .reusable = false}}, SHIFT(5), [15] = {.entry = {.count = 1, .reusable = false}}, SHIFT(10), [17] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), - [19] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(51), - [22] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(49), + [19] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(44), + [22] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(51), [25] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(7), [28] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(4), [31] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(5), @@ -1472,10 +1531,10 @@ static const TSParseActionEntry ts_parse_actions[] = { [39] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_script_tag, 1), [41] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_script_tag, 1), [43] = {.entry = {.count = 1, .reusable = false}}, SHIFT(43), - [45] = {.entry = {.count = 1, .reusable = false}}, SHIFT(28), + [45] = {.entry = {.count = 1, .reusable = false}}, SHIFT(27), [47] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_style_tag, 1), [49] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_style_tag, 1), - [51] = {.entry = {.count = 1, .reusable = false}}, SHIFT(44), + [51] = {.entry = {.count = 1, .reusable = false}}, SHIFT(46), [53] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_path, 3), [55] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_path, 3), [57] = {.entry = {.count = 1, .reusable = false}}, SHIFT(8), @@ -1495,65 +1554,67 @@ static const TSParseActionEntry ts_parse_actions[] = { [87] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym__attributes, 2), [89] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_script_tag, 2), [91] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_script_tag, 2), - [93] = {.entry = {.count = 1, .reusable = false}}, SHIFT(50), + [93] = {.entry = {.count = 1, .reusable = false}}, SHIFT(53), [95] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_style_tag, 2), [97] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_style_tag, 2), - [99] = {.entry = {.count = 1, .reusable = false}}, SHIFT(47), + [99] = {.entry = {.count = 1, .reusable = false}}, SHIFT(49), [101] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym__attributes, 3), [103] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym__attributes, 3), - [105] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_style_tag, 4), - [107] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_style_tag, 4), - [109] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_script_tag, 4), - [111] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_script_tag, 4), - [113] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_script_tag, 3), - [115] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_script_tag, 3), - [117] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_link_tag, 2), - [119] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_link_tag, 2), - [121] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_extends_statement, 3), - [123] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_extends_statement, 3), - [125] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_style_tag, 3), - [127] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_style_tag, 3), + [105] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_style_tag, 5), + [107] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_style_tag, 5), + [109] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_style_tag, 4), + [111] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_style_tag, 4), + [113] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_script_tag, 4), + [115] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_script_tag, 4), + [117] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_script_tag, 3), + [119] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_script_tag, 3), + [121] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_link_tag, 2), + [123] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_link_tag, 2), + [125] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_extends_statement, 3), + [127] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_extends_statement, 3), [129] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_include_statement, 3), [131] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_include_statement, 3), [133] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_attribute, 1), [135] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_attribute, 1), - [137] = {.entry = {.count = 1, .reusable = false}}, SHIFT(33), + [137] = {.entry = {.count = 1, .reusable = false}}, SHIFT(31), [139] = {.entry = {.count = 1, .reusable = false}}, SHIFT(23), [141] = {.entry = {.count = 1, .reusable = false}}, SHIFT(24), - [143] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_quoted_attribute_value, 2), - [145] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_quoted_attribute_value, 2), - [147] = {.entry = {.count = 1, .reusable = false}}, SHIFT(14), - [149] = {.entry = {.count = 1, .reusable = false}}, SHIFT(11), - [151] = {.entry = {.count = 1, .reusable = true}}, SHIFT(41), - [153] = {.entry = {.count = 1, .reusable = false}}, SHIFT(41), - [155] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym__attributes_repeat1, 1), - [157] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_quoted_attribute_value, 3), - [159] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_quoted_attribute_value, 3), - [161] = {.entry = {.count = 1, .reusable = false}}, SHIFT(17), - [163] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_path_repeat1, 2), - [165] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_path_repeat1, 2), SHIFT_REPEAT(32), - [168] = {.entry = {.count = 1, .reusable = true}}, SHIFT(37), - [170] = {.entry = {.count = 1, .reusable = true}}, SHIFT(39), - [172] = {.entry = {.count = 1, .reusable = true}}, SHIFT(40), - [174] = {.entry = {.count = 1, .reusable = true}}, SHIFT(35), - [176] = {.entry = {.count = 1, .reusable = true}}, SHIFT(42), - [178] = {.entry = {.count = 1, .reusable = true}}, SHIFT(32), - [180] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym__attributes_repeat1, 2), - [182] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym__attributes_repeat1, 2), SHIFT_REPEAT(24), + [143] = {.entry = {.count = 1, .reusable = false}}, SHIFT(14), + [145] = {.entry = {.count = 1, .reusable = false}}, SHIFT(11), + [147] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_quoted_attribute_value, 3), + [149] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_quoted_attribute_value, 3), + [151] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_quoted_attribute_value, 2), + [153] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_quoted_attribute_value, 2), + [155] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_path_repeat1, 2), + [157] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_path_repeat1, 2), SHIFT_REPEAT(30), + [160] = {.entry = {.count = 1, .reusable = true}}, SHIFT(38), + [162] = {.entry = {.count = 1, .reusable = true}}, SHIFT(42), + [164] = {.entry = {.count = 1, .reusable = true}}, SHIFT(39), + [166] = {.entry = {.count = 1, .reusable = true}}, SHIFT(41), + [168] = {.entry = {.count = 1, .reusable = false}}, SHIFT(41), + [170] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym__attributes_repeat1, 1), + [172] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym__attributes_repeat1, 2), + [174] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym__attributes_repeat1, 2), SHIFT_REPEAT(24), + [177] = {.entry = {.count = 1, .reusable = true}}, SHIFT(40), + [179] = {.entry = {.count = 1, .reusable = true}}, SHIFT(30), + [181] = {.entry = {.count = 1, .reusable = true}}, SHIFT(34), + [183] = {.entry = {.count = 1, .reusable = false}}, SHIFT(21), [185] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_attribute, 3), [187] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_attribute, 3), - [189] = {.entry = {.count = 1, .reusable = false}}, SHIFT(26), - [191] = {.entry = {.count = 1, .reusable = true}}, SHIFT(45), - [193] = {.entry = {.count = 1, .reusable = true}}, SHIFT(46), - [195] = {.entry = {.count = 1, .reusable = true}}, SHIFT(6), + [189] = {.entry = {.count = 1, .reusable = false}}, SHIFT(29), + [191] = {.entry = {.count = 1, .reusable = true}}, SHIFT(48), + [193] = {.entry = {.count = 1, .reusable = true}}, SHIFT(6), + [195] = {.entry = {.count = 1, .reusable = true}}, SHIFT(47), [197] = {.entry = {.count = 1, .reusable = true}}, SHIFT(18), - [199] = {.entry = {.count = 1, .reusable = true}}, SHIFT(21), - [201] = {.entry = {.count = 1, .reusable = true}}, SHIFT(30), - [203] = {.entry = {.count = 1, .reusable = true}}, SHIFT(15), - [205] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), - [207] = {.entry = {.count = 1, .reusable = true}}, SHIFT(38), - [209] = {.entry = {.count = 1, .reusable = true}}, SHIFT(16), - [211] = {.entry = {.count = 1, .reusable = true}}, SHIFT(34), + [199] = {.entry = {.count = 1, .reusable = true}}, SHIFT(35), + [201] = {.entry = {.count = 1, .reusable = true}}, SHIFT(15), + [203] = {.entry = {.count = 1, .reusable = true}}, SHIFT(52), + [205] = {.entry = {.count = 1, .reusable = true}}, SHIFT(28), + [207] = {.entry = {.count = 1, .reusable = true}}, SHIFT(45), + [209] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), + [211] = {.entry = {.count = 1, .reusable = true}}, SHIFT(36), + [213] = {.entry = {.count = 1, .reusable = true}}, SHIFT(16), + [215] = {.entry = {.count = 1, .reusable = true}}, SHIFT(17), }; #ifdef __cplusplus diff --git a/src/scanner.cc b/src/scanner.cc index b2bd7f7..1ff5b36 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -1,14 +1,21 @@ #include #include +#include +#include "tag.h" namespace { using std::vector; +using std::string; enum TokenType { NEWLINE, INDENT, - DEDENT + DEDENT, + TAG_NAME, + SCRIPT_TAG_NAME, + STYLE_TAG_NAME, + RAW_TEXT }; struct Scanner { @@ -82,10 +89,41 @@ struct Scanner { } } + if (valid_symbols[TAG_NAME] && !valid_symbols[RAW_TEXT]) { + // return scan_tag_name(lexer); + string tag_name; + while (iswalnum(lexer->lookahead)) { + tag_name += towupper(lexer->lookahead); + advance(lexer); + } + if (tag_name.empty()) return false; + Tag tag = Tag::for_name(tag_name); + tags.push_back(tag); + switch (tag.type) { + case SCRIPT: + lexer->result_symbol = SCRIPT_TAG_NAME; + break; + case STYLE: + lexer->result_symbol = STYLE_TAG_NAME; + break; + default: + lexer->result_symbol = TAG_NAME; + break; + } + return true; + } + + if (valid_symbols[RAW_TEXT] && !valid_symbols[TAG_NAME]) { + if (!tags.size()) return false; + lexer->result_symbol = RAW_TEXT; + return true; + } + return false; } vector indent_length_stack; + vector tags; }; } diff --git a/src/tag.h b/src/tag.h new file mode 100644 index 0000000..7f8a71e --- /dev/null +++ b/src/tag.h @@ -0,0 +1,285 @@ +#include +#include + +using std::string; +using std::map; + +enum TagType { + AREA, + BASE, + BASEFONT, + BGSOUND, + BR, + COL, + COMMAND, + EMBED, + FRAME, + HR, + IMAGE, + IMG, + INPUT, + ISINDEX, + KEYGEN, + LINK, + MENUITEM, + META, + NEXTID, + PARAM, + SOURCE, + TRACK, + WBR, + END_OF_VOID_TAGS, + + A, + ABBR, + ADDRESS, + ARTICLE, + ASIDE, + AUDIO, + B, + BDI, + BDO, + BLOCKQUOTE, + BODY, + BUTTON, + CANVAS, + CAPTION, + CITE, + CODE, + COLGROUP, + DATA, + DATALIST, + DD, + DEL, + DETAILS, + DFN, + DIALOG, + DIV, + DL, + DT, + EM, + FIELDSET, + FIGCAPTION, + FIGURE, + FOOTER, + FORM, + H1, + H2, + H3, + H4, + H5, + H6, + HEAD, + HEADER, + HGROUP, + HTML, + I, + IFRAME, + INS, + KBD, + LABEL, + LEGEND, + LI, + MAIN, + MAP, + MARK, + MATH, + MENU, + METER, + NAV, + NOSCRIPT, + OBJECT, + OL, + OPTGROUP, + OPTION, + OUTPUT, + P, + PICTURE, + PRE, + PROGRESS, + Q, + RB, + RP, + RT, + RTC, + RUBY, + S, + SAMP, + SCRIPT, + SECTION, + SELECT, + SLOT, + SMALL, + SPAN, + STRONG, + STYLE, + SUB, + SUMMARY, + SUP, + SVG, + TABLE, + TBODY, + TD, + TEMPLATE, + TEXTAREA, + TFOOT, + TH, + THEAD, + TIME, + TITLE, + TR, + U, + UL, + VAR, + VIDEO, +}; + +static const map get_tag_map() { + map result; +#define TAG(name) result[#name] = name + TAG(AREA); + TAG(BASE); + TAG(BASEFONT); + TAG(BGSOUND); + TAG(BR); + TAG(COL); + TAG(COMMAND); + TAG(EMBED); + TAG(FRAME); + TAG(HR); + TAG(IMAGE); + TAG(IMG); + TAG(INPUT); + TAG(ISINDEX); + TAG(KEYGEN); + TAG(LINK); + TAG(MENUITEM); + TAG(META); + TAG(NEXTID); + TAG(PARAM); + TAG(SOURCE); + TAG(TRACK); + TAG(WBR); + TAG(A); + TAG(ABBR); + TAG(ADDRESS); + TAG(ARTICLE); + TAG(ASIDE); + TAG(AUDIO); + TAG(B); + TAG(BDI); + TAG(BDO); + TAG(BLOCKQUOTE); + TAG(BODY); + TAG(BUTTON); + TAG(CANVAS); + TAG(CAPTION); + TAG(CITE); + TAG(CODE); + TAG(COLGROUP); + TAG(DATA); + TAG(DATALIST); + TAG(DD); + TAG(DEL); + TAG(DETAILS); + TAG(DFN); + TAG(DIALOG); + TAG(DIV); + TAG(DL); + TAG(DT); + TAG(EM); + TAG(FIELDSET); + TAG(FIGCAPTION); + TAG(FIGURE); + TAG(FOOTER); + TAG(FORM); + TAG(H1); + TAG(H2); + TAG(H3); + TAG(H4); + TAG(H5); + TAG(H6); + TAG(HEAD); + TAG(HEADER); + TAG(HGROUP); + TAG(HTML); + TAG(I); + TAG(IFRAME); + TAG(INS); + TAG(KBD); + TAG(LABEL); + TAG(LEGEND); + TAG(LI); + TAG(MAIN); + TAG(MAP); + TAG(MARK); + TAG(MATH); + TAG(MENU); + TAG(METER); + TAG(NAV); + TAG(NOSCRIPT); + TAG(OBJECT); + TAG(OL); + TAG(OPTGROUP); + TAG(OPTION); + TAG(OUTPUT); + TAG(P); + TAG(PICTURE); + TAG(PRE); + TAG(PROGRESS); + TAG(Q); + TAG(RB); + TAG(RP); + TAG(RT); + TAG(RTC); + TAG(RUBY); + TAG(S); + TAG(SAMP); + TAG(SCRIPT); + TAG(SECTION); + TAG(SELECT); + TAG(SLOT); + TAG(SMALL); + TAG(SPAN); + TAG(STRONG); + TAG(STYLE); + TAG(SUB); + TAG(SUMMARY); + TAG(SUP); + TAG(SVG); + TAG(TABLE); + TAG(TBODY); + TAG(TD); + TAG(TEMPLATE); + TAG(TEXTAREA); + TAG(TFOOT); + TAG(TH); + TAG(THEAD); + TAG(TIME); + TAG(TITLE); + TAG(TR); + TAG(U); + TAG(UL); + TAG(VAR); + TAG(VIDEO); +#undef TAG + return result; +} + +static const map TAG_TYPES_BY_TAG_NAME = get_tag_map(); + +struct Tag { + TagType type; + + Tag(TagType type) : type(type) {} + + bool operator==(const Tag &other) const { + if (type != other.type) return false; + return true; + } + + static inline Tag for_name(const string &name) { + map::const_iterator type = TAG_TYPES_BY_TAG_NAME.find(name); + return Tag(type->second); + } +}; From 4f9e60cfb2abc3605c67a4fe61cd5fdfefd29fc3 Mon Sep 17 00:00:00 2001 From: Dee Vazquez Date: Thu, 11 May 2023 09:59:33 -0700 Subject: [PATCH 2/2] add yarn example --- package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package.json b/package.json index 1c91717..5a4b9fb 100644 --- a/package.json +++ b/package.json @@ -11,6 +11,7 @@ "tree-sitter-cli": "^0.20.8" }, "scripts": { + "example": "tree-sitter parse example-file.pug", "generate": "tree-sitter generate", "gt": "tree-sitter generate && tree-sitter test", "parse": "tree-sitter parse",