Skip to content

Commit

Permalink
Add back the improved string scanner
Browse files Browse the repository at this point in the history
- Add back the functionality of our string scanner with hopefully more
  things fixed
- Fix an infinite loop that was causing CI failure on Windows
- Update the fuzzer to just be part of our justfile
- Add back additional queries from before the revert
- Add some additional tests
  • Loading branch information
tgross35 committed Jan 24, 2024
1 parent 4f5d53b commit 01b17d0
Show file tree
Hide file tree
Showing 26 changed files with 6,206 additions and 5,689 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# These test files are from the fuzzer
test/highlight/timeout-* binary
15 changes: 9 additions & 6 deletions .github/workflows/fuzz.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,14 @@ jobs:
test:
name: Parser fuzzing
runs-on: ubuntu-latest
timeout-minutes: 25
steps:
- uses: actions/checkout@v3
- uses: vigoux/tree-sitter-fuzz-action@v1
with:
language: bash
external-scanner: src/scanner.c
time: 60
timeout: 5
- uses: taiki-e/install-action@just
- run: just fuzz
- name: print tests
if: always()
run: |
nm -g fuzzer-out/tree-sitter/libtree-sitter.a
find . -name 'timeout-*' |
xargs -IFNAME sh -c 'echo "\nPrinting FNAME" && base64 -i FNAME'
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ node_modules
build
log.html
*.wasm
fuzzer-out/
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,13 @@ foo := if true {
}
```

## Test Information

The tests directory contains "corpus" tests that are checked for syntax, as
well as "highlight" tests that check the result. The "highlight" test directory
includes some test files generated by the fuzzer that aren't always human
readable.

## TODO

- [x] Implement a basic parser that is able to understand all features of Justfiles
Expand Down
102 changes: 55 additions & 47 deletions grammar.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
// Main grammar for justfiles

const ESCAPE_SEQUENCE = token(/\\[nrt"\\]/);

// Comma separated list with at least one item
function comma_sep1(item) {
return seq(item, repeat(seq(",", item)));
}

// Create an array with the given item as contents
function array(item) {
function make_array(item) {
const array_item = field("array_item", item);
return field(
"array",
Expand All @@ -23,7 +25,22 @@ function array(item) {

module.exports = grammar({
name: "just",
externals: ($) => [$._indent, $._dedent, $._newline],
externals: (
$,
) => [
$._indent,
$._dedent,
$._newline,
$._string_start,
$._string_end,
$._content_component,
$._raw_string_start,
$._raw_string_end,
$._command_start,
$._command_end,
$._interp_start,
$._interp_end,
],
inline: (
$,
) => [
Expand Down Expand Up @@ -85,15 +102,15 @@ module.exports = grammar({
export: ($) => seq("export", $.assignment),

// import : 'import' '?'? string?
import: ($) => seq("import", optional("?"), $.string_literal),
import: ($) => seq("import", optional("?"), $._string),

// module : 'mod' '?'? string?
module: ($) =>
seq(
"mod",
optional("?"),
field("mod_name", $.identifier),
optional($.string_literal),
optional($._string),
),

// setting : 'set' 'dotenv-load' boolean?
Expand All @@ -105,29 +122,17 @@ module.exports = grammar({
seq(
"set",
field("left", $.identifier),
field(
"right",
optional(
seq(
":=",
choice($.boolean, $.string_literal, array($.string_literal)),
),
),
),
$.eol,
),
seq(
"set",
"shell",
":=",
field(
"right",
array($.string_literal),
optional(
seq(":=", field("right", choice($.boolean, $._string, $.array))),
),
$.eol,
),
),

// Our only use of arrays (setting) only accepts strings. We may want to figure
// out how to better reuse `array` while specifying a type.
array: ($) => make_array($._string),

// boolean : ':=' ('true' | 'false')
boolean: (_) => choice("true", "false"),

Expand Down Expand Up @@ -167,11 +172,13 @@ module.exports = grammar({
choice(
seq($.expression, "==", $.expression),
seq($.expression, "!=", $.expression),
seq($.expression, "=~", $.expression),
// verify whether this is valid
seq($.expression, "=~", choice($.regex_literal, $.expression)),
$.expression,
),

// Capture this special for injections
regex_literal: ($) => prec(4, $._string),

// value : NAME '(' sequence? ')'
// | BACKTICK
// | INDENTED_BACKTICK
Expand All @@ -184,7 +191,7 @@ module.exports = grammar({
$.function_call,
$.external_command,
$.identifier,
$.string_literal,
$._string,
seq("(", $.expression, ")"),
),
),
Expand All @@ -197,9 +204,6 @@ module.exports = grammar({
")",
),

external_command: ($) =>
choice(seq($._backticked), seq($._indented_backticked)),

// sequence : expression ',' sequence
// | expression ','?
sequence: ($) => comma_sep1($.expression),
Expand All @@ -218,7 +222,7 @@ module.exports = grammar({
seq(
repeat($.attribute),
$.recipe_header,
$._newline,
$.eol,
optional($.recipe_body),
),

Expand Down Expand Up @@ -286,14 +290,11 @@ module.exports = grammar({

recipe_line_prefix: (_) => choice("@-", "-@", "@", "-"),

shebang: ($) => seq(/\s*#!.*/, $._newline),
shebang: ($) => prec.left(seq(/#!.*/, optional($._newline))),

// `# ...` comment
comment: ($) => seq(/#.*/, $._newline),

// notinterpolation: ($) => /[^{][^{]\S*/,
notinterpolation: (_) => /[^\s{][^\s{]\S*/,

// interpolation : '{{' expression '}}'
interpolation: ($) => seq("{{", $.expression, "}}"),

Expand All @@ -303,25 +304,32 @@ module.exports = grammar({
// | INDENTED_STRING
// | RAW_STRING
// | INDENTED_RAW_STRING
_string: ($) => choice($.raw_string_literal, $.string_literal),

escape_sequence: (_) => ESCAPE_SEQUENCE,
string_literal: ($) =>
choice(
$._string_indented,
$._raw_string_indented,
$._string,
// _raw_string, can't be written as a separate inline for osm reason
/'[^']*'/,
seq(
field("open", alias($._string_start, '("|""")')),
field("body", repeat(choice($._content_component, $.escape_sequence))),
field("close", alias($._string_end, '("|""")')),
),

_raw_string_indented: (_) => seq("'''", repeat(/./), "'''"),
_string: ($) => seq('"', repeat(choice($.string_escape, /[^\\"]+/)), '"'),
_string_indented: ($) =>
seq('"""', repeat(choice($.string_escape, /[^\\"]+/)), '"""'),
string_escape: (_) => /\\[nrt"\\]/,
raw_string_literal: ($) =>
seq(
field("open", alias($._raw_string_start, "('|''')")),
field("body", optional($._content_component)),
field("close", alias($._raw_string_end, "('|''')")),
),

external_command: ($) =>
seq(
field("open", alias($._command_start, "(`|```)")),
field("body", repeat(choice(prec(1, $.interpolation), $.command_body))),
field("close", alias($._command_end, "(`|```)")),
),

_backticked: (_) => seq("`", repeat(/./), "`"),
_indented_backticked: (_) => seq("```", repeat(/./), "```"),
command_body: ($) => $._content_component,

text: (_) => /.+/, //recipe TEXT, only matches in a recipe body
// text: (_) => /\S+/, //recipe TEXT, only matches in a recipe body
},
});
67 changes: 62 additions & 5 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@ format:
deno fmt

# Generate the parser
gen:
npm run gen
gen *extra-args:
#!/bin/sh
set -eaux
npx tree-sitter generate {{ extra-args }}
python3 build-flavored-queries.py

alias t := test-ts

Expand All @@ -27,19 +30,22 @@ test-parse-highlight:
#!/bin/sh
set -eaux

# skip readme.just because it is broken but works for testing
# skip readme.just because it is broken but works for testing, and skip files
# from the fuzzer
# FIXME: also skip test.just because it is currently broken
find {{justfile_directory()}} -type f -iregex '.*[\./]just[^\./]*' |
grep -v readme.just |
grep -v test.just |
grep -vE 'timeout-.*' |
grep -vE 'crash-.*' |
while read -r fname
do
printf '\n\n\n'
echo "::group::Parse and highlight testing for $fname"
echo "::notice:: checking parsing of $fname"
npx tree-sitter parse "$fname" > "$fname.parse.out"
npx tree-sitter parse "$fname" /dev/null
echo "::notice:: checking highlight of $fname"
npx tree-sitter highlight "$fname" > "$fname.highlight.out"
npx tree-sitter highlight "$fname" /dev/null
echo "::endgroup::"
done

Expand All @@ -55,6 +61,55 @@ check-c:
-Wno-format-pedantic \
-o/dev/null'

fuzz *extra-args: (gen "--debug-build")
#!/bin/sh
set -eaux

out="fuzzer-out"
ts_source="$out/tree-sitter"

flags="-fsanitize=fuzzer,address,undefined"
flags="$flags -g -O1"
flags="$flags -Isrc/ -I$ts_source/lib/include"
flags="$flags -o $out/fuzzer"

mkdir -p "$out"

[ ! -d "$ts_source" ] &&
git clone https://github.com/tree-sitter/tree-sitter "$ts_source" \
--depth=1

make -C "$ts_source"

cat << EOF | clang $flags "$ts_source/libtree-sitter.a" "src/scanner.c" "src/parser.c" -x c -
#include <stdio.h>
#include <stdlib.h>
#include "tree_sitter/api.h"

TSLanguage *tree_sitter_just();

int LLVMFuzzerTestOneInput(const uint8_t *data, const size_t len) {
TSParser *parser = ts_parser_new();
ts_parser_set_language(parser, tree_sitter_just());

// Build a syntax tree based on source code stored in a string.
TSTree *tree = ts_parser_parse_string(
parser,
NULL,
(const char *)data,
len
);
// Free all of the heap-allocated memory.
ts_tree_delete(tree);
ts_parser_delete(parser);
return 0;
}
EOF

fuzzer_flags="-artifact_prefix=$out/ -timeout=20 -max_total_time=1200"
./fuzzer-out/fuzzer $fuzzer_flags {{ extra-args }}


# Verify that the `just` tool parses all files we are using
verify-just-parsing:
#!/bin/sh
Expand All @@ -63,6 +118,8 @@ verify-just-parsing:
# skip readme.just because it is broken but works for testing
find . -type f -iregex '.*[\./]just[^\./]*' |
grep -v readme.just |
grep -vE 'timeout-.*' |
grep -vE 'crash-.*' |
while read -r fname
do
echo "::notice file=$fname:: checking Just parsing"
Expand Down
Loading

0 comments on commit 01b17d0

Please sign in to comment.