Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove npm dependency from CI #78

Closed
wants to merge 18 commits into from
35 changes: 19 additions & 16 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,26 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: taiki-e/install-action@just
- uses: actions/setup-node@v4
- uses: taiki-e/install-action@v2
with:
node-version: 18
- name: Get npm cache directory
id: npm-cache-dir
shell: bash
run: echo "dir=$(npm config get cache)" >> ${GITHUB_OUTPUT}
- uses: actions/cache@v3
id: npm-cache
with:
path: ${{ steps.npm-cache-dir.outputs.dir }}
key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
restore-keys: ${{ runner.os }}-node-
- run: |
npm --version
npx --version
- run: npm install --locked
tool: [email protected]
- uses: Swatinem/rust-cache@v2
- name: print glibc version
run: |
GCC_FEATURES=$(gcc -dM -E - <<< "#include <features.h>")

if grep -q __UCLIBC__ <<< "${GCC_FEATURES}"; then
echo "uClibc"
grep "#define __UCLIBC_MAJOR__" <<< "${GCC_FEATURES}"
grep "#define __UCLIBC_MINOR__" <<< "${GCC_FEATURES}"
grep "#define __UCLIBC_SUBLEVEL__" <<< "${GCC_FEATURES}"
elif grep -q __GLIBC__ <<< "${GCC_FEATURES}"; then
echo "glibc"
grep "#define __GLIBC__" <<< "${GCC_FEATURES}"
grep "#define __GLIBC_MINOR__" <<< "${GCC_FEATURES}"
else
echo "something else"
fi
- name: Configure
run: just --verbose configure-tree-sitter
- name: Check generated files
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/fuzz.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,14 @@ jobs:
steps:
- uses: actions/checkout@v3
- uses: taiki-e/install-action@just
- uses: taiki-e/install-action@v2
with:
tool: tree-sitter-cli
- run: just fuzz
- name: print tests
if: always()
run: |
find . -name '*.a'
nm -g fuzzer-out/tree-sitter/libtree-sitter.a
find . -name 'timeout-*' |
xargs -IFNAME sh -c 'echo "\nPrinting FNAME" && base64 -i FNAME'
14 changes: 13 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
# Rust build
target/**

# Node cache
node_modules

# Tree-sitter ouptut
build
log.html
*.wasm
fuzzer-out/

# Fuzzer items
tree-sitter-src/
fuzzer/corpus
fuzzer/artifacts
**.log
**.out
**.dSYM
50 changes: 50 additions & 0 deletions bindings/debug.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Demo that parses the first argument. Useful for attaching a debugger.

#include "tree_sitter/api.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

TSLanguage *tree_sitter_just();

size_t MAX_LEN = 10 * 1024; // 10k

int main(int argc, char **argv) {
TSParser *parser = ts_parser_new();
ts_parser_set_language(parser, tree_sitter_just());

assert(argc == 2 && "must provide a file name");

if (argc < 2) {
printf("must provide one or more file names\n");
return 1;
}

size_t alloc_size = 1024;
char *data = malloc(alloc_size);
assert(data);

for (int i = 1; i < argc; ++i) {
FILE *fp = fopen(argv[i], "r");
fseek(fp, 0L, SEEK_END);
size_t file_size = ftell(fp);
rewind(fp);

if (file_size > alloc_size) {
data = realloc(data, file_size);
assert(data);
alloc_size = file_size;
}

// Build a syntax tree based on source code stored in a string.
TSTree *tree = ts_parser_parse_string(parser, NULL, data, file_size);
TSNode root_node = ts_tree_root_node(tree);
assert(ts_node_child_count(root_node) > 0);

// Free all of the heap-allocated memory.
ts_tree_delete(tree);
ts_parser_delete(parser);
}
return 0;
}
40 changes: 40 additions & 0 deletions fuzzer/build-corpus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env python3
"""Turn our tests into a corpus for the fuzzer (one test per file)
"""

from pathlib import Path
import re

RE = r"===+\n(?P<name>.*?)\n===+\n(?P<source>.*?)\n---+"

def main():
repo = Path(__file__).parent.parent
ts_corpus_files = (repo / "test" / "corpus").glob("*.txt")

corpus = {}

for fname in ts_corpus_files:
text = fname.read_text()
prefix = fname.name.rstrip(".txt")

for match in re.finditer(RE, text, re.MULTILINE | re.DOTALL):
name = match.group("name").replace(" ", "_")
name = f"{prefix}_{name}.just"
source = match.group("source")
corpus[name] = source

out_dir = repo / "fuzzer" / "corpus"
out_dir.mkdir(exist_ok=True)

# Clear the corpus of all files we created
for existing in out_dir.iterdir():
if existing.name.endswith(".just"):
existing.unlink()

for (name, source) in corpus.items():
out_file: Path = out_dir / name
out_file.write_text(source)


if __name__ == "__main__":
main()
22 changes: 22 additions & 0 deletions fuzzer/entry.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#include <stdio.h>
#include <stdlib.h>
#include "tree_sitter/api.h"

TSLanguage *tree_sitter_just();

int LLVMFuzzerTestOneInput(const uint8_t *data, const size_t len) {
TSParser *parser = ts_parser_new();
ts_parser_set_language(parser, tree_sitter_just());

// Build a syntax tree based on source code stored in a string.
TSTree *tree = ts_parser_parse_string(
parser,
NULL,
(const char *)data,
len
);
// Free all of the heap-allocated memory.
ts_tree_delete(tree);
ts_parser_delete(parser);
return 0;
}
91 changes: 45 additions & 46 deletions justfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# List all recipes

Check notice on line 1 in justfile

View workflow job for this annotation

GitHub Actions / static-validation

checking Just parsing
default:
just --list

Expand All @@ -16,14 +16,14 @@
gen *extra-args:
#!/bin/sh
set -eaux
npx tree-sitter generate {{ extra-args }}
tree-sitter generate {{ extra-args }}
python3 build-flavored-queries.py

alias t := test-ts

# Run tests that are built into tree-sitter
test-ts *ts-test-args: gen
npm test -- {{ ts-test-args }}
tree-sitter test {{ ts-test-args }}

# Verify that tree-sitter can parse and highlight all files in the repo. Requires a tree-sitter configuration.
test-parse-highlight:
Expand All @@ -41,9 +41,9 @@
printf '\n\n\n'
echo "::group::Parse and highlight testing for $fname"
echo "::notice:: checking parsing of $fname"
npx tree-sitter parse "$fname" > /dev/null
tree-sitter parse "$fname" > /dev/null
echo "::notice:: checking highlight of $fname"
npx tree-sitter highlight "$fname" > /dev/null
tree-sitter highlight "$fname" > /dev/null
echo "::endgroup::"
done

Expand All @@ -59,53 +59,54 @@
-Wno-format-pedantic \
-o/dev/null'

fuzz *extra-args: (gen "--debug-build")
src := justfile_directory() / "src"
bindings := justfile_directory() / "bindings"
ts_src := justfile_directory() / "tree-sitter-src"
ts_staticlib := ts_src / "libtree-sitter.a"
fuzzer := justfile_directory() / "fuzzer"
nproc := if os() == "macos" { `sysctl -n hw.logicalcpu` } else { `nproc` }

# Download and build upstream tree-sitter
tree-sitter *cflags:
#!/bin/sh
set -eaux
[ ! -d "{{ ts_src }}" ] &&
git clone https://github.com/tree-sitter/tree-sitter "{{ ts_src }}" \
--depth=1
CFLAGS="-O1 -g {{ cflags }} $CFLAGS" make -C "{{ ts_src }}"

out="fuzzer-out"
ts_source="$out/tree-sitter"

flags="-fsanitize=fuzzer,address,undefined"
flags="$flags -g -O1"
flags="$flags -Isrc/ -I$ts_source/lib/include"
flags="$flags -o $out/fuzzer"
debug-build: tree-sitter
clang -O3 -g ${CFLAGS:-} -Isrc "-I{{ ts_src }}/lib/include" \
"-L{{ ts_src }}" "-ltree-sitter" \
"{{src}}/scanner.c" "{{src}}/parser.c" "{{bindings}}/debug.c" \
-o debug.out

mkdir -p "$out"
debug *file-names: debug-build
LD_LIBRARY_PATH="{{ts_src}}" ./debug.out {{file-names}}

[ ! -d "$ts_source" ] &&
git clone https://github.com/tree-sitter/tree-sitter "$ts_source" \
--depth=1
# Run the fuzzer
fuzz *extra-args: (gen "--debug-build") \
(tree-sitter "-fsanitize=fuzzer,address,undefined" "-fvisibility=notidden")
#!/bin/sh
set -eaux

make -C "$ts_source"
"{{fuzzer / "build-corpus.py"}}"

cat << EOF | clang $flags "$ts_source/libtree-sitter.a" "src/scanner.c" "src/parser.c" -x c -
#include <stdio.h>
#include <stdlib.h>
#include "tree_sitter/api.h"
artifacts="{{fuzzer}}/artifacts/"
exe="{{fuzzer}}/fuzz.out"
corpus="{{fuzzer}}/corpus"
mkdir -p "$artifacts"

TSLanguage *tree_sitter_just();
flags="-fsanitize=fuzzer,address,undefined"
flags="$flags -g -O1 -std=gnu99"
flags="$flags -I{{ src }} -I{{ ts_src }}/lib/include"

int LLVMFuzzerTestOneInput(const uint8_t *data, const size_t len) {
TSParser *parser = ts_parser_new();
ts_parser_set_language(parser, tree_sitter_just());
sources="{{src}}/scanner.c {{src}}/parser.c {{fuzzer}}/entry.c"
link="{{ if os() == "macos" { ts_src / 'libtree-sitter.a' } else { '-L' + ts_src + ' -ltree-sitter' } }}"

// Build a syntax tree based on source code stored in a string.
TSTree *tree = ts_parser_parse_string(
parser,
NULL,
(const char *)data,
len
);
// Free all of the heap-allocated memory.
ts_tree_delete(tree);
ts_parser_delete(parser);
return 0;
}
EOF
clang $flags -o "$exe" $sources $link

fuzzer_flags="-artifact_prefix=$out/ -timeout=20 -max_total_time=1200"
./fuzzer-out/fuzzer $fuzzer_flags {{ extra-args }}
fuzzer_flags="-artifact_prefix=$artifacts -timeout=20 -max_total_time=1200 -jobs={{nproc}}"
LD_LIBRARY_PATH="{{ts_src}}" "$exe" "$corpus" $fuzzer_flags {{ extra-args }}


# Verify that the `just` tool parses all files we are using
Expand Down Expand Up @@ -147,7 +148,7 @@

cfg_fname = r"""{{ config_directory() / "tree-sitter" / "config.json" }}"""
if not os.path.isfile(cfg_fname):
sp.run(["npx", "tree-sitter", "init-config"], check=True, shell=shell)
sp.run(["tree-sitter", "init-config"], check=True, shell=shell)

with open(cfg_fname, "r+") as f:
j = json.load(f)
Expand All @@ -166,20 +167,18 @@
deno fmt --check

# Make sure that files have not changed
ci-validate-generated-files:
ci-validate-generated-files: gen
#!/bin/sh
set -eaux

git tag ci-tmp-pre-updates

npm run gen

failed=false
git diff ci-tmp-pre-updates --exit-code || failed=true

if ! [ "$failed" = "false" ]; then
echo '::warning::Generated files are out of date!'
echo '::warning::run `npm run gen` and commit the changes'
echo '::warning::run `just gen` or `npm run gen` and commit the changes'
fi

git tag -d ci-tmp-pre-updates
Loading
Loading