Skip to content

Commit

Permalink
Improve white space matching in external scanner
Browse files Browse the repository at this point in the history
Summary: Factor out the `is_whitespace` test, and match the one used in `grammar.js`

Reviewed By: jcpetruzza

Differential Revision: D59276662

fbshipit-source-id: 49b4f10d71e61b9b80f051fc7d27e46b0bd678d2
  • Loading branch information
alanz authored and facebook-github-bot committed Jul 3, 2024
1 parent da275db commit 9a04980
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 8 deletions.
3 changes: 2 additions & 1 deletion grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ module.exports = grammar({

extras: $ => [
// $.whitespace causes issues with error recovery,
// emulate it manually when traversing the tree
// emulate it manually when traversing the tree.
// The regexp is based on the ?WHITE_SPACE/1 macro in elp_scan.erl
/[\x01-\x20\x80-\xA0]/,
$.comment,
],
Expand Down
18 changes: 11 additions & 7 deletions src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,14 @@ static inline void skip(TSLexer* lexer) {
/* fprintf(stderr, "Scanner lookahead: '%c'.\n", lexer->lookahead); */
/* } */

static inline bool is_whitespace(TSLexer* lexer) {
return (
/* The test is based on the ?WHITE_SPACE/1 macro in
elp_scan.erl, and matches the one in grammar.js */
(lexer->lookahead >= 0x01 && lexer->lookahead <= 0x20) ||
(lexer->lookahead >= 0x80 && lexer->lookahead <= 0xA0));
}

bool tree_sitter_erlang_external_scanner_scan(
void* unused_payload,
TSLexer* lexer,
Expand All @@ -70,9 +78,7 @@ bool tree_sitter_erlang_external_scanner_scan(

if (valid_symbols[TQ_STRING] || valid_symbols[TQ_SIGIL_STRING]) {
/* Skip any leading whitespace */
while (lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
lexer->lookahead == '\f' || lexer->lookahead == '\r' ||
lexer->lookahead == '\n') {
while (is_whitespace(lexer)) {
skip(lexer);
}
bool is_sigil_string = false;
Expand Down Expand Up @@ -112,8 +118,7 @@ bool tree_sitter_erlang_external_scanner_scan(
advance(lexer);
}
/* skip whitespace to end of line */
while (lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
lexer->lookahead == '\f' || lexer->lookahead == '\r') {
while (lexer->lookahead != '\n' && is_whitespace(lexer)) {
advance(lexer);
}

Expand All @@ -129,8 +134,7 @@ bool tree_sitter_erlang_external_scanner_scan(
if (lexer->lookahead == '\n') {
advance(lexer);
/* skip whitespace to first '"' */
while (lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
lexer->lookahead == '\f' || lexer->lookahead == '\r') {
while (lexer->lookahead != '\n' && is_whitespace(lexer)) {
advance(lexer);
}

Expand Down

0 comments on commit 9a04980

Please sign in to comment.