Skip to content

Commit

Permalink
feat: complete Language bindings, implement LookaheadIterator and…
Browse files Browse the repository at this point in the history
… `LookaheadNamesIterator`
  • Loading branch information
amaanq committed Sep 6, 2023
1 parent f184c31 commit 2c981d0
Show file tree
Hide file tree
Showing 6 changed files with 827 additions and 96 deletions.
4 changes: 3 additions & 1 deletion script/fetch-fixtures
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
set -eux

language_names=(
tree-sitter-python
tree-sitter-embedded-template
tree-sitter-javascript
tree-sitter-json
tree-sitter-python
tree-sitter-rust
)

mkdir -p tests/fixtures
Expand Down
8 changes: 5 additions & 3 deletions script/fetch-fixtures.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

if not exist tests\fixtures mkdir test\fixtures

call:fetch_grammar javascript master
call:fetch_grammar python master
call:fetch_grammar json master
call:fetch_grammar embedded-template master
call:fetch_grammar javascript master
call:fetch_grammar json master
call:fetch_grammar python master
call:fetch_grammar rust master

exit /B 0

Expand Down
122 changes: 113 additions & 9 deletions tests/test_tree_sitter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import re
from os import path
from typing import Optional, Tuple
from typing import List, Optional, Tuple
from unittest import TestCase

from tree_sitter import Language, Parser
from tree_sitter import Language, Parser, Tree
from tree_sitter.binding import LookaheadIterator, Node

LIB_PATH = path.join("build", "languages.so")

Expand All @@ -16,15 +17,19 @@
Language.build_library(
LIB_PATH,
[
path.join(project_root, "tests", "fixtures", "tree-sitter-python"),
path.join(project_root, "tests", "fixtures", "tree-sitter-embedded-template"),
path.join(project_root, "tests", "fixtures", "tree-sitter-javascript"),
path.join(project_root, "tests", "fixtures", "tree-sitter-json"),
path.join(project_root, "tests", "fixtures", "tree-sitter-python"),
path.join(project_root, "tests", "fixtures", "tree-sitter-rust"),
],
)

PYTHON = Language(LIB_PATH, "python")
EMBEDDED_TEMPLATE = Language(LIB_PATH, "embedded_template")
JAVASCRIPT = Language(LIB_PATH, "javascript")
JSON = Language(LIB_PATH, "json")
PYTHON = Language(LIB_PATH, "python")
RUST = Language(LIB_PATH, "rust")

JSON_EXAMPLE: bytes = b"""
Expand Down Expand Up @@ -194,6 +199,53 @@ def test_children_by_field_name(self):
[a.type for a in attributes], ["jsx_attribute", "jsx_attribute"]
)

def test_node_child_by_field_name_with_extra_hidden_children(self):
parser = Parser()
parser.set_language(PYTHON)

tree = parser.parse(b"while a:\n pass")
while_node = tree.root_node.child(0)
if while_node is None:
self.fail("while_node is None")
self.assertEqual(while_node.type, "while_statement")
self.assertEqual(while_node.child_by_field_name('body'), while_node.child(3))

def test_node_descendant_count(self):
parser = Parser()
parser.set_language(JSON)
tree = parser.parse(JSON_EXAMPLE)
value_node = tree.root_node
all_nodes = get_all_nodes(tree)

self.assertEqual(value_node.descendant_count, len(all_nodes))

cursor = value_node.walk()
for i, node in enumerate(all_nodes):
cursor.goto_descendant(i)
self.assertEqual(cursor.node, node, f"index {i}")

for i, node in reversed(list(enumerate(all_nodes))):
cursor.goto_descendant(i)
self.assertEqual(cursor.node, node, f"rev index {i}")

def test_descendant_count_single_node_tree(self):
parser = Parser()
parser.set_language(EMBEDDED_TEMPLATE)
tree = parser.parse(b"hello")

nodes = get_all_nodes(tree)
self.assertEqual(len(nodes), 2)
self.assertEqual(tree.root_node.descendant_count, 2)

cursor = tree.walk()

cursor.goto_descendant(0)
self.assertEqual(cursor.depth, 0)
self.assertEqual(cursor.node, nodes[0])
cursor.goto_descendant(1)
self.assertEqual(cursor.depth, 1)
self.assertEqual(cursor.node, nodes[1])

def test_field_name_for_child(self):
parser = Parser()
parser.set_language(JAVASCRIPT)
Expand Down Expand Up @@ -624,21 +676,21 @@ def test_walk(self):
self.assertEqual(cursor.node.end_byte, 18)
self.assertEqual(cursor.node.start_point, (0, 0))
self.assertEqual(cursor.node.end_point, (1, 7))
self.assertEqual(cursor.current_field_name(), None)
self.assertEqual(cursor.field_name, None)

self.assertTrue(cursor.goto_first_child())
self.assertEqual(cursor.node.type, "function_definition")
self.assertEqual(cursor.node.start_byte, 0)
self.assertEqual(cursor.node.end_byte, 18)
self.assertEqual(cursor.node.start_point, (0, 0))
self.assertEqual(cursor.node.end_point, (1, 7))
self.assertEqual(cursor.current_field_name(), None)
self.assertEqual(cursor.field_name, None)

self.assertTrue(cursor.goto_first_child())
self.assertEqual(cursor.node.type, "def")
self.assertEqual(cursor.node.is_named, False)
self.assertEqual(cursor.node.sexp(), '("def")')
self.assertEqual(cursor.current_field_name(), None)
self.assertEqual(cursor.field_name, None)
def_node = cursor.node

# Node remains cached after a failure to move
Expand All @@ -648,13 +700,13 @@ def test_walk(self):
self.assertTrue(cursor.goto_next_sibling())
self.assertEqual(cursor.node.type, "identifier")
self.assertEqual(cursor.node.is_named, True)
self.assertEqual(cursor.current_field_name(), "name")
self.assertEqual(cursor.field_name, "name")
self.assertFalse(cursor.goto_first_child())

self.assertTrue(cursor.goto_next_sibling())
self.assertEqual(cursor.node.type, "parameters")
self.assertEqual(cursor.node.is_named, True)
self.assertEqual(cursor.current_field_name(), "parameters")
self.assertEqual(cursor.field_name, "parameters")

def test_edit(self):
parser = Parser()
Expand Down Expand Up @@ -1103,5 +1155,57 @@ def test_point_range_captures(self):
self.assertEqual(captures[1][1], "func-call")


class TestLookaheadIterator(TestCase):
def test_lookahead_iterator(self):
parser = Parser()
parser.set_language(RUST)
tree = parser.parse(b"struct Stuff{}")

cursor = tree.walk()

self.assertEqual(cursor.goto_first_child(), True) # struct
self.assertEqual(cursor.goto_first_child(), True) # struct keyword

next_state = cursor.node.next_parse_state

self.assertNotEqual(next_state, 0)
self.assertEqual(
next_state,
RUST.next_state(cursor.node.parse_state, cursor.node.grammar_id)
)
self.assertLess(next_state, RUST.parse_state_count)
self.assertEqual(cursor.goto_next_sibling(), True) # type_identifier
self.assertEqual(next_state, cursor.node.parse_state)
self.assertEqual(cursor.node.grammar_name, "identifier")
self.assertNotEqual(cursor.node.grammar_id, cursor.node.kind_id)

expected_symbols = ["identifier", "block_comment", "line_comment"]
lookahead: LookaheadIterator = RUST.lookahead_iterator(next_state)
self.assertEqual(lookahead.language, RUST.language_id)
self.assertEqual(list(lookahead.iter_names()), expected_symbols)

lookahead.reset_state(next_state)
self.assertEqual(list(lookahead.iter_names()), expected_symbols)

lookahead.reset(RUST.language_id, next_state)
self.assertEqual(list(map(RUST.node_kind_for_id, list(iter(lookahead)))), expected_symbols)


def trim(string):
return re.sub(r"\s+", " ", string).strip()


def get_all_nodes(tree: Tree) -> List[Node]:
result = []
visited_children = False
cursor = tree.walk()
while True:
if not visited_children:
result.append(cursor.node)
if not cursor.goto_first_child():
visited_children = True
elif cursor.goto_next_sibling():
visited_children = False
elif not cursor.goto_parent():
break
return result
114 changes: 105 additions & 9 deletions tree_sitter/__init__.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,43 @@
"""Python bindings for tree-sitter."""

import enum
from ctypes import c_void_p, cdll
from distutils.ccompiler import new_compiler
from distutils.unixccompiler import UnixCCompiler
from os import path
from platform import system
from tempfile import TemporaryDirectory
from typing import Optional
from typing import Callable, List, Optional

from tree_sitter.binding import (Node, Parser, Tree, TreeCursor, # noqa: F401
_language_field_id_for_name, _language_query)
from tree_sitter.binding import (LookaheadIterator, Node, Parser, # noqa: F401
Tree, TreeCursor, _language_field_count,
_language_field_id_for_name,
_language_field_name_for_id, _language_query,
_language_state_count, _language_symbol_count,
_language_symbol_for_name,
_language_symbol_name, _language_symbol_type,
_language_version, _lookahead_iterator,
_next_state)


class SymbolType(enum.IntEnum):
"""An enumeration of the different types of symbols."""

REGULAR = 0
"""A regular symbol."""

ANONYMOUS = 1
"""An anonymous symbol."""

AUXILIARY = 2
"""An auxiliary symbol."""


class Language:
"""A tree-sitter language"""

@staticmethod
def build_library(output_path, repo_paths):
def build_library(output_path: str, repo_paths: List[str]):
"""
Build a dynamic library at the given path, based on the parser
repositories at the given paths.
Expand Down Expand Up @@ -75,21 +96,96 @@ def build_library(output_path, repo_paths):
)
return True

def __init__(self, library_path, name):
def __init__(self, library_path: str, name: str):
"""
Load the language with the given name from the dynamic library
at the given path.
"""
self.name = name
self.lib = cdll.LoadLibrary(library_path)
language_function = getattr(self.lib, "tree_sitter_%s" % name)
language_function: Callable[[], c_void_p] = getattr(self.lib, "tree_sitter_%s" % name)
language_function.restype = c_void_p
self.language_id = language_function()
self.language_id: c_void_p = language_function()

@property
def version(self) -> int:
"""
Get the ABI version number that indicates which version of the Tree-sitter CLI
that was used to generate this [`Language`].
"""
return _language_version(self.language_id)

@property
def node_kind_count(self) -> int:
"""Get the number of distinct node types in this language."""
return _language_symbol_count(self.language_id)

@property
def parse_state_count(self) -> int:
"""Get the number of valid states in this language."""
return _language_state_count(self.language_id)

def node_kind_for_id(self, id: int) -> Optional[str]:
"""Get the name of the node kind for the given numerical id."""
return _language_symbol_name(self.language_id, id)

def id_for_node_kind(self, kind: str, named: bool) -> Optional[int]:
"""Get the numerical id for the given node kind."""
return _language_symbol_for_name(self.language_id, kind, named)

def node_kind_is_named(self, id: int) -> bool:
"""
Check if the node type for the given numerical id is named
(as opposed to an anonymous node type).
"""
return _language_symbol_type(self.language_id, id) == SymbolType.REGULAR

def field_id_for_name(self, name) -> Optional[int]:
def node_kind_is_visible(self, id: int) -> bool:
"""
Check if the node type for the given numerical id is visible
(as opposed to an auxiliary node type).
"""
return _language_symbol_type(self.language_id, id) <= SymbolType.ANONYMOUS

@property
def field_count(self) -> int:
"""Get the number of fields in this language."""
return _language_field_count(self.language_id)

def field_name_for_id(self, field_id: int) -> Optional[str]:
"""Get the name of the field for the given numerical id."""
return _language_field_name_for_id(self.language_id, field_id)

def field_id_for_name(self, name: str) -> Optional[int]:
"""Return the field id for a field name."""
return _language_field_id_for_name(self.language_id, name)

def query(self, source):
def next_state(self, state: int, id: int) -> int:
"""
Get the next parse state. Combine this with
[`lookahead_iterator`](Language.lookahead_iterator) to
generate completion suggestions or valid symbols in error nodes.
"""
return _next_state(self.language_id, state, id)

def lookahead_iterator(self, state: int) -> Optional[LookaheadIterator]:
"""
Create a new lookahead iterator for this language and parse state.
This returns `None` if state is invalid for this language.
Iterating `LookaheadIterator` will yield valid symbols in the given
parse state. Newly created lookahead iterators will return the `ERROR`
symbol from `LookaheadIterator.current_symbol`.
Lookahead iterators can be useful to generate suggestions and improve
syntax error diagnostics. To get symbols valid in an ERROR node, use the
lookahead iterator on its first leaf node state. For `MISSING` nodes, a
lookahead iterator created on the previous non-extra leaf node may be
appropriate.
"""
return _lookahead_iterator(self.language_id, state)

def query(self, source: str):
"""Create a Query with the given source code."""
return _language_query(self.language_id, source)
Loading

0 comments on commit 2c981d0

Please sign in to comment.