From f3e30f3bfa08d6a861f8b81912dbb0c1283b2560 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alja=C5=BE=20Mur=20Er=C5=BEen?= <aljaz@edgedb.com>
Date: Wed, 20 Mar 2024 21:46:29 +0100
Subject: [PATCH] language server

---
 edb/edgeql-parser/src/parser.rs |   4 +-
 edb/errors/base.py              |   8 ++
 edb/language_server/__init__.py |  17 ++++
 edb/language_server/main.py     |  87 +++++++++++++++++
 edb/language_server/parsing.py  | 149 ++++++++++++++++++++++++++++
 edb/language_server/server.py   | 166 ++++++++++++++++++++++++++++++++
 6 files changed, 429 insertions(+), 2 deletions(-)
 create mode 100644 edb/language_server/__init__.py
 create mode 100644 edb/language_server/main.py
 create mode 100644 edb/language_server/parsing.py
 create mode 100644 edb/language_server/server.py

diff --git a/edb/edgeql-parser/src/parser.rs b/edb/edgeql-parser/src/parser.rs
index 3ca96ca4ed5..3842206e41c 100644
--- a/edb/edgeql-parser/src/parser.rs
+++ b/edb/edgeql-parser/src/parser.rs
@@ -558,9 +558,9 @@ fn injection_cost(kind: &Kind) -> u16 {
 
         // Manual keyword tweaks to encourage some error messages and discourage others.
         Keyword(keywords::Keyword(
-            "delete" | "update" | "migration" | "role" | "global" | "administer",
+            "delete" | "update" | "migration" | "role" | "global" | "administer" | "future" | "database",
         )) => 100,
-        Keyword(keywords::Keyword("insert")) => 20,
+        Keyword(keywords::Keyword("insert" | "module" | "extension" | "branch")) => 20,
         Keyword(keywords::Keyword("select" | "property" | "type")) => 10,
         Keyword(_) => 15,
 
diff --git a/edb/errors/base.py b/edb/errors/base.py
index caeb46d7002..ddaa6a7e66c 100644
--- a/edb/errors/base.py
+++ b/edb/errors/base.py
@@ -181,6 +181,14 @@ def line(self):
     def col(self):
         return int(self._attrs.get(FIELD_COLUMN_START, -1))
 
+    @property
+    def line_end(self):
+        return int(self._attrs.get(FIELD_LINE_END, -1))
+
+    @property
+    def col_end(self):
+        return int(self._attrs.get(FIELD_COLUMN_END, -1))
+
     @property
     def position(self):
         return int(self._attrs.get(FIELD_POSITION_START, -1))
diff --git a/edb/language_server/__init__.py b/edb/language_server/__init__.py
new file mode 100644
index 00000000000..9eab314cb6a
--- /dev/null
+++ b/edb/language_server/__init__.py
@@ -0,0 +1,17 @@
+#
+# This source file is part of the EdgeDB open source project.
+#
+# Copyright 2008-present MagicStack Inc. and the EdgeDB authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/edb/language_server/main.py b/edb/language_server/main.py
new file mode 100644
index 00000000000..e971ab1ab23
--- /dev/null
+++ b/edb/language_server/main.py
@@ -0,0 +1,87 @@
+#
+# This source file is part of the EdgeDB open source project.
+#
+# Copyright 2008-present MagicStack Inc. and the EdgeDB authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from lsprotocol import types as lsp_types
+
+
+from edb.edgeql import parser as qlparser
+
+from . import parsing as ls_parsing
+from . import server as ls_server
+
+
+def main():
+    ls = ls_server.EdgeDBLanguageServer()
+
+    @ls.feature(
+        lsp_types.INITIALIZE,
+    )
+    def init(_params: lsp_types.InitializeParams):
+        ls.show_message_log('Starting')
+        qlparser.preload_spec()
+        ls.show_message_log('Started')
+
+    @ls.feature(lsp_types.TEXT_DOCUMENT_DID_OPEN)
+    def text_document_did_open(params: lsp_types.DidOpenTextDocumentParams):
+        document_updated(ls, params.text_document.uri)
+
+    @ls.feature(lsp_types.TEXT_DOCUMENT_DID_CHANGE)
+    def text_document_did_change(params: lsp_types.DidChangeTextDocumentParams):
+        document_updated(ls, params.text_document.uri)
+
+    @ls.feature(
+        lsp_types.TEXT_DOCUMENT_COMPLETION,
+        lsp_types.CompletionOptions(trigger_characters=[',']),
+    )
+    def completions(params: lsp_types.CompletionParams):
+        items = []
+
+        document = ls.workspace.get_text_document(params.text_document.uri)
+
+        if item := ls_parsing.parse_and_suggest(document, params.position):
+            items.append(item)
+
+        return lsp_types.CompletionList(is_incomplete=False, items=items)
+
+    ls.start_io()
+
+
+def document_updated(ls: ls_server.EdgeDBLanguageServer, doc_uri: str):
+    # each call to this function should yield in exactly one publish_diagnostics
+    # for this document
+
+    document = ls.workspace.get_text_document(doc_uri)
+    ql_ast = ls_parsing.parse(document, ls)
+    if diagnostics := ql_ast.error:
+        ls.publish_diagnostics(document.uri, diagnostics, document.version)
+        return
+    assert ql_ast.ok
+
+    try:
+        if isinstance(ql_ast.ok, list):
+            diagnostics = ls_server.compile(ls, ql_ast.ok)
+            ls.publish_diagnostics(document.uri, diagnostics, document.version)
+        else:
+            ls.publish_diagnostics(document.uri, [], document.version)
+    except BaseException as e:
+        ls.show_message_log(f'Internal error: {e}')
+        ls.publish_diagnostics(document.uri, [], document.version)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/edb/language_server/parsing.py b/edb/language_server/parsing.py
new file mode 100644
index 00000000000..d4e2981feda
--- /dev/null
+++ b/edb/language_server/parsing.py
@@ -0,0 +1,149 @@
+#
+# This source file is part of the EdgeDB open source project.
+#
+# Copyright 2008-present MagicStack Inc. and the EdgeDB authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Any, List, Tuple, Optional, TypeVar, Generic
+from dataclasses import dataclass
+
+from pygls.server import LanguageServer
+from pygls.workspace import TextDocument
+from lsprotocol import types as lsp_types
+
+
+from edb.edgeql import ast as qlast
+from edb.edgeql import tokenizer
+from edb.edgeql import parser as qlparser
+from edb.edgeql.parser.grammar import tokens as qltokens
+import edb._edgeql_parser as rust_parser
+
+
+T = TypeVar('T', covariant=True)
+E = TypeVar('E', covariant=True)
+
+
+@dataclass(kw_only=True, slots=True)
+class Result(Generic[T, E]):
+    ok: Optional[T] = None
+    error: Optional[E] = None
+
+
+def parse(
+    doc: TextDocument, ls: LanguageServer
+) -> Result[List[qlast.Base] | qlast.Schema, List[lsp_types.Diagnostic]]:
+    sdl = doc.filename.endswith('.esdl') if doc.filename else False
+
+    source, result, productions = _parse_inner(doc.source, sdl)
+
+    if result.errors:
+        diagnostics = []
+        for error in result.errors:
+            message, span, hint, details = error
+
+            if details:
+                message += f"\n{details}"
+            if hint:
+                message += f"\nHint: {hint}"
+            (start, end) = tokenizer.inflate_span(source.text(), span)
+            assert end
+
+            diagnostics.append(
+                lsp_types.Diagnostic(
+                    range=lsp_types.Range(
+                        start=lsp_types.Position(
+                            line=start.line - 1,
+                            character=start.column - 1,
+                        ),
+                        end=lsp_types.Position(
+                            line=end.line - 1,
+                            character=end.column - 1,
+                        ),
+                    ),
+                    severity=lsp_types.DiagnosticSeverity.Error,
+                    message=message,
+                )
+            )
+
+        return Result(error=diagnostics)
+
+    # parsing successful
+    assert isinstance(result.out, rust_parser.CSTNode)
+
+    ast = qlparser._cst_to_ast(
+        result.out, productions, source, doc.filename
+    ).val
+    if sdl:
+        assert isinstance(ast, qlast.Schema), ast
+    else:
+        assert isinstance(ast, list), ast
+    return Result(ok=ast)
+
+
+def parse_and_suggest(
+    doc: TextDocument, position: lsp_types.Position
+) -> Optional[lsp_types.CompletionItem]:
+    sdl = doc.filename.endswith('.esdl') if doc.filename else False
+
+    source, result, _productions = _parse_inner(doc.source, sdl)
+    for error in result.errors:
+        message: str
+        message, span, _hint, _details = error
+        if not message.startswith('Missing keyword '):
+            continue
+        (start, end) = tokenizer.inflate_span(source.text(), span)
+
+        if not _position_in_span(position, (start, end)):
+            continue
+
+        keyword = message.removeprefix('Missing keyword \'')[:-1]
+
+        return lsp_types.CompletionItem(
+            label=keyword,
+            kind=lsp_types.CompletionItemKind.Keyword,
+        )
+    return None
+
+
+def _position_in_span(pos: lsp_types.Position, span: Tuple[Any, Any]):
+    start, end = span
+
+    if pos.line < start.line - 1:
+        return False
+    if pos.line > end.line - 1:
+        return False
+    if pos.line == start.line - 1 and pos.character < start.column - 1:
+        return False
+    if pos.line == end.line - 1 and pos.character > end.column - 1:
+        return False
+    return True
+
+
+def _parse_inner(
+    source_str: str, sdl: bool
+) -> Tuple[tokenizer.Source, rust_parser.ParserResult, Any]:
+    try:
+        source = tokenizer.Source.from_string(source_str)
+    except Exception as e:
+        # TODO
+        print(e)
+        raise AssertionError(e)
+
+    start_t = qltokens.T_STARTSDLDOCUMENT if sdl else qltokens.T_STARTBLOCK
+    start_t_name = start_t.__name__[2:]
+    tokens = source.tokens()
+
+    result, productions = rust_parser.parse(start_t_name, tokens)
+    return source, result, productions
diff --git a/edb/language_server/server.py b/edb/language_server/server.py
new file mode 100644
index 00000000000..ee6f6fb3ee2
--- /dev/null
+++ b/edb/language_server/server.py
@@ -0,0 +1,166 @@
+#
+# This source file is part of the EdgeDB open source project.
+#
+# Copyright 2008-present MagicStack Inc. and the EdgeDB authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Optional, List
+import dataclasses
+import pathlib
+import os
+
+from pygls.server import LanguageServer
+from pygls import uris as pygls_uris
+from lsprotocol import types as lsp_types
+
+
+from edb import errors
+
+from edb.edgeql import ast as qlast
+from edb.edgeql import compiler as qlcompiler
+
+from edb.schema import schema as s_schema
+from edb.schema import std as s_std
+from edb.schema import ddl as s_ddl
+
+from . import parsing as ls_parsing
+
+
+@dataclasses.dataclass(kw_only=True, slots=True)
+class State:
+    schema: Optional[s_schema.Schema] = None
+
+    std_schema: Optional[s_schema.Schema] = None
+
+
+class EdgeDBLanguageServer(LanguageServer):
+    state: State
+
+    def __init__(self):
+        super().__init__('EdgeDB Language Server', 'v0.1')
+        self.state = State()
+
+
+def compile(
+    ls: EdgeDBLanguageServer, stmts: List[qlast.Base]
+) -> List[lsp_types.Diagnostic]:
+    diagnostics: List[lsp_types.Diagnostic] = []
+
+    if not stmts:
+        return diagnostics
+
+    schema = _get_schema(ls)
+    if not schema:
+        return diagnostics
+
+    for ql_stmt in stmts:
+
+        try:
+            if isinstance(ql_stmt, qlast.DDLCommand):
+                schema, _delta = s_ddl.delta_and_schema_from_ddl(
+                    ql_stmt, schema=schema, modaliases={None: 'default'}
+                )
+
+            elif isinstance(ql_stmt, (qlast.Command, qlast.Query)):
+                ir_stmt = qlcompiler.compile_ast_to_ir(ql_stmt, schema)
+                ls.show_message_log(f'IR: {ir_stmt}')
+
+            else:
+                ls.show_message_log(f'skip compile of {ql_stmt}')
+        except errors.EdgeDBError as error:
+            diagnostics.append(_convert_error(error))
+    return diagnostics
+
+
+def _convert_error(error: errors.EdgeDBError) -> lsp_types.Diagnostic:
+    return lsp_types.Diagnostic(
+        range=lsp_types.Range(
+            start=lsp_types.Position(
+                line=error.line - 1,
+                character=error.col - 1,
+            ),
+            end=lsp_types.Position(
+                line=error.line_end - 1,
+                character=error.col_end - 1,
+            ),
+        ),
+        severity=lsp_types.DiagnosticSeverity.Error,
+        message=error.args[0],
+    )
+
+
+def _get_schema(ls: EdgeDBLanguageServer) -> Optional[s_schema.Schema]:
+
+    if ls.state.schema:
+        return ls.state.schema
+
+    # discover dbschema/ folders
+    if len(ls.workspace.folders) != 1:
+
+        if len(ls.workspace.folders) > 1:
+            ls.show_message_log(
+                "WARNING: workspaces with multiple root folders "
+                "are not supported"
+            )
+        return None
+
+    workspace: lsp_types.WorkspaceFolder = next(
+        iter(ls.workspace.folders.values())
+    )
+    workspace_path = pathlib.Path(pygls_uris.to_fs_path(workspace.uri))
+
+    dbschema = workspace_path / 'dbschema'
+
+    # read and parse .esdl files
+    sdl = qlast.Schema(declarations=[])
+    for entry in os.listdir(dbschema):
+        if not entry.endswith('.esdl'):
+            continue
+        doc = ls.workspace.get_text_document(f'dbschema/{entry}')
+
+        res = ls_parsing.parse(doc, ls)
+        if diagnostics := res.error:
+            ls.publish_diagnostics(doc.uri, diagnostics, doc.version)
+        else:
+            if isinstance(res.ok, qlast.Schema):
+                sdl.declarations.extend(res.ok.declarations)
+            else:
+                # TODO: complain that .esdl contains non-SDL syntax
+                pass
+
+    # apply SDL to std schema
+    std_schema = _load_std_schema(ls.state)
+    schema = s_ddl.apply_sdl(
+        sdl,
+        base_schema=std_schema,
+        current_schema=std_schema,
+    )
+
+    ls.state.schema = schema
+    return ls.state.schema
+
+
+def _load_std_schema(state: State) -> s_schema.Schema:
+    if state.std_schema is not None:
+        return state.std_schema
+
+    schema: s_schema.Schema = s_schema.EMPTY_SCHEMA
+    for modname in [*s_schema.STD_SOURCES, *s_schema.TESTMODE_SOURCES]:
+        schema = s_std.load_std_module(schema, modname)
+    schema, _ = s_std.make_schema_version(schema)
+    schema, _ = s_std.make_global_schema_version(schema)
+
+    state.std_schema = schema
+    return state.std_schema