Skip to content

Commit

Permalink
v0.6.0 (#42)
Browse files Browse the repository at this point in the history
* Bump version and add note to README

Prepare for release tomorrow and move forward for v0.7.0 and v0.8.0 release

* Steal pygments regexes (#34)

* test.py

* Move to server functions and set up proper tests

* Reformat

* Reformat

* Change to beartype typing

* Even more formatting

* Remove regex stealer test

Runs different locally than on gh runner and not worth the time or effort.

* Get docstring areas

* Make function work

* Add type annotation

* format

* Add lots of comments but don't remove private type

@leycec I tried using the idea you gave for private types but got Union type errors for some reason I couldn't really understand. Thank you so much for your time and thoughts and congrats on the bike ride, thats a long stretch!

* Fix a small bug

* Improve highlighting functions significantly

Now it ignores whitespace at the front of the line!

* Stop using private variable

* Format for black and ruff

* Move docstring tokens up

* Update tests

* Fix line number for docstring tokens

Was 1 behind

* Reformat
  • Loading branch information
Moosems authored Jul 7, 2024
1 parent 22f1a16 commit a17f289
Show file tree
Hide file tree
Showing 5 changed files with 217 additions and 54 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<h1 align="center">Salve v0.5.2</h1>
<h1 align="center">Salve v0.6.0</h1>

# Installation

Expand All @@ -9,7 +9,7 @@ In the Command Line, paste the following: `pip install salve_ipc`
Salve is an IPC library that can be used by code editors to easily get autocompletions, replacements, editorconfig suggestions, definitions, and syntax highlighting.

> **Note**
> The first time that the system is loaded or a new server needs to be started it will take a fair bit longer
> The first time that the system is loaded or a new server needs to be started it will take a fair bit longer. Additionally, any usage of IPC needs to eventually be called from an `if __name__ == "__main__":` block to prevent a multiproccesing error.
## Documentation

Expand Down
197 changes: 190 additions & 7 deletions salve_ipc/server_functions/highlight.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from re import Match, Pattern, compile
from re import MULTILINE, Match, Pattern, compile

from beartype.typing import Callable
from pygments import lex
from pygments.lexer import Lexer
from pygments.lexer import Lexer, RegexLexer, default
from pygments.lexers import get_lexer_by_name
from pygments.token import _TokenType
from pygments.token import Comment as CommentToken
from pygments.token import String as StringToken

from .misc import Token, generic_tokens

Expand Down Expand Up @@ -140,42 +142,223 @@ def find_hidden_chars(lines: list[str], start_line: int = 1) -> list[Token]:
return tok_list


# Instantiate some useful variables/types for the following functions
useful_toks = {
StringToken.Doc,
StringToken.Heredoc,
CommentToken,
CommentToken.Multiline,
}

# Beartype speed optimizations
_TokenType = type(StringToken) # Resolves to pygments.token._TokenType
_TokenTupleInternalType = tuple[_TokenType | Callable, ...]
_TokenTupleReturnType = list[tuple[str, _TokenType]]
_ListOfStrs = list[str]
_LexReturnTokens = list[tuple[_TokenType, str]]


def get_pygments_comment_regexes(lexer: RegexLexer) -> _TokenTupleReturnType:
"""
Steals the regexes that pgments uses to give docstring, heredoc, comment, and multiline comment highlights
(css comments, though multine, aren't called multiline comments)
"""

regexes: _TokenTupleReturnType = []

for path in lexer.tokens:
# This should have a better type definition but I didn't have the mental capacity to
# write each possibility so I'm waiting for beartype to implement the functionality for me like the bum I am
path_tokens: list = lexer.tokens[path]

if isinstance(path_tokens[0], str):
# This means that the path is redirecting to another path in its place but we check them all anyway so just exit this path
continue

for token_tuple in path_tokens:
# Ensure that this is actually a tuple and not a random type
if isinstance(token_tuple, default):
continue

if token_tuple[1] in useful_toks:
regexes.append((token_tuple[0], token_tuple[1]))
continue

# The Token tuple SHOULD be a callable at this point
if not callable(token_tuple[1]):
continue

pygments_func: Callable = token_tuple[1]

if pygments_func.__closure__ is None:
# Will always evaluate to False but its for the static type checkers appeasement
continue

tokens: _TokenTupleInternalType = [
cell.cell_contents for cell in token_tuple[1].__closure__
][
0
] # Sometimes pygments hides these types in functional programming

for token in tokens:
if token in useful_toks:
# We know if its in the useful tokens list that its a token type but the static type checker doesn't
regexes.append((token_tuple[0], token)) # type: ignore
continue

return list(set(regexes)) # type: ignore


def proper_docstring_tokens(lexer: RegexLexer, full_text: str) -> list[Token]:
proper_highlight_regexes: _TokenTupleReturnType = (
get_pygments_comment_regexes(lexer)
)

new_docstring_tokens: list[Token] = []
split_text: _ListOfStrs = full_text.splitlines()

for regex, token_type in proper_highlight_regexes:
current_text = full_text
match: Match[str] | None = compile(regex, flags=MULTILINE).search(
full_text
)

if match is None:
# Onwards to the next regex!
continue

start_pos: tuple[int, int] = (1, 0)
simple_token_type: str = get_new_token_type(str(token_type))

while match:
span: tuple[int, int] = match.span()
matched_str: str = current_text[span[0] : span[1]]

# Remove any whitespace previous to the match and update span accordingly
matched_len_initial: int = len(matched_str)
matched_str = matched_str.lstrip()
matched_len_lstripped: int = len(matched_str)
span = (
(span[0] + matched_len_initial - matched_len_lstripped),
span[1],
)

# Other useful variables without relation
newline_count: int = matched_str.count("\n")
previous_text: str = current_text[: span[0]]

start_line: int = previous_text.count("\n") + start_pos[0]

# Deal with the easy case first
if not newline_count:
# Prepare token variables
start_col: int = split_text[start_line].find(matched_str)
current_text: str = full_text[span[0] + span[1] - span[0] :]

# Create and add token
token: Token = (
(start_line, start_col),
matched_len_lstripped,
simple_token_type,
)
new_docstring_tokens.append(token)

start_pos = (start_line, start_col + matched_len_lstripped)
current_text = current_text[: span[1]]

# Continue onward!
match = compile(regex, flags=MULTILINE).search(current_text)
continue

# Now for multiple line matches
split_match: list[str] = matched_str.splitlines()
for i in range(newline_count + 1):
match_str: str = split_match[i]
initial_len: int = len(match_str)
start_col: int = initial_len - len(match_str.lstrip())

if i == 0:
line: str = split_text[start_line - 1]

true_len: int = len(line)
lstripped_len: int = len(line.lstrip())
initial_len = lstripped_len
if lstripped_len != true_len:
# In case the regex doesn't skip whitespace/junk
initial_len = true_len

start_col = line.find(match_str)

# Create and add token
token: Token = (
(start_line + i, start_col),
initial_len - start_col,
simple_token_type,
)
new_docstring_tokens.append(token)

start_pos = (start_line + i, start_col + len(match_str))

# Continue onward!
current_text = current_text[span[1] :]
match = compile(regex, flags=MULTILINE).search(current_text)

return new_docstring_tokens


def get_highlights(
full_text: str,
language: str = "text",
text_range: tuple[int, int] = (1, -1),
) -> list[Token]:
"""Gets pygments tokens from text provided in language proved and converts them to Token's"""

# Create some variables used all throughout the function
lexer: Lexer = get_lexer_by_name(language)
split_text: list[str] = full_text.splitlines()
split_text: _ListOfStrs = full_text.splitlines()
new_tokens: list[Token] = []

if text_range[1] == -1:
# This indicates that the text range should span the length of the entire code
text_range = (text_range[0], len(split_text))

start_index: tuple[int, int] = (text_range[0], 0)
split_text = split_text[text_range[0] - 1 : text_range[1]]
# We want only the lines in the text range because this list is iterated
split_text: _ListOfStrs = split_text[text_range[0] - 1 : text_range[1]]

for line in split_text:
og_tokens: list[tuple[_TokenType, str]] = list(lex(line, lexer))
og_tokens: _LexReturnTokens = list(lex(line, lexer))
for token in og_tokens:
new_type: str = get_new_token_type(str(token[0]))
token_str: str = token[1]
token_len: int = len(token_str)

if token_str == "\n": # Lexer adds the newline back
if token_str == "\n":
# Lexer adds the newline back as its own token
continue

if not token_str.strip() and new_type == "Text":
# If the token is empty or is plain Text we simply skip it because thats ultimately useless info
start_index = (start_index[0], start_index[1] + token_len)
continue

# Create and append the Token that will be returned
new_token = (start_index, token_len, new_type)
new_tokens.append(new_token)

start_index = (start_index[0], start_index[1] + token_len)
start_index = (start_index[0] + 1, 0)

# Add extra token types
# NOTE: we add these at the end so that when they are applied one by one by the editor these
# override older tokens that may not be as accurate

if isinstance(lexer, RegexLexer):
new_tokens += proper_docstring_tokens(lexer, full_text)

new_tokens += get_urls(split_text, text_range[0])
if [char for char in hidden_chars if char in full_text]:
# if there are not hidden chars we don't want to needlessly compute this
new_tokens += find_hidden_chars(split_text, text_range[0])

return new_tokens
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# pip install -r requirements.txt --break-system-packages; pip uninstall salve_ipc -y --break-system-packages; pip install . --break-system-packages --no-build-isolation; pytest .
# pip install -r requirements.txt --break-system-packages; pip uninstall salve_ipc -y --break-system-packages; pip install . --break-system-packages --no-build-isolation; python3 -m pytest .
from setuptools import setup

with open("README.md", "r") as file:
Expand All @@ -7,7 +7,7 @@

setup(
name="salve_ipc",
version="0.5.2",
version="0.6.0",
description="Salve is an IPC library that can be used by code editors to easily get autocompletions, replacements, editorconfig suggestions, definitions, and syntax highlighting.",
author="Moosems",
author_email="[email protected]",
Expand Down
62 changes: 19 additions & 43 deletions tests/test_ipc.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def test_IPC():
"type": "response",
"cancelled": False,
"command": AUTOCOMPLETE,
"result": ["this"],
"result": ["test", "this"],
}

replacements_output: Response | None = context.get_response(REPLACEMENTS)
Expand Down Expand Up @@ -102,52 +102,28 @@ def test_IPC():
((8, 10), 3, "Name"),
((8, 13), 1, "Punctuation"),
((8, 14), 1, "Punctuation"),
((9, 4), 3, "Keyword"),
((9, 8), 8, "Name"),
((9, 16), 1, "Punctuation"),
((9, 17), 4, "Name"),
((9, 21), 1, "Punctuation"),
((9, 22), 1, "Punctuation"),
((10, 8), 4, "Keyword"),
((13, 0), 3, "Name"),
((13, 3), 1, "Punctuation"),
((13, 4), 1, "Punctuation"),
((14, 0), 24, "Comment"),
((14, 2), 22, "Link"),
((9, 4), 3, "String"),
((10, 4), 4, "Name"),
((11, 4), 3, "String"),
((13, 4), 3, "Keyword"),
((13, 8), 8, "Name"),
((13, 16), 1, "Punctuation"),
((13, 17), 4, "Name"),
((13, 21), 1, "Punctuation"),
((13, 22), 1, "Punctuation"),
((14, 8), 4, "Keyword"),
((17, 0), 3, "Name"),
((17, 3), 1, "Punctuation"),
((17, 4), 1, "Punctuation"),
((18, 0), 24, "Comment"),
((9, 4), 3, "String"),
((10, 4), 4, "String"),
((11, 4), 3, "String"),
((18, 2), 22, "Link"),
((5, 7), 1, "Hidden_Char"),
],
}

editorconfig_response: Response | None = context.get_response(EDITORCONFIG)
if editorconfig_response is None:
raise AssertionError("Editorconfig output is None")
editorconfig_response["id"] = 0
assert editorconfig_response == {
"id": 0,
"type": "response",
"cancelled": False,
"command": EDITORCONFIG,
"result": {
"end_of_line": "lf",
"insert_final_newline": "true",
"charset": "utf-8",
"indent_style": "space",
"indent_size": "4",
},
}

definition_response: Response | None = context.get_response(DEFINITION)
if definition_response is None:
raise AssertionError("Definition output is None")
definition_response["id"] = 0
assert definition_response == {
"id": 0,
"type": "response",
"cancelled": False,
"command": DEFINITION,
"result": ((3, 0), 3, "Definition"),
}

context.remove_file("test")
context.kill_IPC()

Expand Down
4 changes: 4 additions & 0 deletions tests/testing_file1.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@


class Foo(Bar):
"""
test
"""

def __init__(self):
pass

Expand Down

0 comments on commit a17f289

Please sign in to comment.