Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

namespell: Ignore text inside code blocks #14

Merged
merged 7 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 134 additions & 9 deletions hooks/namespell.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
import os
import re
import sys
import time
from importlib import metadata
from typing import NamedTuple, List
from collections import deque

NAME_RULES = {
"Zarhus": "Zarhus",
Expand All @@ -27,16 +30,121 @@

IGNORE_STRING = "namespell:disable"

class IgnoreBlock(NamedTuple):
start: str
end: str

def __get_comment_string(file) -> str:
_, extension = os.path.splitext(f"./{file.name}")
class IgnoreInline(NamedTuple):
start: str
end: str

IGNORE_BLOCKS = {
".md": [IgnoreBlock("```", "```"), IgnoreBlock("<!--", "-->")],
"default": []
}

IGNORE_INLINE = {
".md": [IgnoreInline("`", "`"), IgnoreInline("<!--", "-->")],
"default": []
}

# Stack that keeps track of "active" blocks to be ignored by storing their
# start strings. This is necesary for nested blocks (e.g code block inside
# comment)
blocks = deque()
current_start_token = ""

def __get_comment_string(extension) -> str:
return (
COMMENT_STRINGS[extension]
if extension in COMMENT_STRINGS.keys()
else COMMENT_STRINGS["default"]
)


def __get_ignore_blocks(extension) -> List[IgnoreBlock]:
return (
IGNORE_BLOCKS[extension]
if extension in IGNORE_BLOCKS.keys()
else IGNORE_BLOCKS["default"]
)


def __get_inline_ignore(extension) -> List[IgnoreInline]:
return (
IGNORE_INLINE[extension]
if extension in IGNORE_INLINE.keys()
else IGNORE_INLINE["default"]
)


def __log_verbose(message, verbose):
if verbose:
print(message)


# Check if the line starts/ends a block to be ignored and modify the blocks
# stack accordingly
def __check_block(ignore_blocks, line, verbose=False):
global current_start_token
for ignore_block in ignore_blocks:
start_index = line.find(ignore_block.start)
end_index = line.find(ignore_block.end)
if start_index != -1:
# For identical start and end tokens: If the last start token was
# the same as this one, then this one is an end token
if ignore_block.start == ignore_block.end \
and current_start_token == ignore_block.start:
start_index = -1
else:
__log_verbose(f"BLOCK START: {ignore_block.start}, INDEX: {start_index}", verbose)
blocks.append(ignore_block.start)
current_start_token = ignore_block.start
# Check if block end matches the latest block start token
# If true, remove it from the stack
if end_index != -1 and end_index != start_index:
__log_verbose(f"BLOCK END: {ignore_block.end}, INDEX: {end_index}", verbose)
if current_start_token == ignore_block.start:
current_start_token = ""
try:
blocks.pop()
if blocks:
# Get element from the top without popping it
current_start_token = blocks[-1]
except IndexError:
return

# return list of indices of words to be ignored
def __check_inline_ignore(ignore_inline, line, verbose=False):
indices = []
for element in ignore_inline:
start_index = line.find(element.start)
if start_index == -1:
continue
position = 0
line_to_process = line
while start_index != -1:
__log_verbose(f"INLINE BLOCK START: {position + start_index}", verbose)
position += start_index
line_to_process = line_to_process[start_index + 1:]
end_index = line_to_process.find(element.end) + 1
if end_index == -1:
print("Error: No matching inline comment/code ending tag")
__log_verbose(f"INLINE BLOCK END: {position + end_index}", verbose)
line_slice = line_to_process[:end_index - 1]
for name in NAME_RULES.keys():
pattern = re.compile(
rf"(?<![-_\./=\"#]){re.escape(name)}(?![-_\./=\"#])", re.IGNORECASE
)
for m in re.finditer(pattern, line_slice):
i = position + m.start() + 1
indices.append(i)
position += end_index + 1
line_to_process = line_to_process[end_index:]
start_index = line_to_process.find(element.start)
return indices


def __get_active_rules(
filename, line, line_number, line_ignore_pattern, file_ignore_pattern
):
Expand Down Expand Up @@ -73,11 +181,15 @@ def __get_active_rules(
return active_rules, are_file_rules


def check_and_fix_file(filename, autofix=False):
def check_and_fix_file(filename, autofix=False, verbose=False):
with open(filename, "r", encoding="utf8", errors="ignore") as file:
lines = file.readlines()
comment_string = __get_comment_string(file)

_, extension = os.path.splitext(f"./{file.name}")
comment_string = __get_comment_string(extension)
ignore_blocks = __get_ignore_blocks(extension)
ignore_inline = __get_inline_ignore(extension)
__log_verbose(f"FILE: {filename}", verbose)

# Don't check empty files
if len(lines) == 0:
return True
Expand All @@ -95,7 +207,13 @@ def check_and_fix_file(filename, autofix=False):
if file_rules == {}:
return True
for line_number, line in enumerate(lines, start=1):
__log_verbose(f"LINE {line_number}", verbose)
fixed_line = line
__check_block(ignore_blocks, line, verbose)
to_ignore = __check_inline_ignore(ignore_inline, line, verbose)
if blocks:
fixed_lines.append(fixed_line)
continue
active_rules = file_rules
if line_number != 1:
line_rules, _ = __get_active_rules(
Expand All @@ -112,12 +230,13 @@ def check_and_fix_file(filename, autofix=False):
pattern = re.compile(
rf"(?<![-_\./=\"#]){re.escape(name)}(?![-_\./=\"#])", re.IGNORECASE
)
matches = pattern.findall(line)
matches = re.finditer(pattern, line)
for match in matches:
if match != correct_format:
__log_verbose(f"FOUND: {match.group()} AT {match.start()}", verbose)
if match.group() != correct_format and match.start() not in to_ignore:
found_issues = True
print(
f"{filename}:{line_number}: '{match}' should be '{correct_format}'"
f"{filename}:{line_number}: '{match.group()}' should be '{correct_format}'"
)
if autofix:
fixed_line = re.sub(pattern, correct_format, fixed_line)
Expand Down Expand Up @@ -147,14 +266,20 @@ def parse_args() -> argparse.Namespace:
help="Automatically fix issues",
)
parser.add_argument("files", nargs="+", help="File(s) to parse")
parser.add_argument(
"--verbose",
action="store_true",
default=False,
help="Run tool in verbose mode"
)
return parser.parse_args()


def main():
args = parse_args()
all_passed = True
for filename in args.files:
if not check_and_fix_file(filename, args.fix):
if not check_and_fix_file(filename, args.fix, args.verbose):
all_passed = False
if not all_passed:
sys.exit(1)
Expand Down
18 changes: 18 additions & 0 deletions hooks/test/errors.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Section

This zarhus should be detected as a mistake. <!-- but this zarhus shouldn't -->

Zarhus `zarhus` Dasharo dasharo

```text
zarhus
dasharo
```

## This zarhus is a mistake

<!--
dasharo
-->
dasharo

25 changes: 25 additions & 0 deletions hooks/test/no_errors.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# This document shall have no errors: `zarhus`

This is inline `zarhus and yocto` and this is inline comment<!-- zarhus --> to be ignored
<!-- This
is block comment and everything zarhus and yocto should be ignored even inline `zarhus`
zarhus
-->

```text
This zarhus
this zarhus and yocto should be zarhus ignored
and `zarhus` this zarhus
```

1. Indented code block

```text
This zarhus
should be zarhus also ignored
and `zarhus` this zarhus
```

It's also possible to disable zarhus and yocto rule inline <!-- namespell:disable Zarhus, Yocto -->

All rules disabled: zarhus, yocto, uefi, nvidia <!-- namespell:disable -->