Skip to content

Commit c0235c8

Browse files
committed
ISSUE-21: Enhance parsing in no-todos
1 parent 5d7a417 commit c0235c8

File tree

2 files changed

+37
-4
lines changed

2 files changed

+37
-4
lines changed

hooks/no_todos.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,17 @@ def main(argv: Sequence[str] | None = None) -> int:
2727
result = 0
2828

2929
for filename in args.filenames:
30-
with open(filename, encoding='utf-8') as f:
31-
lines = f.readlines()
30+
try:
31+
with open(filename, encoding='utf-8-sig') as f:
32+
lines = f.readlines()
33+
except (UnicodeDecodeError, UnicodeError):
34+
print(f'{filename}: cannot be read as UTF-8, trying UTF-16')
35+
try:
36+
with open(filename, encoding='utf-16') as f:
37+
lines = f.readlines()
38+
except (UnicodeDecodeError, UnicodeError):
39+
print(f'{filename}: cannot be read as UTF-16, skipping it')
40+
continue
3241

3342
for tag in disallowed:
3443
for line in lines:

tests/test_no_todos.py

+26-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def test_has_todo(tmpdir):
1818
# noinspection SpellCheckingInspection
1919
f.write_text('TODO: ¥eßűs, ∂éñ∂ þħïs!', encoding='utf-8')
2020
assert no_todos.main((str(f),)) == 1
21-
assert mocked_print.call_args_list[0].args[0] \
21+
assert mocked_print.call_args_list[-1].args[0] \
2222
.endswith('todo.txt: contains TODO')
2323

2424

@@ -42,7 +42,7 @@ def test_has_todo_but_excepted(tmpdir):
4242
# noinspection SpellCheckingInspection
4343
f.write_text('TODO: ฿űþ ñ∅™ þħïs!', encoding='utf-8')
4444
assert no_todos.main(('-e', 'todo.txt', str(f))) == 0
45-
assert mocked_print.call_args_list[0].args[0] \
45+
assert mocked_print.call_args_list[-1].args[0] \
4646
.endswith('todo.txt: contains TODO, but is on the exception list')
4747

4848

@@ -64,3 +64,27 @@ def test_complex_case(tmpdir):
6464
# noinspection SpellCheckingInspection
6565
f3.write_text('TODO: ¥eßűs, ∂éñ∂ þħïs!-3', encoding='utf-8')
6666
assert no_todos.main(('-e', 'todo_2.txt', str(f1), str(f2), str(f3))) == 2
67+
68+
69+
def test_non_utf_8(tmpdir):
70+
with patch('builtins.print') as mocked_print:
71+
f1 = tmpdir.join('todo_utf8_bom.txt')
72+
# noinspection SpellCheckingInspection
73+
f1.write_text('TODO: ¥eßűs, ∂éñ∂ þħïs!-1', encoding='utf-8-sig')
74+
assert no_todos.main((str(f1),)) == 1
75+
for a in mocked_print.call_args_list:
76+
assert not a.args[0].endswith(
77+
'todo_utf8_bom.txt: cannot be read as UTF-8, trying UTF-16'
78+
)
79+
f2 = tmpdir.join('todo_utf16.txt')
80+
# noinspection SpellCheckingInspection
81+
f2.write_text('TODO: ¥eßűs, ∂éñ∂ þħïs!-1', encoding='utf-16')
82+
assert no_todos.main((str(f1),str(f2))) == 2
83+
assert mocked_print.call_args_list[-2].args[0] \
84+
.endswith('todo_utf16.txt: cannot be read as UTF-8, trying UTF-16')
85+
f3 = tmpdir.join('todo_greek.txt')
86+
# noinspection SpellCheckingInspection
87+
f3.write_text('TODO: Υeσΰς, δέηδ ΤΞΪΣ!-1', encoding='ISO-8859-7')
88+
assert no_todos.main((str(f1),str(f2),str(f3))) == 2
89+
assert mocked_print.call_args_list[-1].args[0] \
90+
.endswith('todo_greek.txt: cannot be read as UTF-16, skipping it')

0 commit comments

Comments
 (0)