From 063eb21be3790fa42c5bfbac118367adc00a625d Mon Sep 17 00:00:00 2001 From: jsh9 <25124332+jsh9@users.noreply.github.com> Date: Fri, 20 Dec 2024 02:33:44 -0500 Subject: [PATCH] Fix a bug with reading non-UTF-8 encoded files --- .gitignore | 3 +++ CHANGELOG.md | 6 +++++- pydoclint/main.py | 5 ++++- setup.cfg | 2 +- tests/data/edge_cases/19_file_encoding/nonascii.py | 4 ++++ tests/data/edge_cases/19_file_encoding/nonascii2.py | 4 ++++ tests/test_main.py | 2 ++ 7 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 tests/data/edge_cases/19_file_encoding/nonascii.py create mode 100644 tests/data/edge_cases/19_file_encoding/nonascii2.py diff --git a/.gitignore b/.gitignore index 2dc53ca..8fc17bc 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,6 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. .idea/ + +# macOS stuff +.DS_Store diff --git a/CHANGELOG.md b/CHANGELOG.md index 199411d..c0d18fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,14 @@ # Change Log -## [unpublished] +## [0.5.13] - 2024-12-20 - Fixed - Fixed a bug where assigning a dict value (such as `abc['something'] = 123`) would result in EdgeCaseError + - Fixed a bug where non-UTF-8 encoded files would crash _pydoclint_ + +- Full diff + - https://github.com/jsh9/pydoclint/compare/0.5.12...0.5.13 ## [0.5.12] - 2024-12-15 diff --git a/pydoclint/main.py b/pydoclint/main.py index b9a2200..b49234d 100644 --- a/pydoclint/main.py +++ b/pydoclint/main.py @@ -638,7 +638,10 @@ def _checkFile( if not filename.is_file(): # sometimes folder names can end with `.py` return [] - with open(filename, encoding='utf8') as fp: + with open(filename, encoding='utf-8', errors='replace') as fp: + # Note: errors='replace' would replace unrecognized characters with + # question marks. This may not be a perfect solution, but for + # not this may be good enough. src: str = ''.join(fp.readlines()) tree: ast.Module = ast.parse(src) diff --git a/setup.cfg b/setup.cfg index f090d8d..5a9dac9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = pydoclint -version = 0.5.12 +version = 0.5.13 description = A Python docstring linter that checks arguments, returns, yields, and raises sections long_description = file: README.md long_description_content_type = text/markdown diff --git a/tests/data/edge_cases/19_file_encoding/nonascii.py b/tests/data/edge_cases/19_file_encoding/nonascii.py new file mode 100644 index 0000000..12738e3 --- /dev/null +++ b/tests/data/edge_cases/19_file_encoding/nonascii.py @@ -0,0 +1,4 @@ +# coding: iso-8859-5 +# (Unlikely to be the default encoding for most testers.) +# ±¶ÿàáâãäåæçèéêëìíîï <- Cyrillic characters +u = "®âðÄ" diff --git a/tests/data/edge_cases/19_file_encoding/nonascii2.py b/tests/data/edge_cases/19_file_encoding/nonascii2.py new file mode 100644 index 0000000..a1afce4 --- /dev/null +++ b/tests/data/edge_cases/19_file_encoding/nonascii2.py @@ -0,0 +1,4 @@ +# coding: iso-8859-5 +# (Unlikely to be the default encoding for most testers.) +# БЖџрÑтуфхцчшщъыьÑÑŽÑ <- Cyrillic characters +'Ўт№Ф' diff --git a/tests/test_main.py b/tests/test_main.py index 510aef8..ebcf388 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1531,6 +1531,8 @@ def testNonAscii() -> None: ], ), ('18_assign_to_subscript/case.py', {}, []), + ('19_file_encoding/nonascii.py', {}, []), # from: https://github.com/ipython/ipython/blob/0334d9f71e7a97394a73c15c663ca50d65df62e1/IPython/core/tests/nonascii.py + ('19_file_encoding/nonascii2.py', {}, []), # from: https://github.com/ipython/ipython/blob/0334d9f71e7a97394a73c15c663ca50d65df62e1/IPython/core/tests/nonascii2.py ], ) def testEdgeCases(