From 5ef77a62a360f157093fd1dac04e19ee04fa7733 Mon Sep 17 00:00:00 2001 From: Wazzabeee Date: Sat, 20 Apr 2024 17:55:58 +0200 Subject: [PATCH] feat: add pre commit to repo --- .github/workflows/ci.yml | 2 +- .pre-commit-config.yaml | 30 ++++++++++++++++++++++++++++++ __init__.py | 0 scripts/__init__.py | 0 scripts/html_utils.py | 1 + scripts/html_writing.py | 1 + scripts/processing_files.py | 1 + scripts/similarity.py | 1 + scripts/utils.py | 1 + tests/scripts/test_utils.py | 31 +++++++++++++++++++++++++++++++ 10 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 .pre-commit-config.yaml create mode 100644 __init__.py create mode 100644 scripts/__init__.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7430fc9..bfbc30c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,4 +36,4 @@ jobs: flake8 pylint scripts/ --rcfile=setup.cfg --fail-under=9 mypy -p scripts --ignore-missing-imports --disallow-incomplete-defs - continue-on-error: true # This is equivalent to allow_failure in GitLab CI + continue-on-error: true diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..a657830 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,30 @@ +repos: + - repo: https://github.com/psf/black + rev: 24.4.0 + hooks: + - id: black + args: ['--line-length=120', '--verbose'] + + - repo: https://github.com/pycqa/flake8 + rev: '7.0.0' + hooks: + - id: flake8 + + - repo: https://github.com/pre-commit/mirrors-pylint + rev: v3.0.0a5 + hooks: + - id: pylint + name: pylint + entry: pylint + language: system + args: ['.', '--rcfile=setup.cfg', '--fail-under=8'] + types: [python] + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.9.0 + hooks: + - id: mypy + name: mypy + entry: python3 -m mypy + args: ['--explicit-package-bases', '--ignore-missing-imports', '--disallow-incomplete-defs'] + language: system \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/html_utils.py b/scripts/html_utils.py index 68d12c1..5b28b47 100644 --- a/scripts/html_utils.py +++ b/scripts/html_utils.py @@ -5,6 +5,7 @@ It returns colors depending on the similarity score. """ + import difflib from operator import itemgetter from typing import List, Tuple diff --git a/scripts/html_writing.py b/scripts/html_writing.py index 61eb1bd..ae662a1 100644 --- a/scripts/html_writing.py +++ b/scripts/html_writing.py @@ -6,6 +6,7 @@ It inserts comparison results in corresponding html files """ + from os import fsync, rename, path from random import randint from shutil import copy diff --git a/scripts/processing_files.py b/scripts/processing_files.py index d4bc64e..02afd5d 100644 --- a/scripts/processing_files.py +++ b/scripts/processing_files.py @@ -1,4 +1,5 @@ """ This module is used to process text in docx, odt, txt and pdf files """ + import re import zipfile from os import path diff --git a/scripts/similarity.py b/scripts/similarity.py index eca3fb1..8e623ce 100644 --- a/scripts/similarity.py +++ b/scripts/similarity.py @@ -7,6 +7,7 @@ - overlapping words """ + import difflib from utils import remove_numbers, remove_stop_words, lemmatize diff --git a/scripts/utils.py b/scripts/utils.py index 90703ce..d5fcf22 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -6,6 +6,7 @@ It can lemmatize, remove stop words, remove numbers for text processing """ + import argparse from os import path, listdir from time import sleep diff --git a/tests/scripts/test_utils.py b/tests/scripts/test_utils.py index bc718dd..a67a588 100644 --- a/tests/scripts/test_utils.py +++ b/tests/scripts/test_utils.py @@ -12,12 +12,22 @@ class TestUtils(unittest.TestCase): + """ + Tests utils.py + """ + @classmethod def setUpClass(cls): + """ + Sets up the test class. + """ # Download NLTK stopwords nltk.download("stopwords") def test_parse_options(self): + """ + Tests parse_options() + """ # Mock the arguments and test the parse_options function test_args = ["program", "input_dir", "-o", "output_dir", "-s", "5"] with patch("sys.argv", test_args): @@ -27,23 +37,35 @@ def test_parse_options(self): self.assertEqual(args.block_size, 5) def test_is_float(self): + """ + Tests is_float() + """ # Test cases for is_float function self.assertTrue(is_float(3.14)) self.assertFalse(is_float(-1)) self.assertFalse(is_float("not a float")) def test_wait_for_file(self): + """ + Tests wait_for_file() + """ # Test the wait_for_file function with a mock path.isfile with patch("os.path.isfile", return_value=True): self.assertTrue(wait_for_file("dummy_file")) def test_remove_numbers(self): + """ + Tests remove_numbers() + """ # Test the remove_numbers function words_list = ["hello", "world", 123, 4.56] expected = ["hello", "world"] self.assertEqual(remove_numbers(words_list), expected) def test_remove_stop_words(self): + """ + Tests remove_stop_words() + """ # Test the remove_stop_words function with patch("nltk.corpus.stopwords.words", return_value=["a", "the"]): words_list = ["a", "quick", "brown", "fox"] @@ -51,6 +73,9 @@ def test_remove_stop_words(self): self.assertEqual(remove_stop_words(words_list), expected) def test_remove_stop_words_only_stopwords(self): + """ + Tests remove_stop_words() + """ # Test with only stopwords with patch("nltk.corpus.stopwords.words", return_value=["a", "the"]): words_list = ["a", "the"] @@ -58,6 +83,9 @@ def test_remove_stop_words_only_stopwords(self): self.assertEqual(remove_stop_words(words_list), expected) def test_remove_stop_words_no_stopwords(self): + """ + Tests remove_stop_words() + """ # Test with no stopwords with patch("nltk.corpus.stopwords.words", return_value=["a", "the"]): words_list = ["quick", "brown", "fox"] @@ -65,6 +93,9 @@ def test_remove_stop_words_no_stopwords(self): self.assertEqual(remove_stop_words(words_list), expected) def test_lemmatize(self): + """ + Tests lemmatize() + """ # Test the lemmatize function with patch("nltk.stem.WordNetLemmatizer.lemmatize", side_effect=lambda x: x + "_lemmatized"): words_list = ["running", "jumps"]