Skip to content

Commit

Permalink
feat: add pre commit to repo
Browse files Browse the repository at this point in the history
  • Loading branch information
Wazzabeee committed Apr 20, 2024
1 parent 171903c commit 5ef77a6
Show file tree
Hide file tree
Showing 10 changed files with 67 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,4 @@ jobs:
flake8
pylint scripts/ --rcfile=setup.cfg --fail-under=9
mypy -p scripts --ignore-missing-imports --disallow-incomplete-defs
continue-on-error: true # This is equivalent to allow_failure in GitLab CI
continue-on-error: true
30 changes: 30 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
repos:
- repo: https://github.com/psf/black
rev: 24.4.0
hooks:
- id: black
args: ['--line-length=120', '--verbose']

- repo: https://github.com/pycqa/flake8
rev: '7.0.0'
hooks:
- id: flake8

- repo: https://github.com/pre-commit/mirrors-pylint
rev: v3.0.0a5
hooks:
- id: pylint
name: pylint
entry: pylint
language: system
args: ['.', '--rcfile=setup.cfg', '--fail-under=8']
types: [python]

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.9.0
hooks:
- id: mypy
name: mypy
entry: python3 -m mypy
args: ['--explicit-package-bases', '--ignore-missing-imports', '--disallow-incomplete-defs']
language: system
Empty file added __init__.py
Empty file.
Empty file added scripts/__init__.py
Empty file.
1 change: 1 addition & 0 deletions scripts/html_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
It returns colors depending on the similarity score.
"""

import difflib
from operator import itemgetter
from typing import List, Tuple
Expand Down
1 change: 1 addition & 0 deletions scripts/html_writing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
It inserts comparison results in corresponding html files
"""

from os import fsync, rename, path
from random import randint
from shutil import copy
Expand Down
1 change: 1 addition & 0 deletions scripts/processing_files.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" This module is used to process text in docx, odt, txt and pdf files """

import re
import zipfile
from os import path
Expand Down
1 change: 1 addition & 0 deletions scripts/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- overlapping words
"""

import difflib

from utils import remove_numbers, remove_stop_words, lemmatize
Expand Down
1 change: 1 addition & 0 deletions scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
It can lemmatize, remove stop words, remove numbers for text processing
"""

import argparse
from os import path, listdir
from time import sleep
Expand Down
31 changes: 31 additions & 0 deletions tests/scripts/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,22 @@


class TestUtils(unittest.TestCase):
"""
Tests utils.py
"""

@classmethod
def setUpClass(cls):
"""
Sets up the test class.
"""
# Download NLTK stopwords
nltk.download("stopwords")

def test_parse_options(self):
"""
Tests parse_options()
"""
# Mock the arguments and test the parse_options function
test_args = ["program", "input_dir", "-o", "output_dir", "-s", "5"]
with patch("sys.argv", test_args):
Expand All @@ -27,44 +37,65 @@ def test_parse_options(self):
self.assertEqual(args.block_size, 5)

def test_is_float(self):
"""
Tests is_float()
"""
# Test cases for is_float function
self.assertTrue(is_float(3.14))
self.assertFalse(is_float(-1))
self.assertFalse(is_float("not a float"))

def test_wait_for_file(self):
"""
Tests wait_for_file()
"""
# Test the wait_for_file function with a mock path.isfile
with patch("os.path.isfile", return_value=True):
self.assertTrue(wait_for_file("dummy_file"))

def test_remove_numbers(self):
"""
Tests remove_numbers()
"""
# Test the remove_numbers function
words_list = ["hello", "world", 123, 4.56]
expected = ["hello", "world"]
self.assertEqual(remove_numbers(words_list), expected)

def test_remove_stop_words(self):
"""
Tests remove_stop_words()
"""
# Test the remove_stop_words function
with patch("nltk.corpus.stopwords.words", return_value=["a", "the"]):
words_list = ["a", "quick", "brown", "fox"]
expected = ["quick", "brown", "fox"]
self.assertEqual(remove_stop_words(words_list), expected)

def test_remove_stop_words_only_stopwords(self):
"""
Tests remove_stop_words()
"""
# Test with only stopwords
with patch("nltk.corpus.stopwords.words", return_value=["a", "the"]):
words_list = ["a", "the"]
expected = []
self.assertEqual(remove_stop_words(words_list), expected)

def test_remove_stop_words_no_stopwords(self):
"""
Tests remove_stop_words()
"""
# Test with no stopwords
with patch("nltk.corpus.stopwords.words", return_value=["a", "the"]):
words_list = ["quick", "brown", "fox"]
expected = ["quick", "brown", "fox"]
self.assertEqual(remove_stop_words(words_list), expected)

def test_lemmatize(self):
"""
Tests lemmatize()
"""
# Test the lemmatize function
with patch("nltk.stem.WordNetLemmatizer.lemmatize", side_effect=lambda x: x + "_lemmatized"):
words_list = ["running", "jumps"]
Expand Down

0 comments on commit 5ef77a6

Please sign in to comment.