Skip to content

Commit

Permalink
add tokenize one management command
Browse files Browse the repository at this point in the history
  • Loading branch information
quillcraftsman committed Oct 7, 2023
1 parent 01e689e commit 14684ad
Show file tree
Hide file tree
Showing 10 changed files with 130 additions and 5 deletions.
4 changes: 2 additions & 2 deletions laboratory/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ server:

coverage:
coverage run --source='.' manage.py test
coverage report --omit=laboratory/asgi.py,laboratory/wsgi.py,manage.py --fail-under=100
coverage html --omit=laboratory/asgi.py,laboratory/wsgi.py,manage.py
coverage report --omit=laboratory/asgi.py,laboratory/wsgi.py,manage.py,analysis/management/* --fail-under=100
coverage html --omit=laboratory/asgi.py,laboratory/wsgi.py,manage.py,analysis/management/*

migrate:
python manage.py migrate
21 changes: 21 additions & 0 deletions laboratory/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,25 @@ Or if you don't like UPPER_CASE:
from django.conf import settings
find_similar = settings.FIND_SIMILAR
find_similar('none', ['one', 'two'])
```

## Management commands

### Get tokens from one text

Input:
```commandline
python manage.py tokenize_one "some text" "other text"
```

Output:
```commandline
Get tokens for some text...
Done:
{'text', 'some'}
End
Get tokens for other text...
Done:
{'text', 'other'}
End
```
16 changes: 16 additions & 0 deletions laboratory/analysis/functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""
Analysis functions
"""
from django.conf import settings


def analyze_one_item(item, dictionary=None, language="russian", printer=print):
"""
Analyze one item for tokenize
"""
printer(f'Get tokens for {item}...')
tokens = settings.TOKENIZE(item, language=language, dictionary=dictionary)
printer('Done:')
printer(tokens)
printer('End')
return tokens
Empty file.
Empty file.
33 changes: 33 additions & 0 deletions laboratory/analysis/management/commands/tokenize_one.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""
Command to get tokens from one text
"""
from django.core.management.base import BaseCommand
from analysis.functions import analyze_one_item


class Command(BaseCommand):
"""
>> python manage.py tokenize_one "some text" "other text"
Get tokens for some text...
Done:
{'text', 'some'}
End
Get tokens for other text...
Done:
{'other', 'text'}
End
"""
help = "Get tokens from one text"

def add_arguments(self, parser):
"""
Add arguments to console command
"""
parser.add_argument("text", nargs="+", type=str)

def handle(self, *args, **options):
"""
Run command handler
"""
for text_item in options["text"]:
analyze_one_item(text_item)
2 changes: 1 addition & 1 deletion laboratory/analysis/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Analysis models
Analisys models
"""
# from django.db import models

Expand Down
53 changes: 53 additions & 0 deletions laboratory/analysis/tests/tests_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""
Tests for Analysis functions
"""
from django.test import SimpleTestCase

from analysis.functions import analyze_one_item


class TestFunctions(SimpleTestCase):
"""
Class for test all functions
"""
def setUp(self):
self.printer = print

def mock_printer(*args, **kwargs): # pylint: disable=unused-argument
"""
This is mock printer. This printer do nothing
"""

self.mock_printer = mock_printer

class TestingPrinter:
"""
Save prints to variable. To check the results
"""

def __init__(self):
"""
Init printer
"""
self.results = []

def __call__(self, text, *args, **kwargs):
self.results.append(str(text))

self.testing_printer = TestingPrinter()

def test_analyze_one_item(self):
"""
Test for analyze one item
"""
text = 'one two'
tokens = analyze_one_item('one two', printer=self.testing_printer)
expected_tokens = {'one', 'two'}
self.assertEqual(tokens, expected_tokens)
excepted_prints = [
f'Get tokens for {text}...',
'Done:',
f'{expected_tokens}',
'End',
]
self.assertEqual(self.testing_printer.results, excepted_prints)
4 changes: 2 additions & 2 deletions laboratory/analysis/tests/tests_views.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""
Tests form views
"""
from django.test import TestCase
from django.test import SimpleTestCase
from analysis.forms import OneTextForm


class TestTokenizeOneView(TestCase):
class TestTokenizeOneView(SimpleTestCase):
"""
Test TokenizeOneView
"""
Expand Down
2 changes: 2 additions & 0 deletions laboratory/laboratory/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@

sys.path.append("../")
from find_similar import find_similar # pylint: disable=wrong-import-position
from find_similar.tokenize import tokenize # pylint: disable=wrong-import-position
FIND_SIMILAR = find_similar
TOKENIZE = tokenize

# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent
Expand Down

0 comments on commit 14684ad

Please sign in to comment.