From 7b1ed8a9606a091d7eb26380040733b69563bba9 Mon Sep 17 00:00:00 2001 From: "David L. Day" <1132144+davidlday@users.noreply.github.com> Date: Tue, 17 Aug 2021 22:02:39 -0400 Subject: [PATCH] Command Line Interface (#16) * added cli to setup * explicitly extend object * simple draft of cli * paragrah -> paragraph, +sha256 * added minimal output * removed __str__ * added filename to output and cleaned up options * added cli section * fixed sample json indent * added cli heading * added help text * version 1.2.0 release --- README.rst | 45 ++++++++++++++++++ prosegrinder/VERSION | 2 +- prosegrinder/__main__.py | 73 ++++++++++++++++++++++++++++++ prosegrinder/fragment.py | 5 +- prosegrinder/fragment_container.py | 5 +- prosegrinder/prose.py | 13 ++++-- prosegrinder/readability_scores.py | 2 +- prosegrinder/word.py | 5 +- setup.py | 14 ++++-- tests/test_prose.py | 2 +- 10 files changed, 141 insertions(+), 25 deletions(-) create mode 100644 prosegrinder/__main__.py diff --git a/README.rst b/README.rst index 7fee348..230ebb2 100644 --- a/README.rst +++ b/README.rst @@ -36,6 +36,51 @@ of readabilit scores. All objects and attributes should be treated as immutable. I know this isn't great documentation, but it should be enough to get you going. +Command Line Interaface +~~~~~~~~~~~~~~~~~~~~~~~ + +Prosegrinder now includes a simple CLI for analyzing text in a file::: + + Usage: prosegrinder [OPTIONS] FILE + + Options: + -s, --save FILENAME + -i, --indent INTEGER + --help Show this message and exit. + +Will provide basic statistics on text from a file, the filename, and the sh256 of text analyzed. Output is json to help facilitate use in automation::: + + { + "filename": "./tests/resources/shortstory.txt", + "sha256": "5b756dea7c7f0088ff3692e402466af7f4fc493fa357c1ae959fa4493943fc03", + "word_character_count": 7008, + "phone_count": 5747, + "syllable_count": 2287, + "word_count": 1528, + "sentence_count": 90, + "paragraph_count": 77, + "complex_word_count": 202, + "long_word_count": 275, + "pov_word_count": 113, + "first_person_word_count": 8, + "second_person_word_count": 74, + "third_person_word_count": 31, + "pov": "first", + "readability_scores": { + "automated_readability_index": 0.281, + "coleman_liau_index": 9.425, + "flesch_kincaid_grade_level": 8.693, + "flesch_reading_ease": 62.979, + "gunning_fog_index": 12.079, + "linsear_write": 10.733, + "lix": 34.975, + "rix": 3.056, + "smog": 11.688 + } + } + + + Readbility scores ~~~~~~~~~~~~~~~~~ diff --git a/prosegrinder/VERSION b/prosegrinder/VERSION index 9084fa2..26aaba0 100644 --- a/prosegrinder/VERSION +++ b/prosegrinder/VERSION @@ -1 +1 @@ -1.1.0 +1.2.0 diff --git a/prosegrinder/__main__.py b/prosegrinder/__main__.py new file mode 100644 index 0000000..b27daf1 --- /dev/null +++ b/prosegrinder/__main__.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- + +import click +import json + +from prosegrinder.prose import Prose + +@click.command() +@click.argument('file', required=True, type=click.File('r')) +@click.option('-i', '--indent', required=False, + type=int, default=2, help="Python pretty-print json indent level.") +@click.option('-s', '--save', required=False, + type=click.File('w'), help="File to save output to.") +def cli(file, save, indent): + filename = click.format_filename(file.name) + text = file.read() + p = Prose(text) + d = { + "filename": + filename, + "sha256": + p.sha256, + "word_character_count": + p.word_character_count, + "phone_count": + p.phone_count, + "syllable_count": + p.syllable_count, + "word_count": + p.word_count, + "sentence_count": + p.sentence_count, + "paragraph_count": + p.paragraph_count, + "complex_word_count": + p.complex_word_count, + "long_word_count": + p.long_word_count, + "pov_word_count": + p.pov_word_count, + "first_person_word_count": + p.first_person_word_count, + "second_person_word_count": + p.second_person_word_count, + "third_person_word_count": + p.third_person_word_count, + "pov": + p.pov, + "readability_scores": { + "automated_readability_index": + p.readability_scores.automated_readability_index, + "coleman_liau_index": + p.readability_scores.coleman_liau_index, + "flesch_kincaid_grade_level": + p.readability_scores.flesch_kincaid_grade_level, + "flesch_reading_ease": + p.readability_scores.flesch_reading_ease, + "gunning_fog_index": + p.readability_scores.gunning_fog_index, + "linsear_write": + p.readability_scores.linsear_write, + "lix": + p.readability_scores.lix, + "rix": + p.readability_scores.rix, + "smog": + p.readability_scores.smog + } + } + if (save): + save.write(json.dumps(d, indent=indent)) + else: + click.echo(json.dumps(d, indent=indent)) diff --git a/prosegrinder/fragment.py b/prosegrinder/fragment.py index 07d2a41..3cb443e 100644 --- a/prosegrinder/fragment.py +++ b/prosegrinder/fragment.py @@ -9,7 +9,7 @@ from prosegrinder.word import Word -class Fragment(): +class Fragment(object): def __init__(self, text, dictionary=Dictionary()): self._text = text @@ -49,9 +49,6 @@ def __init__(self, text, dictionary=Dictionary()): elif self._third_person_word_count > 0: self._pov = pointofview.THIRD - def __str__(self): - return str(self.__dict__) - def __eq__(self, other): return self._text == other._text diff --git a/prosegrinder/fragment_container.py b/prosegrinder/fragment_container.py index 5969981..f485023 100644 --- a/prosegrinder/fragment_container.py +++ b/prosegrinder/fragment_container.py @@ -7,7 +7,7 @@ from prosegrinder.dictionary import Dictionary -class FragmentContainer(): +class FragmentContainer(object): def __init__(self, fragments, dictionary=Dictionary()): self._dictionary = dictionary @@ -48,9 +48,6 @@ def __init__(self, fragments, dictionary=Dictionary()): elif (self._third_person_word_count > 0): self._pov = pointofview.THIRD - def __str__(self): - return str(self.__dict__) - def __eq__(self, other): return self._fragments == other._fragments diff --git a/prosegrinder/prose.py b/prosegrinder/prose.py index b0d8655..978252f 100644 --- a/prosegrinder/prose.py +++ b/prosegrinder/prose.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from collections import Counter +import hashlib import narrative @@ -11,10 +12,11 @@ from prosegrinder.readability_scores import ReadabilityScores -class Prose(): +class Prose(object): def __init__(self, text, dictionary=Dictionary()): self._text = text + self._sha256 = hashlib.sha256(self._text.encode()).hexdigest() self._dictionary = dictionary self._paragraphs = Paragraph.parse_paragraphs( self._text, self._dictionary) @@ -59,9 +61,6 @@ def __init__(self, text, dictionary=Dictionary()): [Fragment(fragment_text) for fragment_text in n['narrative']]) self._pov = self._narrative.pov - def __str__(self): - return str(self.__dict__) - def __eq__(self, other): return self._text == other._text @@ -125,7 +124,7 @@ def sentence_count(self): return self._sentence_count @property - def paragrah_count(self): + def paragraph_count(self): return self._paragraph_count @property @@ -147,3 +146,7 @@ def pov(self): @property def text(self): return self._text + + @property + def sha256(self): + return self._sha256 diff --git a/prosegrinder/readability_scores.py b/prosegrinder/readability_scores.py index 3800f51..03b9de3 100644 --- a/prosegrinder/readability_scores.py +++ b/prosegrinder/readability_scores.py @@ -3,7 +3,7 @@ from math import sqrt -class ReadabilityScores(): +class ReadabilityScores(object): NDIGITS = 3 # Default for round(number,[ndigits]) diff --git a/prosegrinder/word.py b/prosegrinder/word.py index f6ec711..5c8391d 100644 --- a/prosegrinder/word.py +++ b/prosegrinder/word.py @@ -6,7 +6,7 @@ import pointofview -class Word(): +class Word(object): """A Word, the base unit for measuring fiction prose.""" @@ -29,9 +29,6 @@ def __init__(self, text, phones, normalized_phones, syllable_count, is_dictionar self._character_count = len(self._text) self._pov = pointofview.get_word_pov(self._text) - def __str__(self): - return str(self.__dict__) - def __eq__(self, other): return self.__dict__ == other.__dict__ diff --git a/setup.py b/setup.py index ea9477d..44ead4f 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from os import path -from setuptools import setup +from setuptools import setup, find_packages # Version with open(path.join(path.dirname(__file__), 'prosegrinder', 'VERSION')) as version_file: @@ -19,9 +19,12 @@ author='David L. Day', author_email='dday376@gmail.com', url='https://github.com/prosegrinder/python-prosegrinder', - packages=[ - 'prosegrinder' - ], + packages=find_packages(include=['prosegrinder', 'prosegrinder.*']), + entry_points={ + 'console_scripts': [ + 'prosegrinder = prosegrinder.__main__:cli', + ], + }, package_dir={'prosegrinder': 'prosegrinder'}, package_data={ '': ['LICENSE', '*.rst', 'MANIFEST.in'], @@ -42,7 +45,8 @@ 'cmudict>=1.0.0', 'narrative>=1.0.0', 'pointofview>=1.0.0', - 'syllables>=1.0.0' + 'syllables>=1.0.0', + 'click>=8.0.1' ], python_requires='>=3.6', ) diff --git a/tests/test_prose.py b/tests/test_prose.py index 3a8c047..349005f 100644 --- a/tests/test_prose.py +++ b/tests/test_prose.py @@ -66,7 +66,7 @@ def test_sentences(): def test_paragraphs(): - assert(PARAGRAPH_COUNT == prose.paragrah_count) + assert(PARAGRAPH_COUNT == prose.paragraph_count) def test_dialogue_narrative():