code-kern-ai · LeonardPuettmannKern · Oct 19, 2023 · Oct 19, 2023 · Oct 19, 2023 · Oct 23, 2023
diff --git a/classifiers/__init__.py b/classifiers/__init__.py
@@ -22,6 +22,7 @@
 from .reference_quality import (
     word_count_classifier,
     special_character_classifier,
+    sentence_complete_classifier,
 )
 
 from .dates_and_times import (
@@ -74,6 +75,7 @@
     maximum_sentence_complexity,
     question_type_classifier,
     communication_style_classifier,
+    sentence_complete_classifier,
 ]:
     module_name = module.__name__.split(".")[-1]
     model_name = (

diff --git a/classifiers/reference_quality/sentence_complete_classifier/README.md b/classifiers/reference_quality/sentence_complete_classifier/README.md
@@ -0,0 +1 @@
+Languages can be very dynamic and complicated. This brick does not actually try to be able to accurately classify all sentences, which would be quite complex. Instead, this brick is meant to check if some characteristics apply that a lot of complete sentences have. These characteristics being: does the sentence starts with an uppercase character, if it ends on a punctuation and if it contains at least two nouns and a verb. The name `starts_with_uppercase_ends_with_punctuation_and_contains_two_nouns_and_a_verb` would be a bit long for a brick, though.
diff --git a/classifiers/reference_quality/sentence_complete_classifier/__init__.py b/classifiers/reference_quality/sentence_complete_classifier/__init__.py
@@ -0,0 +1,44 @@
+from pydantic import BaseModel
+from extractors.util.spacy import SpacySingleton
+
+INPUT_EXAMPLE = {
+    "text": "it would be sad if",
+    "spacy_model": "en_core_web_sm"
+}
+
+class SentenceCompleteClassifierModel(BaseModel):
+    text: str
+    spacy_model: str
+
+    class Config:
+        schema_extra = {"example": INPUT_EXAMPLE}
+
+def sentence_complete_classifier(req: SentenceCompleteClassifierModel):
+    """Classify whether or not a text is complete"""
+    nlp = SpacySingleton.get_nlp(req.spacy_model)
+    doc = nlp(req.text)
+
+    classifications = []
+    for sent in doc.sents:
+        if sent[0].is_title and sent[-1].is_punct:
+            has_noun = 2
+            has_verb = 1
+            for token in sent:
+                if token.pos_ in ["NOUN", "PROPN", "PRON"]:
+                    has_noun -= 1
+                elif token.pos_ == "VERB":
+                    has_verb -= 1
+            if has_noun < 1 and has_verb < 1:
+                classifications.append("complete")
+            else:
+                classifications.append("incomplete")
+        else:
+            classifications.append("incomplete")
+
+    # Aggregation logic
+    if all(classification == "complete" for classification in classifications):
+        return {"text_completeness": "complete"}
+    elif all(classification == "incomplete" for classification in classifications):
+        return {"text_completeness": "incomplete"}
+    elif any(classification == "incomplete" for classification in classifications):
+        return {"text_completeness": "partly complete"}
diff --git a/classifiers/reference_quality/sentence_complete_classifier/code_snippet_common.md b/classifiers/reference_quality/sentence_complete_classifier/code_snippet_common.md
@@ -0,0 +1,59 @@
+```python
+import spacy
+
+loaded_models = {}
+def load_spacy(spacy_model):
+    if spacy_model not in loaded_models:
+        loaded_models[spacy_model] = spacy.load(spacy_model)
+    return loaded_models[spacy_model]
+
+def sentence_complete_classifier(text: str, spacy_model: str = "en_core_web_sm") -> str:
+    """
+    @param text: The text to classify
+    @param spacy_model: A spaCy language model
+    @returns: Classification for the text based on all sentences
+    """
+    nlp = load_spacy(spacy_model)
+    doc = nlp(text)
+
+    classifications = []
+    for sent in doc.sents:
+        if sent[0].is_title and sent[-1].is_punct:
+            has_noun = 2
+            has_verb = 1
+            for token in sent:
+                if token.pos_ in ["NOUN", "PROPN", "PRON"]:
+                    has_noun -= 1
+                elif token.pos_ == "VERB":
+                    has_verb -= 1
+            if has_noun < 1 and has_verb < 1:
+                classifications.append("complete")
+            else:
+                classifications.append("incomplete")
+        else:
+            classifications.append("incomplete")
+
+    # Aggregation logic
+    if all(classification == "complete" for classification in classifications):
+        return "complete"
+    elif all(classification == "incomplete" for classification in classifications):
+        return "incomplete"
+    elif any(classification == "incomplete" for classification in classifications):
+        return "partly complete"
+
+
+# ↑ necessary bricks function 
+# -----------------------------------------------------------------------------------------
+# ↓ example implementation 
+
+def example_integration():
+    texts = [
+        "This is a complete sentence written by me!",
+        "The first sentence I have written is complete! However, the second one...",
+        "and they rand over here and then"
+    ]
+    for text in texts: 
+        print(f"The text '{text}' is -> {sentence_complete_classifier(text)}")
+
+example_integration()
+```
diff --git a/...sifiers/reference_quality/sentence_complete_classifier/code_snippet_refinery.md b/...sifiers/reference_quality/sentence_complete_classifier/code_snippet_refinery.md
@@ -0,0 +1,29 @@
+```python
+ATTRIBUTE: str = "text"
+
+def sentence_complete_classifier(record):
+    classifications = []
+    for sent in record[ATTRIBUTE].sents:
+        if sent[0].is_title and sent[-1].is_punct:
+            has_noun = 2
+            has_verb = 1
+            for token in sent:
+                if token.pos_ in ["NOUN", "PROPN", "PRON"]:
+                    has_noun -= 1
+                elif token.pos_ == "VERB":
+                    has_verb -= 1
+            if has_noun < 1 and has_verb < 1:
+                classifications.append("complete")
+            else:
+                classifications.append("incomplete")
+        else:
+            classifications.append("incomplete")
+
+    # Aggregation logic
+    if all(classification == "complete" for classification in classifications):
+        return "complete"
+    elif all(classification == "incomplete" for classification in classifications):
+        return "incomplete"
+    elif any(classification == "incomplete" for classification in classifications):
+        return "partly complete"
+```
diff --git a/classifiers/reference_quality/sentence_complete_classifier/config.py b/classifiers/reference_quality/sentence_complete_classifier/config.py
@@ -0,0 +1,38 @@
+from util.configs import build_classifier_function_config
+from util.enums import State, BricksVariableType, RefineryDataType, SelectionType
+from . import sentence_complete_classifier, INPUT_EXAMPLE
+
+
+def get_config():
+    return build_classifier_function_config(
+        # strapi information
+        function=sentence_complete_classifier,
+        input_example=INPUT_EXAMPLE,
+        issue_id=349,
+        tabler_icon="LanguageKatakana",
+        min_refinery_version="1.7.0",
+        state=State.PUBLIC.value,
+        type="python_function",
+        available_for=["refinery", "common"],
+        part_of_group=[
+            "reference_quality",
+        ],  # first entry should be parent directory
+        # bricks integrator information
+        cognition_init_mapping = {
+            "incomplete": "Needs fix",
+            "complete": "null"
+        },
+        integrator_inputs={
+            "name": "sentence_complete_classifier",
+            "refineryDataType": RefineryDataType.TEXT.value,
+            "variables": {
+                "ATTRIBUTE": {
+                    "selectionType": SelectionType.CHOICE.value,
+                    "addInfo": [
+                        BricksVariableType.ATTRIBUTE.value,
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                },
+            }
+        }      
+    )
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Languages can be very dynamic and complicated. This brick does not actually try to be able to accurately classify all sentences, which would be quite complex. Instead, this brick is meant to check if some characteristics apply that a lot of complete sentences have. These characteristics being: does the sentence starts with an uppercase character, if it ends on a punctuation and if it contains at least two nouns and a verb. The name `starts_with_uppercase_ends_with_punctuation_and_contains_two_nouns_and_a_verb` would be a bit long for a brick, though.
Copy link Contributor FelixKirschKern Oct 23, 2023 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. From the name and description of the brick, I would not expect the aggregation logic. I suggest mentioning it in the README