From 4d1d9f1c1921a498b68d8d5fbc63d65f3182ff8e Mon Sep 17 00:00:00 2001
From: Michael Fekadu <mike.fekadu@gmail.com>
Date: Wed, 4 Mar 2020 18:51:56 -0800
Subject: [PATCH 1/3] install monkey type as dev-dependency, ignore
 monkeytype.sqlite3

pipenv install MonkeyType --dev
pipenv shell
monkeytype --verbose run nimbus.py
---
 .gitignore   |  3 +++
 Pipfile      |  2 +-
 Pipfile.lock | 61 +++++++++++++++++++++++++++++++++++-----------------
 3 files changed, 45 insertions(+), 21 deletions(-)

diff --git a/.gitignore b/.gitignore
index 9532684..ea1a9df 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+# monkeytype
+monkeytype.sqlite3
+
 nvenv/*
 
 
diff --git a/Pipfile b/Pipfile
index 6069200..dc31126 100644
--- a/Pipfile
+++ b/Pipfile
@@ -22,7 +22,7 @@ pytest = "==5.3.4"
 pyre-check = "==0.0.41"
 ## like the Unix `make` but better
 invoke = "==1.4.1"
-
+monkeytype = "*"
 
 [packages]
 # REST API
diff --git a/Pipfile.lock b/Pipfile.lock
index 6575e89..3d5dbb3 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "fb30d39142d3cc83d8909d9f4f4648a60ac33d4ec3a5a94d8dac7b90ef727a24"
+            "sha256": "348fee5e0ee39fb3206a8e4f15f486f53691109f3e3035829eb5a61d64ccdd6c"
         },
         "pipfile-spec": 6,
         "requires": {
@@ -636,20 +636,20 @@
         },
         "srsly": {
             "hashes": [
-                "sha256:1102b4984f9f56364540e47d83fac3e7543903dfbb92f0d0e5dd3bfd40528934",
-                "sha256:1c4354095f63f59fc52a4362960faaddebcfa7a240f07209eb50e8f9ec39e700",
-                "sha256:3ceae42dbbda49b57a4937e0ca28f56c2a121c89008cc7ec09e0a9d8d705c03e",
-                "sha256:4ce9d6ab6d1c617150455ef5ba8abd5107a8e65956f06c2efc86697f4cb4b431",
-                "sha256:51c47f98dc06d5c2d1d7806cd38dcc834ab9906dc12170bc21105e5a9590a6fd",
-                "sha256:a672ffaa77680f355933cf424739ae9ecff767908a374ad194692b53040fda01",
-                "sha256:abe3d98d9ea8f7dac898119cd9861466c49cfe0f16287c9f859e0d4cab43a7a4",
-                "sha256:c6bdf53a87770139c6a9d75b3e664505bd81c022312fafca35ed38714e4ecdf1",
-                "sha256:c82e6dc3727454edc6ccdb1d07d5bc0aab3f43539fb8d9f973cf769135d2c7e4",
-                "sha256:ca1ec20ea6e14ad56ccaa84aa6c79d6e51fccf32e0040372b4d06c6e5dbb7fee",
-                "sha256:d5c0c718b2f67fc425d9bb3cc26b6141cb2f53251cdc145f58b70095241a3308",
-                "sha256:de329ba0ff451308d59e40c39372f5231e7c364f4933d7457788203630bdede2"
+                "sha256:18bad26c34cf5a8853fbf018fd168a7bf2ea7ce661e66476c25dac711cb79c9b",
+                "sha256:2179cf1e88c250e89e40227bd5848341011c170079b3d424987d067de6a73f42",
+                "sha256:21cfb0e5dea2c4515b5c2daa78402d5782c6425b4f58af40d2e2cb45e4778d8c",
+                "sha256:29434753a77481ec6129991f4116f983085cc8005c1ad963261124842e8c05fc",
+                "sha256:3f3975e8cb67194d26dd03508469b1303f8b994f30e7782f7eae25fef6dc4aad",
+                "sha256:46213d8f094b348a9433c825ac1eba36a21aa25a8bae6f29c2f9f053e15be961",
+                "sha256:59258b81d567df207f8a0a33c4b5fa232afccf1d927c8ce3ba5395bfd64c0ed8",
+                "sha256:7c553a709fd56a37a07f969e849f55a0aeabaeb7677bebc588a640ab8ec134aa",
+                "sha256:95849d84e8929be248a180e672c8ce1ed98b1341263bc983efdf8427465584f1",
+                "sha256:b94d8a13c60e3298a9ba12b1b211026e8378c7d087efd7ce46a3f2d8d4678d94",
+                "sha256:c8beff52c104a7ffe4a15513a05dc0497998cf83aa1ca39454489994d18c1c07",
+                "sha256:d409beb7257208633c974c01f9dc3265562fb6802caee7de21880761ba87c3ed"
             ],
-            "version": "==1.0.1"
+            "version": "==1.0.2"
         },
         "thinc": {
             "hashes": [
@@ -706,10 +706,10 @@
         },
         "zipp": {
             "hashes": [
-                "sha256:12248a63bbdf7548f89cb4c7cda4681e537031eda29c02ea29674bc6854460c2",
-                "sha256:7c0f8e91abc0dc07a5068f315c52cb30c66bfbc581e5b50704c8a2f6ebae794a"
+                "sha256:aa36550ff0c0b7ef7fa639055d797116ee891440eac1a56f378e2d3179e0320b",
+                "sha256:c599e4d75c98f6798c509911d08a22e6c021d074469042177c8c86fb92eefd96"
             ],
-            "version": "==3.0.0"
+            "version": "==3.1.0"
         }
     },
     "develop": {
@@ -826,6 +826,14 @@
             ],
             "version": "==0.6.1"
         },
+        "monkeytype": {
+            "hashes": [
+                "sha256:71da688939f08d19904462eef2e568a4f18f6133cc7e3c901ff5034c8ab5a538",
+                "sha256:9f052b42851bc24603836ce3105166c8cc5edabeb25e8fcf256fa25777122618"
+            ],
+            "index": "pypi",
+            "version": "==19.11.2"
+        },
         "more-itertools": {
             "hashes": [
                 "sha256:5dd8bcf33e5f9513ffa06d5ad33d78f31e1931ac9a18f33d37e77a180d393a7c",
@@ -985,6 +993,13 @@
             ],
             "version": "==2.23.0"
         },
+        "retype": {
+            "hashes": [
+                "sha256:7d033b115f66e5327dea0a3fd7c9a3dbfa53841575daf27ce2ce409956d901d4",
+                "sha256:846fd135d3ee33c1bad387602a405d808cb99a9a7a47299bfd0e1d25dfb2fedd"
+            ],
+            "version": "==19.9.0"
+        },
         "six": {
             "hashes": [
                 "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
@@ -1000,6 +1015,12 @@
             ],
             "version": "==2.1.0"
         },
+        "stringcase": {
+            "hashes": [
+                "sha256:48a06980661908efe8d9d34eab2b6c13aefa2163b3ced26972902e3bdfd87008"
+            ],
+            "version": "==1.2.0"
+        },
         "toml": {
             "hashes": [
                 "sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c",
@@ -1065,10 +1086,10 @@
         },
         "zipp": {
             "hashes": [
-                "sha256:12248a63bbdf7548f89cb4c7cda4681e537031eda29c02ea29674bc6854460c2",
-                "sha256:7c0f8e91abc0dc07a5068f315c52cb30c66bfbc581e5b50704c8a2f6ebae794a"
+                "sha256:aa36550ff0c0b7ef7fa639055d797116ee891440eac1a56f378e2d3179e0320b",
+                "sha256:c599e4d75c98f6798c509911d08a22e6c021d074469042177c8c86fb92eefd96"
             ],
-            "version": "==3.0.0"
+            "version": "==3.1.0"
         }
     }
 }

From 41945a06af197f68719799a6676578922d192ad3 Mon Sep 17 00:00:00 2001
From: Michael Fekadu <mike.fekadu@gmail.com>
Date: Wed, 4 Mar 2020 18:54:42 -0800
Subject: [PATCH 2/3] generate type annotations by apply monkeytype stubs on
 nimbus.py

monkeytype docs: https://github.com/Instagram/MonkeyType
***************************************************
pipenv install MonkeyType --dev
pipenv shell
monkeytype --verbose run nimbus.py

```
$ monkeytype list-modules
QA
database_wrapper
nimbus_nlp.NIMBUS_NLP
nimbus_nlp.question_classifier
nimbus_nlp.save_and_load_model
```

monkeytype --verbose stub QA
monkeytype --verbose stub database_wrapper
monkeytype --verbose stub nimbus_nlp.NIMBUS_NLP
monkeytype --verbose stub nimbus_nlp.question_classifier
monkeytype --verbose stub nimbus_nlp.save_and_load_model

monkeytype --verbose apply QA
monkeytype --verbose apply database_wrapper
monkeytype --verbose apply nimbus_nlp.NIMBUS_NLP
monkeytype --verbose apply nimbus_nlp.question_classifier
monkeytype --verbose apply nimbus_nlp.save_and_load_model
---
 QA.py                             | 22 ++++++++++++----------
 database_wrapper.py               |  6 +++---
 nimbus_nlp/NIMBUS_NLP.py          | 17 ++++++++++-------
 nimbus_nlp/question_classifier.py | 20 +++++++++++---------
 nimbus_nlp/save_and_load_model.py |  3 ++-
 5 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/QA.py b/QA.py
index 250b065..d2c2699 100644
--- a/QA.py
+++ b/QA.py
@@ -9,6 +9,8 @@
 from database_wrapper import NimbusMySQLAlchemy
 from pandas import read_csv
 
+from functools import partial
+from typing import Dict, List
 Extracted_Vars = Dict[str, Any]
 DB_Data = Dict[str, Any]
 DB_Query = Callable[[Extracted_Vars], DB_Data]
@@ -33,7 +35,7 @@ class QA:
     A class for wrapping functions used to answer a question.
     """
 
-    def __init__(self, q_format, db_query, format_answer):
+    def __init__(self, q_format: str, db_query: partial, format_answer: partial) -> None:
         """
         Args:
             q_format (str): Question format string
@@ -49,13 +51,13 @@ def __init__(self, q_format, db_query, format_answer):
         self.db_query = db_query
         self.format_answer = format_answer
 
-    def _get_data_from_db(self, extracted_vars):
+    def _get_data_from_db(self, extracted_vars: Dict[str, str]) -> str:
         return self.db_query(extracted_vars)
 
-    def _format_answer(self, extracted_vars, db_data):
+    def _format_answer(self, extracted_vars: Dict[str, str], db_data: str) -> str:
         return self.format_answer(extracted_vars, db_data)
 
-    def answer(self, extracted_vars):
+    def answer(self, extracted_vars: Dict[str, str]) -> str:
         db_data = self._get_data_from_db(extracted_vars)
         return self._format_answer(extracted_vars, db_data)
 
@@ -66,7 +68,7 @@ def __hash__(self):
         return hash(self.q_format)
 
 
-def create_qa_mapping(qa_list):
+def create_qa_mapping(qa_list: List[QA]) -> Dict[str, QA]:
     """
     Creates a dictionary whose values are QA objects and keys are the question
     formats of those QA objects.
@@ -146,18 +148,18 @@ def create_qa_mapping(qa_list):
 #     return functools.partial(_single_var_string_sub, a_format)
 
 
-def _string_sub(a_format, extracted_info, db_data):
+def _string_sub(a_format: str, extracted_info: Dict[str, str], db_data: str) -> str:
     if db_data is None:
         return None
     else:
         return a_format.format(ex=extracted_info['normalized entity'], db=db_data)
 
 
-def string_sub(a_format):
+def string_sub(a_format: str) -> partial:
     return functools.partial(_string_sub, a_format)
 
 
-def _get_property(prop, extracted_info):
+def _get_property(prop: str, extracted_info: Dict[str, str]) -> str:
     ent_string = extracted_info["normalized entity"]
     ent = tag_lookup[extracted_info['tag']]
     try:
@@ -168,7 +170,7 @@ def _get_property(prop, extracted_info):
         return value
 
 
-def get_property(prop):
+def get_property(prop: str) -> partial:
     return functools.partial(_get_property, prop)
 
 
@@ -186,7 +188,7 @@ def yes_no(a_format, pred=None):
     return functools.partial(_yes_no, a_format, pred)
 
 
-def generate_fact_QA(csv):
+def generate_fact_QA(csv: str) -> List[QA]:
     df = read_csv(csv)
     text_in_brackets = r'\[[^\[\]]*\]'
     qa_objs = []
diff --git a/database_wrapper.py b/database_wrapper.py
index c5b0849..7f4b9b9 100755
--- a/database_wrapper.py
+++ b/database_wrapper.py
@@ -409,7 +409,7 @@ def __safe_create(SQLAlchemy_object):
         __safe_create(self.Locations)
         __safe_create(self.QuestionAnswerPair)
 
-    def _create_database_session(self):
+    def _create_database_session(self) -> None:
         Session = sessionmaker(bind=self.engine)
         self.session = Session()
         print("initialized database session")
@@ -435,13 +435,13 @@ def return_qa_pair_csv(self):
     def partial_fuzzy_match(self, tag_value, identifier):
         return fuzz.partial_ratio(tag_value, identifier)
 
-    def full_fuzzy_match(self, tag_value, identifier):
+    def full_fuzzy_match(self, tag_value: str, identifier: str) -> int:
         return fuzz.ratio(tag_value, identifier)
 
     def get_property_from_entity(
         self, prop: str, entity: UNION_ENTITIES, identifier: str,
         tag_column_map: dict = default_tag_column_dict
-    ):
+    ) -> str:
         """
         This function implements the abstractmethod to get a column of values
         from a NimbusDatabase entity.
diff --git a/nimbus_nlp/NIMBUS_NLP.py b/nimbus_nlp/NIMBUS_NLP.py
index d525d8f..981e89d 100644
--- a/nimbus_nlp/NIMBUS_NLP.py
+++ b/nimbus_nlp/NIMBUS_NLP.py
@@ -19,10 +19,13 @@
 # Temporary import for the classifier
 from nimbus_nlp.question_classifier import QuestionClassifier
 
+from google.cloud.automl_v1.types import PredictResponse
+from monkeytype.encoding import DUMMY_NAME
+from typing import Dict
 class NIMBUS_NLP:
 
     @staticmethod
-    def predict_question(input_question):
+    def predict_question(input_question: str) -> Dict[str, str]:
         '''
         Runs through variable extraction and the question classifier to
         predict the intended question.
@@ -55,7 +58,7 @@ def predict_question(input_question):
 
 class Variable_Extraction:
 
-    def __init__(self, config_file: str = "config.json"):
+    def __init__(self, config_file: str = "config.json") -> None:
 
         with open(config_file) as json_data_file:
             config = json.load(json_data_file)
@@ -70,7 +73,7 @@ def __init__(self, config_file: str = "config.json"):
         # TODO: consider does this even do anything useful?
         os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credential_path
 
-    def inline_text_payload(self, sent):
+    def inline_text_payload(self, sent: str) -> Dict[str, DUMMY_NAME]:
         '''
         Converts the input sentence into GCP's callable format
 
@@ -82,7 +85,7 @@ def inline_text_payload(self, sent):
 
         return {'text_snippet': {'content': sent, 'mime_type': 'text/plain'} }
 
-    def get_prediction(self, sent):
+    def get_prediction(self, sent: str) -> PredictResponse:
         '''
         Obtains the prediction from the input sentence and returns the
         normalized sentence
@@ -109,7 +112,7 @@ def get_prediction(self, sent):
         # Return the output of the API call
         return request
 
-    def extract_variables(self, sent):
+    def extract_variables(self, sent: str) -> Dict[str, str]:
         '''
         Takes the prediction and replaces the entity with its corresponding tag
 
@@ -146,7 +149,7 @@ def extract_variables(self, sent):
                }
 
     @staticmethod    
-    def excess_word_removal(entity, tag):
+    def excess_word_removal(entity: str, tag: str) -> str:
         '''
         Checks the tag and determines which excess word removal function to use
 
@@ -163,7 +166,7 @@ def excess_word_removal(entity, tag):
             return entity
 
     @staticmethod
-    def strip_titles(entity):
+    def strip_titles(entity: str) -> str:
         '''
         Strips titles from input entities
 
diff --git a/nimbus_nlp/question_classifier.py b/nimbus_nlp/question_classifier.py
index 1890bcf..9fa56bb 100644
--- a/nimbus_nlp/question_classifier.py
+++ b/nimbus_nlp/question_classifier.py
@@ -13,9 +13,11 @@
 # TODO: move the functionality in this module into class(es), so that it can be more easily used as a dependency
 
 
+from spacy.tokens.token import Token
+from typing import Dict, List, Tuple
 class QuestionClassifier:
 
-    def __init__(self):
+    def __init__(self) -> None:
         nltk.download('stopwords')
         nltk.download('punkt')
         nltk.download('averaged_perceptron_tagger')
@@ -38,12 +40,12 @@ def train_model(self):
         save_model(self.classifier, "nlp-model")
 
 
-    def load_latest_classifier(self):
+    def load_latest_classifier(self) -> None:
         self.classifier = load_latest_model()
         with open(PROJECT_DIR+ '/models/features/overall_features.json', 'r') as fp:
             self.overall_features = json.load(fp)
 
-    def get_question_features(self, question):
+    def get_question_features(self, question: str) -> Dict[str, int]:
         # print("using new algorithm")
         """
         Method to extract features from each individual question.
@@ -121,7 +123,7 @@ def get_question_features_old_algorithm(self, question):
     # Note: this method of extracting the main verb is not perfect, but
     # for single sentence questions that should have no ambiguity about the main verb,
     # it should be sufficient.
-    def extract_main_verb(self, question):
+    def extract_main_verb(self, question: str) -> Token:
         doc = self.nlp(question)
         sents = list(doc.sents)
         if len(sents) == 0:
@@ -129,10 +131,10 @@ def extract_main_verb(self, question):
 
         return sents[0].root
 
-    def get_lemmas(self, words):
+    def get_lemmas(self, words: List[str]) -> List[str]:
         return [self.nlp(word)[0].lemma_ for word in words]
 
-    def is_wh_word(self, pos):
+    def is_wh_word(self, pos: str) -> bool:
         return pos in self.WH_WORDS
 
     def build_question_classifier(self):
@@ -174,7 +176,7 @@ def build_question_classifier(self):
 
         return new_classifier
 
-    def filterWHTags(self, question):
+    def filterWHTags(self, question: str) -> List[Tuple[str, str]]:
         # ADD ALL VARIABLES TO THE FEATURE DICT WITH A WEIGHT OF 90
         matches = re.findall(r'(\[(.*?)\])', question)
         for match in matches:
@@ -193,7 +195,7 @@ def filterWHTags(self, question):
             tag for tag in question_tags if self.is_wh_word(tag[1])]
         return question_tags
 
-    def validate_WH(self, test_question, predicted_question):
+    def validate_WH(self, test_question: str, predicted_question: str) -> bool:
         """
         Assumes that only 1 WH word exists
         Returns True if the WH word in the test question equals the
@@ -221,7 +223,7 @@ def validate_WH(self, test_question, predicted_question):
             i += 1
         return wh_match
 
-    def classify_question(self, test_question):
+    def classify_question(self, test_question: str) -> str:
         """
         Match a user query with a question in the database based on the classifier we trained and overall features we calculated.
         Return relevant question.
diff --git a/nimbus_nlp/save_and_load_model.py b/nimbus_nlp/save_and_load_model.py
index 375ae12..2f0f9da 100644
--- a/nimbus_nlp/save_and_load_model.py
+++ b/nimbus_nlp/save_and_load_model.py
@@ -12,6 +12,7 @@
 from os.path import isfile, join
 import re
 
+from sklearn.neighbors.classification import KNeighborsClassifier
 PROJECT_DIR = os.path.dirname(os.path.abspath(__file__))
 now = datetime.now()
 date_time = now.strftime("_%m_%d_%Y_%H_%M_%S")
@@ -29,7 +30,7 @@ def load_model(model_name):
     train_path = PROJECT_DIR + '/models/classification/' + model_name + '.joblib'
     return joblib.load(train_path)
 
-def load_latest_model():
+def load_latest_model() -> KNeighborsClassifier:
     # https://stackoverflow.com/a/39327156
     train_path = PROJECT_DIR + '/models/classification/*'
     list_of_files = glob.glob(train_path)

From 1b83bc614a4b278eba50cf82e7801cefa6894369 Mon Sep 17 00:00:00 2001
From: Michael Fekadu <mike.fekadu@gmail.com>
Date: Wed, 4 Mar 2020 19:14:32 -0800
Subject: [PATCH 3/3] allow any version of pyre because its a dev-dependency
 not production

---
 Pipfile      | 2 +-
 Pipfile.lock | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Pipfile b/Pipfile
index dc31126..a78b4c7 100644
--- a/Pipfile
+++ b/Pipfile
@@ -19,7 +19,7 @@ flake8 = "==3.7.9"
 hypothesis = "==5.3.1"
 pytest = "==5.3.4"
 ## type-checking
-pyre-check = "==0.0.41"
+pyre-check = "*"
 ## like the Unix `make` but better
 invoke = "==1.4.1"
 monkeytype = "*"
diff --git a/Pipfile.lock b/Pipfile.lock
index 3d5dbb3..884f711 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "348fee5e0ee39fb3206a8e4f15f486f53691109f3e3035829eb5a61d64ccdd6c"
+            "sha256": "4040c61542e19c50f3a11a971ef93b181f9f548859f3acd8bc350ccbab0c425f"
         },
         "pipfile-spec": 6,
         "requires": {