From edaea7ff6e4a3418856db504790b6f9b9d0b6afc Mon Sep 17 00:00:00 2001
From: Cameron Toy <cameron-toy@users.noreply.github.com>
Date: Mon, 2 Mar 2020 13:41:50 -0800
Subject: [PATCH 1/4] performance improvements

---
 database_wrapper.py               |   3 +-
 flask_api.py                      |   2 +
 nimbus.py                         |  17 +++--
 nimbus_nlp/NIMBUS_NLP.py          | 104 ++++++++++++++----------------
 nimbus_nlp/question_classifier.py |  15 +++--
 5 files changed, 75 insertions(+), 66 deletions(-)

diff --git a/database_wrapper.py b/database_wrapper.py
index b43d5ff..0e51b16 100755
--- a/database_wrapper.py
+++ b/database_wrapper.py
@@ -92,6 +92,7 @@
     ]
 }
 
+
 class BadDictionaryKeyError(Exception):
     """Raised when the given JSON/dict is missing some required fields.
 
@@ -154,7 +155,7 @@ def __init__(self, message: str):
 
 def get_current_time():
     """
-    Useful for answering questions like "Is prof availible now/tomorrow?"
+    Useful for answering questions like "Is prof available now/tomorrow?"
     """
     pass
 
diff --git a/flask_api.py b/flask_api.py
index d6478c5..4df2438 100755
--- a/flask_api.py
+++ b/flask_api.py
@@ -4,6 +4,7 @@
 Contains all the handlers for the API. Also the main code to run Flask.
 """
 import json
+import requests
 
 from flask import Flask, jsonify, request
 from flask_cors import CORS
@@ -275,3 +276,4 @@ def convert_to_mfcc():
     app.run(host='0.0.0.0',
             debug=gunicorn_config.DEBUG_MODE,
             port=gunicorn_config.PORT)
+
diff --git a/nimbus.py b/nimbus.py
index 2ef8f87..f37fe1a 100644
--- a/nimbus.py
+++ b/nimbus.py
@@ -1,5 +1,5 @@
 from QA import create_qa_mapping, generate_fact_QA
-from nimbus_nlp.NIMBUS_NLP import NIMBUS_NLP
+from nimbus_nlp.NIMBUS_NLP import NimbusNLP
 
 
 class Nimbus:
@@ -8,24 +8,33 @@ def __init__(self):
         self.qa_dict = create_qa_mapping(
             generate_fact_QA("q_a_pairs.csv")
         )
+        self.nimbus_nlp = NimbusNLP()
 
     def answer_question(self, question):
-        ans_dict = NIMBUS_NLP.predict_question(question)
+        ans_dict = self.nimbus_nlp.predict_question(question)
         print(ans_dict)
         try:
             qa = self.qa_dict[ans_dict["question class"]]
         except KeyError:
+            # Printed if question isn't found. This occurs because the training set is broader
+            # than the answerable question set.
             return "I'm sorry, I don't understand. Please try another question."
         else:
             answer = qa.answer(ans_dict)
             if answer is None:
+                # Printed when a database query was made and a null value was returned.
+                # Should be handled in the QA class in the future.
                 return("I'm sorry, I understand your question but was unable to find an answer. "
                        "Please try another question.")
             else:
                 return answer
 
+
 if __name__ == "__main__":
     nimbus = Nimbus()
+    # print(nimbus.answer_question("What is Irene's phone number?"))
+    # print(nimbus.answer_question("What is Dr. Khosmood's email?"))
+    # print(nimbus.answer_question("What are the prerequisites for CPE 357?"))
     while True:
-        question = input("Enter a question: ")
-        print(nimbus.answer_question(question))
\ No newline at end of file
+        q = input("Enter a question: ")
+        print(nimbus.answer_question(q))
\ No newline at end of file
diff --git a/nimbus_nlp/NIMBUS_NLP.py b/nimbus_nlp/NIMBUS_NLP.py
index d525d8f..b93cdd7 100644
--- a/nimbus_nlp/NIMBUS_NLP.py
+++ b/nimbus_nlp/NIMBUS_NLP.py
@@ -1,16 +1,3 @@
-
-import nltk
-import numpy as np
-import os
-import pandas as pd
-import re
-import sklearn.neighbors
-import spacy
-import sys
-
-from google.api_core.client_options import ClientOptions
-from google.cloud import automl_v1
-from google.cloud.automl_v1.proto import service_pb2
 import os
 import json
 from google.api_core.client_options import ClientOptions
@@ -19,41 +6,45 @@
 # Temporary import for the classifier
 from nimbus_nlp.question_classifier import QuestionClassifier
 
-class NIMBUS_NLP:
 
-    @staticmethod
-    def predict_question(input_question):
-        '''
+# Made this an instantiable class to prevent the overhead of instantiating
+# a variable extractor and question classifier for every question.
+# Consider: Does this even need to be a class? Its functionality could be
+# moved to the Nimbus class of nimbus.py
+class NimbusNLP:
+
+    def __init__(self):
+        # Instantiate variable extractor and question classifier
+        self.variable_extractor = VariableExtractor()
+        self.classifier = QuestionClassifier()
+        # Load classifier model
+        self.classifier.load_latest_classifier()
+
+    def predict_question(self, input_question):
+        """
         Runs through variable extraction and the question classifier to
         predict the intended question.
 
         Args: input_question (string) - user input question to answer
 
-        Return: nlp_props (dict) - contains the user's input question,
+        Return: nlp_props (dict) - contains the user"s input question,
                                    the variable extracted input question,
                                    the entity extracted, and the predicted
                                    answer
 
-        '''
-
-        # Instantiate the variable extraction class
-        variable_extraction = Variable_Extraction()
+        """
 
-        # Obtain the properties from variable extraction
-        nlp_props = variable_extraction.extract_variables(input_question)
+        # Get dictionary of extracted variables + info from question
+        nlp_props = self.variable_extractor.extract_variables(input_question)
 
-        # Instantiate the question classifier class
-        classifier = QuestionClassifier()
-        classifier.load_latest_classifier()
-        
-        # Classify the question and add it to the nlp properties dictionary 
-        nlp_props["question class"] = classifier.\
-                classify_question(nlp_props["normalized question"])
+        # Add classified question to nlp_props dictionary
+        nlp_props["question class"] = self.classifier.\
+            classify_question(nlp_props["normalized question"])
         
         return nlp_props
     
 
-class Variable_Extraction:
+class VariableExtractor:
 
     def __init__(self, config_file: str = "config.json"):
 
@@ -68,34 +59,35 @@ def __init__(self, config_file: str = "config.json"):
 
         credential_path = os.getcwd() + "/auth.json"
         # TODO: consider does this even do anything useful?
-        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credential_path
+        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credential_path
 
-    def inline_text_payload(self, sent):
-        '''
-        Converts the input sentence into GCP's callable format
+    @staticmethod
+    def inline_text_payload(sent):
+        """
+        Converts the input sentence into GCP"s callable format
 
         Args: sent (string) - input sentence
 
         Return: (dict) - GCP NER input format
 
-        '''
+        """
 
-        return {'text_snippet': {'content': sent, 'mime_type': 'text/plain'} }
+        return {"text_snippet": {"content": sent, "mime_type": "text/plain"} }
 
     def get_prediction(self, sent):
-        '''
+        """
         Obtains the prediction from the input sentence and returns the
         normalized sentence
 
         Args: sent (string) - input sentence
 
         Return: request (PredictObject) - predictiton output
-        ''' 
+        """
         
         params = {}
         
         # Setup API 
-        options = ClientOptions(api_endpoint='automl.googleapis.com')
+        options = ClientOptions(api_endpoint="automl.googleapis.com")
         
         # Create prediction object
         predictor = automl_v1.PredictionServiceClient(client_options=options)
@@ -110,7 +102,7 @@ def get_prediction(self, sent):
         return request
 
     def extract_variables(self, sent):
-        '''
+        """
         Takes the prediction and replaces the entity with its corresponding tag
 
         Args: sent (string) - input sentence
@@ -120,7 +112,7 @@ def extract_variables(self, sent):
                             "normalized entity" - stripped entity
                             "input question" - input question from the user
                             "normalized question" - variable-replaced question
-        '''
+        """
 
         # Make the prediction
         request = self.get_prediction(sent)
@@ -132,10 +124,10 @@ def extract_variables(self, sent):
         tag = request.payload[0].display_name
 
         # Removes excessive words from the entity
-        normalized_entity = Variable_Extraction.excess_word_removal(entity, tag)
+        normalized_entity = VariableExtractor.excess_word_removal(entity, tag)
 
         # Replaces the entity of input question with its corresponding tag
-        normalized_question = sent.replace(entity, '[' + tag + ']')
+        normalized_question = sent.replace(entity, "[" + tag + "]")
         
         return {
                     "entity"                : entity,
@@ -147,31 +139,31 @@ def extract_variables(self, sent):
 
     @staticmethod    
     def excess_word_removal(entity, tag):
-        '''
+        """
         Checks the tag and determines which excess word removal function to use
 
         Args: entity (string) - extracted entity from the input question
 
         Return: (string) - returns the normalized entity string
 
-        '''
+        """
 
-        if (tag == 'PROF'):
-            return Variable_Extraction.strip_titles(entity)
+        if tag == "PROF":
+            return VariableExtractor.strip_titles(entity)
 
         else:
             return entity
 
     @staticmethod
     def strip_titles(entity):
-        '''
+        """
         Strips titles from input entities
 
         Args: entity (string) - extracted entity from the input question
 
         Return: norm_entity (string) - the normalized, title-stripped entity
 
-        '''
+        """
 
         # list of titles for removal
         titles = {"professor", "dr.", "dr", "doctor", "prof", "instructor", "mrs.",\
@@ -189,12 +181,16 @@ def strip_titles(entity):
         # if there is no title in the word
         return entity
 
+
 #TODO: Add the Question_Classifier code directly into this file
+# Is this really necessary? Separation of dependencies might be good here.
 class Question_Classifier:
     pass
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
+    nimbus_nlp = NimbusNLP()
     while True:
         question = input("Enter a question: ")
-        answer = NIMBUS_NLP.predict_question(question)
-        print(answer)
+        answer = nimbus_nlp.predict_question(question)
+        print(answer)
\ No newline at end of file
diff --git a/nimbus_nlp/question_classifier.py b/nimbus_nlp/question_classifier.py
index e0c6882..b376f73 100644
--- a/nimbus_nlp/question_classifier.py
+++ b/nimbus_nlp/question_classifier.py
@@ -4,8 +4,6 @@
 import numpy as np
 import sklearn.neighbors
 import pandas as pd
-import sys
-import json
 from nimbus_nlp.save_and_load_model import save_model, load_latest_model, PROJECT_DIR
 import json
 
@@ -16,7 +14,13 @@
 class QuestionClassifier:
 
     def __init__(self):
-        nltk.download('stopwords')
+        # Prevents classifier from attempting to download stopwords corpus every run
+        try:
+            from nltk.corpus import stopwords
+        except ImportError:
+            nltk.download('stopwords')
+            from nltk.corpus import stopwords
+
         self.classifier = None
         self.nlp = spacy.load('en_core_web_sm')
         self.WH_WORDS = {'WDT', 'WP', 'WP$', 'WRB'}
@@ -25,20 +29,17 @@ def __init__(self):
     def train_model(self):
         self.save_model = save_model
 
-
         # REPLACE WITH API EVENTUALLY
         self.file_path = "question_set_clean.csv"
 
         # The possible WH word tags returned through NLTK part of speech tagging
 
-
         self.classifier = self.build_question_classifier()
         save_model(self.classifier, "nlp-model")
 
-
     def load_latest_classifier(self):
         self.classifier = load_latest_model()
-        with open(PROJECT_DIR+ '/models/features/overall_features.json', 'r') as fp:
+        with open(PROJECT_DIR + '/models/features/overall_features.json', 'r') as fp:
             self.overall_features = json.load(fp)
 
     def get_question_features(self, question):

From c47f159c9522ae41ea52fbedbcc9c3ed0b76254b Mon Sep 17 00:00:00 2001
From: Cameron Toy <cameron-toy@users.noreply.github.com>
Date: Mon, 2 Mar 2020 14:01:03 -0800
Subject: [PATCH 2/4] removed imports used for testing

---
 flask_api.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/flask_api.py b/flask_api.py
index 4df2438..ae2eeb3 100755
--- a/flask_api.py
+++ b/flask_api.py
@@ -3,9 +3,6 @@
 
 Contains all the handlers for the API. Also the main code to run Flask.
 """
-import json
-import requests
-
 from flask import Flask, jsonify, request
 from flask_cors import CORS
 from pydrive.auth import GoogleAuth

From 4ec6c808e039675bf10a61de406d64c546a14d57 Mon Sep 17 00:00:00 2001
From: Cameron Toy <cameron-toy@users.noreply.github.com>
Date: Tue, 7 Apr 2020 18:45:43 -0700
Subject: [PATCH 3/4] Added Professor/Section view access

---
 Entity/ProfessorSectionView.py | 46 ++++++++++++++++++++++++++++++++++
 database_wrapper.py            | 37 +++++++++++++++++++++++++--
 test_view_access.py            |  8 ++++++
 3 files changed, 89 insertions(+), 2 deletions(-)
 create mode 100644 Entity/ProfessorSectionView.py
 create mode 100644 test_view_access.py

diff --git a/Entity/ProfessorSectionView.py b/Entity/ProfessorSectionView.py
new file mode 100644
index 0000000..70d05aa
--- /dev/null
+++ b/Entity/ProfessorSectionView.py
@@ -0,0 +1,46 @@
+from sqlalchemy import Column, Integer, String, Enum, Text
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.dialects.mysql import SET
+import enum
+
+Base = declarative_base()
+
+
+class SectionType(enum.Enum):
+    activity = Act = 0
+    seminar = Sem = 1
+    independent = Ind = 2
+    lab = 3
+    lecture = Lec = 4
+
+
+class ProfessorSectionView(Base):
+    __tablename__ = "Professor_Teaches_Section_2"
+    id = Column(Integer, primary_key=True)
+    id_sections = Column(Integer)
+    profAliasId = Column(Integer)
+    section_name = Column(String(255))
+    instructor = Column(String(255))
+    profEmailAlias = Column(String(255))
+    title = Column(String(255))
+    phone = Column(String(255))
+    office = Column(String(255))
+    type = Column(Enum(SectionType))
+    days = Column(SET('M', 'T', 'W', 'R', 'F'))
+    start = Column(String(255))
+    end = Column(String(255))
+    location = Column(String(255))
+    department = Column(String(255))
+    firstName = Column(String(50))
+    lastName = Column(String(50))
+    phoneNumber = Column(String(20))
+    researchInterests = Column(Text)
+    email = Column(String(255))
+
+    def __repr__(self):
+        D = self.__dict__
+        attributes = [
+            f"{k}={D.get(k)}" for k in self.__dir__() if not k.startswith("_")
+        ]
+        attributes_string = ", ".join(attributes)
+        return f"{self.__class__.__name__}({attributes_string})"
diff --git a/database_wrapper.py b/database_wrapper.py
index f541ff9..e8beca0 100755
--- a/database_wrapper.py
+++ b/database_wrapper.py
@@ -27,7 +27,8 @@
 from Entity.QuestionAnswerPair import QuestionAnswerPair, AnswerType
 from Entity.Professors import Professors, ProfessorsProperties
 from Entity.Clubs import Clubs
-from Entity.Sections import Sections, SectionType
+from Entity.Sections import Sections
+from Entity.ProfessorSectionView import ProfessorSectionView
 
 from fuzzywuzzy import fuzz
 
@@ -50,6 +51,7 @@
     Professors: {"firstName", "lastName"},
     Clubs: {"club_name"},
     Sections: {"section_name"},
+    ProfessorSectionView: {"firstName", "lastName"}
 }
 
 EXPECTED_KEYS_BY_ENTITY = {
@@ -343,6 +345,7 @@ def __init__(self, config_file: str = "config.json") -> None:
         self.AudioSampleMetaData = AudioSampleMetaData
         self.Locations = Locations
         self.QuestionAnswerPair = QuestionAnswerPair
+        self.ProfessorSectionViews = ProfessorSectionView
         self.inspector = inspect(self.engine)
         self._create_database_session()
         print("initialized NimbusMySQLAlchemy")
@@ -415,6 +418,7 @@ def __safe_create(SQLAlchemy_object):
         __safe_create(self.AudioSampleMetaData)
         __safe_create(self.Locations)
         __safe_create(self.QuestionAnswerPair)
+        __safe_create(self.ProfessorSectionViews)
 
     def _create_database_session(self):
         Session = sessionmaker(bind=self.engine)
@@ -466,6 +470,35 @@ def get_property_from_entity(
         )
         >>> ["foaad@calpoly.edu"]
 
+        Args:
+            prop: the relevant property value to retrieve from matching entities
+            entity: the type of entity we want to get the property from
+            identifier: a string that identifies the entity in some way (i.e., a professor's name)
+            tag_column_map: a dictionary mapping entity types to columns that identify the entities
+                ex:
+                {Professors: {"firstName", "lastName"}}
+
+        Returns:
+            The closest value of `prop`,
+            such that the `entity` matches `identifier`.
+        """
+        return self._get_property_from_entity(
+            prop,
+            entity,
+            identifier,
+            tag_column_map
+        )[-1][2]
+
+    def _get_property_from_entity(
+        self,
+        prop: str,
+        entity: UNION_ENTITIES,
+        identifier: str,
+        tag_column_map: dict = default_tag_column_dict,
+    ):
+        """
+        Returns a full list of matching entities. Used by get_property_from_entity()
+
         Args:
             prop: the relevant property value to retrieve from matching entities
             entity: the type of entity we want to get the property from
@@ -506,7 +539,7 @@ def get_property_from_entity(
             return None
 
         sorted_results = sorted(results, key=lambda pair: pair[0])
-        return sorted_results[-1][2]
+        return sorted_results
 
     def get_course_properties(
         self, department: str, course_num: Union[str, int]
diff --git a/test_view_access.py b/test_view_access.py
new file mode 100644
index 0000000..5bb7458
--- /dev/null
+++ b/test_view_access.py
@@ -0,0 +1,8 @@
+from QA import db
+from Entity.ProfessorSectionView import ProfessorSectionView
+
+print(db._get_property_from_entity(
+    "section_name",
+    ProfessorSectionView,
+    "Irene Humer"
+))
\ No newline at end of file

From 3c93013d6dca30b7e134274a363defd081dee2a5 Mon Sep 17 00:00:00 2001
From: Cameron Toy <38936057+cameron-toy@users.noreply.github.com>
Date: Sat, 18 Apr 2020 22:39:56 -0700
Subject: [PATCH 4/4] Changed __tablename__

Changed from "Professor_Teaches_Section_2" to "Professor_Teaches_Section"
---
 Entity/ProfessorSectionView.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Entity/ProfessorSectionView.py b/Entity/ProfessorSectionView.py
index 70d05aa..701a05e 100644
--- a/Entity/ProfessorSectionView.py
+++ b/Entity/ProfessorSectionView.py
@@ -15,7 +15,7 @@ class SectionType(enum.Enum):
 
 
 class ProfessorSectionView(Base):
-    __tablename__ = "Professor_Teaches_Section_2"
+    __tablename__ = "Professor_Teaches_Section"
     id = Column(Integer, primary_key=True)
     id_sections = Column(Integer)
     profAliasId = Column(Integer)