Skip to content

Commit

Permalink
Requested Changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Ayatallah committed Jun 14, 2019
1 parent 8158e4e commit 84e9269
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 59 deletions.
6 changes: 1 addition & 5 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,5 @@ crashlytics.properties
crashlytics-build.properties
fabric.properties

# .idea and models
# .idea
.idea/
classifiers/*.model
classifiers/*.model.*
classifiers/*.pkl
data/*.csv
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
- sudo rm -f /etc/boto.cfg
- pip install -r requirements.txt
script:
- python3 text_classifier.py Test dataset.csv d2v.model joblib_model.pkl
- python3 text_classifier.py
15 changes: 6 additions & 9 deletions models/classifier_model.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from .model import Model
from .doc2vec_model import doc2VecModel

import logging
import numpy as np
import os
import inspect

import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from .model import Model
from .doc2vec_model import doc2VecModel


logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
base_file_path = inspect.getframeinfo(inspect.currentframe()).filename
Expand All @@ -30,12 +33,6 @@ def train_model(self, d2v, training_vectors, training_labels):
logging.info(
'Training F1 score: {}'.format(f1_score(training_labels, training_predictions, average='weighted')))

def save_model(self, filename):
logging.info("Saving trained classification model")

def load_model(self, filename):
logging.info("Loading trained classification model")

def test_model(self, d2v, testing_vectors, testing_labels):
logging.info("Classifier testing")
test_vectors = doc2VecModel.get_vectors(d2v, len(testing_vectors), 300, 'Test')
Expand Down
20 changes: 4 additions & 16 deletions models/doc2vec_model.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from .model import Model

import logging
import random
import os
import inspect

import numpy as np
from gensim.models import doc2vec
from gensim.models.doc2vec import Doc2Vec

from .model import Model


logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
base_file_path = inspect.getframeinfo(inspect.currentframe()).filename
Expand Down Expand Up @@ -47,20 +49,6 @@ def train_model(self):
# fix the learning rate, no decay
self.model.min_alpha = self.model.alpha

def save_model(self, filename):
logging.info("Saving trained Doc2Vec model")
filename = os.path.join(classifiers_path, filename)
self.model.save(filename)

def load_model(self, filename):
logging.info("Loading trained Doc2Vec model")
filename = os.path.join(classifiers_path, filename)
if (os.path.isfile(filename)):
d2v = Doc2Vec.load(filename)
self.model = d2v
else:
self.model = None

def get_vectors(self, corpus_size, vectors_size, vectors_type):
"""
Get vectors from trained doc2vec model
Expand Down
7 changes: 0 additions & 7 deletions models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,5 @@ def initialize_model(self):
def train_model(self):
pass

@abstractmethod
def save_model(self):
pass

@abstractmethod
def load_model(self):
pass


37 changes: 16 additions & 21 deletions text_classifier.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
import pandas as pd
import logging
import sys, getopt
import os, inspect
import numpy as np
from sklearn.model_selection import train_test_split
from models.doc2vec_model import doc2VecModel
from models.classifier_model import classifierModel

import os
import logging
import inspect

import pandas as pd
from sklearn.model_selection import train_test_split


logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
base_file_path = inspect.getframeinfo(inspect.currentframe()).filename
project_dir_path = os.path.dirname(os.path.abspath(base_file_path))
data_path = os.path.join(project_dir_path, 'data')
default_classifier = os.path.join(project_dir_path, 'classifiers','joblib_model.pkl')
default_classifier = os.path.join(project_dir_path, 'classifiers','logreg_model.pkl')
default_doc2vec= os.path.join(project_dir_path, 'classifiers','d2v.model')
default_dataset= os.path.join(data_path, 'dataset.csv')

Expand Down Expand Up @@ -49,24 +51,17 @@ def train_classifier(self):
return self.d2v, self.classifier

def test_classifier(self):
x_train, x_test, y_train, y_test, all_data = self.prepare_all_data()
_, x_test, _, y_test, _ = self.prepare_all_data()
if (self.d2v.model is None or self.classifier.model is None):
logging.info("No Trained Models Found, Train First or Use Correct Model Names")
else:
self.classifier.test_model(self.d2v, x_test, y_test)

def main(argv):
if(len(argv)==1):
dataset_file = argv[0]

tc = TextClassifier()
tc.read_data(dataset_file)
tc.test_classifier()
tc.train_classifier()

else:
print('Please use the following Commands to use text_classifier for training/testing/predicting:')
print ('To Run: python text_classifier.py <dataset_file>')
def run(dataset_file):
tc = TextClassifier()
tc.read_data(dataset_file)
tc.test_classifier()
tc.train_classifier()

if __name__ == "__main__":
main(sys.argv[1:])
run("dataset.csv")

0 comments on commit 84e9269

Please sign in to comment.