This repository has been archived by the owner on Jan 18, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclassifier.py
77 lines (64 loc) · 3.71 KB
/
classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from imblearn.over_sampling import SMOTE
import numpy as np
def Scale(data): # Scales x
scaler = preprocessing.StandardScaler().fit(data) # scaler
data = scaler.transform(data) # scale training data
return data
def LogReg(train_x, train_y, test_x, test_y, iter): # LOGISTIC
train_x = Scale(train_x) # scale training data
test_x = Scale(test_x) # scale training data
logreg = LogisticRegression(penalty = "none", solver = "saga", max_iter = iter).fit(train_x, train_y)
return (logreg.score(test_x, test_y)) # return accuracy
def ElasticNet(train_x, train_y, test_x, test_y, iter): # LOGISTIC ELASTIC PENALTY
train_x = Scale(train_x) # scale training data
test_x = Scale(test_x) # scale training data
logreg = LogisticRegression(penalty = "elasticnet", l1_ratio = 0.5, solver = "saga", max_iter = iter).fit(train_x, train_y)
return (logreg.score(test_x, test_y)) # return accuracy
def AdaBoost(train_x, train_y, test_x, test_y, n_est): # ADABOOST
ada = AdaBoostClassifier(n_estimators = n_est).fit(train_x, train_y) # adaboost
return (ada.score(test_x,test_y)) # return accuracy
def RandForest(train_x, train_y, test_x, test_y): # RANDOM FOREST
forest = RandomForestClassifier(max_depth = 3).fit(train_x, train_y) # build forest
return (forest.score(test_x,test_y)) # return accuracy
# Batch Sentence Embedding
print("Loading Data...\n")
train_x1 = np.loadtxt('train_x_batch.txt')
train_y1 = np.loadtxt('train_y.txt')
test_x1 = np.loadtxt('test_x_batch.txt')
test_y1 = np.loadtxt('test_y.txt')
train_x1, train_y1 = SMOTE().fit_resample(train_x1, train_y1) # SMOTE
print("Classifiers for Batch Sentence Embedding:")
print("Logistic Regression Accuracy: ",LogReg(train_x1, train_y1, test_x1, test_y1, 500)) # accuracy: 0.7731
print("Logistic Elastic Net Accuracy: ",ElasticNet(train_x1, train_y1, test_x1, test_y1, 500)) # accuracy: 0.77275
print("Ada Boost Accuracy: ", AdaBoost(train_x1, train_y1, test_x1, test_y1, 500)) # accuracy: 0.88145
print("Random Forest Accuracy:", RandForest(train_x1, train_y1, test_x1, test_y1)) # accuracy: 0.827
print("\n\n")
# Paragraph Embedding
print("Loading Data...\n")
train_x2 = np.loadtxt('train_x_block.txt')
train_y2 = np.loadtxt('train_y.txt')
test_x2 = np.loadtxt('test_x_block.txt')
test_y2 = test_y1
train_x2, train_y2 = SMOTE().fit_resample(train_x2, train_y2) # SMOTE
print("Classifiers for Paragraph Embedding:")
print("Logistic Regression Accuracy: ",LogReg(train_x2, train_y2, test_x2, test_y2, 500)) # accuracy: 0.77295
print("Logistic Elastic Net Accuracy: ",ElasticNet(train_x2, train_y2, test_x2, test_y2, 500)) # accuracy: 0.7726
print("Ada Boost Accuracy: ", AdaBoost(train_x2, train_y2, test_x2, test_y2, 500)) # accuracy: 0.914
print("Random Forest Accuracy:", RandForest(train_x2, train_y2, test_x2, test_y2)) # accuracy: 0.907
print("\n\n")
# Sentence Transformer
print("Loading Data...\n")
train_x3 = np.loadtxt('s_train_x_batch.txt')
train_y3 = np.loadtxt('s_train_y.txt')
test_x3 = np.loadtxt('s_test_x_batch.txt')
test_y3 = np.loadtxt('s_test_y.txt')
train_x3, train_y3 = SMOTE().fit_resample(train_x3, train_y3) # SMOTE
print("Classifiers for Sentence Transformers:")
print("Logistic Regression Accuracy: ",LogReg(train_x3, train_y3, test_x3, test_y3, 1000)) # accuracy: 0.784
print("Logistic Elastic Net Accuracy: ",ElasticNet(train_x3, train_y3, test_x3, test_y3, 1000)) # accuracy: 0.799
print("Ada Boost Accuracy: ", AdaBoost(train_x3, train_y3, test_x3, test_y3, 500)) # accuracy: 0.916
print("Random Forest Accuracy:", RandForest(train_x3, train_y3, test_x3, test_y3)) # accuracy: 0.906
print("\n\n")