-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredictSentiment.py
87 lines (69 loc) · 2.74 KB
/
predictSentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import pickle
from sentimentUtils import getFeatureVector,extract_word_features
inputDirName = 'Twitter Data';
outputDirName = 'Result'
#Sentiment prediction using Best Classifier model saved using pickle
# ---------------------------------------
# SINGLE FOLD RESULT (Naive Bayes)
# ---------------------------------------
# accuracy: 0.83
# precision 0.853936545241
# recall 0.835202991453
# f-measure 0.828396460256
# ---------------------------------------
# SINGLE FOLD RESULT (SVM)
# ---------------------------------------
# accuracy: 0.813333333333
# precision 0.813333333333
# recall 0.813835470085
# f-measure 0.813258636788
# ---------------------------------------
# N-FOLD CROSS VALIDATION RESULT (Naive Bayes)
# ---------------------------------------
# accuracy: 0.948571428571
# precision 0.950264364014
# recall 0.946820784157
# f-measure 0.94716754863
# ---------------------------------------
# N-FOLD CROSS VALIDATION RESULT (SVM)
# ---------------------------------------
# accuracy: 0.951428571429
# precision 0.950980392157
# recall 0.952481076535
# f-measure 0.950937950938
# ---------------------------------------
BestClassifierPkl = os.path.join("pkl","nb_cv.pkl");
BestClassifier = pickle.load(open(BestClassifierPkl,'rb'))
FeaturesListPkl = os.path.join('pkl','FeaturesList.pkl')
allFeaturesList = pickle.load(open(FeaturesListPkl,'rb'))
#start extract_features
def extract_features(tweet):
tweet_words = set(tweet)
features = {}
for word in allFeaturesList:
features['contains(%s)' % word] = (word in tweet_words)
return features
#end
feedsopenList = []
for openname in os.listdir(inputDirName):
feedsopenList.append(openname)
if not os.path.exists(outputDirName):
os.makedirs(outputDirName)
for feedsopenName in feedsopenList:
print("started processing open ",feedsopenName )
fullInputopenName = os.path.join(inputDirName,feedsopenName);
feeds_input_open = open(fullInputopenName, 'r', encoding="utf8", errors='ignore');
sentiment_result = []
fullOutputopenName = os.path.join(outputDirName,feedsopenName);
feeds_output_open = open(fullOutputopenName, 'w+', encoding="utf8", errors='ignore');
for tweet in feeds_input_open.read().split('\r'):
if(len(tweet) > 0):
feature_vector = getFeatureVector(tweet);
extracted_features = extract_word_features(feature_vector)
sentiment = BestClassifier.classify(extracted_features)
sentiment_result.append(tweet+" :: "+sentiment+"\r");
feeds_output_open.writelines(sentiment_result);
print("finished predicting sentiment for open ",feedsopenName )
feeds_input_open.close();
feeds_output_open.close();