-
Notifications
You must be signed in to change notification settings - Fork 0
/
classifier.py
71 lines (57 loc) · 1.8 KB
/
classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import processing
class Sentiment:
def __init__(self, name, probability, dictionary):
self.name = name
self.probability = probability
self.dictionary = dictionary
self.type = "Sentiment"
def getPredictionReply(self):
return 'This tweet is {}'.format(self.name)
class Topic:
def __init__(self, name, probability, dictionary):
self.name = name
self.probability = probability
self.dictionary = dictionary
self.type = "Topic"
def getPredictionReply(self):
return 'This tweet is about {}'.format(self.name)
# Bayes Naive therom methods
def getTotalCount(listCat):
totalCount = 0
for i in listCat:
totalCount += len(i)
return totalCount
def getProbability(listOfData, totalCount):
return float(len(listOfData)) / totalCount
def condProb(wordCount,totalWordCount, vocab_count):
prob = (wordCount + 1)/(float(totalWordCount) + float(vocab_count))
return prob
def countWord(word, bag):
count = 0
for i in bag:
if word == i:
count += 1
return count
# Gets name of prediction of each line
def predict(words, outcomes):
words = words.split()
clean_words = []
for i in words:
clean_words.append(processing.cleanText(i, processing.redundantChar))
# Iterate through the sentiments and generate probability vector
for i in outcomes:
for j in clean_words:
if j in i.dictionary:
i.probability = i.probability * i.dictionary[j]
highest = 0.0
highestIndex = 0
index = 0
probs = []
for x in outcomes:
probs.append(x.probability)
for i in probs:
if (i > highest):
highest = i
highestIndex = index
index += 1
return outcomes[highestIndex]