-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnaiveBayes.py
82 lines (69 loc) · 3.19 KB
/
naiveBayes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import math
import numpy as np
class naiveBayes:
def __init__(self,dataType,trainingData,trainingLabels):
self.dataType = dataType
self.trainingData = trainingData
self.trainingLabels = trainingLabels
self.seperatedTrainingData = self.seperateTrainingDataByClass()
def seperateTrainingDataByClass(self):
seperatedTrainingData = {}
for i in range(len(self.trainingData)):
if self.trainingLabels[i] not in seperatedTrainingData:
seperatedTrainingData[self.trainingLabels[i]] = []
seperatedTrainingData[self.trainingLabels[i]].append(self.trainingData[i])
return seperatedTrainingData
def getClassMeanAndStd(self):
meanAndStd = {}
for label, data in self.seperatedTrainingData.items():
meanAndStd[label] = self.calculateMeanAndStd(data)
return meanAndStd
def mean(self,attributes):
return sum(attributes)/float(len(attributes))
def stddev(self,attributes):
avg = self.mean(attributes)
variance = sum([pow(x - avg,2) for x in attributes])/float(len(attributes) - 1)
return math.sqrt(variance)
def calculateMeanAndStd(self,data):
meanAndStd = [(self.mean(attribute), self.stddev(attribute)) for attribute in zip(*data) ]
return meanAndStd
def calculateGaussianProbability(self,attribute,avg,std):
if std == 0:
return 1
else:
exponent = math.exp(-(math.pow(attribute-avg,2)/(2*math.pow(std,2))))
return (1 / (math.sqrt(2*math.pi) * std)) * exponent
def calculateClassProbabilities(self,classMeanAndStd,inputTestVector):
probabilities = {}
for classLabel, classStdMean in classMeanAndStd.items():
probabilities[classLabel] = 1
for i in range(len(classStdMean)):
avg,std = classStdMean[i]
x = inputTestVector[i]
probabilities[classLabel] *= self.calculateGaussianProbability(x,avg,std)
return probabilities
def predict(self,classMeanAndStd,inputTestVector):
probabilities = self.calculateClassProbabilities(classMeanAndStd,inputTestVector)
bestLabel, bestProb = None, -1
for classLabel , probability in probabilities.items():
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classLabel
return bestLabel
def predictTestSet(self,classMeanAndStd,testSet):
predictions = []
for i in range(len(testSet)):
result = self.predict(classMeanAndStd,testSet[i])
predictions.append(result)
return predictions
def getAccuracy(self,testSet,testLabels):
classMeanAndStd = self.getClassMeanAndStd()
predictions = self.predictTestSet(classMeanAndStd,testSet)
confusionMatrix = np.zeros((len(classMeanAndStd),len(classMeanAndStd)))
truePositive = 0
for i in range(len(testLabels)):
confusionMatrix[testLabels[i]][predictions[i]] += 1
if testLabels[i] == predictions[i]:
truePositive += 1
print confusionMatrix
return (truePositive/float(len(testLabels)))*100