-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnaive_bayes.py
78 lines (69 loc) · 2.33 KB
/
naive_bayes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import numpy as np
import pandas
def read_csv(fname):
data = pandas.read_csv(fname)
return np.array(data.to_numpy())
class NaiveBayes:
def __init__(self, lamb=0):
self.P_Y = None
self.P_X_cond_Y = None
self.num_features = None
self.lamb = lamb
def train(self, X, Y):
assert self.num_features is None
assert X.ndim == 2
assert Y.ndim == 1
ys = np.unique(Y)
self.P_Y = dict((y, np.count_nonzero(Y == y) / len(Y)) for y in ys)
self.num_features = X.shape[1]
self.P_X_cond_Y = [dict() for _ in range(self.num_features)]
for j in range(self.num_features):
Xj = X[:, j]
xjs = np.unique(Xj)
for xj in xjs:
for y in ys:
mask = (Xj == xj) & (Y == y)
pair = (xj, y)
self.P_X_cond_Y[j][pair] = \
(np.count_nonzero(mask) + self.lamb) / \
(np.count_nonzero(Y == y) + len(xjs) * self.lamb)
print("Train over")
def predict(self, X):
Y = [None for _ in range(len(X))]
for i, x in enumerate(X):
Y[i] = self.predict_one(x)
return np.array(Y)
def predict_one(self, x):
num_features = len(x)
assert num_features == self.num_features
names = list(self.P_Y.keys())
probs = list()
for y, py in self.P_Y.items():
prob = py
for j in range(num_features):
pair = (x[j], y)
prob *= self.P_X_cond_Y[j][pair]
probs.append(prob)
i = np.argmax(np.array(probs))
return names[i]
if __name__ == '__main__':
fname = '../data/table4.1.csv'
data = read_csv(fname)
X, Y = data[:, :-1], data[:, -1]
print('lambda = 0')
bayes = NaiveBayes()
bayes.train(X, Y)
PY = bayes.predict(X)
acc = np.count_nonzero(Y == PY) / len(Y)
print(f"Accuracy: {acc}")
predict = bayes.predict_one([2, 'S'])
print(f"input: [2, 'S'], predict: {predict}")
print('=' * 16)
print('lambda = 1')
bayes = NaiveBayes(lamb=1)
bayes.train(X, Y)
PY = bayes.predict(X)
acc = np.count_nonzero(Y == PY) / len(Y)
print(f"Accuracy: {acc}")
predict = bayes.predict_one([2, 'S'])
print(f"input: [2, 'S'], predict: {predict}")