-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathlogistic.py
76 lines (54 loc) · 1.69 KB
/
logistic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv('data/iris.csv', delimiter=',', header=0)
data['y'] = 0
data.loc[data['species'] == 'virginica', 'y'] = 1
X = np.matrix(data[['sepal_length','sepal_width','petal_length','petal_width']])
y = np.matrix(data['y']).T
def scale(data):
means = np.mean(data, 0)
stds = np.std(data, 0)
return (data-means)/stds
def logistic(z):
return 1./(1+np.exp(-z))
def plot_boundary(Zs, Hs, y):
data_fit = np.concatenate((Zs, Hs), axis=1)
data_fit.sort(axis = 0)
z = np.linspace(Zs.min(), Zs.max(), 100)
plt.plot(z, logistic(z), 'r-', label='Theory')
plt.plot(Zs, Hs, 'X', label='empirical')
plt.plot(Zs, y, '*', label = 'data')
plt.xlabel('z')
plt.ylabel('h(z)')
plt.title('Logistic Regression')
plt.legend()
#plt.show()
def plot_points(data, features, weights, label='y'):
plt.plot(data[features[0]][data[label]==0], data[features[1]][data[label]==0], '*', label='y=0')
plt.plot(data[features[0]][data[label]==1], data[features[1]][data[label]==1], '+', label='y=1')
plt.xlabel(features[0])
plt.ylabel(features[1])
plt.legend()
#plt.show()
alpha = 0.5
M, N = X.shape
X = np.concatenate((np.ones((M, 1)), X), axis=1)
epsilon = 0.12
weights = 2*np.random.rand(N+1, 1)*epsilon - epsilon
count = 0
oldJ = 0
err = 1
Js = []
while err > 1e-3:
Zs = np.dot(X, weights)
Hs = logistic(Zs)
deltas = alpha/M*np.dot(X.T, (Hs-y))
count += 1
weights -= deltas
J = -1/M*np.dot(y.T, np.log(Hs)) - np.dot(1-y.T, np.log(1-Hs))
Js.append(float(J))
err = np.abs(oldJ-J)
oldJ = J
print(count, J, err, weights.flatten())