-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathANN_training_and_prediction.py
181 lines (135 loc) · 4.58 KB
/
ANN_training_and_prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
'''
Created_by : Anand Tiwari
Created_at : 09/03/2018
Description: This algorithm describe the working or implimentation of an Artificial Neural
Network with input layer, 2 hidden layer and output layer. Sigmoid and Softmax
are used as a activation function. In this implimentation we used concept of Gradient
Ascent instead of Gradient Descent.
'''
import numpy as np
import matplotlib.pyplot as plt
# Defining the Softmax activation function : This function is used for output layer.
def softmax(a):
expA = np.exp(a)
return expA / expA.sum(axis = 1, keepdims = True)
# Sigmoid activation function : This function is used for the hidden layer.
def sigmoid(a):
return 1 / (1 + np.exp(-a))
# Forward function for finding the probabilities or output
def forward(X, W1, b1, W2, b2, W3, b3):
Z1 = sigmoid(X.dot(W1) + b1)
Z2 = sigmoid(Z1.dot(W2) + b2)
return softmax(Z2.dot(W3) + b3), Z1, Z2
# Derivative of Weight2 for the 1st hidden layer
# updating the weight2
def derivative_W2(Z1, Z2, T, Y, W3):
# N, D = Z1.shape
# M, K = W2.shape
# slow
# ret1 = np.zeros((D, M))
# for n in range(N):
# for k in range(K):
# for m in range(M):
# for d in range(D):
# ret1[d, m] += (T[n, k] - Y[n, k]) * W2[m, k] * Z[n, m] * (1 - Z[n, m]) * X[n, d]
# Vectorized operations are faster
dz = (T - Y).dot(W3.T) * Z2 * (1 - Z2)
return Z1.T.dot(dz)
# derivation of bias2 for the first hidden layer
# updating bias2
def derivative_b2(T, Y, W3, Z2):
return ((T - Y).dot(W3.T) * Z2 * (1 - Z2)).sum(axis = 0)
# derivative of weight3 for the second hiden layer
# updating the weight3
def derivative_W3(Z2, T, Y):
# N, K = T.shape
# M = Z.shape[1]
# slow
# ret1 = np.zeros((M, K))
# for n in range(N):
# for m in range(M):
# for k in range(K):
# ret1[m, k] += (T[n, k] - Y[n, k]) * Z[n, m]
# little faster
# ret2 = np.zeros((M, K))
# for n in range(N):
# for k in range(K):
# ret2[:, k] += (T[n, k] - Y[n, k]) * Z[n, :]
# Faster
# ret3 = np.zeros((M, K))
# for n in range(N):
# ret3 += np.outer(Z[n], T[n] - Y[n])
# Vetorized operations are faster
# ret4 = Z.T.dot(T - Y)
return Z2.T.dot(T - Y)
# derivation of bias3 for the second hidden layer
# updating bias3
def derivative_b3(T, Y):
return (T - Y).sum(axis = 0)
# derivation of weight1 for the input layer
# updating weight1
def derivative_W1(X, Z1, Z2, T, Y, W3, W2):
dz = ((T - Y).dot(W3.T) * Z2 * (1 - Z2)).dot(W2.T) * Z1 * (1 - Z1)
return X.T.dot(dz)
# derivation of bias1 for the input layer
# updating bias1
def derivative_b1(T, Y, W3, W2, Z2, Z1):
return (((T - Y).dot(W3.T) * Z2 * (1 - Z2)).dot(W2.T) * Z1 * (1 - Z1)).sum(axis = 0)
# Cost or Error function
def cross_entropy(T, Y):
return -np.mean(T * np.log(Y))
# Classification of accurate predictions
def classification_rate(y, y_hat):
correct = 0
total = 0
for i in range(len(y)):
total += 1
if y[i] == y_hat[i]:
correct += 1
return float(correct) / total
# main function contains main logic
def main():
N = 500
D = 2
M = 3
K = 3
# Three Guassian Cloud of data
X1 = np.random.randn(N, D) + np.array([0, -2])
X2 = np.random.randn(N, D) + np.array([2, 2])
X3 = np.random.randn(N, D) + np.array([-2, 2])
X = np.vstack([X1, X2, X3])
Y = np.array([0]*N + [1]*N + [2]*N)
N_Y = len(Y)
T = np.zeros((N_Y, K))
for i in range(N_Y):
T[i, Y[i]] = 1
plt.scatter(X[:, 0], X[:, 1], s=100, c=Y, alpha=0.5)
plt.show()
# randomly initialize weights and bias
W1 = np.random.randn(D, M)
b1 = np.random.randn(M)
W2 = np.random.randn(M, M)
b2 = np.random.randn(M)
W3 = np.random.randn(M, K)
b3 = np.random.randn(K)
learning_rate = 0.0001
cost = []
# Gradient Ascent
for i in range(100000):
output, hidden1, hidden2 = forward(X, W1, b1, W2, b2, W3, b3)
if i%100 == 0:
c = cross_entropy(T, output)
p = np.argmax(output, axis = 1)
accuracy = classification_rate(Y, p)
print("Iteration :", i, "Cost :", c, "Accuracy :", accuracy)
cost.append(c)
W3 += learning_rate * derivative_W3(hidden2, T, output) # Second hidden Layer
b3 += learning_rate * derivative_b3(T, output)
W2 += learning_rate * derivative_W2(hidden1, hidden2, T, output, W3) # First hidden layer
b2 += learning_rate * derivative_b2(T, output, W3, hidden2)
W1 += learning_rate * derivative_W1(X, hidden1, hidden2, T, output, W3, W2) # Inout layer
b1 += learning_rate * derivative_b1(T, output, W3, W2, hidden2, hidden1)
plt.plot(cost)
plt.show()
if __name__ == "__main__":
main()