-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDigitNN.py
149 lines (112 loc) · 5.07 KB
/
DigitNN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import numpy as np
import pandas as pd
from math import e
#structure of the neural network
#first layer is the firstLayer that is not the input
firstLayer = 50
secondLayer = 10
#learning parameters for gradient descent
epochs = 30
miniBatchSize = 100
iterations = int(60000/miniBatchSize)
#how big of a step you want to take during gradient descent
learningRate = 3
def sigmoid(val):
return 1/(1+np.exp(-val))
def sigmoidPrime(val):
return sigmoid(val)*(1-sigmoid(val))
#takes a label such as y=1 and outputs a vector v = [0,1,0,0,0,0,0,0,0,0]
#where the yth index is 1 and the rest is 0
def one_hot(Y):
one_hot_Y = np.zeros((Y.size, Y.max() + 1))
one_hot_Y[np.arange(Y.size), Y] = 1
one_hot_Y = one_hot_Y.T
return one_hot_Y
#initialize the weights and biases of the neural network
def initialize():
W1 = np.random.uniform(-0.5,0.5,(firstLayer,784))
W2 = np.random.uniform(-0.5,0.5, (secondLayer, firstLayer))
B1 = np.zeros((firstLayer,1))
B2 = np.zeros((secondLayer,1))
return W1, W2, B1, B2
def forwardPropogation(X, W1, B1, W2, B2):
z1 = np.dot(W1,X) + B1
a1 = sigmoid(z1)
z2 = np.dot(W2,a1) + B2
a2 = sigmoid(z2)
return z1,a1,z2,a2
def updateParameters(DW1, DW2, DB1, DB2, W1, W2, B1, B2, learningRate, miniBatchSize):
W1 = W1 - learningRate * DW1/miniBatchSize
W2 = W2 - learningRate * DW2/miniBatchSize
B1 = B1 - learningRate * DB1/miniBatchSize
B2 = B2 - learningRate * DB2/miniBatchSize
return W1, W2, B1, B2
#after we update our parameters, we check to see how much better our
#neural can classify the digits in the testData set
def testAccuracy(W1, W2, B1, B2):
cnt = 0
for m in range(dataTest[0].size):
image = X_test[:,m].reshape(784,1)
#forward propogation
z1,a1,z2,a2 = forwardPropogation(image, W1, B1, W2, B2)
#check to see if output of neural network is equal to
#the desired output
if(np.argmax(a2) == Y_test[m]):
cnt = cnt +1
percent = cnt/10000*100
print("Accuracy: " + str(percent) + "%")
#contains training data, 60,000 images each with 784 pixels
dataTrain = np.transpose(np.array(pd.read_csv('data/mnist_train.csv')))
#dimension of dataTrain is (784,60000), each image is a column
#contians testing data, 10,000 images
dataTest = np.transpose(np.array(pd.read_csv('data/mnist_test.csv')))
#dimension is (784,10000) each image is a column
#Y_train and Y_test gives the handwritten digit for each image
#one_hot_Y if Y=1 then one_hot_Y = [0,1,0,0,0,0,0,0,0,0]
#or if Y=9 then one_hot_Y = [0,0,0,0,0,0,0,0,0,1]
#shape of Y_train is 60,000x1
#shape of one_hot_Y is 10x60,000 each column is a one_hot representation of a label
Y_train = (dataTrain[0])
one_hot_Y = one_hot(Y_train)
#divide all pixel values by 255 so each pixel value is between 0 and 1
X_train = dataTrain[1:]/255
Y_test = (dataTest[0])
X_test = dataTest[1:]/255
#initialize weights
W1, W2, B1, B2 = initialize()
#training
#epochs is the number of times we go through the entire dataset
for e in range(epochs):
for j in range(iterations):
'''DW1 is the the partial derivative of the cost function with respect to Weight 1,
DW2 is the partial derivative of the cost function with respect to Weight 2,
DB2 is the partial derivative of the cost function with respect to Bias 2,
DB1 is the partial derivative of the cost function with respect to Bias 1,
'''
DW1 = np.zeros((firstLayer,784))
DW2 = np.zeros((secondLayer,firstLayer))
DB1 = np.zeros((firstLayer,1))
DB2 = np.zeros((secondLayer,1))
#iterate over each image and run it through the neural network
#once we go over miniBatchSize number of images, we will update the paramters of the neural network
#we can actually feed in multiple images into the neural network, but to keep it simple we will
#just stick with going one at a time even though it slows the program down
for i in range(miniBatchSize):
img = j*miniBatchSize+i
#b/c numpy is weird the dimension of X_train[:,img] is (784,) so we have to reshape it
#same with one-hot_Y
image = (X_train[:,img]).reshape((784,1))
label = one_hot_Y[:,img].reshape(10,1)
#forward propogation
z1,a1,z2,a2 = forwardPropogation(image, W1, B1, W2, B2)
#backpropogation
#this is dL/dz2
error2 = (a2 - label) * sigmoidPrime(z2)
DB2 = DB2 + error2
DW2 = DW2 + np.dot(error2,a1.T)
#this is dL/dz1
error1 = np.dot(W2.T,error2) * sigmoidPrime(z1)
DB1 = DB1 + error1
DW1 = DW1 + np.dot(error1,image.T)
W1, W2, B1, B2 = updateParameters(DW1, DW2, DB1, DB2, W1, W2, B1, B2, learningRate, miniBatchSize)
testAccuracy(W1, W2, B1, B2)