Skip to content

Commit 9632728

Browse files
committed
3 layers MLP for kaggle dight recognizer
0 parents  commit 9632728

File tree

3 files changed

+368
-0
lines changed

3 files changed

+368
-0
lines changed

classifier.py

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import numpy as np
2+
3+
4+
5+
class Classifier(object):
6+
def _normalize_labels(self, y):
7+
'''
8+
Returns a new set of labels (mapped to integers starting from zero).
9+
Also computes a dictionary from converting from new labels to original ones.
10+
11+
Input: y - an N-dimensional array comprising labels.
12+
Returns: labels mapped to { 0, 1, ..., |y.unique()|-1 }
13+
'''
14+
self.from_index_label_to_raw = dict((i, l) for i, l in enumerate(np.unique(y)))
15+
from_label_to_index = dict((l, i)
16+
for (i, l) in self.from_index_label_to_raw.items())
17+
18+
return np.vectorize(lambda l: from_label_to_index[l])(y)
19+
20+
21+
def to_label(self, index):
22+
'''
23+
Maps an index (output by 'predict' method) to 'raw' label.
24+
25+
Input: index - an integer.
26+
Returns: label - an integer.
27+
'''
28+
return self.from_index_label_to_raw[index]
29+

digit_recognizer.py

+116
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
#!/usr/bin/env python
2+
# -*- coding:utf-8 -*-
3+
4+
import os
5+
import re
6+
import time
7+
8+
import numpy as np
9+
import pandas as pd
10+
import matplotlib.pyplot as plt
11+
from sklearn.cross_validation import train_test_split
12+
import itertools
13+
14+
from neural_network import Neural_Network
15+
16+
def dight_recognizer(layer_one_size = 500, layer_two_size = 250):
17+
18+
# load train_data
19+
print "loading data : "
20+
train_filename = '/home/fuyan/kaggle/digit_recognizer/data/train.csv'
21+
all_data = np.array(pd.read_csv(train_filename))
22+
# featrue change to float
23+
featrue_data = all_data[ : , 1 : ].astype(float)
24+
# label change to int
25+
label_data = all_data[ : , 0].astype(int)
26+
27+
# average-removed
28+
train_average = np.mean(featrue_data, axis = 0)
29+
featrue_data -= train_average
30+
31+
# get train set and test set
32+
print "getting train set and test set : "
33+
train_data, test_data, train_label, test_label = train_test_split(featrue_data, label_data, test_size = 0.2, random_state = 42)
34+
35+
# set layer, get classifications
36+
all_data_num, featrue_num = train_data.shape
37+
layer_1_size = layer_one_size
38+
layer_2_size = layer_two_size
39+
classifications_num = len(np.unique(label_data))
40+
print "total " + str(classifications_num) + " types"
41+
42+
#set main parameter
43+
learning_rate = [0.002] # we can put more parameter to compare
44+
regularization_strengths = [0.02] # we can put more parameter to compare
45+
num_iters = 50000
46+
batch_size = 100
47+
learning_rate_decay_num = 0.98
48+
49+
# init best net
50+
best_net = None
51+
best_loss_history = None
52+
best_accuracy = None
53+
54+
# start
55+
print "training start : "
56+
for rate_temp, reg_temp in itertools.product(learning_rate, regularization_strengths):
57+
print "learning_rate : " + str(rate_temp) + "\t" + "regularization_strengths : " + str(reg_temp)
58+
net_temp = Neural_Network(featrue_num, layer_1_size, layer_2_size, classifications_num)
59+
60+
loss_history_temp = net_temp.train( train_set = train_data,
61+
label_set = label_data,
62+
learning_rate = rate_temp,
63+
regularization_strengths = reg_temp,
64+
iters_number = num_iters,
65+
batch_number = batch_size,
66+
learning_rate_decay_number = learning_rate_decay_num,
67+
n = 1000,
68+
verbose=True )
69+
#return
70+
'''
71+
g, p = net_temp.train( train_set = train_data,
72+
label_set = label_data,
73+
learning_rate = rate_temp,
74+
regularization_strengths = reg_temp,
75+
iters_number = num_iters,
76+
batch_number = batch_size,
77+
learning_rate_decay_number = learning_rate_decay_num,
78+
n = 1000,
79+
verbose=True )
80+
return g, p
81+
'''
82+
# output accuracy
83+
train_data_accuracy = np.mean(net_temp.predict(train_data) == train_label)
84+
test_data_accuracy = np.mean(net_temp.predict(test_data) == test_label)
85+
print "\ttrain set accuracy : " + str(train_data_accuracy)
86+
print "\ttest set accuracy : " + str(test_data_accuracy)
87+
88+
# update the best net
89+
if test_data_accuracy > best_accuracy:
90+
best_accuracy = test_data_accuracy
91+
best_net = net_temp
92+
best_loss_history = loss_history_temp
93+
94+
# output the best net
95+
print "the best neural network accuracy is : "
96+
print "\ttrain set accuracy : " + str(np.mean(best_net.predict(train_data) == train_label))
97+
print "\ttest set accuracy : " + str(np.mean(best_net.predict(test_data) == test_label))
98+
99+
# plot loss history
100+
print "plot loss history : "
101+
plt.plot(best_loss_history)
102+
plt.xlabel('iteration')
103+
plt.ylabel('loss')
104+
plt.title('loss history')
105+
plt.xscale('log')
106+
plt.yscale('log')
107+
plt.show()
108+
109+
# # recognizer kaggle test set and write to file
110+
# print "loading kaggle test set : "
111+
# kaggle_test_filename = '/home/fuyan/kaggle/dight_recognizer/data/test.csv'
112+
# kaggle_test_data = pd.read_csv(kaggle_test_filename)
113+
114+
115+
if __name__ == '__main__':
116+
dight_recognizer(500, 250)

neural_network.py

+223
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
#!/usr/bin/env python
2+
# -*- coding:utf-8 -*-
3+
4+
# first neural network
5+
6+
from classifier import Classifier
7+
8+
import numpy as np
9+
10+
class Neural_Network(Classifier):
11+
12+
# 3 - layer neural network with ReLU activation function
13+
def __init__(self, featrue_num, layer_1_size, layer_2_size, classifications_num):
14+
self.featrue_number = featrue_num
15+
self.hidden_1_size = layer_1_size
16+
self.hidden_2_size = layer_2_size
17+
self.classifications_number = classifications_num
18+
19+
# set neural network parameter, Normal distribution
20+
self.parameter = {}
21+
self.parameter['W1'] = np.random.randn(self.featrue_number, self.hidden_1_size) * np.sqrt(2.0 / self.featrue_number)
22+
self.parameter['W2'] = np.random.randn(self.hidden_1_size, self.hidden_2_size) * np.sqrt(2.0 / self.hidden_1_size)
23+
self.parameter['W3'] = np.random.randn(self.hidden_2_size, self.classifications_number) * np.sqrt(2.0 / self.hidden_2_size)
24+
self.parameter['B1'] = np.ones(self.hidden_1_size) * 0.01
25+
self.parameter['B2'] = np.ones(self.hidden_2_size) * 0.01
26+
self.parameter['B3'] = np.ones(self.classifications_number) * 0.01
27+
28+
def copy(self):
29+
nn = Neural_Network(self.featrue_number, self.hidden_1_size, self.hidden_2_size, self.classifications_number)
30+
return nn
31+
32+
def train( self,
33+
train_set,
34+
label_set,
35+
learning_rate,
36+
learning_rate_decay_number,
37+
regularization_strengths,
38+
iters_number,
39+
batch_number = -1,
40+
n = 1000,
41+
verbose = False ):
42+
43+
label_set = self._normalize_labels(label_set)
44+
45+
train_number, featrue_number = train_set.shape
46+
47+
loss_history = []
48+
49+
# start train
50+
print "neural network is training : "
51+
for it in xrange(1, 2 + 1):
52+
53+
# get batch to SGD, np.random.choice replace = False means can not be repeated
54+
if batch_number != -1:
55+
indices = np.random.choice(train_number, size=batch_number, replace=True)
56+
train_batch = train_set[indices]
57+
label_batch = label_set[indices]
58+
else:
59+
train_batch = train_set
60+
label_batch = label_set
61+
62+
loss, grads = self.loss(train_batch, label_batch, regularization_strengths)
63+
print loss
64+
#return
65+
loss_history.append(loss)
66+
67+
# update W and B
68+
for parameter_temp in self.parameter.keys():
69+
self.parameter[parameter_temp] -= learning_rate * grads[parameter_temp]
70+
71+
# output loss when 1000, 2000, 3000....
72+
if verbose and it % 1000 == 0:
73+
print "the " + str(it) + " loss is :" + str(loss)
74+
75+
# update learning_rate when n
76+
if it % n == 0:
77+
learning_rate *= learning_rate_decay_number
78+
79+
return np.array(loss_history)
80+
81+
def predict(self, data):
82+
data_scores = self.predict_scores(data)
83+
84+
label_predict = np.argmax(data_scores, axis = 1)
85+
print label_predict[0 : 20]
86+
print np.vectorize(self.to_label)(label_predict)[0 : 20]
87+
return np.vectorize(self.to_label)(label_predict)
88+
89+
def predict_scores(self, data):
90+
W1, W2, W3, B1, B2, B3 = ( self.parameter['W1'],
91+
self.parameter['W2'],
92+
self.parameter['W3'],
93+
self.parameter['B1'],
94+
self.parameter['B2'],
95+
self.parameter['B3'], )
96+
scores = np.maximum(0,
97+
np.maximum(0,
98+
data.dot(W1) + B1).dot(W2) + B2).dot(W3) + B3
99+
return scores
100+
'''
101+
def loss(self, train_batch, label_batch, regularization_strengths = 0):
102+
train_number, featrue_number = train_batch.shape
103+
W1, W2, W3, B1, B2, B3 = ( self.parameter['W1'],
104+
self.parameter['W2'],
105+
self.parameter['W3'],
106+
self.parameter['B1'],
107+
self.parameter['B2'],
108+
self.parameter['B3'], )
109+
110+
# forward
111+
hidden_1_scores = train_batch.dot(W1) + B1
112+
hidden_1_relu = np.maximum(0, hidden_1_scores)
113+
#print hidden_1_scores[0]
114+
#print hidden_1_relu[0]
115+
116+
hidden_2_scores = hidden_1_relu.dot(W2) + B2
117+
hidden_2_relu = np.maximum(0, hidden_2_scores)
118+
#print hidden_2_scores[0]
119+
#print hidden_2_relu[0]
120+
121+
output_scores = hidden_2_relu.dot(W3) + B3
122+
#print output_scores[0]
123+
124+
softmax_exp = np.exp(output_scores)
125+
softmax_scores_sum = np.sum(softmax_exp, axis = 1).reshape(-1, 1)
126+
softmax_scores = softmax_exp / softmax_scores_sum
127+
correct_prodict = softmax_scores[np.arange(train_number), label_batch]
128+
129+
# loss only aim to plot loss function, the last step don't know why, but no problem
130+
loss = np.sum(-np.log(correct_prodict))
131+
loss /= train_number
132+
loss += 0.5 * regularization_strengths * (np.sum(W1 * W1) + np.sum(W2 * W2) + np.sum(W3 * W3))
133+
134+
# bp
135+
# softmax-layer loss function
136+
softmax_scores[np.arange(train_number), label_batch] -= 1
137+
softmax_scores /= train_number
138+
139+
dB3 = np.sum(softmax_scores, axis = 0)
140+
dW3 = (hidden_2_relu.T / train_number).dot(softmax_scores)
141+
dW3 += regularization_strengths * W3
142+
143+
d_hidden2_output = softmax_scores.dot(W3.T)
144+
d_hidden2_scores = (hidden_2_scores > 0).astype(float) * d_hidden2_output
145+
146+
dB2 = np.sum(d_hidden2_scores, axis = 0)
147+
dW2 = (hidden_1_relu.T / train_number).dot(d_hidden2_scores)
148+
dW2 += regularization_strengths * W2
149+
150+
d_hidden1_output = d_hidden2_scores.dot(W2.T)
151+
d_hidden1_scores = (hidden_1_scores > 0).astype(float) * d_hidden1_output
152+
153+
dB1 = np.sum(d_hidden1_scores, axis = 0)
154+
dW1 = (train_batch.T / train_number).dot(d_hidden1_scores)
155+
dW1 += regularization_strengths * W1
156+
157+
grads = {
158+
'W1' : dW1,
159+
'W2' : dW2,
160+
'W3' : dW3,
161+
'B1' : dB1,
162+
'B2' : dB2,
163+
'B3' : dB3
164+
}
165+
return loss, grads
166+
'''
167+
def loss(self, X, y, reg = 0):
168+
N, _ = X.shape
169+
170+
W1, b1, W2, b2, W3, b3 = (self.parameter['W1'],
171+
self.parameter['B1'],
172+
self.parameter['W2'],
173+
self.parameter['B2'],
174+
self.parameter['W3'],
175+
self.parameter['B3'])
176+
177+
# computing score
178+
179+
h1_scores = X.dot(W1) + b1
180+
h1_relu = np.maximum(0, h1_scores)
181+
h2_scores = h1_relu.dot(W2) + b2
182+
h2_relu = np.maximum(0, h2_scores)
183+
scores = h2_relu.dot(W3) + b3
184+
185+
unnormalized_probs = np.exp(scores)
186+
normalizer = np.sum( unnormalized_probs, axis=1 ).reshape(-1, 1)
187+
probs = unnormalized_probs / normalizer
188+
correct_label_probs = probs[np.arange(N), y]
189+
190+
loss = np.sum( -np.log(correct_label_probs) )
191+
loss /= N
192+
loss += 0.5 * reg * ( np.sum(W1*W1) + np.sum(W2*W2) + np.sum(W3*W3) )
193+
194+
dscores = probs
195+
dscores[np.arange(N), y] -= 1
196+
dscores /= N
197+
198+
db3 = np.sum(dscores, axis=0)
199+
dW3 = h2_relu.T.dot(dscores)
200+
dW3 += reg * W3
201+
202+
dh2_relu = dscores.dot(W3.T)
203+
dh2_scores = (h2_scores > 0).astype(float) * dh2_relu
204+
205+
db2 = np.sum(dh2_scores, axis=0)
206+
dW2 = h1_relu.T.dot(dh2_scores)
207+
dW2 += reg * W2
208+
209+
dh1_relu = dh2_scores.dot(W2.T)
210+
dh1_scores = (h1_scores > 0).astype(float) * dh1_relu
211+
212+
db1 = np.sum(dh1_scores, axis=0)
213+
dW1 = X.T.dot(dh1_scores)
214+
dW1 += reg * W1
215+
216+
grads = {'W1' : dW1,
217+
'W2' : dW2,
218+
'W3' : dW3,
219+
'B1' : db1,
220+
'B2' : db2,
221+
'B3' : db3 }
222+
223+
return loss, grads

0 commit comments

Comments
 (0)