-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnn.cpp
136 lines (122 loc) · 4.76 KB
/
nn.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#include "nn.h"
Layer::Layer( unsigned int input_size, unsigned int output_size ) : _training(false)
{
double limit = sqrt(6. / ((double)input_size + (double)output_size)); // glorot uniform
_W = Eigen::MatrixXd::Random(input_size, output_size).array() * limit;
_b = Eigen::VectorXd::Zero(output_size);
}
void Layer::update( LayerParams lp, const Eigen::MatrixXd & update )
{
switch ( lp ) {
case LayerParams::WEIGHTS:
_W.array() += update.array();
break;
case LayerParams::BIAS:
_b += update;
break;
}
}
Eigen::MatrixXd Hidden::forward_pass( const Eigen::MatrixXd & input ) const
{
return ((input * W()).rowwise() + b().transpose()).array().max(0);
}
LayerGradients Hidden::backward_pass( const Eigen::MatrixXd & input, const Eigen::MatrixXd & output, const Eigen::MatrixXd & upstream_gradient ) const
{
LayerGradients gradients;
Eigen::MatrixXd dM = (output.array() > 0).select(upstream_gradient, 0); // backprop relu
gradients.b = dM.colwise().sum();
gradients.W = input.transpose() * dM;
gradients.input = dM * W().transpose();
return gradients;
}
Eigen::MatrixXd Softmax::forward_pass( const Eigen::MatrixXd & input ) const
{
Eigen::MatrixXd M = (input * W()).rowwise() + b().transpose();
Eigen::MatrixXd exp_scores = (M.array() - M.maxCoeff()).array().exp();
return exp_scores.array().colwise() / exp_scores.rowwise().sum().array();
}
LayerGradients Softmax::backward_pass( const Eigen::MatrixXd & input, const Eigen::MatrixXd & probs, const Eigen::MatrixXd & true_probs ) const
{
LayerGradients gradients;
// gradient of cross entropy loss function with respect to layer output
// values before softmax activation
Eigen::MatrixXd dscores = (probs - true_probs).array() / true_probs.rows();
gradients.b = dscores.colwise().sum();
gradients.W = input.transpose() * dscores;
gradients.input = dscores * W().transpose();
return gradients;
}
Eigen::MatrixXd Dropout::forward_pass( const Eigen::MatrixXd & input ) const
{
if ( _training )
{
Eigen::MatrixXf keep_probs(input.rows(), input.cols());
keep_probs = keep_probs.setRandom().array().abs();
return (keep_probs.array() > _drop_rate).select(input, 0) / (1 - _drop_rate);
}
return input;
}
LayerGradients Dropout::backward_pass( const Eigen::MatrixXd & input, const Eigen::MatrixXd & output, const Eigen::MatrixXd & upstream_gradient ) const
{
LayerGradients gradients;
if ( _training )
{
gradients.input = (output.array() != 0).select(upstream_gradient, 0) / (1 - _drop_rate);
return gradients;
}
gradients.input = upstream_gradient;
return gradients;
}
Eigen::MatrixXd NeuralNet::probs( const Eigen::MatrixXd & input, bool training ) const
{
Eigen::MatrixXd output = input;
for ( auto layer = _layers.begin(); layer != _layers.end(); ++layer )
{
(**layer).training(training);
output = (**layer).forward_pass(output);
}
return output;
}
void NeuralNet::gradients( const Eigen::MatrixXd & input, const Eigen::MatrixXd & true_probs )
{
// forward pass
std::vector<Eigen::MatrixXd> inputs;
inputs.push_back(input);
for ( auto layer = _layers.begin(); layer != _layers.end(); ++layer )
{
((**layer)).training(true);
inputs.push_back((**layer).forward_pass(inputs.back()));
}
//backward pass
size_t layer_index = _layers.size() - 1;
// softmax
_gradients.at(layer_index) = _layers.at(layer_index)->\
backward_pass(inputs.at(layer_index), inputs.back(), true_probs);
_layers.at(layer_index)->training(false);
// hidden layers
while ( layer_index != 0 )
{
layer_index -= 1;
_gradients.at(layer_index) = _layers.at(layer_index)->\
backward_pass(inputs.at(layer_index),\
inputs.at(layer_index + 1),\
_gradients.at(layer_index + 1).input);
_layers.at(layer_index)->training(false);
}
}
double NeuralNet::loss( const Eigen::MatrixXd & probs, const Eigen::MatrixXd & true_probs ) const
{
return (probs.array().log() * -true_probs.array()).rowwise().sum().mean();
}
void NeuralNet::update( const std::vector<LayerUpdate> & updates )
{
for ( size_t layer_index = 0; layer_index < _layers.size(); ++layer_index )
{
_layers.at(layer_index)->update(LayerParams::WEIGHTS, updates.at(layer_index).W);
_layers.at(layer_index)->update(LayerParams::BIAS, updates.at(layer_index).b);
}
}
void NeuralNet::update( const size_t layer_index, LayerParams lp, const Eigen::MatrixXd & update )
{
_layers.at(layer_index)->update(lp, update);
}