-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnn.h
120 lines (109 loc) · 4.64 KB
/
nn.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
// Classes for creating layers and a neural network.
#ifndef _NN
#define _NN
#include <ctime>
#include <Eigen/Core>
#include <iostream>
#include <random>
#include <vector>
enum class LayerParams { WEIGHTS, BIAS };
struct LayerGradients
{
Eigen::MatrixXd W;
Eigen::VectorXd b;
Eigen::MatrixXd input;
};
struct LayerUpdate
{
Eigen::MatrixXd W;
Eigen::VectorXd b;
};
// Base class for dense layers.
// Weights are initialized with Glorot uniform initializer.
// Bias is initialized to zeros.
class Layer
{
Eigen::MatrixXd _W;
Eigen::VectorXd _b;
Layer();
protected:
bool _training;
Layer( unsigned int input_size, unsigned int output_size );
public:
virtual ~Layer() {}
const Eigen::MatrixXd & W() const { return _W; }
const Eigen::VectorXd & b() const { return _b; }
// Return layer output for given input.
virtual Eigen::MatrixXd forward_pass( const Eigen::MatrixXd &) const = 0;
// Return gradients with respect to layer weights, bias, and input.
virtual LayerGradients backward_pass( const Eigen::MatrixXd & , const Eigen::MatrixXd &, const Eigen::MatrixXd & ) const = 0;
// Add update to WEIGHTS or BIAS.
void update( LayerParams lp, const Eigen::MatrixXd & update );
// Set flag to designate layer is in training phase.
void training( bool flag ) { _training = flag; }
};
// Dense layer with relu activation.
class Hidden : public Layer
{
Hidden();
public:
Hidden( unsigned int input_size, unsigned int output_size ) :\
Layer(input_size, output_size) {}
// Return layer output for given input.
Eigen::MatrixXd forward_pass( const Eigen::MatrixXd & input ) const;
// Return gradients with respect to layer weights, bias, and input.
LayerGradients backward_pass( const Eigen::MatrixXd & input, const Eigen::MatrixXd & output, const Eigen::MatrixXd & upstream_gradient ) const;
};
// Dense layer with softmax activation.
class Softmax : public Layer
{
Softmax();
public:
Softmax( unsigned int input_size, unsigned int output_size ) :\
Layer(input_size, output_size) {}
// Return softmax conditional probabilities for given input.
Eigen::MatrixXd forward_pass( const Eigen::MatrixXd & input ) const;
// Return gradients of cross entropy loss function with respect to
// layer weights, bias, and input.
LayerGradients backward_pass( const Eigen::MatrixXd & input, const Eigen::MatrixXd & probs, const Eigen::MatrixXd & true_probs ) const;
};
// Dropout layer that applies inverted dropout if training flag is set
// (see `training()` in `Layer()`).
class Dropout : public Layer
{
float _drop_rate;
Dropout();
public:
Dropout( float drop_rate ) : Layer(0, 0), _drop_rate(drop_rate) {}
Eigen::MatrixXd forward_pass( const Eigen::MatrixXd & input ) const;
LayerGradients backward_pass( const Eigen::MatrixXd & input, const Eigen::MatrixXd & output, const Eigen::MatrixXd & upstream_gradient ) const;
};
// Neural network for classification.
// Uses cross entropy loss function.
class NeuralNet
{
std::vector<Layer *> _layers;
std::vector<LayerGradients> _gradients;
NeuralNet();
friend class Optimizer;
public:
// There can be any number of non-softmax layers as long as input and
// output dimensions match. The last layer should always of type Softmax.
// Layers are joined in the same order as arguments:
// `NeuralNet( &h1, &h2, &softmax )` results in h1 -> h2 -> softmax.
// All layers should have a base class of type Layer.
template<typename... L>
NeuralNet( L... layers ) : _layers { layers... } { _gradients.resize(_layers.size()); }
// Perform forward pass and return probability of input belonging to each class.
Eigen::MatrixXd probs( const Eigen::MatrixXd & input, bool training=false ) const;
// Perform forward pass and backward pass, then update _gradients.
void gradients( const Eigen::MatrixXd & input, const Eigen::MatrixXd & true_probs );
const std::vector<LayerGradients> & gradients() const { return _gradients; }
// Cross entropy loss.
double loss( const Eigen::MatrixXd & probs, const Eigen::MatrixXd & true_probs ) const;
// Update all parameters in all layers.
void update( const std::vector<LayerUpdate> & updates );
// Update one layer parameter.
void update( const size_t layer_index, LayerParams lp, const Eigen::MatrixXd & update );
};
#endif