Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding SGD, Adam and RMSProp optimizers #42

Merged
merged 2 commits into from
Aug 10, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ target_sources(afml
src/nn/Modules/Module.cpp
src/nn/Modules/Dropout.cpp
src/nn/Init.cpp
src/optim/Optimizers.cpp
)

target_include_directories(afml
Expand Down
2 changes: 1 addition & 1 deletion examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ endfunction(build_example)
# build_example(Activations.cpp)
# build_example(FFNet.cpp)
# build_example(Node.cpp)
build_example(perceptron.cpp)
build_example(xor.cpp)
# build_example(Weights.cpp)
build_example(autograd.cpp)
50 changes: 35 additions & 15 deletions examples/perceptron.cpp → examples/xor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,29 @@

#include <af/autograd.h>
#include <af/nn.h>
#include <af/optim.h>

#include <string>
#include <memory>

using namespace af;
using namespace af::nn;
using namespace af::autograd;

int main()
int main(int argc, const char **args)
{
int optim_mode = 0;
std::string optimizer_arg = std::string(args[1]);
if (optimizer_arg == "--adam") {
optim_mode = 1;
} else if (optimizer_arg == "--rmsprop") {
optim_mode = 2;
}

const int inputSize = 2;
const int outputSize = 1;
const double lr = 0.1;
const double lr = 0.01;
const double mu = 0.1;
const int numSamples = 4;

float hInput[] = {1, 1,
Expand All @@ -34,24 +47,35 @@ int main()
auto in = af::array(inputSize, numSamples, hInput);
auto out = af::array(outputSize, numSamples, hOutput);

nn::Sequential perceptron;
nn::Sequential model;

perceptron.add(nn::Linear(inputSize, outputSize));
perceptron.add(nn::Sigmoid());
model.add(nn::Linear(inputSize, outputSize));
model.add(nn::Sigmoid());

auto loss = nn::MeanSquaredError();

std::unique_ptr<optim::Optimizer> optim;

if (optimizer_arg == "--rmsprop") {
optim = std::unique_ptr<optim::Optimizer>(new optim::RMSPropOptimizer(model.parameters(), lr));
} else if (optimizer_arg == "--adam") {
optim = std::unique_ptr<optim::Optimizer>(new optim::AdamOptimizer(model.parameters(), lr));
} else {
optim = std::unique_ptr<optim::Optimizer>(new optim::SGDOptimizer(model.parameters(), lr, mu));
}

Variable result, l;
for (int i = 0; i < 1000; i++) {
for (int j = 0; j < numSamples; j++) {
perceptron.train();
perceptron.zeroGrad();

model.train();
optim->zeroGrad();

af::array in_j = in(af::span, j);
af::array out_j = out(af::span, j);

// Forward propagation
result = perceptron(nn::input(in_j));
result = model(nn::input(in_j));

// Calculate loss
l = loss(result, nn::noGrad(out_j));
Expand All @@ -60,18 +84,14 @@ int main()
l.backward();

// Update parameters
// TODO: Should use optimizer
for (auto &param : perceptron.parameters()) {
param.array() -= lr * param.grad().array();
param.array().eval();
}
optim->update();
}

if ((i + 1) % 100 == 0) {
perceptron.eval();
model.eval();

// Forward propagation
result = perceptron(nn::input(in));
result = model(nn::input(in));

// Calculate loss
// TODO: Use loss function
Expand Down
6 changes: 4 additions & 2 deletions include/af/autograd/Variable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@

#pragma once

#include <arrayfire.h>

#include <cstddef>
#include <functional>
#include <memory>
#include <vector>
#include <unordered_map>

#include <arrayfire.h>

namespace af {
namespace autograd {
class Variable
Expand Down Expand Up @@ -62,6 +62,8 @@ namespace af {

af::dim4 dims() const;

af::dtype type() const;

void zeroGrad();

void setCalcGrad(bool calc_grad);
Expand Down
3 changes: 2 additions & 1 deletion include/af/nn/Modules/Container.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
********************************************************/
#pragma once

#include <memory>
#include <af/autograd/Variable.hpp>
#include <af/nn/Modules/Module.hpp>

#include <memory>

namespace af
{
namespace nn
Expand Down
6 changes: 2 additions & 4 deletions include/af/nn/Modules/Module.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
********************************************************/
#pragma once

#include <af/autograd/Variable.hpp>

#include <string>
#include <vector>

#include <af/autograd/Variable.hpp>

namespace af
{
namespace nn
Expand All @@ -35,8 +35,6 @@ namespace af

std::vector<autograd::Variable> parameters();

void zeroGrad();

void train();

void eval();
Expand Down
9 changes: 9 additions & 0 deletions include/af/optim.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
/*******************************************************
* Copyright (c) 2017, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <af/optim/Optimizers.hpp>
90 changes: 90 additions & 0 deletions include/af/optim/Optimizers.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*******************************************************
* Copyright (c) 2017, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/

#pragma once

#include <af/autograd/Variable.hpp>
#include <arrayfire.h>

#include <vector>

namespace af
{
namespace optim
{

class Optimizer
{
protected:
std::vector<autograd::Variable> m_parameters;
public:

Optimizer(const std::vector<autograd::Variable> &parameters);

virtual void update() = 0;

void zeroGrad();
};

class SGDOptimizer : public Optimizer
{
bool m_use_nesterov;
double m_lr;
double m_mu;
double m_wd;
std::vector<af::array> m_velocities;
public:
SGDOptimizer(const std::vector<autograd::Variable> &parameters,
double learning_rate, double momentum = 0,
double weight_decay = 0,
bool use_nesterov = false);
void update();
};

class AdamOptimizer : public Optimizer
{
double m_lr;
double m_beta1;
double m_beta2;
double m_eps;
double m_wd;
int m_count;
std::vector<af::array> m_biased_first;
std::vector<af::array> m_biased_second;
public:
AdamOptimizer(const std::vector<autograd::Variable> &parameters,
double learning_rate,
double beta1 = 0.9,
double beta2 = 0.999,
double epsilon = 1E-8,
double weight_decay = 0);
void update();
};

class RMSPropOptimizer : public Optimizer
{
bool m_use_first;
double m_lr;
double m_rho;
double m_eps;
double m_wd;
std::vector<af::array> m_first;
std::vector<af::array> m_second;
public:
RMSPropOptimizer(const std::vector<autograd::Variable> &parameters,
double learning_rate,
double rho = 0.99,
double epsilon = 1E-8,
double weight_decay = 0,
bool use_first = false);
void update();
};

}
}
14 changes: 6 additions & 8 deletions src/autograd/Variable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@ namespace af {
return m_shared->m_data.dims();
}

af::dtype Variable::type() const
{
return m_shared->m_data.type();
}

void Variable::zeroGrad()
{
m_shared->m_grads.clear();
Expand Down Expand Up @@ -144,14 +149,7 @@ namespace af {
m_shared->m_grads.resize(1);
}

// Remove the graph if not needed
if (!retain_grad_graph) {
// This can be done by extracting af::array and ignoring everything else
auto grad_data = grad.array();
// Since there's no graph leading this, set calc_grad to false
grad = Variable(grad_data, false);
}

grad.setCalcGrad(retain_grad_graph);
m_shared->m_grads[0] = grad;
}

Expand Down
7 changes: 0 additions & 7 deletions src/nn/Modules/Module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,6 @@ namespace af
return m_parameters;
}

void Module::zeroGrad()
{
for (auto &parameter : m_parameters) {
parameter.zeroGrad();
}
}

Variable Module::operator()(const Variable &input)
{
return this->forward(input);
Expand Down
Loading