-
Notifications
You must be signed in to change notification settings - Fork 82
/
Copy pathrmsprop_async.py
38 lines (28 loc) · 1.12 KB
/
rmsprop_async.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import numpy
from chainer import cuda
from chainer import optimizer
class RMSpropAsync(optimizer.GradientMethod):
"""RMSprop for asynchronous methods.
The only difference from chainer.optimizers.RMSprop in that the epsilon is
outside the square root."""
def __init__(self, lr=0.01, alpha=0.99, eps=1e-8):
self.lr = lr
self.alpha = alpha
self.eps = eps
def init_state(self, param, state):
xp = cuda.get_array_module(param.data)
state['ms'] = xp.zeros_like(param.data)
def update_one_cpu(self, param, state):
ms = state['ms']
grad = param.grad
ms *= self.alpha
ms += (1 - self.alpha) * grad * grad
param.data -= self.lr * grad / numpy.sqrt(ms + self.eps)
def update_one_gpu(self, param, state):
cuda.elementwise(
'T grad, T lr, T alpha, T eps',
'T param, T ms',
'''ms = alpha * ms + (1 - alpha) * grad * grad;
param -= lr * grad / sqrt(ms + eps);''',
'rmsprop')(param.grad, self.lr, self.alpha, self.eps,
param.data, state['ms'])