-
Notifications
You must be signed in to change notification settings - Fork 6
/
activations.py
36 lines (28 loc) · 875 Bytes
/
activations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from torch import nn
import torch
import torch.nn.functional as F
'''
Applies the mish function element-wise:
mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + exp(x)))
See additional documentation for mish class.
'''
'''
class MishImplementation(torch.autograd.Function):
@staticmethod
def forward(ctx, i):
result = i * torch.tanh(F.softplus(i))
ctx.save_for_backward(i)
return result
@staticmethod
def backward(ctx, grad_output):
i = ctx.saved_variables[0]
s_i = F.softplus(i)
e_i = torch.exp(i)
return grad_output * (torch.tanh(s_i) + (i * e_i * (F.math.acosh(s_i))**2) / (e_i + 1))
class MemoryEfficientMish(nn.Module):
def forward(self, x):
return MishImplementation.apply(x)
'''
class Mish(nn.Module):
def forward(self, x):
return x * torch.tanh(F.softplus(x))