diff --git a/eureka/nn.py b/eureka/nn.py index 4f4d2e2..69b46a9 100644 --- a/eureka/nn.py +++ b/eureka/nn.py @@ -124,7 +124,7 @@ def __init__(self, num_features, epsilon=1e-8, affine=False): # For the most part of BatchNorm, you don't actually need to implement these gradient variables if (self.affine): - self.dw = np.zeros((1, num_features)), np.zeros((1, num_features)) + self.dw, self.db = None, None self.vw, self.vb = np.zeros((1, num_features)), np.zeros((1, num_features)) self.sw, self.sb = np.zeros((1, num_features)), np.zeros((1, num_features)) @@ -149,8 +149,8 @@ def forward(self, x): def backward(self, d_bn_out): # Gradient with respect to affine parameters if (self.affine): - self.dbeta = np.sum(d_bn_out, axis=0) - self.dgamma = np.sum(d_bn_out*self.x_hat, axis=0) + self.dw = np.sum(d_bn_out*self.x_hat, axis=0) + self.db = np.sum(d_bn_out, axis=0) # Gradient of loss with respect to BN-layer input x dx_hat = d_bn_out * self.w