From 9864d657120a735e6d247b529f0a61ffb97a08ef Mon Sep 17 00:00:00 2001 From: Eljan Mahammadli Date: Sat, 18 Nov 2023 20:39:57 -0500 Subject: [PATCH] support for BatchNorm1d --- gradipy/nn/__init__.py | 2 +- gradipy/nn/modules.py | 60 +++++++++++++++++++++++++++++++++-------- test/test_tensor_ops.py | 35 ++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 12 deletions(-) diff --git a/gradipy/nn/__init__.py b/gradipy/nn/__init__.py index 8544ffc..045ea05 100644 --- a/gradipy/nn/__init__.py +++ b/gradipy/nn/__init__.py @@ -6,4 +6,4 @@ init_xavier_uniform, ) from . import optim -from .modules import Linear, Conv2d +from .modules import Linear, Conv2d, BatchNorm1d diff --git a/gradipy/nn/modules.py b/gradipy/nn/modules.py index 8ec2785..32a298b 100644 --- a/gradipy/nn/modules.py +++ b/gradipy/nn/modules.py @@ -6,13 +6,16 @@ class Module(ABC): def __init__(self) -> None: - self.parameters = [] self.y = None @abstractmethod def forward(self) -> Tensor: pass + @abstractmethod + def parameters(self) -> list: + pass + def backward(self) -> Tensor: self.y.backward() @@ -25,17 +28,20 @@ def __call__(self, *args) -> Tensor: class Linear(Module): - def __init__(self, in_features: int, out_features: int) -> None: + def __init__(self, in_features: int, out_features: int, bias: bool = False) -> None: super().__init__() self.in_features = in_features self.out_features = out_features + self.bias = bias + # TODO: this is fixed for relu, condider to use pytorch's init self.weight = init_kaiming_normal(in_features, out_features) - self.parameters = [self.weight] - self.y = None def forward(self, x: Tensor) -> Tensor: - self.y = x.matmul(self.weight) - return self.y + # TODO: implement bias + return x.matmul(self.weight) + + def parameters(self) -> list: + return [self.weight] + ([] if self.bias is False else [self.bias]) class Conv2d(Module): @@ -44,8 +50,9 @@ def __init__( in_channels: int, out_channels: int, kernel_size: int, - stride: int, - padding: int, + stride: int = 1, + padding: int = 0, + bias: bool = False, ) -> None: super().__init__() self.in_channels = in_channels @@ -53,10 +60,41 @@ def __init__( self.kernel_size = kernel_size self.stride = stride self.padding = padding + self.bias = bias # TODO: implement better init for conv2d. Is kaiming normal good enough? + # TODO: implement bias self.weight = Tensor(np.random.randn(out_channels, in_channels, kernel_size, kernel_size)) - self.parameters = [self.weight] def forward(self, x: Tensor) -> Tensor: - self.y = x.conv2d(self.weight, None, self.stride, self.padding) - return self.y + return x.conv2d(self.weight, None, self.stride, self.padding) + + def parameters(self) -> list: + return [self.weight] + ([] if self.bias is False else [self.bias]) + + +class BatchNorm1d(Module): + def __init__(self, num_features: int, eps: float = 1e-5, momentum: float = 0.1) -> None: + super().__init__() + self.eps = eps + self.momentum = momentum + self.weight = np.ones(num_features, dtype=np.float32) + self.bias = np.zeros(num_features, dtype=np.float32) + self.running_mean = np.zeros(num_features, dtype=np.float32) + self.running_var = np.ones(num_features, dtype=np.float32) + self.training = True + + def forward(self, x: Tensor) -> Tensor: + if self.training: + xmean = x.data.mean(axis=0) + xvar = x.data.var(axis=0) + else: + xmean = self.running_mean + xvar = self.running_var + if self.training: + self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * xmean + self.running_var = (1 - self.momentum) * self.running_var + self.momentum * xvar + out = self.weight * ((x.data - xmean) / np.sqrt(xvar + self.eps)) + self.bias + return Tensor(out) # what is the children of this tensor? + + def parameters(self) -> list: + return [Tensor(self.weight), Tensor(self.bias)] diff --git a/test/test_tensor_ops.py b/test/test_tensor_ops.py index 563147e..8467e86 100644 --- a/test/test_tensor_ops.py +++ b/test/test_tensor_ops.py @@ -218,3 +218,38 @@ def test_gradipy(): for x, y in zip(test_pytorch(), test_gradipy()): np.testing.assert_allclose(x, y) + + +def test_BatchNorm1d(): + ii = np.random.randn(32, 10).astype(np.float32) + num_features = ii.shape[-1] + + def test_pytorch(): + i = torch.from_numpy(ii) + bn = ptnn.BatchNorm1d(num_features) + o = bn(i) + return ( + o.detach().numpy(), + bn.weight.detach().numpy(), + bn.bias.detach().numpy(), + bn.running_mean.numpy(), + # bn.running_var.numpy(), + ) + + return (bn.running_var.numpy(),) + + def test_gradipy(): + i = Tensor(ii) + bn = nn.BatchNorm1d(num_features) + o = bn(i) + return ( + o.data, + bn.weight.data, + bn.bias.data, + bn.running_mean, + # bn.running_var + # variance calculation is different from pytorch for some reason. + ) + + for x, y in zip(test_pytorch(), test_gradipy()): + np.testing.assert_allclose(x, y, atol=1e-5)