Skip to content

Commit

Permalink
support for BatchNorm1d
Browse files Browse the repository at this point in the history
  • Loading branch information
eljanmahammadli committed Nov 19, 2023
1 parent 6067fda commit 9864d65
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 12 deletions.
2 changes: 1 addition & 1 deletion gradipy/nn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
init_xavier_uniform,
)
from . import optim
from .modules import Linear, Conv2d
from .modules import Linear, Conv2d, BatchNorm1d
60 changes: 49 additions & 11 deletions gradipy/nn/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@

class Module(ABC):
def __init__(self) -> None:
self.parameters = []
self.y = None

@abstractmethod
def forward(self) -> Tensor:
pass

@abstractmethod
def parameters(self) -> list:
pass

def backward(self) -> Tensor:
self.y.backward()

Expand All @@ -25,17 +28,20 @@ def __call__(self, *args) -> Tensor:


class Linear(Module):
def __init__(self, in_features: int, out_features: int) -> None:
def __init__(self, in_features: int, out_features: int, bias: bool = False) -> None:
super().__init__()
self.in_features = in_features
self.out_features = out_features
self.bias = bias
# TODO: this is fixed for relu, condider to use pytorch's init
self.weight = init_kaiming_normal(in_features, out_features)
self.parameters = [self.weight]
self.y = None

def forward(self, x: Tensor) -> Tensor:
self.y = x.matmul(self.weight)
return self.y
# TODO: implement bias
return x.matmul(self.weight)

def parameters(self) -> list:
return [self.weight] + ([] if self.bias is False else [self.bias])


class Conv2d(Module):
Expand All @@ -44,19 +50,51 @@ def __init__(
in_channels: int,
out_channels: int,
kernel_size: int,
stride: int,
padding: int,
stride: int = 1,
padding: int = 0,
bias: bool = False,
) -> None:
super().__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.bias = bias
# TODO: implement better init for conv2d. Is kaiming normal good enough?
# TODO: implement bias
self.weight = Tensor(np.random.randn(out_channels, in_channels, kernel_size, kernel_size))
self.parameters = [self.weight]

def forward(self, x: Tensor) -> Tensor:
self.y = x.conv2d(self.weight, None, self.stride, self.padding)
return self.y
return x.conv2d(self.weight, None, self.stride, self.padding)

def parameters(self) -> list:
return [self.weight] + ([] if self.bias is False else [self.bias])


class BatchNorm1d(Module):
def __init__(self, num_features: int, eps: float = 1e-5, momentum: float = 0.1) -> None:
super().__init__()
self.eps = eps
self.momentum = momentum
self.weight = np.ones(num_features, dtype=np.float32)
self.bias = np.zeros(num_features, dtype=np.float32)
self.running_mean = np.zeros(num_features, dtype=np.float32)
self.running_var = np.ones(num_features, dtype=np.float32)
self.training = True

def forward(self, x: Tensor) -> Tensor:
if self.training:
xmean = x.data.mean(axis=0)
xvar = x.data.var(axis=0)
else:
xmean = self.running_mean
xvar = self.running_var
if self.training:
self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * xmean
self.running_var = (1 - self.momentum) * self.running_var + self.momentum * xvar
out = self.weight * ((x.data - xmean) / np.sqrt(xvar + self.eps)) + self.bias
return Tensor(out) # what is the children of this tensor?

def parameters(self) -> list:
return [Tensor(self.weight), Tensor(self.bias)]
35 changes: 35 additions & 0 deletions test/test_tensor_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,3 +218,38 @@ def test_gradipy():

for x, y in zip(test_pytorch(), test_gradipy()):
np.testing.assert_allclose(x, y)


def test_BatchNorm1d():
ii = np.random.randn(32, 10).astype(np.float32)
num_features = ii.shape[-1]

def test_pytorch():
i = torch.from_numpy(ii)
bn = ptnn.BatchNorm1d(num_features)
o = bn(i)
return (
o.detach().numpy(),
bn.weight.detach().numpy(),
bn.bias.detach().numpy(),
bn.running_mean.numpy(),
# bn.running_var.numpy(),
)

return (bn.running_var.numpy(),)

def test_gradipy():
i = Tensor(ii)
bn = nn.BatchNorm1d(num_features)
o = bn(i)
return (
o.data,
bn.weight.data,
bn.bias.data,
bn.running_mean,
# bn.running_var
# variance calculation is different from pytorch for some reason.
)

for x, y in zip(test_pytorch(), test_gradipy()):
np.testing.assert_allclose(x, y, atol=1e-5)

0 comments on commit 9864d65

Please sign in to comment.