From abe9d0232780efa6f68d743425e2da5d59f07641 Mon Sep 17 00:00:00 2001
From: Eljan Mahammadli <eljanmahammadli@gmail.com>
Date: Sat, 25 Nov 2023 17:34:36 -0500
Subject: [PATCH] refactored alexnet and resnet

---
 examples/alexnet.py                      | 159 +++++------------------
 examples/resnet.py                       |  41 ++++++
 extra/helpers.py                         |  36 +++++
 {examples => extra}/imagenet.py          |   0
 models/alexnet.py                        |  61 +++++++++
 examples/resnet50.py => models/resnet.py | 121 +++--------------
 6 files changed, 187 insertions(+), 231 deletions(-)
 create mode 100644 examples/resnet.py
 create mode 100644 extra/helpers.py
 rename {examples => extra}/imagenet.py (100%)
 create mode 100644 models/alexnet.py
 rename examples/resnet50.py => models/resnet.py (70%)

diff --git a/examples/alexnet.py b/examples/alexnet.py
index 58d4e26..f0b682f 100644
--- a/examples/alexnet.py
+++ b/examples/alexnet.py
@@ -1,132 +1,41 @@
-import os, requests, argparse
-from typing import Sequence
-from PIL import Image
-from io import BytesIO
+import argparse, timeit
 import numpy as np
-import torch
-from torchvision import models, transforms
 from gradipy.tensor import Tensor
-import gradipy.nn as nn
-from .imagenet import IMAGENET_CATEGORIES
-
-
-class AlexNet(nn.Module):
-    """pure gradipy implementation of AlexNet."""
-
-    def __init__(self, num_classes: int = 1000, dropout: float = 0.5) -> None:
-        super().__init__()
-        self.features = nn.Sequential(
-            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
-            nn.ReLU(),
-            nn.MaxPool2d(kernel_size=3, stride=2),
-            nn.Conv2d(64, 192, kernel_size=5, padding=2),
-            nn.ReLU(),
-            nn.MaxPool2d(kernel_size=3, stride=2),
-            nn.Conv2d(192, 384, kernel_size=3, padding=1),
-            nn.ReLU(),
-            nn.Conv2d(384, 256, kernel_size=3, padding=1),
-            nn.ReLU(),
-            nn.Conv2d(256, 256, kernel_size=3, padding=1),
-            nn.ReLU(),
-            nn.MaxPool2d(kernel_size=3, stride=2),
-        )
-        self.avgpool = nn.AdaptiveAvgPool2d(6)
-        self.classifier = nn.Sequential(
-            # nn.Dropout(p=dropout),
-            nn.Linear(256 * 6 * 6, 4096),
-            nn.ReLU(),
-            # nn.Dropout(p=dropout),
-            nn.Linear(4096, 4096),
-            nn.ReLU(),
-            nn.Linear(4096, num_classes),
-        )
-
-    def forward(self, x: Tensor) -> Tensor:
-        x = self.features(x)
-        x = self.avgpool(x)
-        x = x.flatten()
-        x = self.classifier(x)
-        return x
-
-    def from_pretrained(self, weights: Sequence[Tensor]) -> None:
-        index = 0
-        trainable_layers = [
-            l for l in self.features.layers + self.classifier.layers if l.name in ["Conv2d", "Linear"]
-        ]
-        for layer in trainable_layers:
-            layer.weight, layer.bias = weights[index], weights[index + 1]
-            index += 2  # weight + bias
-
-
-def load_weights_from_pytorch(save_directory="./weights"):
-    # download the file if it doesn't exist
-    url = "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth"
-    os.makedirs(save_directory, exist_ok=True)
-    filename = url.split("/")[-1]
-    save_path = os.path.join(save_directory, filename)
-    if not os.path.exists(save_path):
-        response = requests.get(url)
-        with open(save_path, "wb") as file:
-            file.write(response.content)
-    # parse it into a list of Tensors using torch
-    weights: list = []
-    state_dict = torch.load(save_path)
-    for key, value in state_dict.items():
-        # print(f"Key: {key}, Tensor Shape: {value.shape}")
-        if "bias" in key and "features" in key:
-            weights.append(Tensor(value.detach().numpy().reshape(-1, 1)))
-        elif "weight" in key and "classifier" in key:
-            weights.append(Tensor(value.detach().numpy().transpose(1, 0)))
-        else:
-            weights.append(Tensor(value.detach().numpy()))
-    return weights
-
-
-def load_and_preprocess_image(url):
-    preprocess = transforms.Compose(
-        [
-            transforms.Resize((224, 224)),
-            transforms.ToTensor(),
-            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-        ]
-    )
-    response = requests.get(url)
-    img = Image.open(BytesIO(response.content))
-    img = preprocess(img)
-    img = img.unsqueeze(0)  # Add batch dimension
-    return img
-
-
-def pytorch_alexnet(img):
-    alexnet = models.alexnet(pretrained=True)
-    alexnet.eval()  # Set the model to evaluation mode
-    with torch.no_grad():
-        logits = alexnet(img)
-    idx = torch.argmax(logits).item()
-    value = torch.max(logits).item()
-    cls = IMAGENET_CATEGORIES[idx]
-    return logits, idx, value, cls
-
-
-def gradipy_alexnet(img):
-    alexnet = AlexNet()
-    weights = load_weights_from_pytorch()
-    alexnet.from_pretrained(weights)
-    logits = alexnet(Tensor(img.numpy()))
-    idx = np.argmax(logits.data, axis=1)[0]
-    value = np.max(logits.data, axis=1)[0]
-    cls = IMAGENET_CATEGORIES[idx]
-    return logits, idx, value, cls
+from models.alexnet import AlexNet
+from extra.imagenet import IMAGENET_CATEGORIES
+from extra.helpers import load_and_preprocess_image
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Enter the URL of an image to classify.")
-    parser.add_argument("url", help="URL of image to classify.")
+    parser.add_argument("url", nargs="?", help="URL of image to classify.")
+    parser.add_argument("--test", action="store_true", help="Use a predefined URL for testing.")
     args = parser.parse_args()
-    img = load_and_preprocess_image(args.url)
-    logits, idx, value, cls = pytorch_alexnet(img)
-    logits_, idx_, value_, cls_ = gradipy_alexnet(img)
-    print(f"PyTorch: {idx=}, {value=}, {cls=}")
-    print(f"GradiPy: {idx_=}, {value_=}, {cls_=}")
-    np.testing.assert_allclose(logits.data, logits_.data, atol=1e-4)
-    assert idx == idx_ and int(value) == int(value_) and cls == cls_
+    if args.test:
+        test_url = "https://images.theconversation.com/files/86272/original/image-20150624-31498-1med6rz.jpg?ixlib=rb-1.1.0&q=45&auto=format&w=926&fit=clip"
+        args.url = test_url
+    elif not args.url:
+        parser.error("Please provide the URL of an image to classify.")
+
+    img = Tensor(load_and_preprocess_image(args.url))
+    st = timeit.default_timer()
+    resnet50 = AlexNet()
+    resnet50.from_pretrained()
+    logits = resnet50(img)
+    idx = np.argmax(logits.data, axis=1)[0]
+    value = np.max(logits.data, axis=1)[0]
+    cls = IMAGENET_CATEGORIES[idx]
+    et = timeit.default_timer()
+    print(f"Predicted in {et-st:.3f} seconds. Idx: {idx}, Logit: {value:.3f}, Category: {cls}")
+
+    if args.test:
+        expected_idx = 36
+        expected_value = 24.387
+        expected_cls = "terrapin"
+
+        assert idx == expected_idx, f"Expected index: {expected_idx}, Actual index: {idx}"
+        assert np.isclose(
+            value, expected_value, atol=1e-3
+        ), f"Expected value: {expected_value}, Actual value: {value}"
+        assert cls == expected_cls, f"Expected category: {expected_cls}, Actual category: {cls}"
+        print("Test passed.")
diff --git a/examples/resnet.py b/examples/resnet.py
new file mode 100644
index 0000000..ac6c525
--- /dev/null
+++ b/examples/resnet.py
@@ -0,0 +1,41 @@
+import argparse, timeit
+import numpy as np
+from gradipy.tensor import Tensor
+from models.resnet import ResNet, Bottleneck
+from extra.helpers import load_and_preprocess_image
+from extra.imagenet import IMAGENET_CATEGORIES
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Enter the URL of an image to classify.")
+    parser.add_argument("url", nargs="?", help="URL of image to classify.")
+    parser.add_argument("--test", action="store_true", help="Use a predefined URL for testing.")
+    args = parser.parse_args()
+    if args.test:
+        test_url = "https://us.feliway.com/cdn/shop/articles/7_Reasons_Why_Humans_Cats_Are_A_Match_Made_In_Heaven-9.webp?v=1667409797"
+        args.url = test_url
+    elif not args.url:
+        parser.error("Please provide the URL of an image to classify.")
+
+    img = Tensor(load_and_preprocess_image(args.url))
+    st = timeit.default_timer()
+    resnet50 = ResNet(block=Bottleneck, layers=[3, 4, 6, 3])
+    resnet50.from_pretrained()
+    logits = resnet50(img)
+    idx = np.argmax(logits.data, axis=1)[0]
+    value = np.max(logits.data, axis=1)[0]
+    cls = IMAGENET_CATEGORIES[idx]
+    et = timeit.default_timer()
+    print(f"Predicted in {et-st:.3f} seconds. Idx: {idx}, Logit: {value:.3f}, Category: {cls}")
+
+    if args.test:
+        expected_idx = 281
+        expected_value = 10.254
+        expected_cls = "tabby"
+
+        assert idx == expected_idx, f"Expected index: {expected_idx}, Actual index: {idx}"
+        assert np.isclose(
+            value, expected_value, atol=1e-3
+        ), f"Expected value: {expected_value}, Actual value: {value}"
+        assert cls == expected_cls, f"Expected category: {expected_cls}, Actual category: {cls}"
+        print("Test passed.")
diff --git a/extra/helpers.py b/extra/helpers.py
new file mode 100644
index 0000000..22f2d92
--- /dev/null
+++ b/extra/helpers.py
@@ -0,0 +1,36 @@
+import os, requests
+from gradipy.tensor import Tensor
+
+
+def fetch(url: str, save_directory="./weights"):
+    """downloads pytorch triained weights"""
+    from torch import load
+
+    os.makedirs(save_directory, exist_ok=True)
+    filename = url.split("/")[-1]
+    save_path = os.path.join(save_directory, filename)
+    if not os.path.exists(save_path):
+        response = requests.get(url)
+        with open(save_path, "wb") as file:
+            file.write(response.content)
+    return load(save_path)
+
+
+def load_and_preprocess_image(url) -> Tensor:
+    """preprocess imaganet example"""
+    from torchvision import transforms
+    from PIL import Image
+    from io import BytesIO
+
+    preprocess = transforms.Compose(
+        [
+            transforms.Resize((224, 224)),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ]
+    )
+    response = requests.get(url)
+    img = Image.open(BytesIO(response.content))
+    img = preprocess(img)
+    img = img.unsqueeze(0)  # Add batch dimension
+    return img
diff --git a/examples/imagenet.py b/extra/imagenet.py
similarity index 100%
rename from examples/imagenet.py
rename to extra/imagenet.py
diff --git a/models/alexnet.py b/models/alexnet.py
new file mode 100644
index 0000000..4cd49ab
--- /dev/null
+++ b/models/alexnet.py
@@ -0,0 +1,61 @@
+import gradipy.nn as nn
+from gradipy.tensor import Tensor
+from extra.helpers import fetch
+
+
+class AlexNet(nn.Module):
+    """pure gradipy implementation of AlexNet."""
+
+    def __init__(self, num_classes: int = 1000, dropout: float = 0.5) -> None:
+        super().__init__()
+        self.features = nn.Sequential(
+            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
+            nn.ReLU(),
+            nn.MaxPool2d(kernel_size=3, stride=2),
+            nn.Conv2d(64, 192, kernel_size=5, padding=2),
+            nn.ReLU(),
+            nn.MaxPool2d(kernel_size=3, stride=2),
+            nn.Conv2d(192, 384, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.Conv2d(384, 256, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.Conv2d(256, 256, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(kernel_size=3, stride=2),
+        )
+        self.avgpool = nn.AdaptiveAvgPool2d(6)
+        self.classifier = nn.Sequential(
+            # nn.Dropout(p=dropout),
+            nn.Linear(256 * 6 * 6, 4096),
+            nn.ReLU(),
+            # nn.Dropout(p=dropout),
+            nn.Linear(4096, 4096),
+            nn.ReLU(),
+            nn.Linear(4096, num_classes),
+        )
+
+    def forward(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        x = self.avgpool(x)
+        x = x.flatten()
+        x = self.classifier(x)
+        return x
+
+    def from_pretrained(self) -> None:
+        index, weights = 0, []
+        url = "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth"
+        for key, value in fetch(url).items():
+            # print(f"Key: {key}, Tensor Shape: {value.shape}")
+            if "bias" in key and "features" in key:
+                weights.append(Tensor(value.detach().numpy().reshape(-1, 1)))
+            elif "weight" in key and "classifier" in key:
+                weights.append(Tensor(value.detach().numpy().transpose(1, 0)))
+            else:
+                weights.append(Tensor(value.detach().numpy()))
+
+        trainable_layers = [
+            l for l in self.features.layers + self.classifier.layers if l.name in ["Conv2d", "Linear"]
+        ]
+        for layer in trainable_layers:
+            layer.weight, layer.bias = weights[index], weights[index + 1]
+            index += 2  # weight + bias
diff --git a/examples/resnet50.py b/models/resnet.py
similarity index 70%
rename from examples/resnet50.py
rename to models/resnet.py
index 4ffd89f..af5ebc9 100644
--- a/examples/resnet50.py
+++ b/models/resnet.py
@@ -1,66 +1,16 @@
-import os, requests, sys
-from typing import Optional, Callable, Type, Union, List, Any, Sequence
-from PIL import Image
-from io import BytesIO
-import numpy as np
-import torch
-from torchvision import transforms
+from typing import Optional, Callable, Type, Union, List
 from gradipy.tensor import Tensor
 import gradipy.nn as nn
 import gradipy.nn.functional as F
-from .imagenet import IMAGENET_CATEGORIES
-
-
-def load_weights_from_pytorch(save_directory="./weights"):
-    # download the file if it doesn't exist
-    url = "https://download.pytorch.org/models/resnet50-0676ba61.pth"
-    os.makedirs(save_directory, exist_ok=True)
-    filename = url.split("/")[-1]
-    save_path = os.path.join(save_directory, filename)
-    if not os.path.exists(save_path):
-        response = requests.get(url)
-        with open(save_path, "wb") as file:
-            file.write(response.content)
-    weights: list = []
-    state_dict = torch.load(save_path)
-    for key, value in state_dict.items():
-        # print(f"Key: {key}, --> Tensor Shape: {value.shape}")
-        if "fc.weight" in key:
-            weights.append(Tensor(value.detach().numpy().transpose(1, 0)))
-        else:
-            weights.append(Tensor(value.detach().numpy()))
-    return weights
-
-
-def load_and_preprocess_image(url):
-    preprocess = transforms.Compose(
-        [
-            transforms.Resize((224, 224)),
-            transforms.ToTensor(),
-            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-        ]
-    )
-    response = requests.get(url)
-    img = Image.open(BytesIO(response.content))
-    img = preprocess(img)
-    img = img.unsqueeze(0)  # Add batch dimension
-    return Tensor(img.data)
+from extra.helpers import fetch
 
 
 def conv3x3(
     in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1
 ) -> nn.Conv2d:
     """3x3 convolution with padding"""
-    return nn.Conv2d(
-        in_planes,
-        out_planes,
-        kernel_size=3,
-        stride=stride,
-        padding=dilation,
-        # groups=groups,
-        # bias=False,
-        # dilation=dilation,
-    )
+    # TODO: implement dilation and groups
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, bias=False)
 
 
 def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
@@ -100,11 +50,9 @@ def __init__(
 
     def forward(self, x: Tensor) -> Tensor:
         identity = x
-
         out = self.conv1(x)
         out = self.bn1(out)
         out = self.relu(out)
-
         out = self.conv2(out)
         out = self.bn2(out)
 
@@ -118,12 +66,6 @@ def forward(self, x: Tensor) -> Tensor:
 
 
 class Bottleneck(nn.Module):
-    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
-    # while original implementation places the stride at the first 1x1 convolution(self.conv1)
-    # according to "Deep residual learning for image recognition" https://arxiv.org/abs/1512.03385.
-    # This variant is also known as ResNet V1.5 and improves accuracy according to
-    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
-
     expansion: int = 4
 
     def __init__(
@@ -144,7 +86,7 @@ def __init__(
         # Both self.conv2 and self.downsample layers downsample the input when stride != 1
         self.conv1 = conv1x1(inplanes, width)
         self.bn1 = norm_layer(width)
-        self.conv2 = conv3x3(width, width, stride)  # , groups, dilation
+        self.conv2 = conv3x3(width, width, stride)
         self.bn2 = norm_layer(width)
         self.conv3 = conv1x1(width, planes * self.expansion)
         self.bn3 = norm_layer(planes * self.expansion)
@@ -223,6 +165,7 @@ def __init__(
         self.avgpool = nn.AdaptiveAvgPool2d(1)
         self.fc = nn.Linear(512 * block.expansion, num_classes)
 
+        # TODO: align with torch here
         # for m in self.modules():
         #     if isinstance(m, nn.Conv2d):
         #         nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
@@ -288,7 +231,6 @@ def _make_layer(
         return nn.Sequential(*layers)
 
     def forward(self, x: Tensor) -> Tensor:
-        # See note [TorchScript super()]
         x = self.conv1(x)
         x = self.bn1(x)
         x = self.relu(x)
@@ -304,11 +246,15 @@ def forward(self, x: Tensor) -> Tensor:
         x = self.fc(x)
         return x
 
-    def from_pretrained(self, weights: Sequence[Tensor]) -> None:
-        index = 0
-        layer_names = ["Conv2d", "BatchNorm2d", "Linear"]
-        trainable_layers = [l for l in self.modules() if l.name in layer_names]
-        for layer in trainable_layers:
+    def from_pretrained(self) -> None:
+        weights, index = [], 0
+        url = "https://download.pytorch.org/models/resnet50-0676ba61.pth"
+        for key, value in fetch(url).items():
+            if "fc.weight" in key:
+                weights.append(Tensor(value.detach().numpy().transpose(1, 0)))
+            else:
+                weights.append(Tensor(value.detach().numpy()))
+        for layer in self.modules():
             if layer.name == "Conv2d":
                 layer.weight = weights[index]
                 index += 1  # weight
@@ -324,40 +270,3 @@ def from_pretrained(self, weights: Sequence[Tensor]) -> None:
                 )
                 layer.training = False  # in inference mode this has to be False
                 index += 4  # running_mean + running_var + weight, bias
-
-
-def _resnet(
-    block: Type[Union[BasicBlock, Bottleneck]],
-    layers: List[int],
-    # weights: Optional[WeightsEnum],
-    # progress: bool,
-    **kwargs: Any,
-) -> ResNet:
-    model = ResNet(block, layers, **kwargs)
-
-    return model
-
-
-if __name__ == "__main__":
-    url = "https://us.feliway.com/cdn/shop/articles/7_Reasons_Why_Humans_Cats_Are_A_Match_Made_In_Heaven-9.webp?v=1667409797"
-    img = load_and_preprocess_image(url)
-    print(img.shape)
-    resnet50 = _resnet(block=Bottleneck, layers=[3, 4, 6, 3])
-    weights = load_weights_from_pytorch()
-    resnet50.from_pretrained(weights)
-    from pprint import pprint
-
-    # pprint([i.name for i in resnet50.modules()])
-    # sys.exit()
-    logits = resnet50(img)
-    debug = True
-    if debug:
-        import matplotlib.pyplot as plt
-
-        plt.plot(logits.transpose().data)
-        plt.show()
-    idx = np.argmax(logits.data, axis=1)[0]
-    value = np.max(logits.data, axis=1)[0]
-    cls = IMAGENET_CATEGORIES[idx]
-    print(logits.shape)
-    print(idx, value, cls)