From abe9d0232780efa6f68d743425e2da5d59f07641 Mon Sep 17 00:00:00 2001 From: Eljan Mahammadli Date: Sat, 25 Nov 2023 17:34:36 -0500 Subject: [PATCH] refactored alexnet and resnet --- examples/alexnet.py | 159 +++++------------------ examples/resnet.py | 41 ++++++ extra/helpers.py | 36 +++++ {examples => extra}/imagenet.py | 0 models/alexnet.py | 61 +++++++++ examples/resnet50.py => models/resnet.py | 121 +++-------------- 6 files changed, 187 insertions(+), 231 deletions(-) create mode 100644 examples/resnet.py create mode 100644 extra/helpers.py rename {examples => extra}/imagenet.py (100%) create mode 100644 models/alexnet.py rename examples/resnet50.py => models/resnet.py (70%) diff --git a/examples/alexnet.py b/examples/alexnet.py index 58d4e26..f0b682f 100644 --- a/examples/alexnet.py +++ b/examples/alexnet.py @@ -1,132 +1,41 @@ -import os, requests, argparse -from typing import Sequence -from PIL import Image -from io import BytesIO +import argparse, timeit import numpy as np -import torch -from torchvision import models, transforms from gradipy.tensor import Tensor -import gradipy.nn as nn -from .imagenet import IMAGENET_CATEGORIES - - -class AlexNet(nn.Module): - """pure gradipy implementation of AlexNet.""" - - def __init__(self, num_classes: int = 1000, dropout: float = 0.5) -> None: - super().__init__() - self.features = nn.Sequential( - nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), - nn.ReLU(), - nn.MaxPool2d(kernel_size=3, stride=2), - nn.Conv2d(64, 192, kernel_size=5, padding=2), - nn.ReLU(), - nn.MaxPool2d(kernel_size=3, stride=2), - nn.Conv2d(192, 384, kernel_size=3, padding=1), - nn.ReLU(), - nn.Conv2d(384, 256, kernel_size=3, padding=1), - nn.ReLU(), - nn.Conv2d(256, 256, kernel_size=3, padding=1), - nn.ReLU(), - nn.MaxPool2d(kernel_size=3, stride=2), - ) - self.avgpool = nn.AdaptiveAvgPool2d(6) - self.classifier = nn.Sequential( - # nn.Dropout(p=dropout), - nn.Linear(256 * 6 * 6, 4096), - nn.ReLU(), - # nn.Dropout(p=dropout), - nn.Linear(4096, 4096), - nn.ReLU(), - nn.Linear(4096, num_classes), - ) - - def forward(self, x: Tensor) -> Tensor: - x = self.features(x) - x = self.avgpool(x) - x = x.flatten() - x = self.classifier(x) - return x - - def from_pretrained(self, weights: Sequence[Tensor]) -> None: - index = 0 - trainable_layers = [ - l for l in self.features.layers + self.classifier.layers if l.name in ["Conv2d", "Linear"] - ] - for layer in trainable_layers: - layer.weight, layer.bias = weights[index], weights[index + 1] - index += 2 # weight + bias - - -def load_weights_from_pytorch(save_directory="./weights"): - # download the file if it doesn't exist - url = "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" - os.makedirs(save_directory, exist_ok=True) - filename = url.split("/")[-1] - save_path = os.path.join(save_directory, filename) - if not os.path.exists(save_path): - response = requests.get(url) - with open(save_path, "wb") as file: - file.write(response.content) - # parse it into a list of Tensors using torch - weights: list = [] - state_dict = torch.load(save_path) - for key, value in state_dict.items(): - # print(f"Key: {key}, Tensor Shape: {value.shape}") - if "bias" in key and "features" in key: - weights.append(Tensor(value.detach().numpy().reshape(-1, 1))) - elif "weight" in key and "classifier" in key: - weights.append(Tensor(value.detach().numpy().transpose(1, 0))) - else: - weights.append(Tensor(value.detach().numpy())) - return weights - - -def load_and_preprocess_image(url): - preprocess = transforms.Compose( - [ - transforms.Resize((224, 224)), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ) - response = requests.get(url) - img = Image.open(BytesIO(response.content)) - img = preprocess(img) - img = img.unsqueeze(0) # Add batch dimension - return img - - -def pytorch_alexnet(img): - alexnet = models.alexnet(pretrained=True) - alexnet.eval() # Set the model to evaluation mode - with torch.no_grad(): - logits = alexnet(img) - idx = torch.argmax(logits).item() - value = torch.max(logits).item() - cls = IMAGENET_CATEGORIES[idx] - return logits, idx, value, cls - - -def gradipy_alexnet(img): - alexnet = AlexNet() - weights = load_weights_from_pytorch() - alexnet.from_pretrained(weights) - logits = alexnet(Tensor(img.numpy())) - idx = np.argmax(logits.data, axis=1)[0] - value = np.max(logits.data, axis=1)[0] - cls = IMAGENET_CATEGORIES[idx] - return logits, idx, value, cls +from models.alexnet import AlexNet +from extra.imagenet import IMAGENET_CATEGORIES +from extra.helpers import load_and_preprocess_image if __name__ == "__main__": parser = argparse.ArgumentParser(description="Enter the URL of an image to classify.") - parser.add_argument("url", help="URL of image to classify.") + parser.add_argument("url", nargs="?", help="URL of image to classify.") + parser.add_argument("--test", action="store_true", help="Use a predefined URL for testing.") args = parser.parse_args() - img = load_and_preprocess_image(args.url) - logits, idx, value, cls = pytorch_alexnet(img) - logits_, idx_, value_, cls_ = gradipy_alexnet(img) - print(f"PyTorch: {idx=}, {value=}, {cls=}") - print(f"GradiPy: {idx_=}, {value_=}, {cls_=}") - np.testing.assert_allclose(logits.data, logits_.data, atol=1e-4) - assert idx == idx_ and int(value) == int(value_) and cls == cls_ + if args.test: + test_url = "https://images.theconversation.com/files/86272/original/image-20150624-31498-1med6rz.jpg?ixlib=rb-1.1.0&q=45&auto=format&w=926&fit=clip" + args.url = test_url + elif not args.url: + parser.error("Please provide the URL of an image to classify.") + + img = Tensor(load_and_preprocess_image(args.url)) + st = timeit.default_timer() + resnet50 = AlexNet() + resnet50.from_pretrained() + logits = resnet50(img) + idx = np.argmax(logits.data, axis=1)[0] + value = np.max(logits.data, axis=1)[0] + cls = IMAGENET_CATEGORIES[idx] + et = timeit.default_timer() + print(f"Predicted in {et-st:.3f} seconds. Idx: {idx}, Logit: {value:.3f}, Category: {cls}") + + if args.test: + expected_idx = 36 + expected_value = 24.387 + expected_cls = "terrapin" + + assert idx == expected_idx, f"Expected index: {expected_idx}, Actual index: {idx}" + assert np.isclose( + value, expected_value, atol=1e-3 + ), f"Expected value: {expected_value}, Actual value: {value}" + assert cls == expected_cls, f"Expected category: {expected_cls}, Actual category: {cls}" + print("Test passed.") diff --git a/examples/resnet.py b/examples/resnet.py new file mode 100644 index 0000000..ac6c525 --- /dev/null +++ b/examples/resnet.py @@ -0,0 +1,41 @@ +import argparse, timeit +import numpy as np +from gradipy.tensor import Tensor +from models.resnet import ResNet, Bottleneck +from extra.helpers import load_and_preprocess_image +from extra.imagenet import IMAGENET_CATEGORIES + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Enter the URL of an image to classify.") + parser.add_argument("url", nargs="?", help="URL of image to classify.") + parser.add_argument("--test", action="store_true", help="Use a predefined URL for testing.") + args = parser.parse_args() + if args.test: + test_url = "https://us.feliway.com/cdn/shop/articles/7_Reasons_Why_Humans_Cats_Are_A_Match_Made_In_Heaven-9.webp?v=1667409797" + args.url = test_url + elif not args.url: + parser.error("Please provide the URL of an image to classify.") + + img = Tensor(load_and_preprocess_image(args.url)) + st = timeit.default_timer() + resnet50 = ResNet(block=Bottleneck, layers=[3, 4, 6, 3]) + resnet50.from_pretrained() + logits = resnet50(img) + idx = np.argmax(logits.data, axis=1)[0] + value = np.max(logits.data, axis=1)[0] + cls = IMAGENET_CATEGORIES[idx] + et = timeit.default_timer() + print(f"Predicted in {et-st:.3f} seconds. Idx: {idx}, Logit: {value:.3f}, Category: {cls}") + + if args.test: + expected_idx = 281 + expected_value = 10.254 + expected_cls = "tabby" + + assert idx == expected_idx, f"Expected index: {expected_idx}, Actual index: {idx}" + assert np.isclose( + value, expected_value, atol=1e-3 + ), f"Expected value: {expected_value}, Actual value: {value}" + assert cls == expected_cls, f"Expected category: {expected_cls}, Actual category: {cls}" + print("Test passed.") diff --git a/extra/helpers.py b/extra/helpers.py new file mode 100644 index 0000000..22f2d92 --- /dev/null +++ b/extra/helpers.py @@ -0,0 +1,36 @@ +import os, requests +from gradipy.tensor import Tensor + + +def fetch(url: str, save_directory="./weights"): + """downloads pytorch triained weights""" + from torch import load + + os.makedirs(save_directory, exist_ok=True) + filename = url.split("/")[-1] + save_path = os.path.join(save_directory, filename) + if not os.path.exists(save_path): + response = requests.get(url) + with open(save_path, "wb") as file: + file.write(response.content) + return load(save_path) + + +def load_and_preprocess_image(url) -> Tensor: + """preprocess imaganet example""" + from torchvision import transforms + from PIL import Image + from io import BytesIO + + preprocess = transforms.Compose( + [ + transforms.Resize((224, 224)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ) + response = requests.get(url) + img = Image.open(BytesIO(response.content)) + img = preprocess(img) + img = img.unsqueeze(0) # Add batch dimension + return img diff --git a/examples/imagenet.py b/extra/imagenet.py similarity index 100% rename from examples/imagenet.py rename to extra/imagenet.py diff --git a/models/alexnet.py b/models/alexnet.py new file mode 100644 index 0000000..4cd49ab --- /dev/null +++ b/models/alexnet.py @@ -0,0 +1,61 @@ +import gradipy.nn as nn +from gradipy.tensor import Tensor +from extra.helpers import fetch + + +class AlexNet(nn.Module): + """pure gradipy implementation of AlexNet.""" + + def __init__(self, num_classes: int = 1000, dropout: float = 0.5) -> None: + super().__init__() + self.features = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), + nn.ReLU(), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(64, 192, kernel_size=5, padding=2), + nn.ReLU(), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(192, 384, kernel_size=3, padding=1), + nn.ReLU(), + nn.Conv2d(384, 256, kernel_size=3, padding=1), + nn.ReLU(), + nn.Conv2d(256, 256, kernel_size=3, padding=1), + nn.ReLU(), + nn.MaxPool2d(kernel_size=3, stride=2), + ) + self.avgpool = nn.AdaptiveAvgPool2d(6) + self.classifier = nn.Sequential( + # nn.Dropout(p=dropout), + nn.Linear(256 * 6 * 6, 4096), + nn.ReLU(), + # nn.Dropout(p=dropout), + nn.Linear(4096, 4096), + nn.ReLU(), + nn.Linear(4096, num_classes), + ) + + def forward(self, x: Tensor) -> Tensor: + x = self.features(x) + x = self.avgpool(x) + x = x.flatten() + x = self.classifier(x) + return x + + def from_pretrained(self) -> None: + index, weights = 0, [] + url = "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" + for key, value in fetch(url).items(): + # print(f"Key: {key}, Tensor Shape: {value.shape}") + if "bias" in key and "features" in key: + weights.append(Tensor(value.detach().numpy().reshape(-1, 1))) + elif "weight" in key and "classifier" in key: + weights.append(Tensor(value.detach().numpy().transpose(1, 0))) + else: + weights.append(Tensor(value.detach().numpy())) + + trainable_layers = [ + l for l in self.features.layers + self.classifier.layers if l.name in ["Conv2d", "Linear"] + ] + for layer in trainable_layers: + layer.weight, layer.bias = weights[index], weights[index + 1] + index += 2 # weight + bias diff --git a/examples/resnet50.py b/models/resnet.py similarity index 70% rename from examples/resnet50.py rename to models/resnet.py index 4ffd89f..af5ebc9 100644 --- a/examples/resnet50.py +++ b/models/resnet.py @@ -1,66 +1,16 @@ -import os, requests, sys -from typing import Optional, Callable, Type, Union, List, Any, Sequence -from PIL import Image -from io import BytesIO -import numpy as np -import torch -from torchvision import transforms +from typing import Optional, Callable, Type, Union, List from gradipy.tensor import Tensor import gradipy.nn as nn import gradipy.nn.functional as F -from .imagenet import IMAGENET_CATEGORIES - - -def load_weights_from_pytorch(save_directory="./weights"): - # download the file if it doesn't exist - url = "https://download.pytorch.org/models/resnet50-0676ba61.pth" - os.makedirs(save_directory, exist_ok=True) - filename = url.split("/")[-1] - save_path = os.path.join(save_directory, filename) - if not os.path.exists(save_path): - response = requests.get(url) - with open(save_path, "wb") as file: - file.write(response.content) - weights: list = [] - state_dict = torch.load(save_path) - for key, value in state_dict.items(): - # print(f"Key: {key}, --> Tensor Shape: {value.shape}") - if "fc.weight" in key: - weights.append(Tensor(value.detach().numpy().transpose(1, 0))) - else: - weights.append(Tensor(value.detach().numpy())) - return weights - - -def load_and_preprocess_image(url): - preprocess = transforms.Compose( - [ - transforms.Resize((224, 224)), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ) - response = requests.get(url) - img = Image.open(BytesIO(response.content)) - img = preprocess(img) - img = img.unsqueeze(0) # Add batch dimension - return Tensor(img.data) +from extra.helpers import fetch def conv3x3( in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1 ) -> nn.Conv2d: """3x3 convolution with padding""" - return nn.Conv2d( - in_planes, - out_planes, - kernel_size=3, - stride=stride, - padding=dilation, - # groups=groups, - # bias=False, - # dilation=dilation, - ) + # TODO: implement dilation and groups + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, bias=False) def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d: @@ -100,11 +50,9 @@ def __init__( def forward(self, x: Tensor) -> Tensor: identity = x - out = self.conv1(x) out = self.bn1(out) out = self.relu(out) - out = self.conv2(out) out = self.bn2(out) @@ -118,12 +66,6 @@ def forward(self, x: Tensor) -> Tensor: class Bottleneck(nn.Module): - # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) - # while original implementation places the stride at the first 1x1 convolution(self.conv1) - # according to "Deep residual learning for image recognition" https://arxiv.org/abs/1512.03385. - # This variant is also known as ResNet V1.5 and improves accuracy according to - # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. - expansion: int = 4 def __init__( @@ -144,7 +86,7 @@ def __init__( # Both self.conv2 and self.downsample layers downsample the input when stride != 1 self.conv1 = conv1x1(inplanes, width) self.bn1 = norm_layer(width) - self.conv2 = conv3x3(width, width, stride) # , groups, dilation + self.conv2 = conv3x3(width, width, stride) self.bn2 = norm_layer(width) self.conv3 = conv1x1(width, planes * self.expansion) self.bn3 = norm_layer(planes * self.expansion) @@ -223,6 +165,7 @@ def __init__( self.avgpool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Linear(512 * block.expansion, num_classes) + # TODO: align with torch here # for m in self.modules(): # if isinstance(m, nn.Conv2d): # nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") @@ -288,7 +231,6 @@ def _make_layer( return nn.Sequential(*layers) def forward(self, x: Tensor) -> Tensor: - # See note [TorchScript super()] x = self.conv1(x) x = self.bn1(x) x = self.relu(x) @@ -304,11 +246,15 @@ def forward(self, x: Tensor) -> Tensor: x = self.fc(x) return x - def from_pretrained(self, weights: Sequence[Tensor]) -> None: - index = 0 - layer_names = ["Conv2d", "BatchNorm2d", "Linear"] - trainable_layers = [l for l in self.modules() if l.name in layer_names] - for layer in trainable_layers: + def from_pretrained(self) -> None: + weights, index = [], 0 + url = "https://download.pytorch.org/models/resnet50-0676ba61.pth" + for key, value in fetch(url).items(): + if "fc.weight" in key: + weights.append(Tensor(value.detach().numpy().transpose(1, 0))) + else: + weights.append(Tensor(value.detach().numpy())) + for layer in self.modules(): if layer.name == "Conv2d": layer.weight = weights[index] index += 1 # weight @@ -324,40 +270,3 @@ def from_pretrained(self, weights: Sequence[Tensor]) -> None: ) layer.training = False # in inference mode this has to be False index += 4 # running_mean + running_var + weight, bias - - -def _resnet( - block: Type[Union[BasicBlock, Bottleneck]], - layers: List[int], - # weights: Optional[WeightsEnum], - # progress: bool, - **kwargs: Any, -) -> ResNet: - model = ResNet(block, layers, **kwargs) - - return model - - -if __name__ == "__main__": - url = "https://us.feliway.com/cdn/shop/articles/7_Reasons_Why_Humans_Cats_Are_A_Match_Made_In_Heaven-9.webp?v=1667409797" - img = load_and_preprocess_image(url) - print(img.shape) - resnet50 = _resnet(block=Bottleneck, layers=[3, 4, 6, 3]) - weights = load_weights_from_pytorch() - resnet50.from_pretrained(weights) - from pprint import pprint - - # pprint([i.name for i in resnet50.modules()]) - # sys.exit() - logits = resnet50(img) - debug = True - if debug: - import matplotlib.pyplot as plt - - plt.plot(logits.transpose().data) - plt.show() - idx = np.argmax(logits.data, axis=1)[0] - value = np.max(logits.data, axis=1)[0] - cls = IMAGENET_CATEGORIES[idx] - print(logits.shape) - print(idx, value, cls)