hw1_training_p1.py

# -*- coding: utf-8 -*-
"""hw1_p1.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1SUuKkJY0o3ZUER5J1hhlNgKSnfVKjjJ7
"""
# Commented out IPython magic to ensure Python compatibility.
# Import necessary packages.
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
import os
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder
from torchvision import models

batch_size = 50
data_path = './hw1_data/p1_data'
num_class = 50
train_num_sample = 450
val_num_sample = 50

size = (224, 224)
#transforms.RandomAffine(degrees=(-10,10), translate=(0.1,0.1), scale=(1,1.5)),
transform_0 = [transforms.RandomRotation((-10,10)),transforms.RandomHorizontalFlip(p=0.5),transforms.ColorJitter(contrast=(1,1.5), saturation=(1,2)),]
transform_1 = [transforms.ColorJitter(contrast=(1,1.5), saturation=(1,2)),]
transform_2 = [transforms.RandomAffine(degrees=(-10,10), translate=(0.1,0.1), scale=(1,1.5)),transforms.ColorJitter(contrast=(1,1.5), saturation=(1,2)),transforms.RandomHorizontalFlip(p=0.5),]
train_tfm = transforms.Compose([
    transforms.RandomChoice(transform_0),
    transforms.Resize(size),
    transforms.ToTensor(),
])

train_tfm_1 = transforms.Compose([
    transforms.RandomChoice(transform_1),
    transforms.Resize(size),
    transforms.ToTensor(),
])
train_tfm_2 = transforms.Compose([
    transforms.RandomChoice(transform_2),
    transforms.Resize(size),
    transforms.ToTensor(),
])
# We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize(size),
    transforms.ToTensor(),
])

class P1Dataset(Dataset):
  def __init__(self, X, Y, isTrain):
    self.data = X
    self.label = Y
    self.isTrain = isTrain
  
  def __getitem__(self, idx):
      if self.label is not None:
        if self.isTrain:
          return train_tfm(self.data[idx]), self.label[idx]
        else:
          return test_tfm(self.data[idx]), self.label[idx]
      else:
          return self.data[idx]

  def __len__(self):
      return len(self.data)

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.activation = nn.ReLU()
        self.max_pool_1 = nn.MaxPool2d(2,2,0)
        self.cnn_layer0 = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
            nn.Dropout2d(0.1),
            nn.ReLU()
        )
        self.cnn_layer0to1 = nn.Sequential(
            nn.Conv2d(64, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
            nn.Dropout2d(0.1),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
            nn.Dropout2d(0.1),
        )
        self.cnn_layer1 = nn.Sequential(
            nn.Conv2d(64, 128, 3, 1, 1), #output: [128,128,128]
            nn.BatchNorm2d(128),
            nn.Dropout2d(0.2),
            nn.ReLU(),
        )
        self.cnn_layer2 = nn.Sequential(
            nn.Conv2d(128, 128, 3, 1, 1),#output: [128,64,64]
            nn.BatchNorm2d(128),
            nn.Dropout2d(0.2),
            nn.ReLU(),
            nn.Conv2d(128, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
            nn.Dropout2d(0.2),
        )
        self.cnn_layer3 = nn.Sequential(
            nn.Conv2d(128, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.Dropout2d(0.2),
            nn.ReLU(),
        )
        self.cnn_layer4 = nn.Sequential(
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.Dropout2d(0.2),
            nn.ReLU(),
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.Dropout2d(0.2),
            nn.BatchNorm2d(256),
        )
        self.cnn_layer5 = nn.Sequential(
            nn.Conv2d(256, 512, 3, 1, 1),
            nn.BatchNorm2d(512),
            nn.Dropout2d(0.2),
            nn.ReLU(),
        )
        self.cnn_layer6 = nn.Sequential(
            nn.Conv2d(512, 512, 3, 1, 1),
            nn.BatchNorm2d(512),
            nn.Dropout2d(0.2),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, 1, 1),
            nn.BatchNorm2d(512),
            nn.Dropout2d(0.2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc_layers = nn.Sequential(
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.Dropout(0.2),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.Dropout(0.2),
            nn.ReLU(),
            nn.Linear(128, 50),
        )

    def forward(self, x):
        # input (x): [batch_size, 3, 128, 128]
        # output: [batch_size, 50]

        # Extract features by convolutional layers.
        out = self.cnn_layer0(x)
        x = out

        out = self.cnn_layer0to1(x)
        out = out + x
        out = self.activation(out)
        out = self.max_pool_1(out)

        out = self.cnn_layer1(out)
        x = out

        out = self.cnn_layer2(x)
        out = out + x
        out = self.activation(out)
        out = self.max_pool_1(out)

        out = self.cnn_layer3(out)
        x = out

        out = self.cnn_layer4(x)
        out = out + x
        out = self.activation(out)
        #out = self.max_pool_1(out)

        out = self.cnn_layer5(out)
        x = out

        out = self.cnn_layer6(out)
        out = out + x
        out = self.activation(out)

        out = self.avgpool(out)
        # The extracted feature map must be flatten before going to fully-connected layers. x = torch.flatten(x, 1)
        out = torch.flatten(out,1)

        # The features are transformed by fully-connected layers to obtain the final logits.
        out = self.fc_layers(out)
        return out

def readfile():
    train_x = []
    train_y = []
    val_x = []
    val_y = []
    for i in range(num_class):
        for j in range(train_num_sample):
            img_path = os.path.join(data_path, './train_50/' + str(i) + '_' + str(j) + '.png')
            img = Image.open(img_path)
            #print(img)
            #train_x.append(train_tfm(img))
            #train_y.append(i)
            #train_x.append(train_tfm_1(img))
            #train_y.append(i)
            #train_x.append(train_tfm_2(img))
            #train_y.append(i)
            #print(500*i)
            train_x.append(img)
            train_y.append(i)
            #img.close()
    for i in range(num_class):
        for j in range(450, 450 + val_num_sample):
            img_path = os.path.join(data_path, './val_50/' + str(i) + '_' + str(j) + '.png')
            img = Image.open(img_path)
            val_x.append(img)
            val_y.append(i)
            #img.close()
    print(len(train_x))
    print(len(val_x))
    return train_x, train_y, val_x, val_y

#print("i'm here")
train_x, train_y, val_x, val_y = readfile()
train_set = P1Dataset(train_x, train_y, True)
valid_set = P1Dataset(val_x, val_y, False)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
#print("xyz")
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True)
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
model = models.resnet101(pretrained=True)
fc_inputs = model.fc.in_features
model.fc = nn.Sequential(
    #nn.BatchNorm1d(fc_inputs),
    nn.Linear(fc_inputs, 50),
    #nn.ReLU()
)
model = model.cuda()
criterion = nn.CrossEntropyLoss()
# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
#optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=1e-3)
n_epochs = 400
best_acc = 0.0
# %cd ..
best_model_path = './drive/MyDrive/DLCV/hw1/new_best_model_hw1_1.ckpt'
model_path = './drive/MyDrive/DLCV/hw1/new_model_hw1_1.ckpt'
optimizer_path = './drive/MyDrive/DLCV/hw1/new_optimizer_hw1_1.ckpt'
#model.load_state_dict(torch.load(model_path))
#optimizer.load_state_dict(torch.load(optimizer_path))

for epoch in range(n_epochs):
    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    # Iterate the training set by batches.
    for _,batch in enumerate(train_loader):
        #print("here")
        # A batch consists of image data and corresponding labels.
        imgs, labels = batch

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
    # The average loss and accuracy of the training set is the average of the recorded values.
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for _,batch in enumerate(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
          logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)
    if valid_acc > best_acc:
      best_acc = valid_acc
      torch.save(model.state_dict(), best_model_path)
      print('saving model with acc {:.3f}'.format(best_acc))
    torch.save(optimizer.state_dict(), optimizer_path)
    torch.save(model.state_dict(), model_path)  
    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")