Merge branch 'main' into qol_updates

UCSD-E4E · Jul 5, 2023 · 523700f · 523700f
2 parents e6a6f7c + 8f4c312
commit 523700f
Show file tree

Hide file tree

Showing 2 changed files with 52 additions and 33 deletions.
diff --git a/classification/dataset.py b/classification/dataset.py
@@ -310,10 +310,13 @@ def get_datasets(CONFIG=None):
 
     #train = train.reset_index().rename(columns={"level_1": "index"}).set_index("index").drop(columns="level_0")
     valid = data[~data.index.isin(train.index)]
-    return (
-        PyhaDF_Dataset(train, csv_file="train.csv", CONFIG=CONFIG),
-        PyhaDF_Dataset(valid, csv_file="valid.csv",train=False, CONFIG=CONFIG)
-    )
+
+    train_ds = PyhaDF_Dataset(train, csv_file="train.csv", CONFIG=CONFIG)
+    species = train_ds.get_classes()
+
+    valid_ds = PyhaDF_Dataset(valid, csv_file="valid.csv",train=False, species=species, CONFIG=CONFIG)
+    return train_ds, valid_ds
+
 
 def main():
     """

diff --git a/classification/train.py b/classification/train.py
@@ -23,6 +23,7 @@
 import torch
 import torch.nn.functional as F
 from torch.optim import Adam
+import numpy as np
 from dataset import PyhaDF_Dataset, get_datasets
 from model import BirdCLEFModel
 from utils import set_seed, print_verbose
@@ -37,6 +38,16 @@
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 wandb_run = None
 
+def check_shape(outputs, labels):
+    """ 
+    Checks to make sure the output is the same
+    """
+    if outputs.shape != labels.shape:
+        print(outputs.shape)
+        print(labels.shape)
+        raise RuntimeError("Shape diff between output of models and labels, see above and debug")
+
+
 def train(model: BirdCLEFModel,
         data_loader: PyhaDF_Dataset,
         optimizer: torch.optim.Optimizer,
@@ -57,39 +68,66 @@ def train(model: BirdCLEFModel,
     log_loss = 0
     correct = 0
     total = 0
+    mAP = 0
+
 
     for i, (mels, labels) in enumerate(data_loader):
         optimizer.zero_grad()
         mels = mels.to(device)
         labels = labels.to(device)
 
         outputs = model(mels)
-        loss = model.loss_fn(outputs, labels)
+
 
+        check_shape(outputs, labels)
+
+        loss = model.loss_fn(outputs, labels)
+
         loss.backward()
         optimizer.step()
 
         if scheduler is not None:
             scheduler.step()
 
         running_loss += loss.item()
-        total += labels.size(0)
+
+
+
+
+        metric = MultilabelAveragePrecision(num_labels=CONFIG.num_classes, average="macro")
+        batch_mAP = metric(outputs.detach().cpu(), labels.detach().cpu().long()).item()
+        # https://forums.fast.ai/t/nan-values-when-using-precision-in-multi-classification/59767/2
+        # Could be possible when model is untrained so we only have FNs
+        if np.isnan(batch_mAP):
+            batch_mAP = 0
+        mAP += batch_mAP
+
+        out_max_inx = torch.round(outputs)
+        lab_max_inx = torch.round(labels)
+        correct += (out_max_inx == lab_max_inx).sum().item()
+        total += labels.shape[0] * labels.shape[1]
 
-        correct += torch.all(torch.round(outputs).eq(labels), dim=-1).sum().item()
         log_loss += loss.item()
         log_n += 1
 
-        if i % (CONFIG.logging_freq) == 0 or i == len(data_loader) - 1:
+
+
+        if (i != 0 and i % (CONFIG.logging_freq) == 0) or i == len(data_loader) - 1:
             #Log to Weights and Biases
             wandb.log({
                 "train/loss": log_loss / log_n,
-                "train/accuracy": correct / total * 100.,
+                "train/mAP": mAP / log_n,
+                "train/accuracy": correct / total,
+
             })
-            print("Loss:", log_loss / log_n, "Accuracy:", correct / total * 100.)
+            print("Loss:", log_loss / log_n, "Accuracy:", correct / total, "mAP", mAP / log_n)
             log_loss = 0
             log_n = 0
             correct = 0
             total = 0
+            mAP = 0
+
+
         step += 1
     return running_loss/len(data_loader)
 
@@ -120,6 +158,7 @@ def valid(model: BirdCLEFModel,
 
             # argmax
             outputs = model(mels)
+            check_shape(outputs, labels)
 
             loss = model.loss_fn(outputs, labels)
 
@@ -150,27 +189,6 @@ def valid(model: BirdCLEFModel,
     return running_loss/len(data_loader), valid_map
 
 
-def test_loop(model: BirdCLEFModel,
-          data_loaders: PyhaDF_Dataset):
-    """
-    Checks to make sure shapes are correct before training
-    """
-
-    model.eval()
-    for dl in data_loaders:
-        (mels, labels) = next(iter(dl))
-
-        out = model(mels)
-
-        if out.shape != labels.shape:
-            print("Out shape:", out.shape)
-            print("Labels shape:", labels.shape)
-            raise RuntimeError("Shape diff between output of models and labels, see above and debug")
-
-    print("Test loop ran successfully")
-    del mels, out, labels
-
-
 def init_wandb(CONFIG: Dict[str, Any]):
     """ 
     Initialize the weights and biases logging
@@ -242,8 +260,6 @@ def main():
     step = 0
     best_valid_cmap = 0
 
-    test_loop(model_for_run, [train_dataloader, val_dataloader])
-
     for epoch in range(CONFIG.epochs):
         print("Epoch " + str(epoch))