Skip to content

Commit

Permalink
DCGAN Improvements + Cleanup (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
shuklabhay authored Aug 7, 2024
1 parent 785880b commit 1b8c29d
Show file tree
Hide file tree
Showing 11 changed files with 284 additions and 247 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -164,4 +164,5 @@ cython_debug/
# User directories
data/
.vscode/
.DS_Store
.DS_Store
DCGAN_generated_audio.wav
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Deep Convolution Audio Generation (WIP)

Implementing Deep Convolution to generate audio using generative and variational networks
Implementing Deep Convolution to generate audio using generative and autoencoding networks

## Directories

Expand Down
Binary file not shown.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ nbconvert==7.16.4
nbformat==5.10.4
networkx==3.3
numba==0.60.0
numpy==2.0.1
numpy==1.26.4 #Librosa Requirement
packaging==24.1
pandocfilters==1.5.1
parso==0.8.4
Expand Down
54 changes: 54 additions & 0 deletions src/dcgan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split

from dcgan_architecture import (
BATCH_SIZE,
Discriminator,
Generator,
)
from dcgan_train import training_loop
from utils.helpers import (
compiled_data_path,
get_device,
load_npy_data,
)

# Constants
LR_G = 0.002
LR_D = 0.001

# Load data
audio_data = load_npy_data(compiled_data_path)
audio_data = torch.FloatTensor(audio_data)
train_size = int(0.8 * len(audio_data))
val_size = len(audio_data) - train_size
train_dataset, val_dataset = random_split(
TensorDataset(audio_data), [train_size, val_size]
)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Initialize models and optimizers
generator = Generator()
discriminator = Discriminator()
criterion = nn.BCEWithLogitsLoss()
optimizer_G = optim.Adam(generator.parameters(), lr=LR_G, betas=(0.5, 0.999)) # type: ignore
optimizer_D = optim.Adam(discriminator.parameters(), lr=LR_D, betas=(0.5, 0.999)) # type: ignore

device = get_device()
generator.to(device)
discriminator.to(device)


training_loop(
generator,
discriminator,
train_loader,
val_loader,
criterion,
optimizer_G,
optimizer_D,
device,
)
123 changes: 3 additions & 120 deletions src/dcgan_architecture.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
import torch
import torch.nn as nn

from utils.helpers import save_model
# sys.path.append("../utils") # get this working & fix file structure
from utils.helpers import N_CHANNELS, N_FRAMES, N_FREQ_BINS

# Constants Constants
BATCH_SIZE = 16
LATENT_DIM = 100
N_EPOCHS = 20

N_CHANNELS = 2 # Left, right
N_FRAMES = 173
N_FREQ_BINS = 513
N_EPOCHS = 10

VALIDATION_INTERVAL = int(N_EPOCHS / 2)
SAVE_INTERVAL = int(N_EPOCHS / 1)
Expand Down Expand Up @@ -94,117 +91,3 @@ def forward(self, x):
x = torch.squeeze(x)
x = torch.unsqueeze(x, 1)
return x


# Training
# Training functions
def train_epoch(
generator,
discriminator,
dataloader,
criterion,
optimizer_G,
optimizer_D,
device,
epoch,
):
generator.train()
discriminator.train()
total_g_loss, total_d_loss = 0, 0

for i, (real_audio_data,) in enumerate(dataloader):
batch_size = real_audio_data.size(0)
real_audio_data = real_audio_data.to(device)

def smooth_labels(tensor, amount=0.1):
return tensor + amount * torch.rand_like(tensor)

real_labels = smooth_labels(torch.ones(batch_size, 1).to(device))
fake_labels = smooth_labels(torch.zeros(batch_size, 1).to(device))

# Train generator
optimizer_G.zero_grad()
z = torch.randn(batch_size, LATENT_DIM, 1, 1).to(device)
fake_audio_data = generator(z)
g_loss = criterion(discriminator(fake_audio_data), real_labels)
g_loss.backward()
optimizer_G.step()
total_g_loss += g_loss.item()

# Train discriminator
if (epoch + 1) % 2 == 0:
optimizer_D.zero_grad()
real_loss = criterion(discriminator(real_audio_data), real_labels)
fake_loss = criterion(discriminator(fake_audio_data.detach()), fake_labels)
d_loss = (real_loss + fake_loss) / 2
d_loss.backward()
optimizer_D.step()
total_d_loss += d_loss.item()

return total_g_loss / len(dataloader), total_d_loss / len(dataloader)


def validate(generator, discriminator, dataloader, criterion, device):
generator.eval()
discriminator.eval()
total_g_loss, total_d_loss = 0, 0

with torch.no_grad():
for (real_audio_data,) in dataloader:
batch_size = real_audio_data.size(0)
real_audio_data = real_audio_data.to(device)

real_labels = torch.ones(batch_size, 1).to(device)
fake_labels = torch.zeros(batch_size, 1).to(device)

z = torch.randn(batch_size, LATENT_DIM, 1, 1).to(device)
fake_audio_data = generator(z)

g_loss = criterion(discriminator(fake_audio_data), real_labels)
total_g_loss += g_loss.item()
real_loss = criterion(discriminator(real_audio_data), real_labels)
fake_loss = criterion(discriminator(fake_audio_data), fake_labels)
d_loss = (real_loss + fake_loss) / 2
total_d_loss += d_loss.item()

return total_g_loss / len(dataloader), total_d_loss / len(dataloader)


def training_loop(
generator,
discriminator,
train_loader,
val_loader,
criterion,
optimizer_G,
optimizer_D,
device,
):
for epoch in range(N_EPOCHS):
train_g_loss, train_d_loss = train_epoch(
generator,
discriminator,
train_loader,
criterion,
optimizer_G,
optimizer_D,
device,
epoch,
)

print(
f"[{epoch+1}/{N_EPOCHS}] Train - G Loss: {train_g_loss:.4f}, D Loss: {train_d_loss:.4f}"
)

# Validate periodically
if (epoch + 1) % VALIDATION_INTERVAL == 0:
val_g_loss, val_d_loss = validate(
generator, discriminator, val_loader, criterion, device
)
print(
f"------ Val ------ G Loss: {val_g_loss:.4f}, D Loss: {val_d_loss:.4f}"
)

# Save models periodically
if (epoch + 1) % SAVE_INTERVAL == 0:
save_model(generator)
2 changes: 1 addition & 1 deletion src/dcgan_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

# Initialize Generator
device = get_device()
model_path = "model/generator_final_model.pth"
model_path = "model/DCGAN_final_model.pth"
generator = Generator()
generator.load_state_dict(
torch.load(model_path, map_location=torch.device(device), weights_only=False)
Expand Down
157 changes: 112 additions & 45 deletions src/dcgan_train.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,87 @@
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split

from dcgan_architecture import (
BATCH_SIZE,
Discriminator,
Generator,
training_loop,
)
from utils.helpers import (
compiled_data_path,
get_device,
load_npy_data,
)

# Constants
LR_G = 0.002
LR_D = 0.001

# Load data
audio_data = load_npy_data(compiled_data_path)
audio_data = torch.FloatTensor(audio_data)
train_size = int(0.8 * len(audio_data))
val_size = len(audio_data) - train_size
train_dataset, val_dataset = random_split(
TensorDataset(audio_data), [train_size, val_size]
)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Initialize models and optimizers
generator = Generator()
discriminator = Discriminator()
criterion = nn.BCEWithLogitsLoss()
optimizer_G = optim.Adam(generator.parameters(), lr=LR_G, betas=(0.5, 0.999)) # type: ignore
optimizer_D = optim.Adam(discriminator.parameters(), lr=LR_D, betas=(0.5, 0.999)) # type: ignore

device = get_device()
generator.to(device)
discriminator.to(device)


training_loop(

from utils.helpers import save_model
from dcgan_architecture import LATENT_DIM


N_EPOCHS = 10
VALIDATION_INTERVAL = int(N_EPOCHS / 2)
SAVE_INTERVAL = int(N_EPOCHS / 1)


def train_epoch(
generator,
discriminator,
dataloader,
criterion,
optimizer_G,
optimizer_D,
device,
epoch,
):
generator.train()
discriminator.train()
total_g_loss, total_d_loss = 0, 0

for i, (real_audio_data,) in enumerate(dataloader):
batch_size = real_audio_data.size(0)
real_audio_data = real_audio_data.to(device)

def smooth_labels(tensor, amount=0.1):
return tensor + amount * torch.rand_like(tensor)

real_labels = smooth_labels(torch.ones(batch_size, 1).to(device))
fake_labels = smooth_labels(torch.zeros(batch_size, 1).to(device))

# Train generator
optimizer_G.zero_grad()
z = torch.randn(batch_size, LATENT_DIM, 1, 1).to(device)
fake_audio_data = generator(z)
g_loss = criterion(discriminator(fake_audio_data), real_labels)
g_loss.backward()
optimizer_G.step()
total_g_loss += g_loss.item()

# Train discriminator
if (epoch + 1) % 2 == 0:
optimizer_D.zero_grad()
real_loss = criterion(discriminator(real_audio_data), real_labels)
fake_loss = criterion(discriminator(fake_audio_data.detach()), fake_labels)
d_loss = (real_loss + fake_loss) / 2
d_loss.backward()
optimizer_D.step()
total_d_loss += d_loss.item()

return total_g_loss / len(dataloader), total_d_loss / len(dataloader)


def validate(generator, discriminator, dataloader, criterion, device):
generator.eval()
discriminator.eval()
total_g_loss, total_d_loss = 0, 0

with torch.no_grad():
for (real_audio_data,) in dataloader:
batch_size = real_audio_data.size(0)
real_audio_data = real_audio_data.to(device)

real_labels = torch.ones(batch_size, 1).to(device)
fake_labels = torch.zeros(batch_size, 1).to(device)

z = torch.randn(batch_size, LATENT_DIM, 1, 1).to(device)
fake_audio_data = generator(z)

g_loss = criterion(discriminator(fake_audio_data), real_labels)
total_g_loss += g_loss.item()
real_loss = criterion(discriminator(real_audio_data), real_labels)
fake_loss = criterion(discriminator(fake_audio_data), fake_labels)
d_loss = (real_loss + fake_loss) / 2
total_d_loss += d_loss.item()

return total_g_loss / len(dataloader), total_d_loss / len(dataloader)


def training_loop(
generator,
discriminator,
train_loader,
Expand All @@ -51,4 +90,32 @@
optimizer_G,
optimizer_D,
device,
)
):
for epoch in range(N_EPOCHS):
train_g_loss, train_d_loss = train_epoch(
generator,
discriminator,
train_loader,
criterion,
optimizer_G,
optimizer_D,
device,
epoch,
)

print(
f"[{epoch+1}/{N_EPOCHS}] Train - G Loss: {train_g_loss:.4f}, D Loss: {train_d_loss:.4f}"
)

# Validate periodically
if (epoch + 1) % VALIDATION_INTERVAL == 0:
val_g_loss, val_d_loss = validate(
generator, discriminator, val_loader, criterion, device
)
print(
f"------ Val ------ G Loss: {val_g_loss:.4f}, D Loss: {val_d_loss:.4f}"
)

# Save models periodically
if (epoch + 1) % SAVE_INTERVAL == 0:
save_model(generator, "DCGAN_final_model")
Loading

0 comments on commit 1b8c29d

Please sign in to comment.