Skip to content

Commit

Permalink
file structuring
Browse files Browse the repository at this point in the history
  • Loading branch information
shuklabhay committed Sep 22, 2024
1 parent 21a564d commit 06a8dee
Show file tree
Hide file tree
Showing 11 changed files with 82 additions and 47 deletions.
2 changes: 1 addition & 1 deletion src/data_processing/audio_processing_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import os
import random
from training_audio_information import compiled_data_path
from usage.usage_specs import compiled_data_path
from utils.signal_helpers import (
stft_and_istft,
)
Expand Down
2 changes: 1 addition & 1 deletion src/data_processing/encode_audio_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
load_loudness_data,
)
from utils.signal_helpers import encode_sample_directory
from training_audio_information import training_audio_dir, compiled_data_path
from usage.usage_specs import training_audio_dir, compiled_data_path

# Encode audio samples
if len(sys.argv) > 1:
Expand Down
3 changes: 0 additions & 3 deletions src/data_processing/training_audio_information.py

This file was deleted.

32 changes: 0 additions & 32 deletions src/generate.py

This file was deleted.

2 changes: 1 addition & 1 deletion src/stereo_sample_gan.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
load_loudness_data,
)

from data_processing.training_audio_information import compiled_data_path
from usage.usage_specs import compiled_data_path

# Constants
LR_G = 0.003
Expand Down
1 change: 1 addition & 0 deletions src/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
)
from utils.signal_helpers import graph_spectrogram


# Constants
N_EPOCHS = 8
VALIDATION_INTERVAL = 4
Expand Down
Empty file added src/usage/__init__.py
Empty file.
50 changes: 50 additions & 0 deletions src/usage/generate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import sys
import os

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))


import torch
from architecture import Generator, LATENT_DIM
from usage.usage_specs import (
model_to_generate_with,
outputs_dir,
generated_audio_name,
audio_generation_count,
visualize_generated,
)
from utils.file_helpers import get_device, save_audio
from utils.signal_helpers import audio_to_norm_db, graph_spectrogram, norm_db_to_audio

# Initialize Generator
device = get_device()

generator = Generator()
generator.load_state_dict(
torch.load(
model_to_generate_with, map_location=torch.device(device), weights_only=False
)
)
generator.eval()

# Generate audio
z = torch.randn(audio_generation_count, LATENT_DIM, 1, 1)
with torch.no_grad():
generated_output = generator(z)


generated_output = generated_output.squeeze().numpy()
print("Generated output shape:", generated_output.shape)

# Visualize and save audio
for i in range(audio_generation_count):
current_sample = generated_output[i]

audio_info = norm_db_to_audio(current_sample)
audio_save_path = os.path.join(outputs_dir, f"{generated_audio_name}-{i + 1}.wav")

save_audio(audio_save_path, audio_info)

if visualize_generated is True:
vis_signal_after_istft = audio_to_norm_db(audio_info)
graph_spectrogram(vis_signal_after_istft, "generated audio (after istft)")
16 changes: 16 additions & 0 deletions src/usage/usage_specs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Processing training data
training_audio_dir = "data/one_shots" # Your training data path
compiled_data_path = "data/compiled_data.npy" # Your compiled data/output path
training_sample_length = 0.6 # seconds

# Saving model
outputs_dir = "outputs" # Where to save your generated audio & model
model_save_name = "StereoSampleGAN-OldKick" # What to name your model save
model_save_path = f"{outputs_dir}/{model_save_name}.pth"

# Generating audio
model_to_generate_with = model_save_path # Generation model path
audio_generation_count = 2 # Audio examples to generate
generated_audio_name = "generated_audio" # Output file name
generated_sample_length = 0.6 # Match model training data audio length
visualize_generated = True # SHow generated audio spectrogra,s
5 changes: 3 additions & 2 deletions src/utils/file_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import torch
import soundfile as sf

from usage.usage_specs import model_save_path

# Constants
outputs_dir = "outputs"
GLOBAL_SR = 44100


Expand All @@ -25,7 +26,7 @@ def save_model(model, name):
# Save model
torch.save(
model.state_dict(),
f"{outputs_dir}/{name}.pth",
model_save_path,
)
print(f"Model Saved")

Expand Down
16 changes: 9 additions & 7 deletions src/utils/signal_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@
import plotly.subplots as sp
import scipy

from data_processing.training_audio_information import audio_sample_length
from usage.usage_specs import (
training_sample_length,
outputs_dir,
)
from utils.file_helpers import (
GLOBAL_SR,
outputs_dir,
delete_DSStore,
save_audio,
save_loudness_data,
Expand All @@ -25,7 +27,7 @@

# STFT Helpers
GLOBAL_WIN = 510
GLOBAL_HOP = int(audio_sample_length * GLOBAL_SR) // (DATA_SHAPE - 1)
GLOBAL_HOP = int(training_sample_length * GLOBAL_SR) // (DATA_SHAPE - 1)
window = scipy.signal.windows.kaiser(GLOBAL_WIN, beta=12)


Expand Down Expand Up @@ -85,7 +87,7 @@ def griffin_lim_istft(channel_magnitudes):
center=True,
)
y = librosa.util.fix_length(
y, size=int(audio_sample_length * GLOBAL_SR), axis=0
y, size=int(training_sample_length * GLOBAL_SR), axis=0
)

if i > 0:
Expand Down Expand Up @@ -124,7 +126,7 @@ def load_audio(path):
y, sr = librosa.load(path, sr=GLOBAL_SR, mono=False)
if y.ndim == 1:
y = np.stack((y, y), axis=0)
y = librosa.util.fix_length(y, size=int(audio_sample_length * GLOBAL_SR), axis=1)
y = librosa.util.fix_length(y, size=int(training_sample_length * GLOBAL_SR), axis=1)
return y


Expand Down Expand Up @@ -194,10 +196,10 @@ def graph_spectrogram(audio_data, sample_name):
def generate_sine_impulses(num_impulses=1, outPath="model"):
amplitude = 1
for i in range(num_impulses):
t = np.arange(0, audio_sample_length, 1 / GLOBAL_SR)
t = np.arange(0, training_sample_length, 1 / GLOBAL_SR)
freq = np.random.uniform(0, 20000)
audio_wave = amplitude * np.sin(2 * np.pi * freq * t)
num_samples = int(audio_sample_length * GLOBAL_SR)
num_samples = int(training_sample_length * GLOBAL_SR)
audio_signal = np.zeros(num_samples)

audio_wave = audio_wave[:num_samples]
Expand Down

0 comments on commit 06a8dee

Please sign in to comment.