Skip to content

Commit

Permalink
lots of file organization
Browse files Browse the repository at this point in the history
  • Loading branch information
shuklabhay committed Sep 22, 2024
1 parent 06a8dee commit 264ec73
Show file tree
Hide file tree
Showing 11 changed files with 41 additions and 31 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -165,5 +165,6 @@ cython_debug/
data/
.vscode/
.DS_Store
generated_audio.wav
outputs/generated_audio.wav
outputs/generated_audio_[0-9]*
test.wav
34 changes: 26 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,35 @@
# StereoSampleGAN (WIP)
# StereoSampleGAN

[![On Push](https://github.com/shuklabhay/stereo-sample-gan/actions/workflows/push.yml/badge.svg)](https://github.com/shuklabhay/stereo-sample-gan/actions/workflows/push.yml/badge.svg)

StereoSampleGAN: A lightweight approach high fidelity stereo audio sample generation. Generate a kick drum by running `generate.py`.
StereoSampleGAN: A lightweight approach high fidelity stereo audio sample generation.

Generated audio spectrogram examples:
![Audio Example 1](paper/static/generated_audio_example_1.png)
![Audio Example 2](paper/static/generated_audio_example_2.png)
![Audio Example 3](paper/static/generated_audio_example_3.png)
## Model Usage

1. Prereqs

- Optional but highly reccomended: Set up a [Python virtual environment.](https://www.youtube.com/watch?v=e5GL1obY_sI)
- Audio loader package `librosa` requires an outdated version of Numpy
- Install requirements by running `pip3 install -r requirements.txt`

2. Generate Audio

- Specify usage paramaters in `usage_params.py`
- For `outputs/StereoSampleGAN-DiverseKick.pth`, `training_sample_length = 0.6`
- For `outputs/StereoSampleGAN-Kick.pth`, `training_sample_length = 0.6`
- Generate audio by running `python3 generate.py`

3. Train model

- Specify training data paramaters in `usage_params.py`
- Process training data by running `python3 encode_audio_data.py`
- Train model by running `python3 stereo_sample_gan.py`

## Directories

- `paper`: Research paper and static images
- `model`: Trained model and generated audio
- `paper`: Research paper / model writeup
- `static`: Static images
- `outputs`: Trained model and generated audio
- `src`: Model source code
- `utils`: Model and data utilities
- `data_processing`: Training data processing scripts
File renamed without changes.
2 changes: 1 addition & 1 deletion src/data_processing/audio_processing_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import os
import random
from usage.usage_specs import compiled_data_path
from usage_params import compiled_data_path
from utils.signal_helpers import (
stft_and_istft,
)
Expand Down
2 changes: 1 addition & 1 deletion src/data_processing/encode_audio_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
load_loudness_data,
)
from utils.signal_helpers import encode_sample_directory
from usage.usage_specs import training_audio_dir, compiled_data_path
from usage_params import training_audio_dir, compiled_data_path

# Encode audio samples
if len(sys.argv) > 1:
Expand Down
9 changes: 2 additions & 7 deletions src/usage/generate.py → src/generate.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
import sys
import os

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))


import torch
from architecture import Generator, LATENT_DIM
from usage.usage_specs import (
from usage_params import (
model_to_generate_with,
outputs_dir,
generated_audio_name,
Expand Down Expand Up @@ -41,7 +36,7 @@
current_sample = generated_output[i]

audio_info = norm_db_to_audio(current_sample)
audio_save_path = os.path.join(outputs_dir, f"{generated_audio_name}-{i + 1}.wav")
audio_save_path = os.path.join(outputs_dir, f"{generated_audio_name}_{i + 1}.wav")

save_audio(audio_save_path, audio_info)

Expand Down
8 changes: 1 addition & 7 deletions src/stereo_sample_gan.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
import sys
import os

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))


import torch
from torch.utils.data import DataLoader, TensorDataset, random_split

Expand All @@ -16,7 +10,7 @@
load_loudness_data,
)

from usage.usage_specs import compiled_data_path
from usage_params import compiled_data_path

# Constants
LR_G = 0.003
Expand Down
Empty file removed src/usage/__init__.py
Empty file.
10 changes: 6 additions & 4 deletions src/usage/usage_specs.py → src/usage_params.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
# Processing training data
training_audio_dir = "data/one_shots" # Your training data path
training_audio_dir = "data/kick_samples_diverse" # Your training data path
compiled_data_path = "data/compiled_data.npy" # Your compiled data/output path
training_sample_length = 0.6 # seconds

# Saving model
outputs_dir = "outputs" # Where to save your generated audio & model
model_save_name = "StereoSampleGAN-OldKick" # What to name your model save
model_save_name = "StereoSampleGAN-DiverseKick" # What to name your model save
model_save_path = f"{outputs_dir}/{model_save_name}.pth"

# Generating audio
model_to_generate_with = model_save_path # Generation model path
audio_generation_count = 2 # Audio examples to generate
generated_audio_name = "generated_audio" # Output file name
generated_sample_length = 0.6 # Match model training data audio length
visualize_generated = True # SHow generated audio spectrogra,s
generated_sample_length = (
training_sample_length # Match model training data audio length
)
visualize_generated = True # Show generated audio spectrograms
2 changes: 1 addition & 1 deletion src/utils/file_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import torch
import soundfile as sf

from usage.usage_specs import model_save_path
from usage_params import model_save_path

# Constants
GLOBAL_SR = 44100
Expand Down
2 changes: 1 addition & 1 deletion src/utils/signal_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import plotly.subplots as sp
import scipy

from usage.usage_specs import (
from usage_params import (
training_sample_length,
outputs_dir,
)
Expand Down

0 comments on commit 264ec73

Please sign in to comment.