Skip to content

Commit

Permalink
implement lws
Browse files Browse the repository at this point in the history
  • Loading branch information
shuklabhay committed Aug 7, 2024
1 parent a6f4205 commit 5fcef62
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 14 deletions.
Binary file modified model/test.wav
Binary file not shown.
2 changes: 1 addition & 1 deletion src/scaling testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
scale_normalized_db_to_amplis,
)

sample = "/Users/abhayshukla/Documents/GitHub/deep-convolution-audio-generation/data/kick_samples/(OS) kick doorknocker.wav"
sample = "/Users/abhayshukla/Documents/GitHub/deep-convolution-audio-generation/data/kick_samples/Cymatics - Sanctuary Kick 4 - G.wav"
loudness = encode_sample(sample)
graph_spectrogram(loudness, "before istft")
print(loudness.shape)
Expand Down
50 changes: 37 additions & 13 deletions src/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
# Initialize STFT Object
GLOBAL_WIN = 2**9
GLOBAL_HOP = 2**6
win = scipy.signal.windows.hann(GLOBAL_WIN)
win = scipy.signal.windows.kaiser(GLOBAL_WIN, beta=14)
STFT = scipy.signal.ShortTimeFFT(
win=win, hop=GLOBAL_HOP, fs=GLOBAL_SR, scale_to="magnitude"
)
Expand Down Expand Up @@ -195,35 +195,59 @@ def scale_normalized_db_to_amplis(normalized_loudness):
return channel_amplis # Amplitudes


def istft_with_griffin_lim_reconstruction(amplitudes):
iterations = 100
def lws(amplitudes):
iterations = 50
lookback, lookahead = 1, 1
angles = np.exp(2j * np.pi * np.random.rand(*amplitudes.shape))

for _ in range(iterations):
S = amplitudes * angles
for i in range(N_FREQ_BINS):
start = max(0, i - lookback)
end = min(N_FREQ_BINS - 1, i + lookahead + 1)
for j in range(N_FRAMES):
weights = amplitudes[j, start : end + 1]
E = np.sum(S[j, start : end + 1] * weights)
angles[j, i] = E / np.abs(E)

complex_spec = amplitudes * angles
return complex_spec


def istft_with_griffin_lim_reconstruction(amplitudes, preserve_signal_angles=False):
iterations = 100

if preserve_signal_angles == True:
angles = np.exp(1j * np.angle(amplitudes))
else:
angles = np.exp(2j * np.pi * np.random.rand(*amplitudes.shape))

for i in range(iterations):
full = amplitudes * angles
audio = STFT.istft(full.T)
stft = STFT.stft(audio)
istft = STFT.istft(full.T)
stft = STFT.stft(istft)

if stft.shape[1] != N_FRAMES: # perserve shape
if stft.shape[1] != N_FRAMES: # preserve shape
stft = stft[:, :N_FRAMES]

angles = np.exp(1j * np.angle(stft.T))
new_angles = np.exp(1j * np.angle(stft.T))
angles = new_angles * (i / (i + 1)) + angles * (1 / (i + 1))
return STFT.istft((amplitudes * angles).T)


def istft_with_weiner_reconstruction(amplitudes):
complex_spec = scipy.signal.wiener(amplitudes, mysize=None, noise=0.01)
return STFT.istft(complex_spec.T)
def istft_hybrid(amplitudes):
complex_spec = lws(amplitudes)
return istft_with_griffin_lim_reconstruction(
complex_spec, preserve_signal_angles=True
)


def amplitudes_to_wav(amplitudes, name):
audio_channels = []
for channel_loudness in amplitudes:
channel_amplitudes = scale_normalized_db_to_amplis(channel_loudness)

# audio_signal = STFT.istft(channel_amplitudes.T)
# audio_signal = istft_with_griffin_lim_reconstruction(channel_amplitudes)
audio_signal = istft_with_weiner_reconstruction(channel_amplitudes)
audio_signal = istft_hybrid(channel_amplitudes)
audio_channels.append(audio_signal)

audio_stereo = np.vstack(audio_channels)
Expand Down

0 comments on commit 5fcef62

Please sign in to comment.