Skip to content

Commit

Permalink
noise fix
Browse files Browse the repository at this point in the history
  • Loading branch information
shuklabhay committed Sep 15, 2024
1 parent 00a17d3 commit 29074df
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 31 deletions.
Binary file modified outputs/StereoSampleGAN-Kick.pth
Binary file not shown.
4 changes: 2 additions & 2 deletions src/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
generated_output = generated_output.squeeze().numpy()
print("Generated output shape:", generated_output.shape)

graph_spectrogram(generated_output, "generated output")
# graph_spectrogram(generated_output, "generated output")
audio_info = norm_db_to_audio(generated_output)
audio_save_path = os.path.join(outputs_dir, "generated_audio.wav")

save_audio(audio_save_path, audio_info)

vis_signal_after_istft = audio_to_norm_db(audio_info)
# graph_spectrogram(vis_signal_after_istft, "generated audio (after istft)")
graph_spectrogram(vis_signal_after_istft, "generated audio (after istft)")
31 changes: 2 additions & 29 deletions src/utils/signal_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def norm_db_to_audio(loudness_info):

for i in range(N_CHANNELS):
data = scale_data_to_range(loudness_info[i], -40, 40)
data[data < -38] = -40 # Noise gate
data[data < -35] = -40 # Noise gate
magnitudes = librosa.db_to_amplitude(data)
istft = griffin_lim_istft(magnitudes)
stereo_audio.append(istft)
Expand Down Expand Up @@ -101,11 +101,11 @@ def griffin_lim_istft(channel_magnitudes):
)

stft = stft[:DATA_SHAPE, :DATA_SHAPE] # preserve shape
# stft = noise_spectral_mask(stft) # mask noise
new_angles = np.exp(1j * np.angle(stft.T))

stft = channel_magnitudes * new_angles

channel_magnitudes[channel_magnitudes < 0.05] = 0 # Noise gate
complex_istft = librosa.istft(
(channel_magnitudes * angles).T,
hop_length=GLOBAL_HOP,
Expand Down Expand Up @@ -154,33 +154,6 @@ def scale_data_to_range(data, new_min, new_max):
return scaled_data


def noise_spectral_mask(channel_magnitudes):
# Parameters
n_std = 2.0
time_smoothing = 15
freq_smoothing = 6 #

# Estimate noise profile
noise_profile = np.mean(channel_magnitudes[:, -20:], axis=1)
noise_std = np.std(channel_magnitudes[:, -20:], axis=1)

# Apply 2D smoothing
time_filter = np.ones((1, time_smoothing)) / time_smoothing
freq_filter = np.ones((freq_smoothing, 1)) / freq_smoothing
smoothing_filter = np.outer(freq_filter, time_filter)
smoothed_magnitudes = convolve2d(
channel_magnitudes, smoothing_filter, mode="same", boundary="symm"
)

# Apply thresholding
threshold = noise_profile[:, np.newaxis] + n_std * noise_std[:, np.newaxis]
mask = smoothed_magnitudes > threshold

gain = np.where(mask, 1 - threshold / (smoothed_magnitudes + 1e-10), 0)

return channel_magnitudes * gain


# Validation helpers
def graph_spectrogram(audio_data, sample_name):
fig = sp.make_subplots(rows=2, cols=1)
Expand Down

0 comments on commit 29074df

Please sign in to comment.