noise fix

shuklabhay · Sep 15, 2024 · 29074df · 29074df
1 parent 00a17d3
commit 29074df
Show file tree

Hide file tree

Showing 3 changed files with 4 additions and 31 deletions.
diff --git a/outputs/StereoSampleGAN-Kick.pth b/outputs/StereoSampleGAN-Kick.pth
diff --git a/src/generate.py b/src/generate.py
@@ -22,11 +22,11 @@
 generated_output = generated_output.squeeze().numpy()
 print("Generated output shape:", generated_output.shape)
 
-graph_spectrogram(generated_output, "generated output")
+# graph_spectrogram(generated_output, "generated output")
 audio_info = norm_db_to_audio(generated_output)
 audio_save_path = os.path.join(outputs_dir, "generated_audio.wav")
 
 save_audio(audio_save_path, audio_info)
 
 vis_signal_after_istft = audio_to_norm_db(audio_info)
-# graph_spectrogram(vis_signal_after_istft, "generated audio (after istft)")
+graph_spectrogram(vis_signal_after_istft, "generated audio (after istft)")
diff --git a/src/utils/signal_helpers.py b/src/utils/signal_helpers.py
@@ -57,7 +57,7 @@ def norm_db_to_audio(loudness_info):
 
     for i in range(N_CHANNELS):
         data = scale_data_to_range(loudness_info[i], -40, 40)
-        data[data < -38] = -40  # Noise gate
+        data[data < -35] = -40  # Noise gate
         magnitudes = librosa.db_to_amplitude(data)
         istft = griffin_lim_istft(magnitudes)
         stereo_audio.append(istft)
@@ -101,11 +101,11 @@ def griffin_lim_istft(channel_magnitudes):
         )
 
         stft = stft[:DATA_SHAPE, :DATA_SHAPE]  # preserve shape
-        # stft = noise_spectral_mask(stft)  # mask noise
         new_angles = np.exp(1j * np.angle(stft.T))
 
         stft = channel_magnitudes * new_angles
 
+    channel_magnitudes[channel_magnitudes < 0.05] = 0  # Noise gate
     complex_istft = librosa.istft(
         (channel_magnitudes * angles).T,
         hop_length=GLOBAL_HOP,
@@ -154,33 +154,6 @@ def scale_data_to_range(data, new_min, new_max):
     return scaled_data
 
 
-def noise_spectral_mask(channel_magnitudes):
-    # Parameters
-    n_std = 2.0
-    time_smoothing = 15
-    freq_smoothing = 6  #
-
-    # Estimate noise profile
-    noise_profile = np.mean(channel_magnitudes[:, -20:], axis=1)
-    noise_std = np.std(channel_magnitudes[:, -20:], axis=1)
-
-    # Apply 2D smoothing
-    time_filter = np.ones((1, time_smoothing)) / time_smoothing
-    freq_filter = np.ones((freq_smoothing, 1)) / freq_smoothing
-    smoothing_filter = np.outer(freq_filter, time_filter)
-    smoothed_magnitudes = convolve2d(
-        channel_magnitudes, smoothing_filter, mode="same", boundary="symm"
-    )
-
-    # Apply thresholding
-    threshold = noise_profile[:, np.newaxis] + n_std * noise_std[:, np.newaxis]
-    mask = smoothed_magnitudes > threshold
-
-    gain = np.where(mask, 1 - threshold / (smoothed_magnitudes + 1e-10), 0)
-
-    return channel_magnitudes * gain
-
-
 # Validation helpers
 def graph_spectrogram(audio_data, sample_name):
     fig = sp.make_subplots(rows=2, cols=1)