Lightning-AI · veera-puthiran-14082 · Sep 17, 2024 · Sep 17, 2024 · Sep 17, 2024 · Sep 17, 2024
@@ -83,6 +83,8 @@ def perceptual_evaluation_speech_quality(
         )
     import pesq as pesq_backend
 
+    _filter_error_msg = np.vectorize(lambda x: np.issubdtype(type(x), np.number))
+
     if fs not in (8000, 16000):
         raise ValueError(f"Expected argument `fs` to either be 8000 or 16000 but got {fs}")
     if mode not in ("wb", "nb"):
@@ -103,8 +105,8 @@ def perceptual_evaluation_speech_quality(
             pesq_val_np = np.empty(shape=(preds_np.shape[0]))
             for b in range(preds_np.shape[0]):
                 pesq_val_np[b] = pesq_backend.pesq(fs, target_np[b, :], preds_np[b, :], mode)
-        pesq_val = torch.from_numpy(pesq_val_np)
-        pesq_val = pesq_val.reshape(preds.shape[:-1])
+        pesq_val = torch.from_numpy(pesq_val_np[_filter_error_msg(pesq_val_np)].astype(np.float32))
+        pesq_val = pesq_val.reshape(len(pesq_val))
 
     if keep_same_device:
         return pesq_val.to(preds.device)