From 9e69bc2beb10987784df23b2c02c98cfffb51606 Mon Sep 17 00:00:00 2001
From: Alex <alejandrofuster1@gmail.com>
Date: Mon, 10 May 2021 17:03:03 +0200
Subject: [PATCH 1/5] replaced resample function by new implementation
 (tensorflow version of torchaudio resample)

---
 tensorflow_io/core/python/ops/audio_ops.py | 130 ++++++++++++++-------
 1 file changed, 90 insertions(+), 40 deletions(-)

diff --git a/tensorflow_io/core/python/ops/audio_ops.py b/tensorflow_io/core/python/ops/audio_ops.py
index 00f9a4778..b560d4bba 100644
--- a/tensorflow_io/core/python/ops/audio_ops.py
+++ b/tensorflow_io/core/python/ops/audio_ops.py
@@ -371,56 +371,106 @@ def fade(input, fade_in, fade_out, mode, name=None):
 
     return factor_in * factor_out * input
 
-
-def resample(input, rate_in, rate_out, name=None):
-    """Resample audio.
+def _get_sinc_resample_kernel(rate_in, rate_out, lowpass_filter_width):
+    assert lowpass_filter_width > 0
+    base_freq = min(rate_in, rate_out)
+    # This will perform antialiasing filtering by removing the highest frequencies.
+    # At first I thought I only needed this when downsampling, but when upsampling
+    # you will get edge artifacts without this, as the edge is equivalent to zero padding,
+    # which will add high freq artifacts.
+    base_freq *= 0.99
+
+    # The key idea of the algorithm is that x(t) can be exactly reconstructed from x[i] (tensor)
+    # using the sinc interpolation formula:
+    #   x(t) = sum_i x[i] sinc(pi * rate_in * (i / rate_in - t))
+    # We can then sample the function x(t) with a different sample rate:
+    #    y[j] = x(j / rate_out)
+    # or,
+    #    y[j] = sum_i x[i] sinc(pi * rate_in * (i / rate_in - j / rate_out))
+
+    # We see here that y[j] is the convolution of x[i] with a specific filter, for which
+    # we take an FIR approximation, stopping when we see at least `lowpass_filter_width` zeros crossing.
+    # But y[j+1] is going to have a different set of weights and so on, until y[j + rate_out].
+    # Indeed:
+    # y[j + rate_out] = sum_i x[i] sinc(pi * rate_in * ((i / rate_in - (j + rate_out) / rate_out))
+    #                 = sum_i x[i] sinc(pi * rate_in * ((i - rate_in) / rate_in - j / rate_out))
+    #                 = sum_i x[i + rate_in] sinc(pi * rate_in * (i / rate_in - j / rate_out))
+    # so y[j+rate_out] uses the same filter as y[j], but on a shifted version of x by `rate_in`.
+    # This will explain the F.conv1d after, with a stride of rate_in.
+    width = math.ceil(lowpass_filter_width * rate_in / base_freq)
+    # If rate_in is still big after GCD reduction, most filters will be very unbalanced, i.e.,
+    # they will have a lot of almost zero values to the left or to the right...
+    # There is probably a way to evaluate those filters more efficiently, but this is kept for
+    # future work.
+    idx = tf.range(-width, width + rate_in,dtype=tf.float32)
+    idx=tf.repeat(tf.expand_dims(idx,axis=-1),rate_out,axis=-1)
+    aux_i=tf.expand_dims(tf.range(rate_out,dtype=tf.float32),axis=0)
+    kernels = (-aux_i / rate_out + idx / rate_in) * base_freq
+        
+    kernels = tf.clip_by_value(kernels,-lowpass_filter_width, lowpass_filter_width)
+    kernels *= math.pi
+
+    window = tf.math.cos(kernels / lowpass_filter_width / 2)**2
+    kernels = tf.where(kernels == 0, tf.ones_like(kernels), tf.math.sin(kernels) / kernels)
+    kernels*=window
+    
+    scale = base_freq / rate_in
+    return tf.expand_dims(kernels,axis=1)*scale, width
+
+def resample(input, rate_in, rate_out, lowpass_filter_width= 6):
+    """Resamples the waveform at the new frequency. This matches Kaldi’s OfflineFeatureTpl ResampleWaveform which uses a LinearResample (resample a signal at linearly spaced intervals to upsample/downsample a signal). LinearResample (LR) means that the output signal is at linearly spaced intervals (i.e the output signal has a frequency of rate_out). It uses sinc/bandlimited interpolation to upsample/downsample the signal.
 
     Args:
-      input: A 1-D (`[samples]`) or 2-D (`[samples, channels]`) or 3-D
-        (`[batch, samples, channels]`) `Tensor` of type
-        `int16` or `float`. Audio input.
+      waveform: A 1-D (`[samples]`) or 2-D (`[samples, channels]`) or 3-D (`[batch, samples, channels]`) `Tensor` of type `float`. Audio input.
       rate_in: The rate of the audio input.
       rate_out: The rate of the audio output.
-      name: A name for the operation (optional).
+      lowpass_filter_width:  Controls the sharpness of the filter, more == sharper but less efficient. We suggest around 4 to 10 for normal use. (Default: 6)
 
     Returns:
       output: Resampled audio.
     """
-    rank = tf.rank(input)
-
-    def f1():
-        return tf.expand_dims(tf.expand_dims(input, -1), 0)
-
-    def f2():
-        return tf.expand_dims(input, 0)
-
-    def f3():
-        return input
-
-    input = tf.case(
-        [(tf.math.equal(rank, 1), f1), (tf.math.equal(rank, 2), f2)], default=f3
-    )
-
-    def f(i):
-        return core_ops.io_audio_resample(
-            i, rate_in=rate_in, rate_out=rate_out, name=name
+    waveform = input
+
+    if rate_in==rate_out:
+        return waveform
+    
+    rate_in = int(rate_in)
+    rate_out = int(rate_out)
+    gcd = math.gcd(rate_in, rate_out)
+    rate_in = rate_in // gcd
+    rate_out = rate_out // gcd
+
+    kernel, width = _get_sinc_resample_kernel(rate_in, rate_out, lowpass_filter_width)
+
+    ori_shape=waveform.shape
+    ori_shape_len=len(ori_shape)
+    if ori_shape_len==1:
+        waveform = tf.expand_dims(waveform,axis=0)
+    elif ori_shape_len==2:
+        waveform = tf.transpose(waveform,[1,0])
+    elif ori_shape_len==3:
+        waveform = tf.transpose(waveform,[0,2,1])
+        waveform = tf.reshape(waveform,[ori_shape[0]*ori_shape[2],ori_shape[1]])
+
+    waveform = tf.expand_dims(waveform,axis=-1)
+
+    num_wavs, length, _ = waveform.shape
+    
+    waveform = tf.pad(waveform,[[0,0],[width, width + rate_in],[0,0]])
+    resampled = tf.nn.conv1d(waveform, kernel, stride=rate_in,padding='VALID')
+    resampled = tf.reshape(resampled,[num_wavs, -1])
+    target_length = int(math.ceil(rate_out * length / rate_in))
+    if ori_shape_len==1:
+        return resampled[0, :target_length]
+    elif ori_shape_len==2:
+        return tf.transpose(resampled[:, :target_length],[1,0])
+    elif ori_shape_len==3:
+        return tf.transpose(
+            tf.reshape(
+                resampled[:, :target_length],[ori_shape[0],ori_shape[2],target_length]
+            ),[0,2,1]
         )
 
-    value = tf.vectorized_map(f, input)
-
-    def g1():
-        return tf.squeeze(value, [0, -1])
-
-    def g2():
-        return tf.squeeze(value, [0])
-
-    def g3():
-        return value
-
-    return tf.case(
-        [(tf.math.equal(rank, 1), g1), (tf.math.equal(rank, 2), g2)], default=g3
-    )
-
 
 def decode_wav(
     input, shape=None, dtype=None, name=None

From fc80e028b841da8b03ae04af3bb0480e4b07acbb Mon Sep 17 00:00:00 2001
From: Alex <alejandrofuster1@gmail.com>
Date: Mon, 10 May 2021 17:05:49 +0200
Subject: [PATCH 2/5] fixing documentation

---
 tensorflow_io/core/python/ops/audio_ops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow_io/core/python/ops/audio_ops.py b/tensorflow_io/core/python/ops/audio_ops.py
index b560d4bba..d6898d983 100644
--- a/tensorflow_io/core/python/ops/audio_ops.py
+++ b/tensorflow_io/core/python/ops/audio_ops.py
@@ -418,10 +418,10 @@ def _get_sinc_resample_kernel(rate_in, rate_out, lowpass_filter_width):
     return tf.expand_dims(kernels,axis=1)*scale, width
 
 def resample(input, rate_in, rate_out, lowpass_filter_width= 6):
-    """Resamples the waveform at the new frequency. This matches Kaldi’s OfflineFeatureTpl ResampleWaveform which uses a LinearResample (resample a signal at linearly spaced intervals to upsample/downsample a signal). LinearResample (LR) means that the output signal is at linearly spaced intervals (i.e the output signal has a frequency of rate_out). It uses sinc/bandlimited interpolation to upsample/downsample the signal.
+    """Resamples the input at the new frequency. This matches Kaldi’s OfflineFeatureTpl ResampleWaveform which uses a LinearResample (resample a signal at linearly spaced intervals to upsample/downsample a signal). LinearResample (LR) means that the output signal is at linearly spaced intervals (i.e the output signal has a frequency of rate_out). It uses sinc/bandlimited interpolation to upsample/downsample the signal.
 
     Args:
-      waveform: A 1-D (`[samples]`) or 2-D (`[samples, channels]`) or 3-D (`[batch, samples, channels]`) `Tensor` of type `float`. Audio input.
+      input: A 1-D (`[samples]`) or 2-D (`[samples, channels]`) or 3-D (`[batch, samples, channels]`) `Tensor` of type `float`. Audio input.
       rate_in: The rate of the audio input.
       rate_out: The rate of the audio output.
       lowpass_filter_width:  Controls the sharpness of the filter, more == sharper but less efficient. We suggest around 4 to 10 for normal use. (Default: 6)

From 3a4cafb3ac2e3a4b17abc2b81e5b8330463649c6 Mon Sep 17 00:00:00 2001
From: Alex <alejandrofuster1@gmail.com>
Date: Mon, 10 May 2021 18:50:56 +0200
Subject: [PATCH 3/5] added math dependency

---
 tensorflow_io/core/python/ops/audio_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow_io/core/python/ops/audio_ops.py b/tensorflow_io/core/python/ops/audio_ops.py
index d6898d983..3d4acc3ab 100644
--- a/tensorflow_io/core/python/ops/audio_ops.py
+++ b/tensorflow_io/core/python/ops/audio_ops.py
@@ -17,7 +17,7 @@
 import sys
 
 import tensorflow as tf
-
+import math
 from tensorflow_io.core.python.ops import core_ops
 
 

From ce9aa58c336e8da879f4fdaf7699bb7c2be477e3 Mon Sep 17 00:00:00 2001
From: Alex <alejandrofuster1@gmail.com>
Date: Tue, 11 May 2021 09:51:30 +0200
Subject: [PATCH 4/5] reformatted audio_ops

---
 tensorflow_io/core/python/ops/audio_ops.py | 72 ++++++++++++----------
 1 file changed, 39 insertions(+), 33 deletions(-)

diff --git a/tensorflow_io/core/python/ops/audio_ops.py b/tensorflow_io/core/python/ops/audio_ops.py
index 3d4acc3ab..32ee6c3ee 100644
--- a/tensorflow_io/core/python/ops/audio_ops.py
+++ b/tensorflow_io/core/python/ops/audio_ops.py
@@ -371,6 +371,7 @@ def fade(input, fade_in, fade_out, mode, name=None):
 
     return factor_in * factor_out * input
 
+
 def _get_sinc_resample_kernel(rate_in, rate_out, lowpass_filter_width):
     assert lowpass_filter_width > 0
     base_freq = min(rate_in, rate_out)
@@ -402,22 +403,25 @@ def _get_sinc_resample_kernel(rate_in, rate_out, lowpass_filter_width):
     # they will have a lot of almost zero values to the left or to the right...
     # There is probably a way to evaluate those filters more efficiently, but this is kept for
     # future work.
-    idx = tf.range(-width, width + rate_in,dtype=tf.float32)
-    idx=tf.repeat(tf.expand_dims(idx,axis=-1),rate_out,axis=-1)
-    aux_i=tf.expand_dims(tf.range(rate_out,dtype=tf.float32),axis=0)
+    idx = tf.range(-width, width + rate_in, dtype=tf.float32)
+    idx = tf.repeat(tf.expand_dims(idx, axis=-1), rate_out, axis=-1)
+    aux_i = tf.expand_dims(tf.range(rate_out, dtype=tf.float32), axis=0)
     kernels = (-aux_i / rate_out + idx / rate_in) * base_freq
-        
-    kernels = tf.clip_by_value(kernels,-lowpass_filter_width, lowpass_filter_width)
+
+    kernels = tf.clip_by_value(kernels, -lowpass_filter_width, lowpass_filter_width)
     kernels *= math.pi
 
-    window = tf.math.cos(kernels / lowpass_filter_width / 2)**2
-    kernels = tf.where(kernels == 0, tf.ones_like(kernels), tf.math.sin(kernels) / kernels)
-    kernels*=window
-    
+    window = tf.math.cos(kernels / lowpass_filter_width / 2) ** 2
+    kernels = tf.where(
+        kernels == 0, tf.ones_like(kernels), tf.math.sin(kernels) / kernels
+    )
+    kernels *= window
+
     scale = base_freq / rate_in
-    return tf.expand_dims(kernels,axis=1)*scale, width
+    return tf.expand_dims(kernels, axis=1) * scale, width
 
-def resample(input, rate_in, rate_out, lowpass_filter_width= 6):
+
+def resample(input, rate_in, rate_out, lowpass_filter_width=6):
     """Resamples the input at the new frequency. This matches Kaldi’s OfflineFeatureTpl ResampleWaveform which uses a LinearResample (resample a signal at linearly spaced intervals to upsample/downsample a signal). LinearResample (LR) means that the output signal is at linearly spaced intervals (i.e the output signal has a frequency of rate_out). It uses sinc/bandlimited interpolation to upsample/downsample the signal.
 
     Args:
@@ -431,9 +435,9 @@ def resample(input, rate_in, rate_out, lowpass_filter_width= 6):
     """
     waveform = input
 
-    if rate_in==rate_out:
+    if rate_in == rate_out:
         return waveform
-    
+
     rate_in = int(rate_in)
     rate_out = int(rate_out)
     gcd = math.gcd(rate_in, rate_out)
@@ -442,33 +446,35 @@ def resample(input, rate_in, rate_out, lowpass_filter_width= 6):
 
     kernel, width = _get_sinc_resample_kernel(rate_in, rate_out, lowpass_filter_width)
 
-    ori_shape=waveform.shape
-    ori_shape_len=len(ori_shape)
-    if ori_shape_len==1:
-        waveform = tf.expand_dims(waveform,axis=0)
-    elif ori_shape_len==2:
-        waveform = tf.transpose(waveform,[1,0])
-    elif ori_shape_len==3:
-        waveform = tf.transpose(waveform,[0,2,1])
-        waveform = tf.reshape(waveform,[ori_shape[0]*ori_shape[2],ori_shape[1]])
+    ori_shape = waveform.shape
+    ori_shape_len = len(ori_shape)
+    if ori_shape_len == 1:
+        waveform = tf.expand_dims(waveform, axis=0)
+    elif ori_shape_len == 2:
+        waveform = tf.transpose(waveform, [1, 0])
+    elif ori_shape_len == 3:
+        waveform = tf.transpose(waveform, [0, 2, 1])
+        waveform = tf.reshape(waveform, [ori_shape[0] * ori_shape[2], ori_shape[1]])
 
-    waveform = tf.expand_dims(waveform,axis=-1)
+    waveform = tf.expand_dims(waveform, axis=-1)
 
     num_wavs, length, _ = waveform.shape
-    
-    waveform = tf.pad(waveform,[[0,0],[width, width + rate_in],[0,0]])
-    resampled = tf.nn.conv1d(waveform, kernel, stride=rate_in,padding='VALID')
-    resampled = tf.reshape(resampled,[num_wavs, -1])
+
+    waveform = tf.pad(waveform, [[0, 0], [width, width + rate_in], [0, 0]])
+    resampled = tf.nn.conv1d(waveform, kernel, stride=rate_in, padding="VALID")
+    resampled = tf.reshape(resampled, [num_wavs, -1])
     target_length = int(math.ceil(rate_out * length / rate_in))
-    if ori_shape_len==1:
+    if ori_shape_len == 1:
         return resampled[0, :target_length]
-    elif ori_shape_len==2:
-        return tf.transpose(resampled[:, :target_length],[1,0])
-    elif ori_shape_len==3:
+    elif ori_shape_len == 2:
+        return tf.transpose(resampled[:, :target_length], [1, 0])
+    elif ori_shape_len == 3:
         return tf.transpose(
             tf.reshape(
-                resampled[:, :target_length],[ori_shape[0],ori_shape[2],target_length]
-            ),[0,2,1]
+                resampled[:, :target_length],
+                [ori_shape[0], ori_shape[2], target_length],
+            ),
+            [0, 2, 1],
         )
 
 

From ea3c7c29e1a9bd777308cb17408209ccbbc83e1e Mon Sep 17 00:00:00 2001
From: Alex Fuster <alejandro.fuster@voicemod.net>
Date: Fri, 11 Jun 2021 15:00:46 +0200
Subject: [PATCH 5/5] added support for tensor rates

---
 tensorflow_io/core/python/ops/audio_ops.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/tensorflow_io/core/python/ops/audio_ops.py b/tensorflow_io/core/python/ops/audio_ops.py
index 32ee6c3ee..a44a76ddd 100644
--- a/tensorflow_io/core/python/ops/audio_ops.py
+++ b/tensorflow_io/core/python/ops/audio_ops.py
@@ -374,7 +374,9 @@ def fade(input, fade_in, fade_out, mode, name=None):
 
 def _get_sinc_resample_kernel(rate_in, rate_out, lowpass_filter_width):
     assert lowpass_filter_width > 0
-    base_freq = min(rate_in, rate_out)
+    rate_in=tf.cast(rate_in,tf.float32)
+    rate_out=tf.cast(rate_out,tf.float32)
+    base_freq = tf.minimum(rate_in, rate_out)
     # This will perform antialiasing filtering by removing the highest frequencies.
     # At first I thought I only needed this when downsampling, but when upsampling
     # you will get edge artifacts without this, as the edge is equivalent to zero padding,
@@ -398,13 +400,13 @@ def _get_sinc_resample_kernel(rate_in, rate_out, lowpass_filter_width):
     #                 = sum_i x[i + rate_in] sinc(pi * rate_in * (i / rate_in - j / rate_out))
     # so y[j+rate_out] uses the same filter as y[j], but on a shifted version of x by `rate_in`.
     # This will explain the F.conv1d after, with a stride of rate_in.
-    width = math.ceil(lowpass_filter_width * rate_in / base_freq)
+    width = tf.experimental.numpy.ceil(lowpass_filter_width * rate_in / base_freq)
     # If rate_in is still big after GCD reduction, most filters will be very unbalanced, i.e.,
     # they will have a lot of almost zero values to the left or to the right...
     # There is probably a way to evaluate those filters more efficiently, but this is kept for
     # future work.
     idx = tf.range(-width, width + rate_in, dtype=tf.float32)
-    idx = tf.repeat(tf.expand_dims(idx, axis=-1), rate_out, axis=-1)
+    idx = tf.repeat(tf.expand_dims(idx, axis=-1), tf.cast(rate_out,tf.int32), axis=-1)
     aux_i = tf.expand_dims(tf.range(rate_out, dtype=tf.float32), axis=0)
     kernels = (-aux_i / rate_out + idx / rate_in) * base_freq
 
@@ -437,14 +439,14 @@ def resample(input, rate_in, rate_out, lowpass_filter_width=6):
 
     if rate_in == rate_out:
         return waveform
-
-    rate_in = int(rate_in)
-    rate_out = int(rate_out)
-    gcd = math.gcd(rate_in, rate_out)
+    rate_in = tf.cast(rate_in,tf.int32)
+    rate_out = tf.cast(rate_out,tf.int32)
+    gcd = tf.experimental.numpy.gcd(rate_in, rate_out)
     rate_in = rate_in // gcd
     rate_out = rate_out // gcd
 
     kernel, width = _get_sinc_resample_kernel(rate_in, rate_out, lowpass_filter_width)
+    width=tf.cast(width,tf.int32)
 
     ori_shape = waveform.shape
     ori_shape_len = len(ori_shape)
@@ -459,11 +461,11 @@ def resample(input, rate_in, rate_out, lowpass_filter_width=6):
     waveform = tf.expand_dims(waveform, axis=-1)
 
     num_wavs, length, _ = waveform.shape
-
+   
     waveform = tf.pad(waveform, [[0, 0], [width, width + rate_in], [0, 0]])
-    resampled = tf.nn.conv1d(waveform, kernel, stride=rate_in, padding="VALID")
+    resampled = tf.nn.conv1d(waveform, kernel, stride=tf.reshape(rate_in,[1,]), padding="VALID")
     resampled = tf.reshape(resampled, [num_wavs, -1])
-    target_length = int(math.ceil(rate_out * length / rate_in))
+    target_length = tf.cast(tf.experimental.numpy.ceil(rate_out * length / rate_in),tf.int32)
     if ori_shape_len == 1:
         return resampled[0, :target_length]
     elif ori_shape_len == 2: