Skip to content

Commit

Permalink
Merge pull request #165 from sensein/windowing
Browse files Browse the repository at this point in the history
adjusting windowing by returning an Audio instead of a waveform
  • Loading branch information
fabiocat93 authored Sep 26, 2024
2 parents 68a842d + 1c12fc1 commit c37842b
Show file tree
Hide file tree
Showing 2 changed files with 183 additions and 192 deletions.
40 changes: 32 additions & 8 deletions src/senselab/audio/data_structures/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,28 +103,52 @@ def __eq__(self, other: object) -> bool:
return self.id() == other.id()
return False

def window_generator(self, window_size: int, step_size: int) -> Generator[torch.Tensor, None, None]:
"""Creates a sliding window generator for the audio waveform.
def window_generator(self, window_size: int, step_size: int) -> Generator["Audio", None, None]:
"""Creates a sliding window generator for the audio.
Creates a generator that yields Audio objects corresponding to each window of the waveform
using a sliding window. The window size and step size are specified in number of samples.
If the audio waveform doesn't contain an exact number of windows, the remaining samples
will be included in the last window.
Args:
window_size: Size of each window (number of samples).
step_size: Step size for sliding the window (number of samples).
Raises:
ValueError: If step_size is greater than window_size.
Yields:
Audio: Audio objects corresponding to each window of the waveform.
"""
if step_size > window_size:
warnings.warn(
"Step size is greater than window size. \
Some of audio will not be included in the windows."
Some of the audio will not be included in the windows."
)

num_samples = self.waveform.size(-1)
current_position = 0

while current_position < num_samples - window_size:
window = self.waveform[:, current_position : current_position + window_size]
yield window
while current_position < num_samples:
# Calculate the end position of the window
end_position = current_position + window_size

# If the end_position exceeds the number of samples, take the remaining samples
# This is not necessary since it is done automatically when slicing tensors.
# However, it is more explicit.
if end_position > num_samples:
end_position = num_samples

# Get the windowed waveform
window_waveform = self.waveform[:, current_position:end_position]

# Create a new Audio instance for this window
window_audio = Audio(
waveform=window_waveform,
sampling_rate=self.sampling_rate,
orig_path_or_id=f"{self.orig_path_or_id}_{current_position}_{end_position}",
metadata=self.metadata,
)

yield window_audio
current_position += step_size


Expand Down
Loading

0 comments on commit c37842b

Please sign in to comment.