-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Release 0.1.2 #47
Release 0.1.2 #47
Changes from 34 commits
608495a
f908a40
6ae0b7d
798184c
11e0e7a
6962372
88cea40
8083df5
7e84f31
ac1cab1
8e48ea7
0f7e7df
25cf6fa
df1052c
842d8c7
3f56932
c38cdcb
2f078dd
c03fd19
c0ad6ac
d549f48
3e2e6b6
9993456
62a88aa
97e6102
31466dd
4790680
710e2d1
0291061
a74517e
66ff00b
3ba1620
66c646b
7083fac
a27e7d0
570457b
ff3170d
29b4dfc
060c9b6
e95f8ec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[tool.poetry] | ||
name = "senselab" | ||
version = "0.1.2.dev23+c952ded" | ||
version = "0.0.1" | ||
description = "Senselab is a Python package that simplifies building pipelines for speech and voice analysis." | ||
authors = [ | ||
"Fabio Catania <[email protected]>", | ||
|
@@ -47,6 +47,8 @@ opensmile = "^2.5.0" | |
audiomentations = "^0.35.0" | ||
torch-audiomentations = "^0.11.1" | ||
sentence-transformers = "^2.7.0" | ||
jiwer = "^3.0.4" | ||
speechbrain = "^1.0.0" | ||
|
||
[tool.poetry.group.dev] | ||
optional = true | ||
|
@@ -79,6 +81,9 @@ testpaths = [ | |
|
||
[tool.mypy] | ||
ignore_missing_imports = true | ||
plugins = [ | ||
"pydantic.mypy" | ||
] | ||
|
||
[tool.ruff] | ||
exclude = [ | ||
|
@@ -104,7 +109,7 @@ exclude = [ | |
"node_modules", | ||
"venv" | ||
] | ||
line-length = 80 | ||
line-length = 120 | ||
indent-width = 4 | ||
src = ["src"] | ||
target-version = "py310" | ||
|
@@ -140,10 +145,10 @@ pattern = "default-unprefixed" | |
|
||
[tool.codespell] | ||
skip = [ | ||
"./poetry.lock", | ||
"./docs_style/pdoc-theme/syntax-highlighting.css" | ||
"poetry.lock", | ||
"docs_style/pdoc-theme/syntax-highlighting.css" | ||
] | ||
ignore-words-list = ["senselab"] | ||
ignore-words-list = ["senselab", "nd", "astroid", "wil"] | ||
|
||
[build-system] | ||
requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.0.0,<2.0.0"] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,44 +1,92 @@ | ||
"""This module implements some utilities for audio data augmentation.""" | ||
|
||
from typing import Any, Dict | ||
from typing import Any, Dict, List, Union | ||
|
||
import torch | ||
from datasets import Dataset | ||
from torch_audiomentations import Compose | ||
|
||
from senselab.utils.data_structures.audio import ( | ||
Audio, | ||
batch_audios, | ||
unbatch_audios, | ||
) | ||
from senselab.utils.device import DeviceType, _select_device_and_dtype | ||
from senselab.utils.tasks.input_output import ( | ||
_from_dict_to_hf_dataset, | ||
_from_hf_dataset_to_dict, | ||
) | ||
|
||
|
||
def augment_hf_dataset( | ||
dataset: Dict[str, Any], augmentation: Compose, audio_column: str = "audio" | ||
) -> Dict[str, Any]: | ||
def augment_audio_dataset( | ||
audios: List[Audio], augmentation: Compose, device_options: Union[DeviceType, List[DeviceType]] = [DeviceType.CPU] | ||
) -> List[Audio]: | ||
"""Augments all provided audios with a given augmentation, either individually or all batched together. | ||
|
||
Augment all audios with a user defined augmentation that can be a composition of multiple augmentations. This | ||
augmentation is either performed on each audio individually or all of the audios provided are batched together | ||
and run at once. NOTE: if batching, all audios must have the same sampling rate. | ||
|
||
Args: | ||
audios: List of Audios whose data will be augmented with the given augmentations | ||
augmentation: A Composition of augmentations to run on each audio (uses torch-audiomentations), should have its | ||
output_type set to "dict" | ||
device_options: The device, or a List of possible devices, to use for augmenting. If the chosen device | ||
is MPS or CUDA then the audios are all batched together, so for optimal performance, batching should | ||
be done by passing a batch_size worth of audios ar a time | ||
|
||
Returns: | ||
List of audios that has passed the all of input audios through the provided augmentation. This does | ||
not necessarily mean that the augmentation has been run on every audio. For more information, | ||
see the torch-audiomentations documentation. | ||
""" | ||
augmentation.output_type = "dict" | ||
new_audios = [] | ||
device_type, dtype = _select_device_and_dtype( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how do you manage the scenario when the developer wants to use a device which is not supported? this is the question you asked me There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I dont |
||
device_options if isinstance(device_options, List) else [device_options] | ||
) | ||
if device_type == DeviceType.CPU: | ||
for audio in audios: | ||
audio_to_augment = audio.waveform.unsqueeze(0) | ||
augmented_audio = augmentation(audio_to_augment, sample_rate=audio.sampling_rate).samples | ||
new_audios.append( | ||
Audio( | ||
waveform=torch.squeeze(augmented_audio), | ||
sampling_rate=audio.sampling_rate, | ||
metadata=audio.metadata.copy(), | ||
orig_path_or_id=audio.orig_path_or_id, | ||
) | ||
) | ||
else: | ||
batched_audios, sampling_rates, metadatas = batch_audios(audios) | ||
|
||
batched_audios = batched_audios.to(device=torch.device(str(device_type)), dtype=dtype) | ||
sampling_rate = sampling_rates[0] if isinstance(sampling_rates, List) else sampling_rates | ||
augmented_audio = augmentation(batched_audios, sample_rate=sampling_rate).samples | ||
|
||
augmented_audio = augmented_audio.detach().cpu() | ||
return unbatch_audios(augmented_audio, sampling_rates, metadatas) | ||
|
||
return new_audios | ||
|
||
|
||
def augment_hf_dataset(dataset: Dict[str, Any], augmentation: Compose, audio_column: str = "audio") -> Dict[str, Any]: | ||
wilke0818 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"""Resamples a Hugging Face `Dataset` object.""" | ||
hf_dataset = _from_dict_to_hf_dataset(dataset) | ||
|
||
def _augment_hf_row( | ||
row: Dataset, augmentation: Compose, audio_column: str | ||
) -> Dict[str, Any]: | ||
def _augment_hf_row(row: Dataset, augmentation: Compose, audio_column: str) -> Dict[str, Any]: | ||
waveform = row[audio_column]["array"] | ||
sampling_rate = row[audio_column]["sampling_rate"] | ||
|
||
# Ensure waveform is a PyTorch tensor | ||
if not isinstance(waveform, torch.Tensor): | ||
waveform = torch.tensor(waveform) | ||
if waveform.dim() == 1: | ||
waveform = waveform.unsqueeze(0).unsqueeze( | ||
0 | ||
) # [num_samples] -> [1, 1, num_samples] | ||
waveform = waveform.unsqueeze(0).unsqueeze(0) # [num_samples] -> [1, 1, num_samples] | ||
elif waveform.dim() == 2: | ||
waveform = waveform.unsqueeze( | ||
1 | ||
) # [batch_size, num_samples] -> [batch_size, 1, num_samples] | ||
waveform = waveform.unsqueeze(1) # [batch_size, num_samples] -> [batch_size, 1, num_samples] | ||
|
||
augmented_hf_row = augmentation( | ||
waveform, sample_rate=sampling_rate | ||
).squeeze() | ||
augmented_hf_row = augmentation(waveform, sample_rate=sampling_rate).squeeze() | ||
|
||
return { | ||
"augmented_audio": { | ||
|
@@ -47,8 +95,6 @@ def _augment_hf_row( | |
} | ||
} | ||
|
||
augmented_hf_dataset = hf_dataset.map( | ||
lambda x: _augment_hf_row(x, augmentation, audio_column) | ||
) | ||
augmented_hf_dataset = hf_dataset.map(lambda x: _augment_hf_row(x, augmentation, audio_column)) | ||
augmented_hf_dataset = augmented_hf_dataset.remove_columns([audio_column]) | ||
return _from_hf_dataset_to_dict(augmented_hf_dataset) |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in general, if we create some workflows, it makes sense having a _pydra version of the scripts. Otherwise, we can probably remove marking functions as pydra tasks or we can do it in the same .py file. will leave this here for our discussion later today @wilke0818 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,17 @@ | ||
"""This module defines a pydra API for the preprocessing task.""" | ||
|
||
import pydra | ||
|
||
from senselab.audio.tasks.preprocessing import resample_hf_dataset | ||
from senselab.audio.tasks.preprocessing import ( | ||
chunk_audios, | ||
downmix_audios_to_mono, | ||
resample_audios, | ||
resample_hf_dataset, | ||
select_channel_from_audios, | ||
) | ||
|
||
resample_audios_pt = pydra.mark.task(resample_audios) | ||
downmix_audios_to_mono_pt = pydra.mark.task(downmix_audios_to_mono) | ||
chunk_audios_pt = pydra.mark.task(chunk_audios) | ||
resample_hf_dataset_pt = pydra.mark.task(resample_hf_dataset) | ||
select_channel_from_audios_pt = pydra.mark.task(select_channel_from_audios) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we say "audios" instead of "audio_dataset" since it's a list of audios for how it is rn?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also, not urgent, for the very good documentation that we will write, it's a good idea to include some suggestions for parameters to use for the different types of augmentations