forked from PawanSuryavanshi95/aasist
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_utils.py
113 lines (92 loc) · 3.35 KB
/
data_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import numpy as np
import soundfile as sf
import torch
from torch import Tensor
from torch.utils.data import Dataset
___author__ = "Hemlata Tak, Jee-weon Jung"
__email__ = "[email protected], [email protected]"
def genSpoof_list(dir_meta, is_train=False, is_eval=False):
d_meta = {}
file_list = []
with open(dir_meta, "r") as f:
l_meta = f.readlines()
if is_train:
for line in l_meta:
_, key, _, _, label = line.strip().split(" ")
file_list.append(key)
d_meta[key] = 1 if label == "bonafide" else 0
return d_meta, file_list
elif is_eval:
for line in l_meta:
_, key, _, _, _ = line.strip().split(" ")
#key = line.strip()
file_list.append(key)
return file_list
else:
for line in l_meta:
_, key, _, _, label = line.strip().split(" ")
file_list.append(key)
d_meta[key] = 1 if label == "bonafide" else 0
return d_meta, file_list
def pad(x, max_len=64600):
x_len = x.shape[0]
if x_len >= max_len:
return x[:max_len]
# need to pad
num_repeats = int(max_len / x_len) + 1
padded_x = np.tile(x, (1, num_repeats))[:, :max_len][0]
return padded_x
def pad_random(x: np.ndarray, max_len: int = 64600):
x_len = x.shape[0]
# if duration is already long enough
if x_len >= max_len:
stt = np.random.randint(x_len - max_len)
return x[stt:stt + max_len]
# if too short
num_repeats = int(max_len / x_len) + 1
padded_x = np.tile(x, (num_repeats))[:max_len]
return padded_x
class Dataset_ASVspoof2019_train(Dataset):
def __init__(self, list_IDs, labels, base_dir):
"""self.list_IDs : list of strings (each string: utt key),
self.labels : dictionary (key: utt key, value: label integer)"""
self.list_IDs = list_IDs
self.labels = labels
self.base_dir = base_dir
self.cut = 64600 # take ~4 sec audio (64600 samples)
def __len__(self):
return len(self.list_IDs)
def __getitem__(self, index):
key = self.list_IDs[index]
file_path = ""
if self.labels[key]==1:
file_path = self.base_dir + f"/Recorded/new/converted/{key}"
else:
file_path = self.base_dir + f"/Generated/English/converted/{key}"
X, _ = sf.read(str(file_path))
#X, _ = sf.read(str(self.base_dir / f"flac/{key}.flac"))
X_pad = pad_random(X, self.cut)
x_inp = Tensor(X_pad)
y = self.labels[key]
return x_inp, y
class Dataset_ASVspoof2019_devNeval(Dataset):
def __init__(self, list_IDs, labels, base_dir):
"""self.list_IDs : list of strings (each string: utt key),
"""
self.list_IDs = list_IDs
self.labels = labels
self.base_dir = base_dir
self.cut = 64600 # take ~4 sec audio (64600 samples)
def __len__(self):
return len(self.list_IDs)
def __getitem__(self, index):
key = self.list_IDs[index]
file_path = ""
if self.labels[key]==1:
file_path = self.base_dir + f"/Recorded/new/converted/{key}"
else:
file_path = self.base_dir + f"/Generated/English/converted/{key}"
X, _ = sf.read(str(file_path))
X_pad = pad(X, self.cut)
x_inp = Tensor(X_pad)
return x_inp, self.labels[key], key