forked from laiguokun/SWaveNet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmake_blizzard_mp3_npy.py
48 lines (43 loc) · 1.46 KB
/
make_blizzard_mp3_npy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from scipy.io import wavfile
import cPickle
import fnmatch
import os
from subprocess import Popen, PIPE
import numpy as np
def decode (fname):
# If you are on Windows use full path to ffmpeg.exe
cmd = ["ffmpeg", "-i", fname, "-f", "wav", "-ar", "16000", '-']
# If you are on W add argument creationflags=0x8000000 to prevent another console window jumping out
p = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
data = p.communicate()[0]
return np.fromstring(data[data.find("data")+4:], np.int16)
data_dir = '/usr0/home/bohanl1/datasets/blizzard2013/Lessac_Blizzard2013_CatherineByers_train/unsegmented'
list_len = 200
i = 0
l = []
files = []
for root, dir_names, file_names in os.walk(data_dir):
for filename in fnmatch.filter(file_names, '*.mp3'):
files.append(os.path.join(root, filename))
#files = [os.path.join(data_dir, f) for f in os.listdir(data_dir)]
n_frame = 0
for n, f in enumerate(files):
#sr, d = wavfile.read(f)
d = decode(f)
l.append(d)
"""
print(d.shape)
print(min(d), max(d))
print(type(d))
print len(d)
raw_input()
"""
n_frame += len(d)
print('n_frames', n_frame, 'n_samples', n_frame/32000., 'length', '%f hours' % (1.*n_frame/64000/3600))
if len(l) >= list_len:
print("Dumping at file %i of %i" % (n, len(files)))
cPickle.dump(l, open("data_%i.npy" % i, mode="wb"))
i += 1
l = []
#dump last chunk
cPickle.dump(l, open("data_%i.npy" % i, mode="wb"))