-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathLoadSong.py
84 lines (63 loc) · 3.62 KB
/
LoadSong.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import librosa
import numpy as np
def LoadSong(path, WeightFeatures = True):
'''To compute and load the features of the given clip'''
# Load the file
y, sr = librosa.load(path,sr=16000,mono=True,offset=10)
y = y[:y.shape[0]-160000]
# Divide the audio time series into 0.64 second frames
noOfFrames = int(y.shape[0]/10240) - 1
frames = np.ndarray(shape=(noOfFrames,10240),dtype=float)
for i in range(noOfFrames):
frames[i,:] = np.transpose(y[i*10240:(i+1)*10240])
# Initialize all feature vectors
RMSEvec = np.ndarray(shape=(noOfFrames,1),dtype=float)
SpCenVec = np.ndarray(shape=(noOfFrames,3),dtype=float)
SpCoVec = np.ndarray(shape=(noOfFrames,14),dtype=float)
SpRoVec = np.ndarray(shape=(noOfFrames,3),dtype=float)
SpBanVec = np.ndarray(shape=(noOfFrames,3),dtype=float)
MFFCsVec = np.ndarray(shape=(noOfFrames,13),dtype=float)
ZCRvec = np.ndarray(shape=(noOfFrames,1),dtype=float)
TempoVec = np.ndarray(shape=(noOfFrames,1),dtype=float)
for i in range(noOfFrames):
# Compute the RMS energy
RMSEvec[i] = librosa.feature.rmse(frames[i,:],frame_length=10240,center=False)
# Compute the Spectral Centroid
SpCenVec[i,:] = np.reshape(librosa.feature.spectral_centroid(frames[i,:],sr=16000,n_fft=10240,hop_length=5120),(3,))
# Compute the Spectral Contrast
SpCoVec[i,:] = np.reshape(librosa.feature.spectral_contrast(frames[i,:],sr=16000,n_fft=10240,hop_length=10240),(14,))
# Compute the Spectral Rolloff
SpRoVec[i,:] = np.reshape(librosa.feature.spectral_rolloff(frames[i,:],sr=16000,n_fft=10240,hop_length=5120),(3,))
# Compute the Spectral Bandwidth
SpBanVec[i,:] = np.reshape(librosa.feature.spectral_bandwidth(frames[i,:],sr=16000,n_fft=10240,hop_length=5120),(3,))
# Compute the MFFCs
MFFCsVec[i,:] = np.reshape(librosa.feature.mfcc(frames[i,:],sr=16000,n_mfcc=13,n_fft=10240,hop_length=20480),(13,))
# Compute the Zero Crossing Rate
ZCRvec[i,:] = librosa.feature.zero_crossing_rate(frames[i,:],frame_length=10240,center=False)
# Compute the tempo
TempoVec[i,:] = librosa.beat.tempo(y=frames[i,:], sr=22050, hop_length=20480, start_bpm=30, std_bpm=1.0, ac_size=8.0, max_tempo=500)
# Concatenate all feature vectors into a feature matrix
FeatureMatrix = np.concatenate((RMSEvec,SpCenVec,SpCoVec,SpRoVec,SpBanVec,MFFCsVec,ZCRvec,TempoVec),axis=1)
if WeightFeatures:
# To reduce the noise in data and to take the neighbouring frames into account
FeatureMatrix = WAverageFeatures(FeatureMatrix)
return FeatureMatrix
def WAverageFeatures(FeatureMatrix):
'''To compute the weighted average of each frames with the neighbouring frames'''
AveragedFeatureMatrix = np.ndarray(shape=FeatureMatrix.shape,dtype=float)
noOfFrames = FeatureMatrix.shape[0]
# Weight vector for the first 7 frames
WeightVector1 = np.array([0.6, 0.2, 0.1, 0.05, 0.025])
# Weight vector for the last 7 frames
WeightVector2 = np.array([0.025, 0.05, 0.1, 0.2, 0.6])
# Weight vector for the middle frames
WeightVector3 = np.array([0.025, 0.05, 0.1, 0.6, 0.1, 0.05, 0.025])
# Computing the weighted average and storing it
for i in range(noOfFrames):
if i < 7:
AveragedFeatureMatrix[i,:] = np.matmul(WeightVector1, FeatureMatrix[i:i+5,:])
elif i > noOfFrames - 7:
AveragedFeatureMatrix[i,:] = np.matmul(WeightVector2, FeatureMatrix[i-4:i+1,:])
else:
AveragedFeatureMatrix[i,:] = np.matmul(WeightVector3, FeatureMatrix[i-3:i+4,:])
return AveragedFeatureMatrix