-
Notifications
You must be signed in to change notification settings - Fork 1
/
show_NMF.py
executable file
·119 lines (90 loc) · 3.15 KB
/
show_NMF.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/python
usage='''
Usage: show_own_NMF.py filename.wav [pitch_min pitch_max filtering]
Mandatory argument : file to factorize
Optional arguments : pitch_min (smallest pitch considered), pitch_max (biggest pitch considered), filtering (true or false)
'''
import sys
if len(sys.argv) <= 1:
print usage
sys.exit(-1)
from librosa import load, stft, logamplitude, note_to_midi, midi_to_hz
import numpy as np
filename = sys.argv[1]
pitch_min = note_to_midi('C1')
if len(sys.argv) > 2:
pitch_min = note_to_midi(sys.argv[2])
pitch_max = note_to_midi('C7')
if len(sys.argv) > 3:
pitch_max = note_to_midi(sys.argv[3])
pitches = range(pitch_min, pitch_max + 1)
#pitches = note_to_midi(['C4', 'D4', 'E4', 'F4', 'G4', 'A4', 'B4', 'C5'])
filtering = True
if len(sys.argv) > 4:
if sys.argv[4] == "false":
filtering = False
elif sys.argv[4] == "true":
filtering = True
else:
print "Error reading filtering argument. Assuming true."
### main program ###
x, sr = load(filename)
# compute normal STFT
n_components = len(pitches)
n_fft = 2048
hop_length = n_fft * 3 / 4 # big hop_length
X = stft(x, n_fft=n_fft, hop_length=hop_length)
### NMF ###
V = np.abs(X)
## custom initialisation ##
W_zero = np.zeros((V.shape[0], n_components)).transpose()
threshold = 0.1
index = 0
for comp in W_zero:
h = 1
fund_freq = midi_to_hz(pitches[index])
while int(fund_freq*h*n_fft/sr) < W_zero.shape[1]:
for freq in range(int(fund_freq*h*n_fft/sr * (2**(-threshold))), int(fund_freq*h*n_fft/sr * (2**threshold))):
if freq < W_zero.shape[1]:
comp[freq] = 1.0 / h
h += 1
index += 1
W_zero = W_zero.transpose()
H_zero = np.ones((n_components, V.shape[1]))
from NMF import factorize
comps, acts = factorize(V, W_zero, H_zero)
# filtering activations
if filtering:
filter_threshold = np.max(acts) / 5
for i in range(1, acts.shape[0]):
for j in range(0, acts.shape[1]):
if acts[i-1][j] > filter_threshold and acts[i-1][j] > acts[i][j]:
acts[i-1][j] += acts[i][j]
acts[i][j] = 0
acts[acts < filter_threshold] = 0
# visualisation matters
import matplotlib.pyplot as plt
from librosa.display import specshow
import matplotlib.gridspec as gridspec
plt.close('all')
plt.subplot2grid((2, 2), (0, 0), colspan=2)
specshow(V, sr=sr, hop_length=hop_length, n_yticks=25, x_axis='time', y_axis='linear')
plt.colorbar()
plt.title('Input power spectrogram')
#plt.subplot2grid((2, 2), (0,1))
#specshow(W_zero, sr=sr, hop_length=hop_length, n_yticks=25, n_xticks=25, x_axis='frames', y_axis='linear')
##plt.colorbar()
#plt.xlabel('Components')
#plt.title('Initialised Components')
plt.subplot2grid((2, 2), (1,0))
specshow(comps, sr=sr, hop_length=hop_length, n_yticks=25, n_xticks=25, x_axis='frames', y_axis='linear')
#plt.colorbar()
plt.xlabel('Components')
plt.title('Learned Components')
plt.subplot2grid((2, 2), (1,1))
specshow(acts, sr=sr, hop_length=hop_length, n_yticks=25, y_axis='cqt_note', x_axis='time', fmin=midi_to_hz(pitch_min))
plt.colorbar()
plt.ylabel('Components')
plt.title('Determined Activations')
plt.tight_layout()
plt.show()