-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchromaLuma.py
150 lines (115 loc) · 5.1 KB
/
chromaLuma.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>
# <codecell>
'''
Functions for computing 2d chroma features, ie chroma with luma.
'''
# <codecell>
import numpy as np
import scipy.weave
import scipy.signal
import librosa
# <codecell>
def apdiff(x, y):
'''Compute an all-pairs difference matrix: D[i,j] = x[i] - y[j]
Input:
x - vector, arbitrary size
y - vector, arbitrary size
Output:
D - difference matrix, size x.shape[0] x y.shape[0]
'''
nx, ny = len(x), len(y)
D = np.empty( (nx, ny), dtype=x.dtype)
weaver = r"""
for (int i = 0; i < nx; i++) {
for (int j = 0; j < ny; j++) {
D[i * ny + j] = x[i] - y[j];
}
}
"""
scipy.weave.inline(weaver, arg_names=['nx', 'ny', 'x', 'y', 'D'])
return D
# <codecell>
def logFrequencySpectrum( audioData, fs, **kwargs ):
'''
Compute log-frequency spectrum. Based on code by DAn Ellis.
Input:
audioData - vector of audio samples
fs - sampling rate
minNote - minimum note number to consider, default 35.5
binsPerOctave - number of magnitude values to compute per octave, default 48
nOctaves - number of octaves, default 4
smoothingWindow - window to use to smooth the spectrum, None = don't smooth, default np.hanning( binsPerOctave )
smoothingPower - power to raise spectral envelope to, default 3.0, ignored if smoothingWindow=None
aWeight - whether or not to a-weight the spectrum, default False
takeLog - whether or not to take a log, default True
tuningOffset - apply this tuning offset (in semitones), default 0
Output:
spectrum - log-frequency spectrum
'''
minNote = kwargs.get( 'minNote', 35.5 )
binsPerOctave = kwargs.get( 'binsPerOctave', 48 )
nOctaves = kwargs.get( 'nOctaves', 4 )
smoothingWindow = kwargs.get( 'smoothingWindow', np.hanning( binsPerOctave ) )
smoothingPower = kwargs.get( 'smoothingPower', 3.0 )
aWeight = kwargs.get( 'aWeight', False )
takeLog = kwargs.get( 'takeLog', False )
tuningOffset = kwargs.get( 'tuningOffset', 0 )
minFreq = librosa.feature.midi_to_hz( minNote + tuningOffset )
# Number of samples
N = float(audioData.shape[0])
# Compute FFT
X = np.fft.rfft( np.hanning( N ) * audioData )
X = np.abs( X )
# Ratio between adjacent frequencies in log-f axis
frequencyRatio = 2.0**(1.0/binsPerOctave)
# How many bins in log-f axis
nBins = np.floor( np.log((fs/2.0)/minFreq)/np.log(frequencyRatio) )
# Freqs corresponding to each bin in FFT
fftFreqs = np.arange( X.shape[0] )*(fs/N)
# Freqs corresponding to each bin in log F output
logFFTFreqs = minFreq*np.exp( np.log( 2 )*np.arange( nBins )/binsPerOctave)
# Bandwidths of each bin in log F
logFreqBandwidths = logFFTFreqs*(frequencyRatio - 1)
# .. but bandwidth cannot be less than FFT binwidth
logFreqBandwidths = np.clip( logFreqBandwidths, fs/N, np.inf )
# Controls how much overlap there is between adjacent bands
overlapFactor = 0.5475 # Adjusted by hand to make sum(mx'*mx) close to 1.0
# Weighting matrix mapping energy in FFT bins to logF bins
# is a set of Gaussian profiles depending on the difference in
# frequencies, scaled by the bandwidth of that bin
z = (1.0/(overlapFactor * logFreqBandwidths)).reshape((-1, 1))
fftDiff = z*apdiff(logFFTFreqs, fftFreqs)
mx = np.exp( -0.5*(fftDiff)**2 )
# Normalize rows by sqrt(E), so multiplying by mx' gets approx orig spec back
z2 = (2*(mx**2).sum(axis=1))**-0.5
# Perform mapping in magnitude domain
logFrequencyX = np.sqrt( z2*mx.dot(X) )
# Compute a spectral envelope for normalizing the spectrum
if smoothingWindow is not None:
p = smoothingPower
# Try to avoid boundary effects
windowSize = smoothingWindow.shape[0]
pad = np.ones( windowSize )*np.mean( logFrequencyX )
paddedX = np.append( pad, np.append( logFrequencyX, pad ) )
# Compute spectral envelope for normalization, raised to a power to squash
normalization = np.power( scipy.signal.fftconvolve( paddedX**p, smoothingWindow, 'same' ), 1/p )
# Remove boundary effects
normalization = normalization[windowSize:-windowSize]
logFrequencyX /= normalization
if aWeight:
# Compute A-weighting values for the spectrum
logFFTFreqsSquared = logFFTFreqs**2
weighting = 12200**2*logFFTFreqsSquared**2
weighting /= logFFTFreqsSquared + 20.6**2
weighting /= np.sqrt( (logFFTFreqsSquared + 107.7**2)*(logFFTFreqsSquared + 737.9**2) )
weighting /= logFFTFreqsSquared + 12200*2
logFrequencyX *= weighting
if takeLog:
# Actually take the cube root! Yikes!
logFrequencyX = logFrequencyX**(1/3.0)
# Truncate by number of octaves requested
logFrequencyX = logFrequencyX[:binsPerOctave*nOctaves]
# Normalize
logFrequencyX = (logFrequencyX - logFrequencyX.min())/(logFrequencyX.max() - logFrequencyX.min())
return logFrequencyX