-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
151 lines (123 loc) · 5.74 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# -*- coding: iso-8859-15 -*-
import wave
import struct
import numpy
import scipy.fftpack
import sys
import numpy
import bisect
from math import floor
from math import ceil
from math import pow
# from scipy.io import wavfile
# import matplotlib.pyplot as plt
if(len(sys.argv) < 1):
print('[!] Missing file path argument')
sys.exit(1)
MAX_FRAMES = 16384
TEMPER = 1.05946309436 # Twelfth root of two
# Musical notes enum as an array, since Python is shit
NOTES = [ 'A', 'AB', 'B', 'C', 'CD', 'D', 'DE', 'E', 'F', 'FG', 'G', 'GA' ]; # Neutral names
NOTES_SHP = [ 'A', 'A♯', 'B', 'C', 'C♯', 'D', 'D♯', 'E', 'F', 'F♯', 'G', 'G♯' ]; # As sharps
NOTES_FLT = [ 'A', 'B♭', 'B', 'C', 'D♭', 'D', 'E♭', 'E', 'F', 'G♭', 'G', 'A♭' ]; # As flats
class Pitch:
def __init__(self, frequency, note, octave):
self.frequency = frequency
self.note = note
self.octave = octave
def Interval(self, ratio, halfsteps):
return Pitch(self.frequency * ratio, (self.note + halfsteps) % 12, int(self.octave + ceil((self.note + halfsteps) / 12)))
def HalfStep(self):
return Pitch(self.frequency * TEMPER, (self.note + 1) % 12, int(self.octave + ceil((self.note + 1) / 12)))
# Made these to facilitate some more advanced musical analysis later on, such as identifying the chord itself
#
def MinorThird():
return self.Interval(6/5, 3)
def MajorThird():
return self.Interval(5/4, 4)
def PerfectFifth():
return self.Interval(3/2, 7)
def Octave():
return Pitch(self.frequency * 2, self.note, self.octave + 1)
# BASIS is the domain over which we compute the harmonic products.
# Instead of doing it continuously, we do it over discrete musical notes. We construct this array by iteration
BASIS = [ Pitch(164.813778, 7, 2) ]; # Start with E2 (164.8Hz)
for i in range(24):
BASIS.append(BASIS[i].HalfStep())
##
##
##
# Load the file and read its metadata
# Number of channels, sample width in bytes, sample rate, sample count, and compression data that idk about
wav_file = wave.open(sys.argv[-1], 'r')
nchan, swidth, srate, scount, comptype, compname = wav_file.getparams()
# Some additional calculations
nyquist = srate / 2
nframe = scount if scount < MAX_FRAMES else MAX_FRAMES # Cap the number of frames read to MAX_FRAMES
binwidth = nyquist / nframe # [Hz] Width of the DFT bin
print('')
print('Sample width: %d bytes' % swidth)
print('Sample rate: %d' % srate)
print('Reading %d samples (%f sec)' % (nframe, (float(nframe) / srate)))
print('Nyquist frequency: %f Hz' % nyquist)
print('Fourier bin width: %f Hz' % binwidth)
print('')
# Read the data.
swbits = 8 if swidth == 1 else ((swidth * 8) - 1) # [Bits] Sample width is given in bytes. In .WAV, any sample width greater than 1 byte is signed.
packing = '<%s' % (('H' if swidth == 1 else 'h') * (nframe * nchan)) # .WAV is little-endian. 1-byte samples are unsigned, everything else is signed
data = list(struct.unpack(packing, wav_file.readframes(nframe) )) #
data = [(x / 2**swbits)*2-1 for x in data] # Normalize the data on [0, 1]. Not strictly necessary, but a good idea
wav_file.close()
# Compute the fourier spectrum of the first channel
# rfft returns only the positive half of the fft output since we have a real-valued signal and the fft is symmetric
# rfft returns a complex array; we take numpy.abs to get a real-valued magnitude array
fourier = numpy.abs(scipy.fftpack.rfft( data[0::nchan] )) # .WAV stores channels contiguously within each frame, so we just take a slice (every nth element)
# F = K * srate / bins
# K = F * bins / srate
# Return the Fourier bin index associated with a given frequency.
# Singularity: bin(0.0) will return 1, which may not be expected, as bin 0 is the DC offset bin. Doesn't matter for our purposes
#
# Understanding this correspondence is a bit tricky because there are a lot of factors of 2 involved
# Given frequency f, bin index i, sampling rate S, sample count N:
# There are N bins on the original spectrum output. rfft takes only N/2 bins as the output is symmetric for real-valued signals
# Also on the original spectrum output, python gives us a complex array - each bin has two adjacent indices, a real part and a complex part - but we take numpy.abs above
# So: First 2N total complex output indices. Then, we take the frequency-positive half, for N indices. Then we take numpy.abs, for N/2 indices
#
# Anyways, after we take the abs, there is one index for each bin, which is nice
# So, bins [1, N/2 -1] correspond to frequencies [0, S/2]
# Remember that S/2 is the Nyquist frequency, which makes sense if you think about it. Coincidence?????
#
#
def bin(frequency):
return int(ceil(frequency * nframe / srate / 2))
# Harmonic Product computation
# It's the geometric mean of the fourier spectrum values over HPDEPTH harmonics.
HPDEPTH = 5
def HarmonicProduct(basebin):
res = 1.0
for i in range(1, HPDEPTH): # Spectrum Depth
bi = int(basebin * i);
if(bi > len(fourier)):
break
res *= fourier[bi]
return pow(res, 1.0 / HPDEPTH)
#
#
#
# Candidate arrays
candidateNotes = [] # [string] Array of note names
candidateValues = [] # [float] Array of harmonic product values
# Compute the harmonic product for each pitch in our domain
for pitch in BASIS:
notename = '%s%d' % (NOTES_SHP[pitch.note], pitch.octave)
s = HarmonicProduct(bin(pitch.frequency))
# We use the bisect module to easily keep the candidate arrays sorted
i = bisect.bisect(candidateValues, s)
candidateValues.insert(i, s)
candidateNotes.insert(i, notename)
print('---')
print('%s\t %5f Hz\t %2f' % (notename, pitch.frequency, s))
Result = candidateNotes[-5:]
print('')
print('===')
print('Top 5 choices: %s' % ' '.join(Result))