Skip to content

Commit

Permalink
Minor cleanup including whitespace.
Browse files Browse the repository at this point in the history
  • Loading branch information
dpwe committed May 22, 2019
1 parent 2956cd7 commit 13ca86e
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 31 deletions.
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,5 +145,3 @@ Scaling
The fingerprint database records 2^20 (~1M) distinct fingerprints, with (by default) 100 entries for each fingerprint bucket. When the bucket fills, track entries are dropped at random; since matching depends only on making a minimum number of matches, but no particular match, dropping some of the more popular ones does not prevent matching. The Matlab version has been successfully used for databases of 100k+ tracks. Reducing the hash density (`--density`) leads to smaller reference database size, and the capacity to record more reference items before buckets begin to fill; a density of 7.0 works well.

Times (in units of 256 samples, i.e., 23 ms at the default 11kHz sampling rate) are stored in the bottom 14 bits of each database entry, meaning that times larger than 2^14*0.023 = 380 sec, or about 6 mins, are aliased. If you want to correctly identify time offsets in tracks longer than this, you need to use a larger `--maxtimebits`; e.g. `--maxtimebits 16` increases the time range to 65,536 frames, or about 25 minutes at 11 kHz. The trade-off is that the remaining bits in each 32 bit entry (i.e., 18 bits for the default 14 bit times) are used to store the track ID. Thus, by default, the database can only remember 2^18 = 262k tracks; using a larger `--maxtimebits` will reduce this; similarly, you can increase the number of distinct tracks by reducing `--maxtimebits`, which doesn't prevent matching tracks, but progressively reduces discrimination as the number of distinct time slots reduces (and can make the reported time offsets, and time ranges for `--find-time-ranges`, completely wrong for longer tracks).


34 changes: 20 additions & 14 deletions audfprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,23 @@
"""
from __future__ import division, print_function

import multiprocessing # for new/add
import os # For command line interface
import sys # For __main__
import time # For reporting progress time

import docopt # For command line interface
import joblib # for match

import audfprint_analyze # The actual analyzer class/code
import audfprint_match # Access to match functions, used in command line interface
import hash_table # My hash_table implementation
# For reporting progress time
import time
# For command line interface
import docopt
import os
# For __main__
import sys
# For multiprocessing options
import multiprocessing
import joblib

# The actual analyzer class/code
import audfprint_analyze
# Access to match functions, used in command line interface
import audfprint_match
# My hash_table implementation
import hash_table


if sys.version_info[0] >= 3:
Expand Down Expand Up @@ -413,13 +419,13 @@ def main(argv):

# Setup the analyzer if we're using one (i.e., unless "merge")
analyzer = setup_analyzer(args) if not (
cmd is "merge" or cmd is "newmerge"
or cmd is "list" or cmd is "remove") else None
cmd == "merge" or cmd == "newmerge"
or cmd == "list" or cmd == "remove") else None

precomp_type = 'hashes'

# Set up the hash table, if we're using one (i.e., unless "precompute")
if cmd is not "precompute":
if cmd != "precompute":
# For everything other than precompute, we need a database name
# Check we have one
dbasename = args['--dbase']
Expand Down
25 changes: 16 additions & 9 deletions audfprint_analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,23 @@

from __future__ import division, print_function

import glob # For glob2hashtable, localtester
import os
import struct # For reading/writing hashes to file
import time # For glob2hashtable, localtester

import numpy as np

import scipy.signal

# For reading/writing hashes to file
import struct

# For glob2hashtable, localtester
import glob
import time

import audio_read
import hash_table # For utility, glob2hashtable
# For utility, glob2hashtable
import hash_table
import stft


# ############### Globals ############### #
# Special extension indicating precomputed fingerprint
PRECOMPEXT = '.afpt'
Expand Down Expand Up @@ -80,6 +84,9 @@ def landmarks2hashes(landmarks):
the three remaining values.
"""
landmarks = np.array(landmarks)
# Deal with special case of empty landmarks.
if landmarks.shape[0] == 0:
return np.zeros((0, 2), dtype=np.int32)
hashes = np.zeros((landmarks.shape[0], 2), dtype=np.int32)
hashes[:, 0] = landmarks[:, 0]
hashes[:, 1] = (((landmarks[:, 1] & B1_MASK) << B1_SHIFT)
Expand Down Expand Up @@ -182,7 +189,7 @@ def spreadpeaks(self, peaks, npoints=None, width=4.0, base=None):
self.__sp_width = width
self.__sp_len = npoints
self.__sp_vals = np.exp(-0.5 * ((np.arange(-npoints, npoints + 1)
/ width) ** 2))
/ width)**2))
# Now the actual function
for pos, val in peaks:
vec = np.maximum(vec, val * self.__sp_vals[np.arange(npoints)
Expand All @@ -195,7 +202,7 @@ def _decaying_threshold_fwd_prune(self, sgram, a_dec):
"""
(srows, scols) = np.shape(sgram)
sthresh = self.spreadpeaksinvector(
np.max(sgram[:, :np.minimum(10, scols)], axis=1), self.f_sd
np.max(sgram[:, :np.minimum(10, scols)], axis=1), self.f_sd
)
# Store sthresh at each column, for debug
# thr = np.zeros((srows, scols))
Expand Down Expand Up @@ -399,7 +406,7 @@ def wavfile2hashes(self, filename):
query_hashes = []
for peaklist in peaklists:
query_hashes.append(landmarks2hashes(
self.peaks2landmarks(peaklist)))
self.peaks2landmarks(peaklist)))
query_hashes = np.concatenate(query_hashes)
else:
query_hashes = landmarks2hashes(self.peaks2landmarks(peaks))
Expand Down
3 changes: 2 additions & 1 deletion audfprint_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@
except:
pass

import audfprint_analyze # for localtest and illustrate
import audfprint_analyze
import audio_read
import stft


def process_info():
rss = usrtime = 0
p = psutil.Process(os.getpid())
Expand Down
2 changes: 1 addition & 1 deletion audio_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

# If ffmpeg is unavailable, you can set HAVE_FFMPEG to False which will cause
# soundfile reads to go via scipy.io.wavfile. However, this means that only
# *.wav files are supported *and* they must already be resampled to the
# *.wav files are supported *and* they must already be resampled to the
# system sampling rate (e.g. 11025 Hz).

HAVE_FFMPEG = True
Expand Down
8 changes: 4 additions & 4 deletions stft.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Provide stft to avoid librosa dependency.
"""Provide stft to avoid librosa dependency.
This implementation is based on routines from
This implementation is based on routines from
https://github.com/tensorflow/models/blob/master/research/audioset/mel_features.py
"""

Expand Down Expand Up @@ -71,8 +71,8 @@ def stft(signal, n_fft, hop_length=None, window=None):
values. Defaults to n_fft.
Returns:
2D np.array where each column contains the complex values of the
fft_length/2+1 unique values of the FFT for the corresponding frame of
2D np.array where each column contains the complex values of the
fft_length/2+1 unique values of the FFT for the corresponding frame of
input samples ("spectrogram transposition").
"""
if window is None:
Expand Down

0 comments on commit 13ca86e

Please sign in to comment.