Skip to content

Commit

Permalink
remove obsolete librosa dependency and bump version
Browse files Browse the repository at this point in the history
  • Loading branch information
matiaslindgren committed May 15, 2020
1 parent 5dcafdd commit 3c0eb10
Show file tree
Hide file tree
Showing 4 changed files with 2 additions and 91 deletions.
75 changes: 0 additions & 75 deletions lidbox/dataset/tf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,78 +193,3 @@ def extract_features(signals, sample_rates, feattype, spec_kwargs, melspec_kwarg
X = features.window_normalization(X, **window_norm_kwargs)
tf.debugging.assert_all_finite(X, "window normalization failed")
return X


#TODO
# for conf in augment_config:
# # prepare noise augmentation
# if conf["type"] == "additive_noise":
# noise_source_dir = conf["noise_source"]
# conf["noise_source"] = {}
# with open(os.path.join(noise_source_dir, "id2label")) as f:
# id2label = dict(l.strip().split() for l in f)
# label2path = collections.defaultdict(list)
# with open(os.path.join(noise_source_dir, "id2path")) as f:
# for noise_id, path in (l.strip().split() for l in f):
# label2path[id2label[noise_id]].append((noise_id, path))
# tmpdir = conf.get("copy_to_tmpdir")
# if tmpdir:
# if os.path.isdir(tmpdir):
# if verbosity:
# print("tmpdir for noise source files given, but it already exists at '{}', so copying will be skipped".format(tmpdir))
# else:
# if verbosity:
# print("copying all noise source wavs to '{}'".format(tmpdir))
# os.makedirs(tmpdir)
# tmp = collections.defaultdict(list)
# for noise_type, noise_paths in label2path.items():
# for noise_id, path in noise_paths:
# new_path = os.path.join(tmpdir, noise_id + ".wav")
# shutil.copy2(path, new_path)
# if verbosity > 3:
# print(" ", path, "->", new_path)
# tmp[noise_type].append((noise_id, new_path))
# label2path = tmp
# for noise_type, noise_paths in label2path.items():
# conf["noise_source"][noise_type] = [path for _, path in noise_paths]
# def chunk_loader(signal, sr, meta, *args):
# chunk_length = int(sr * chunk_len_seconds)
# max_pad = int(sr * max_pad_seconds)
# if signal.size + max_pad < chunk_length:
# if verbosity:
# tf_util.tf_print("skipping too short signal (min chunk length is ", chunk_length, " + ", max_pad, " max_pad): length ", signal.size, ", utt ", meta[0], output_stream=sys.stderr, sep='')
# return
# uttid = meta[0].decode("utf-8")
# yield from chunker(signal, target_sr, (uttid, *meta[1:]))
# rng = np.random.default_rng()
# for conf in augment_config:
# if conf["type"] == "random_resampling":
# # apply naive speed modification by resampling
# ratio = rng.uniform(conf["range"][0], conf["range"][1])
# with contextlib.closing(io.BytesIO()) as buf:
# soundfile.write(buf, signal, int(ratio * target_sr), format="WAV")
# buf.seek(0)
# resampled_signal, _ = librosa.core.load(buf, sr=target_sr, mono=True)
# new_uttid = "{:s}-speed{:.3f}".format(uttid, ratio)
# new_meta = (new_uttid, *meta[1:])
# yield from chunker(resampled_signal, target_sr, new_meta)
# elif conf["type"] == "additive_noise":
# for noise_type, db_min, db_max in conf["snr_def"]:
# noise_signal = np.zeros(0, dtype=signal.dtype)
# noise_paths = []
# while noise_signal.size < signal.size:
# rand_noise_path = rng.choice(conf["noise_source"][noise_type])
# noise_paths.append(rand_noise_path)
# sig, _ = librosa.core.load(rand_noise_path, sr=target_sr, mono=True)
# noise_signal = np.concatenate((noise_signal, sig))
# noise_begin = rng.integers(0, noise_signal.size - signal.size, endpoint=True)
# noise_signal = noise_signal[noise_begin:noise_begin+signal.size]
# snr_db = rng.integers(db_min, db_max, endpoint=True)
# clean_and_noise = audio_feat.numpy_snr_mixer(signal, noise_signal, snr_db)[2]
# new_uttid = "{:s}-{:s}_snr{:d}".format(uttid, noise_type, snr_db)
# if not np.all(np.isfinite(clean_and_noise)):
# if verbosity:
# tf_util.tf_print("warning: snr_mixer failed, augmented signal ", new_uttid, " has non-finite values and will be skipped. Utterance was ", uttid, ", and chosen noise signals were\n ", tf.strings.join(noise_paths, separator="\n "), output_stream=sys.stderr, sep='')
# return
# new_meta = (new_uttid, *meta[1:])
# yield from chunker(clean_and_noise, target_sr, new_meta)
2 changes: 1 addition & 1 deletion lidbox/features/audio.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
Audio feature extraction.
Some functions are simply one-to-one TensorFlow math conversions from https://github.com/librosa.
Many functions have been inspired by https://github.com/librosa and https://github.com/kaldi-asr/kaldi.
"""
import os
import wave
Expand Down
13 changes: 0 additions & 13 deletions lidbox/features/librosa_audio.py

This file was deleted.

3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="lidbox",
version="0.4.0",
version="0.5.0",
description="End-to-end spoken language identification (LID) on TensorFlow",
long_description=readmefile_contents,
long_description_content_type="text/markdown",
Expand All @@ -17,7 +17,6 @@
"PyYAML ~= 5.1",
"jsonschema",
"kaldiio ~= 2.13",
"librosa ~= 0.7",
"matplotlib ~= 3.1",
"scikit-learn ~= 0.22.2",
"webrtcvad ~= 2.0.10",
Expand Down

0 comments on commit 3c0eb10

Please sign in to comment.