remove obsolete librosa dependency and bump version

py-lidbox · May 15, 2020 · 3c0eb10 · 3c0eb10
1 parent 5dcafdd
commit 3c0eb10
Show file tree

Hide file tree

Showing 4 changed files with 2 additions and 91 deletions.
diff --git a/lidbox/dataset/tf_utils.py b/lidbox/dataset/tf_utils.py
@@ -193,78 +193,3 @@ def extract_features(signals, sample_rates, feattype, spec_kwargs, melspec_kwarg
         X = features.window_normalization(X, **window_norm_kwargs)
         tf.debugging.assert_all_finite(X, "window normalization failed")
     return X
-
-
-#TODO
-# for conf in augment_config:
-#     # prepare noise augmentation
-#     if conf["type"] == "additive_noise":
-#         noise_source_dir = conf["noise_source"]
-#         conf["noise_source"] = {}
-#         with open(os.path.join(noise_source_dir, "id2label")) as f:
-#             id2label = dict(l.strip().split() for l in f)
-#         label2path = collections.defaultdict(list)
-#         with open(os.path.join(noise_source_dir, "id2path")) as f:
-#             for noise_id, path in (l.strip().split() for l in f):
-#                 label2path[id2label[noise_id]].append((noise_id, path))
-#         tmpdir = conf.get("copy_to_tmpdir")
-#         if tmpdir:
-#             if os.path.isdir(tmpdir):
-#                 if verbosity:
-#                     print("tmpdir for noise source files given, but it already exists at '{}', so copying will be skipped".format(tmpdir))
-#             else:
-#                 if verbosity:
-#                     print("copying all noise source wavs to '{}'".format(tmpdir))
-#                 os.makedirs(tmpdir)
-#                 tmp = collections.defaultdict(list)
-#                 for noise_type, noise_paths in label2path.items():
-#                     for noise_id, path in noise_paths:
-#                         new_path = os.path.join(tmpdir, noise_id + ".wav")
-#                         shutil.copy2(path, new_path)
-#                         if verbosity > 3:
-#                             print(" ", path, "->", new_path)
-#                         tmp[noise_type].append((noise_id, new_path))
-#                 label2path = tmp
-#         for noise_type, noise_paths in label2path.items():
-#             conf["noise_source"][noise_type] = [path for _, path in noise_paths]
-# def chunk_loader(signal, sr, meta, *args):
-#     chunk_length = int(sr * chunk_len_seconds)
-#     max_pad = int(sr * max_pad_seconds)
-#     if signal.size + max_pad < chunk_length:
-#         if verbosity:
-#             tf_util.tf_print("skipping too short signal (min chunk length is ", chunk_length, " + ", max_pad, " max_pad): length ", signal.size, ", utt ", meta[0], output_stream=sys.stderr, sep='')
-#         return
-#     uttid = meta[0].decode("utf-8")
-#     yield from chunker(signal, target_sr, (uttid, *meta[1:]))
-#     rng = np.random.default_rng()
-#     for conf in augment_config:
-#         if conf["type"] == "random_resampling":
-#             # apply naive speed modification by resampling
-#             ratio = rng.uniform(conf["range"][0], conf["range"][1])
-#             with contextlib.closing(io.BytesIO()) as buf:
-#                 soundfile.write(buf, signal, int(ratio * target_sr), format="WAV")
-#                 buf.seek(0)
-#                 resampled_signal, _ = librosa.core.load(buf, sr=target_sr, mono=True)
-#             new_uttid = "{:s}-speed{:.3f}".format(uttid, ratio)
-#             new_meta = (new_uttid, *meta[1:])
-#             yield from chunker(resampled_signal, target_sr, new_meta)
-#         elif conf["type"] == "additive_noise":
-#             for noise_type, db_min, db_max in conf["snr_def"]:
-#                 noise_signal = np.zeros(0, dtype=signal.dtype)
-#                 noise_paths = []
-#                 while noise_signal.size < signal.size:
-#                     rand_noise_path = rng.choice(conf["noise_source"][noise_type])
-#                     noise_paths.append(rand_noise_path)
-#                     sig, _ = librosa.core.load(rand_noise_path, sr=target_sr, mono=True)
-#                     noise_signal = np.concatenate((noise_signal, sig))
-#                 noise_begin = rng.integers(0, noise_signal.size - signal.size, endpoint=True)
-#                 noise_signal = noise_signal[noise_begin:noise_begin+signal.size]
-#                 snr_db = rng.integers(db_min, db_max, endpoint=True)
-#                 clean_and_noise = audio_feat.numpy_snr_mixer(signal, noise_signal, snr_db)[2]
-#                 new_uttid = "{:s}-{:s}_snr{:d}".format(uttid, noise_type, snr_db)
-#                 if not np.all(np.isfinite(clean_and_noise)):
-#                     if verbosity:
-#                         tf_util.tf_print("warning: snr_mixer failed, augmented signal ", new_uttid, " has non-finite values and will be skipped. Utterance was ", uttid, ", and chosen noise signals were\n  ", tf.strings.join(noise_paths, separator="\n  "), output_stream=sys.stderr, sep='')
-#                     return
-#                 new_meta = (new_uttid, *meta[1:])
-#                 yield from chunker(clean_and_noise, target_sr, new_meta)
diff --git a/lidbox/features/audio.py b/lidbox/features/audio.py
@@ -1,6 +1,6 @@
 """
 Audio feature extraction.
-Some functions are simply one-to-one TensorFlow math conversions from https://github.com/librosa.
+Many functions have been inspired by https://github.com/librosa and https://github.com/kaldi-asr/kaldi.
 """
 import os
 import wave

diff --git a/lidbox/features/librosa_audio.py b/lidbox/features/librosa_audio.py
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="lidbox",
-    version="0.4.0",
+    version="0.5.0",
     description="End-to-end spoken language identification (LID) on TensorFlow",
     long_description=readmefile_contents,
     long_description_content_type="text/markdown",
@@ -17,7 +17,6 @@
         "PyYAML ~= 5.1",
         "jsonschema",
         "kaldiio ~= 2.13",
-        "librosa ~= 0.7",
         "matplotlib ~= 3.1",
         "scikit-learn ~= 0.22.2",
         "webrtcvad ~= 2.0.10",