Add a test for interleaved/concurrent streams with a single model ins…

…tance
mozilla · Jun 18, 2019 · f12ea5e · f12ea5e
1 parent ea1422d
commit f12ea5e
Show file tree

Hide file tree

Showing 4 changed files with 101 additions and 0 deletions.
diff --git a/data/smoke_test/new-home-in-the-stars-16k.wav b/data/smoke_test/new-home-in-the-stars-16k.wav
diff --git a/native_client/test/concurrent_streams.py b/native_client/test/concurrent_streams.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function
+
+import argparse
+import numpy as np
+import wave
+
+from deepspeech import Model
+
+
+# These constants control the beam search decoder
+
+# Beam width used in the CTC decoder when building candidate transcriptions
+BEAM_WIDTH = 500
+
+# The alpha hyperparameter of the CTC decoder. Language Model weight
+LM_ALPHA = 0.75
+
+# The beta hyperparameter of the CTC decoder. Word insertion bonus.
+LM_BETA = 1.85
+
+
+# These constants are tied to the shape of the graph used (changing them changes
+# the geometry of the first layer), so make sure you use the same constants that
+# were used during training
+
+# Number of MFCC features to use
+N_FEATURES = 26
+
+# Size of the context window used for producing timesteps in the input vector
+N_CONTEXT = 9
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Running DeepSpeech inference.')
+    parser.add_argument('--model', required=True,
+                        help='Path to the model (protocol buffer binary file)')
+    parser.add_argument('--alphabet', required=True,
+                        help='Path to the configuration file specifying the alphabet used by the network')
+    parser.add_argument('--lm', nargs='?',
+                        help='Path to the language model binary file')
+    parser.add_argument('--trie', nargs='?',
+                        help='Path to the language model trie file created with native_client/generate_trie')
+    parser.add_argument('--audio1', required=True,
+                        help='First audio file to use in interleaved streams')
+    parser.add_argument('--audio2', required=True,
+                        help='Second audio file to use in interleaved streams')
+    args = parser.parse_args()
+
+    ds = Model(args.model, N_FEATURES, N_CONTEXT, args.alphabet, BEAM_WIDTH)
+
+    if args.lm and args.trie:
+        ds.enableDecoderWithLM(args.alphabet, args.lm, args.trie, LM_ALPHA, LM_BETA)
+
+    with wave.open(args.audio1, 'rb') as fin:
+        fs1 = fin.getframerate()
+        audio1 = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
+
+    with wave.open(args.audio2, 'rb') as fin:
+        fs2 = fin.getframerate()
+        audio2 = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
+
+    stream1 = ds.setupStream(sample_rate=fs1)
+    stream2 = ds.setupStream(sample_rate=fs2)
+
+    splits1 = np.array_split(audio1, 10)
+    splits2 = np.array_split(audio2, 10)
+
+    for part1, part2 in zip(splits1, splits2):
+        ds.feedAudioContent(stream1, part1)
+        ds.feedAudioContent(stream2, part2)
+
+    print(ds.finishStream(stream1))
+    print(ds.finishStream(stream2))
+
+if __name__ == '__main__':
+    main()
diff --git a/taskcluster/tc-python-tests-prod.sh b/taskcluster/tc-python-tests-prod.sh
@@ -39,4 +39,6 @@ LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-bin
 
 run_prod_inference_tests
 
+run_prod_concurrent_stream_tests
+
 virtualenv_deactivate "${pyver}" "${PYENV_NAME}"
diff --git a/taskcluster/tc-tests-utils.sh b/taskcluster/tc-tests-utils.sh
@@ -419,6 +419,26 @@ run_all_inference_tests()
   assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}"
 }
 
+run_prod_concurrent_stream_tests()
+{
+  set +e
+  output=$(python ${TASKCLUSTER_TMP_DIR}/test_sources/concurrent_streams.py \
+             --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \
+             --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt \
+             --lm ${TASKCLUSTER_TMP_DIR}/lm.binary \
+             --trie ${TASKCLUSTER_TMP_DIR}/trie \
+             --audio1 ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav \
+             --audio2 ${TASKCLUSTER_TMP_DIR}/new-home-in-the-stars-16k.wav 2>/dev/null)
+  status=$?
+  set -e
+
+  output1=$(echo ${output} | head -n 1)
+  output2=$(echo ${output} | tail -n 1)
+
+  assert_correct_ldc93s1_prodmodel "${output1}" "${status}"
+  assert_correct_inference "${output2}" "i must find a new home in the stars" "${status}"
+}
+
 run_prod_inference_tests()
 {
   set +e
@@ -540,6 +560,7 @@ download_data()
   cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/alphabet.txt ${TASKCLUSTER_TMP_DIR}/alphabet.txt
   cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/vocab.pruned.lm ${TASKCLUSTER_TMP_DIR}/lm.binary
   cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/vocab.trie ${TASKCLUSTER_TMP_DIR}/trie
+  cp -R ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/test ${TASKCLUSTER_TMP_DIR}/test_sources
 }
 
 download_material()