-
Notifications
You must be signed in to change notification settings - Fork 14
/
speech2text
executable file
·122 lines (104 loc) · 4.78 KB
/
speech2text
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/data/data/com.termux/files/usr/bin/bash
DEEPSPEECH_VERSION=0.9.3
SPEECH_TIME=2
ARCH="$(uname -m)"
if [[ "$ARCH" == "aarch64" ]]; then
ARCH="arm64"
elif [[ "$ARCH" == "armv7l" ]]; then
ARCH="armv7"
fi
SOURCE_DIR="$(pwd)"
enterScriptDir() {
SOURCE="${BASH_SOURCE[0]}"
while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )"
SOURCE="$(readlink "$SOURCE")"
[[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
done
DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )"
cd "$DIR"
}
#Ensure we're in the script's directory
enterScriptDir
export LD_LIBRARY_PATH="$(pwd)/deepspeech-bin/"
#Setup
command -v speech2text > /dev/null || ln -s "$(pwd)/speech2text" "$HOME/../usr/bin/speech2text"
ensure_requirements_installed() {
command -v ffmpeg > /dev/null || pkg i -y ffmpeg
#command -v mpv > /dev/null || pkg i -y mpv
command -v wget > /dev/null || pkg i -y wget
command -v termux-microphone-record > /dev/null || pkg i -y termux-api
}
download_deepspeech() {
rm -rf ./deepspeech-bin
mkdir ./deepspeech-bin
cd ./deepspeech-bin
wget "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/native_client.${ARCH}.cpu.android.tar.xz" -O ./native_client.tar.xz -q --show-progress
tar xf ./native_client.tar.xz
rm -f ./native_client.tar.xz
cd ..
}
download_example_audio() {
rm -rf ./audio
wget "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/audio-${DEEPSPEECH_VERSION}.tar.gz" -O ./example-audio.tar.gz -q --show-progress
tar xf ./example-audio.tar.gz
rm -f ./example-audio.tar.gz
}
download_models() {
rm -rf ./models
wget "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/deepspeech-${DEEPSPEECH_VERSION}-models.tflite" -O "./deepspeech-${DEEPSPEECH_VERSION}-models.tflite" -q --show-progress && \
wget "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/deepspeech-${DEEPSPEECH_VERSION}-models.scorer" -O "./deepspeech-${DEEPSPEECH_VERSION}-models.scorer" -q --show-progress
if [[ "$?" != 0 ]]; then
wget "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/deepspeech-${DEEPSPEECH_VERSION}-models.tar.gz" -O ./models.tar.gz -q --show-progress
echo "Extracting ./models.tar.gz ..."
tar xf ./models.tar.gz
rm -f ./models.tar.gz
mv ./deepspeech-*-models ./models
rm -f ./._*
rm -f ./models/._*
fi
}
ensure_requirements_installed
#if [ ! -f ./deepspeech-bin/deepspeech ]; then
# download_deepspeech
#fi
DETECTED_DEEPSPEECH_VERSION=$(LD_LIBRARY_PATH="$LD_LIBRARY_PATH" ./deepspeech-bin/deepspeech --version 2>&1 | grep DeepSpeech | cut -d "v" -f2 | cut -d "-" -f1 | sed -e 's/DeepSpeech //')
if [ "$DEEPSPEECH_VERSION" != "$DETECTED_DEEPSPEECH_VERSION" ]; then
download_deepspeech
download_example_audio
download_models
fi
ls ./audio/*.wav &> /dev/null || download_example_audio
if [ ! -f ./models/output_graph.tflite ] && [ ! -f "./deepspeech-${DEEPSPEECH_VERSION}-models.tflite" ]; then
download_models
fi
# Important part of the script
mic_listen() {
rm -f ./tmp.mp4
rm -f ./tmp.wav
OUTPUT="$(termux-microphone-record -f ./tmp.mp4 -l $SPEECH_TIME -r 16000 -c 1 -e amr_nb)"
if [[ "$OUTPUT" =~ "Recording error: null" ]]; then
termux-notification --sound -t "Speech Recognition Error" -c "Can't access the mic. Make sure not other app is using the mic."
exit
fi
}
recognize() {
termux-microphone-record -q > /dev/null
ffmpeg -loglevel panic -i ./tmp.mp4 -f wav -bitexact -acodec pcm_s16le -ar 16000 -ac 1 ./tmp.wav
rm -f ./tmp.mp4
termux-media-player play ./tmp.wav > /dev/null & #play recording back before deepspeech
if [[ "$DEEPSPEECH_VERSION" == "0.5.1" ]]; then
./deepspeech-bin/deepspeech --model ./models/output_graph.tflite --alphabet ./models/alphabet.txt --lm ./models/lm.binary --trie ./models/trie --audio ./tmp.wav 2>&1 | tail -1
elif [ -f "./deepspeech-${DEEPSPEECH_VERSION}-models.tflite" ] ; then
./deepspeech-bin/deepspeech --model "./deepspeech-${DEEPSPEECH_VERSION}-models.tflite" --scorer "./deepspeech-${DEEPSPEECH_VERSION}-models.scorer" --audio ./tmp.wav 2>&1 | tail -1
elif [ -f ./models/output_graph.tflite ] ; then
./deepspeech-bin/deepspeech --model ./models/output_graph.tflite --lm ./models/lm.binary --trie ./models/trie --audio ./tmp.wav 2>&1 | tail -1
else
echo "Error: Couldn't find the deepspeech models!"
fi
rm -f ./tmp.wav
}
mic_listen
sleep $SPEECH_TIME
recognize
cd "$SOURCE_DIR"