-
Notifications
You must be signed in to change notification settings - Fork 5
/
stt_example.py
92 lines (79 loc) · 2.94 KB
/
stt_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import deepspeech
import numpy as np
import os
import pyaudio
import time
# Cuda for deepspeech is controlled at the pip package level
# pip install deepspeech-gpu
# DeepSpeech parameters
BEAM_WIDTH = 700
LM_ALPHA = 0.75
LM_BETA = 1.85
MODEL_FILE_PATH = os.path.join('models', 'deepspeech-0.9.3-models.pbmm')
SCORER_PATH = os.path.join('models', 'deepspeech-0.9.3-models.scorer')
class Transcriber:
def __init__(self, model):
self.model = model
self.model.enableExternalScorer(SCORER_PATH)
self.model.setScorerAlphaBeta(LM_ALPHA, LM_BETA)
self.model.setBeamWidth(BEAM_WIDTH)
# self.model.enableDecoderWithLM(LM_FILE_PATH, TRIE_FILE_PATH, LM_ALPHA, LM_BETA)
# Create a Streaming session
self.ds_stream = self.model.createStream()
# Encapsulate DeepSpeech audio feeding into a callback for PyAudio
self.text_so_far = ''
self.t_start = time.time()
self.t_wait = .5
self.final_text = None
def process_audio(self, in_data, frame_count, time_info, status):
data16 = np.frombuffer(in_data, dtype=np.int16)
self.ds_stream.feedAudioContent(data16)
text = self.ds_stream.intermediateDecode()
try:
if text != self.text_so_far:
if text not in ["i ", "he ", "the "]:
print('Interim text = {};'.format(text))
self.text_so_far = text
self.t_start = time.time()
elif text != '' and (time.time() - self.t_start > self.t_wait):
if text not in ["i ", "he ", "the "]:
print("Finishing stream")
text = self.ds_stream.finishStream()
print('Final text = {}.\n'.format(text))
self.final_text = text
self.ds_stream = self.model.createStream()
except Exception as e:
print(f"Text: '{text}'; So far: '{self.text_so_far}")
print(self.t_start)
raise e
return (in_data, pyaudio.paContinue)
def listen(self):
print("setting up to listen")
# Feed audio to deepspeech in a callback to PyAudio
self.audio = pyaudio.PyAudio()
self.stream = self.audio.open(
format=pyaudio.paInt16,
channels=1,
rate=16000,
input=True,
frames_per_buffer=1024,
stream_callback=self.process_audio
)
print('Please start speaking, when done press Ctrl-C ...')
self.stream.start_stream()
print("listening now")
return
if __name__ == '__main__':
# Make DeepSpeech Model
model = deepspeech.Model(MODEL_FILE_PATH)
stt = Transcriber(model)
stt.listen()
try:
while stt.stream.is_active():
time.sleep(0.05)
except KeyboardInterrupt:
# PyAudio
stt.stream.stop_stream()
stt.stream.close()
stt.audio.terminate()
print('Finished recording.')