diff --git a/README.md b/README.md index 4375d98..68c89b2 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,11 @@ Next, download required nltk data $ python bin/download.py ``` +Start the `vosk` server for speech recognition +``` +$ docker run -d -p 2700:2700 alphacep/kaldi-en:latest-en:latest +``` + ## TODO - [x] properly load intents data from json diff --git a/config.py b/config.py index faf6bf9..e42d987 100644 --- a/config.py +++ b/config.py @@ -9,3 +9,6 @@ # api API_URL = "http://localhost:8000" + +# voice stream +VOSK_URL = "ws://localhost:2700" diff --git a/requirements.txt b/requirements.txt index 49b7318..cd7f0ac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +asyncio==3.4.3 certifi==2022.5.18.1 charset-normalizer==2.0.12 click==8.1.3 @@ -13,4 +14,5 @@ torch==1.11.0 tqdm==4.64.0 typing_extensions==4.2.0 urllib3==1.26.9 +websockets==10.3 zipp==3.8.0 diff --git a/stream.py b/stream.py new file mode 100644 index 0000000..b746b7c --- /dev/null +++ b/stream.py @@ -0,0 +1,44 @@ +# stream audio to vosk server +import asyncio +import websockets +import sounddevice as sd + +import config + + +def callback(): + # TODO push audio block to queue + pass + + +def open_device(device_num): + print(sd.query_devices()) + samplerate = 80 + blocksize = 8000 + return sd.RawInputStream( + samplerate=samplerate, + blocksize=blocksize, + device=device_num, + dtype='int16', + channels=1, + callback=callback + ) + + +async def run(uri): + audio_queue = asyncio.Queue() + with open_device(1) as device: + async with websockets.connect(uri) as sock: + await sock.connect('{ "config": {"sample_rate": %d }' % (device.samplerate)) + + while True: + data = await audio_queue.get() + await sock.send(data) + await sock.recv() + + await sock.send('{"eof": 1}') + await sock.recv() + +if __name__ == "__main__": + asyncio.run(run(config.VOSK_URL)) +