forked from synesthesiam/hassio-addons
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ebad05d
commit 2143e02
Showing
8 changed files
with
214 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
FROM python:3.6 | ||
LABEL maintainer="Michael Hansen <[email protected]>" | ||
|
||
RUN apt-get update && apt-get install -y python3-pip python3-dev \ | ||
jq \ | ||
libasound2-dev swig \ | ||
portaudio19-dev \ | ||
libatlas-base-dev \ | ||
libnanomsg-dev | ||
|
||
COPY snowboy-1.3.0.tar.gz / | ||
COPY nanomsg-python-master.zip / | ||
|
||
RUN pip3 install --no-cache-dir wheel | ||
RUN pip3 install --no-cache-dir /snowboy-1.3.0.tar.gz | ||
RUN pip3 install --no-cache-dir /nanomsg-python-master.zip | ||
|
||
COPY main.py / | ||
COPY run.sh / | ||
ENTRYPOINT ["/run.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
all: | ||
docker build . -t synesthesiam/snowboy:1.3.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
Snowboy Wake Listener | ||
========================= | ||
|
||
Small service that listens for a wake word with [snowboy](https://snowboy.kitt.ai). | ||
Audio data is streamed in from [Rhasspy](https://github.com/synesthesiam/rhasspy-hassio-addon) via [nanomsg](https://nanomsg.org). | ||
|
||
|
||
Building | ||
---------- | ||
|
||
To build the Docker image, run `make docker` in the project root. | ||
|
||
To create a local virtual environment, run the `create-venv.sh` shell script (expects a Debian distribution). | ||
|
||
|
||
Running | ||
--------- | ||
|
||
To run with Docker: | ||
|
||
docker run -it --network host synesthesiam/snowboy:1.3.0 | ||
|
||
To run in a virtual environement (after running `create-venv.sh`): | ||
|
||
./run-venv.sh | ||
|
||
This will connect to ports 5000 (PUB) and 5001 (PULL) on localhost. By default, the hotword is "snowboy". | ||
|
||
Passing `--feedback` will let you see when audio data is being received and when the hotword is detected. | ||
See `--help` for additional command-line arguments. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
{ | ||
"name": "Snowboy Wake System for Rhasspy", | ||
"slug": "snowboy-rhasspy", | ||
"version": "1.3.0-1", | ||
"description": "Snowboy wake word detection (Kitt.ai) for Rhasspy voice assistant", | ||
"startup": "application", | ||
"boot": "auto", | ||
"options": { | ||
"pub_address": "tcp://75f2ff60-rhasspy:5000", | ||
"pull_address": "tcp://75f2ff60-rhasspy:5001", | ||
"model": "snowboy", | ||
"sensitivity": 0.5, | ||
"audio_gain": 1.0 | ||
}, | ||
"schema": { | ||
"model": "str", | ||
"sensitivity": "float", | ||
"audio_gain": "float", | ||
"pub_address": "str", | ||
"pull_address": "str" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
#!/usr/bin/env python3 | ||
import os | ||
import argparse | ||
import subprocess | ||
import logging | ||
logging.basicConfig(level=logging.DEBUG) | ||
|
||
from snowboy import snowboydetect, snowboydecoder | ||
from nanomsg import Socket, SUB, SUB_SUBSCRIBE, PUSH | ||
|
||
def main(): | ||
# Find available universal models (.umdl) | ||
resource_dir = os.path.dirname(snowboydecoder.RESOURCE_FILE) | ||
umdl_dir = os.path.join(resource_dir, 'models') | ||
|
||
umdl_models = { | ||
os.path.splitext(name)[0]: os.path.join(umdl_dir, name) | ||
for name in os.listdir(umdl_dir) | ||
} | ||
|
||
# Parse arguments | ||
parser = argparse.ArgumentParser(description='snowboy') | ||
parser.add_argument('--pub-address', | ||
help='nanomsg address of PUB socket (default=tcp://127.0.0.1:5000)', | ||
type=str, default='tcp://127.0.0.1:5000') | ||
|
||
parser.add_argument('--pull-address', | ||
help='nanomsg address of PULL socket (default=tcp://127.0.0.1:5001)', | ||
type=str, default='tcp://127.0.0.1:5001') | ||
|
||
parser.add_argument('--payload', help='Payload string to send when wake word is detected (default=OK)', | ||
type=str, default='OK') | ||
|
||
parser.add_argument('--model', | ||
action='append', | ||
type=str, | ||
help='Path to snowboy model file or one of %s (default=snowboy)' % list(umdl_models.keys()), | ||
default=[]) | ||
|
||
parser.add_argument('--sensitivity', | ||
action='append', | ||
help='Model sensitivity (default=0.5)', | ||
type=float, | ||
default=[]) | ||
|
||
parser.add_argument('--gain', help='Audio gain (default=1.0)', | ||
type=float, default=1.0) | ||
|
||
parser.add_argument('--feedback', help='Show printed feedback', action='store_true') | ||
args = parser.parse_args() | ||
|
||
if len(args.model) == 0: | ||
args.model = ['snowboy'] | ||
|
||
if len(args.sensitivity) == 0: | ||
args.sensitivity = [0.5] | ||
|
||
logging.debug(args) | ||
|
||
# Create detector(s) | ||
detectors = [] | ||
|
||
for i, model in enumerate(args.model): | ||
model_path = umdl_models.get(model, model) | ||
detector = snowboydetect.SnowboyDetect( | ||
snowboydecoder.RESOURCE_FILE.encode(), model_path.encode()) | ||
|
||
if len(args.sensitivity) > i: | ||
sensitivity_str = str(args.sensitivity[i]).encode() | ||
detector.SetSensitivity(sensitivity_str) | ||
|
||
detector.SetAudioGain(args.gain) | ||
detectors.append(detector) | ||
|
||
# Do detection | ||
try: | ||
payload = args.payload.encode() | ||
first_frame = False | ||
|
||
# Receive raw audio data via nanomsg | ||
with Socket(SUB) as sub_socket: | ||
sub_socket.connect(args.pub_address) | ||
sub_socket.set_string_option(SUB, SUB_SUBSCRIBE, '') | ||
logging.info('Connected to PUB socket at %s' % args.pub_address) | ||
|
||
with Socket(PUSH) as push_socket: | ||
# Response is sent via nanomsg | ||
push_socket.connect(args.pull_address) | ||
logging.info('Connected to PULL socket at %s' % args.pull_address) | ||
|
||
while True: | ||
data = sub_socket.recv() # audio data | ||
if args.feedback: | ||
print('.', end='', flush=True) | ||
|
||
if not first_frame: | ||
logging.debug('Receiving audio data from Rhasspy') | ||
first_frame = True | ||
|
||
for detector in detectors: | ||
index = detector.RunDetection(data) | ||
# Return is: | ||
# -2 silence | ||
# -1 error | ||
# 0 voice | ||
# n index n-1 | ||
if index > 0: | ||
# Hotword detected | ||
if args.feedback: | ||
print('!', end='', flush=True) | ||
|
||
logging.info('Wake word detected!') | ||
|
||
push_socket.send(payload) # response | ||
first_frame = False | ||
|
||
except KeyboardInterrupt: | ||
pass | ||
|
||
# ----------------------------------------------------------------------------- | ||
|
||
if __name__ == '__main__': | ||
main() |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#!/usr/bin/env bash | ||
DIR="$( cd "$( dirname "$0" )" && pwd )" | ||
CONFIG_PATH="/data/options.json" | ||
|
||
pub_address="$(jq --raw-output '.pub_address' $CONFIG_PATH)" | ||
pull_address="$(jq --raw-output '.pull_address' $CONFIG_PATH)" | ||
model="$(jq --raw-output '.model' $CONFIG_PATH)" | ||
sensitivity="$(jq --raw-output '.sensitivity' $CONFIG_PATH)" | ||
audio_gain="$(jq --raw-output '.audio_gain' $CONFIG_PATH)" | ||
|
||
cd "$DIR" | ||
python3 main.py \ | ||
--pub-address "$pub_address" \ | ||
--pull-address "$pull_address" \ | ||
--model "$model" \ | ||
--sensitivity "$sensitivity" \ | ||
--gain "$audio_gain" |
Binary file not shown.