Skip to content

Commit

Permalink
Some refactoring and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
paulovcmedeiros committed Nov 15, 2023
1 parent e2ece16 commit 293b874
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 7 deletions.
28 changes: 21 additions & 7 deletions pyrobbot/text_to_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,32 @@
class LiveAssistant:
"""Class for converting text to speech and speech to text."""

# May be any language supported by gTTS
language: str = "en"
# How much time user should be inactive for the assistant to stop listening
inactivity_timeout_seconds: int = 2
# Accept audio as speech if the likelihood is above this threshold
speech_likelihood_threshold: float = 0.85
# Params for audio capture
sample_rate: int = 32000 # Hz
frame_duration: int = 30 # milliseconds

def __post_init__(self):
if not _sounddevice_imported:
logger.error(
raise ImportError(
"Module `sounddevice`, needed for audio recording, is not available."
)
logger.error("Cannot continue. Exiting.")
raise SystemExit(1)

webrtcvad_restrictions = {
"sample_rate": [8000, 16000, 32000, 48000],
"frame_duration": [10, 20, 30],
}
for attr, allowed_values in webrtcvad_restrictions.items():
passed_value = getattr(self, attr)
if passed_value not in allowed_values:
raise ValueError(
f"{attr} must be one of: {allowed_values}. Got '{passed_value}'."
)

self.mixer = pygame.mixer
self.vad = webrtcvad.Vad(2)
Expand Down Expand Up @@ -88,9 +102,7 @@ def callback(indata, frames, time, status): # noqa: ARG001
"""This is called (from a separate thread) for each audio block."""
q.put(indata.copy())

# From webrtcvad docs: A frame must be either 10, 20, or 30 ms in duration
frame_duration = 30 # milliseconds
stream_block_size = int((self.sample_rate * frame_duration) / 1000)
stream_block_size = int((self.sample_rate * self.frame_duration) / 1000)
raw_buffer = io.BytesIO()
with sf.SoundFile(
raw_buffer,
Expand All @@ -108,7 +120,9 @@ def callback(indata, frames, time, status): # noqa: ARG001
):
# Recording will stop after self.inactivity_timeout_seconds of silence
voice_activity_detected = deque(
maxlen=int((1000.0 * self.inactivity_timeout_seconds) / frame_duration)
maxlen=int(
(1000.0 * self.inactivity_timeout_seconds) / self.frame_duration
)
)
last_inactivity_checked = datetime.now()
user_is_speaking = True
Expand Down
15 changes: 15 additions & 0 deletions tests/unit/test_text_to_speech.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
import contextlib

import pytest
from sounddevice import PortAudioError

from pyrobbot.text_to_speech import LiveAssistant


def test_cannot_instanciate_assistant_is_soundcard_not_imported(mocker):
"""Test that LiveAssistant cannot be instantiated if soundcard is not imported."""
mocker.patch("pyrobbot.text_to_speech._sounddevice_imported", False)
with pytest.raises(ImportError, match="Module `sounddevice`"):
LiveAssistant()


@pytest.mark.parametrize("param_name", ["sample_rate", "frame_duration"])
def test_cannot_instanciate_assistant_with_invalid_webrtcvad_params(param_name):
"""Test that LiveAssistant cannot be instantiated with invalid webrtcvad params."""
with pytest.raises(ValueError, match=f"{param_name} must be one of:"):
LiveAssistant(**{param_name: 1})


def test_speak():
"""Test the speak method."""
assistant = LiveAssistant()
Expand Down

0 comments on commit 293b874

Please sign in to comment.