Some refactoring and tests

paulovcmedeiros · Nov 15, 2023 · 293b874 · 293b874
1 parent e2ece16
commit 293b874
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 7 deletions.
diff --git a/pyrobbot/text_to_speech.py b/pyrobbot/text_to_speech.py
@@ -31,18 +31,32 @@
 class LiveAssistant:
     """Class for converting text to speech and speech to text."""
 
+    # May be any language supported by gTTS
     language: str = "en"
+    # How much time user should be inactive for the assistant to stop listening
     inactivity_timeout_seconds: int = 2
+    # Accept audio as speech if the likelihood is above this threshold
     speech_likelihood_threshold: float = 0.85
+    # Params for audio capture
     sample_rate: int = 32000  # Hz
+    frame_duration: int = 30  # milliseconds
 
     def __post_init__(self):
         if not _sounddevice_imported:
-            logger.error(
+            raise ImportError(
                 "Module `sounddevice`, needed for audio recording, is not available."
             )
-            logger.error("Cannot continue. Exiting.")
-            raise SystemExit(1)
+
+        webrtcvad_restrictions = {
+            "sample_rate": [8000, 16000, 32000, 48000],
+            "frame_duration": [10, 20, 30],
+        }
+        for attr, allowed_values in webrtcvad_restrictions.items():
+            passed_value = getattr(self, attr)
+            if passed_value not in allowed_values:
+                raise ValueError(
+                    f"{attr} must be one of: {allowed_values}. Got '{passed_value}'."
+                )
 
         self.mixer = pygame.mixer
         self.vad = webrtcvad.Vad(2)
@@ -88,9 +102,7 @@ def callback(indata, frames, time, status):  # noqa: ARG001
             """This is called (from a separate thread) for each audio block."""
             q.put(indata.copy())
 
-        # From webrtcvad docs: A frame must be either 10, 20, or 30 ms in duration
-        frame_duration = 30  # milliseconds
-        stream_block_size = int((self.sample_rate * frame_duration) / 1000)
+        stream_block_size = int((self.sample_rate * self.frame_duration) / 1000)
         raw_buffer = io.BytesIO()
         with sf.SoundFile(
             raw_buffer,
@@ -108,7 +120,9 @@ def callback(indata, frames, time, status):  # noqa: ARG001
         ):
             # Recording will stop after self.inactivity_timeout_seconds of silence
             voice_activity_detected = deque(
-                maxlen=int((1000.0 * self.inactivity_timeout_seconds) / frame_duration)
+                maxlen=int(
+                    (1000.0 * self.inactivity_timeout_seconds) / self.frame_duration
+                )
             )
             last_inactivity_checked = datetime.now()
             user_is_speaking = True

diff --git a/tests/unit/test_text_to_speech.py b/tests/unit/test_text_to_speech.py
@@ -1,10 +1,25 @@
 import contextlib
 
+import pytest
 from sounddevice import PortAudioError
 
 from pyrobbot.text_to_speech import LiveAssistant
 
 
+def test_cannot_instanciate_assistant_is_soundcard_not_imported(mocker):
+    """Test that LiveAssistant cannot be instantiated if soundcard is not imported."""
+    mocker.patch("pyrobbot.text_to_speech._sounddevice_imported", False)
+    with pytest.raises(ImportError, match="Module `sounddevice`"):
+        LiveAssistant()
+
+
+@pytest.mark.parametrize("param_name", ["sample_rate", "frame_duration"])
+def test_cannot_instanciate_assistant_with_invalid_webrtcvad_params(param_name):
+    """Test that LiveAssistant cannot be instantiated with invalid webrtcvad params."""
+    with pytest.raises(ValueError, match=f"{param_name} must be one of:"):
+        LiveAssistant(**{param_name: 1})
+
+
 def test_speak():
     """Test the speak method."""
     assistant = LiveAssistant()