diff --git a/pyrobbot/text_to_speech.py b/pyrobbot/text_to_speech.py index 092fcf3..ecc8ddb 100644 --- a/pyrobbot/text_to_speech.py +++ b/pyrobbot/text_to_speech.py @@ -112,6 +112,7 @@ def callback(indata, frames, time, status): # noqa: ARG001 ) last_inactivity_checked = datetime.now() user_is_speaking = True + speech_detected = False with contextlib.suppress(KeyboardInterrupt): while user_is_speaking: new_data = q.get() @@ -138,13 +139,12 @@ def callback(indata, frames, time, status): # noqa: ARG001 user_is_speaking = ( speech_likelihood >= self.speech_likelihood_threshold ) + if user_is_speaking: + speech_detected = True last_inactivity_checked = now - # Detect if there was any sound at all, skip the conversion if not - recorded_audio = np.frombuffer(raw_buffer.getvalue(), dtype=np.int16) - max_intensity = np.max(np.absolute(recorded_audio)) - if max_intensity < self.inactivity_sound_intensity_threshold: - logger.debug("No sound detected") + if not speech_detected: + logger.debug("No speech detected") return "" logger.debug("Converting audio to text...")