Use streamlit_mic_recorder for manual mic input

Prevents processing the same audio input multiple times when the page is updated or the server rerun.
paulovcmedeiros · Feb 25, 2024 · d5fe814 · d5fe814
1 parent 11d5603
commit d5fe814
Show file tree

Hide file tree

Showing 2 changed files with 29 additions and 15 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -43,7 +43,7 @@
   sounddevice = "^0.4.6"
   soundfile = "^0.12.1"
   speechrecognition = "^3.10.0"
-  streamlit-audiorecorder = "^0.0.4"
+  streamlit-mic-recorder = "^0.0.4"
   tzlocal = "^5.2"
   unidecode = "^1.3.7"
   webrtcvad-wheels = "^2.0.11.post1"

diff --git a/pyrobbot/app/app_page_templates.py b/pyrobbot/app/app_page_templates.py
@@ -9,10 +9,10 @@
 from typing import TYPE_CHECKING
 
 import streamlit as st
-from audiorecorder import audiorecorder
 from PIL import Image
 from pydub import AudioSegment
 from pydub.exceptions import CouldntDecodeError
+from streamlit_mic_recorder import mic_recorder
 
 from pyrobbot import GeneralDefinitions
 from pyrobbot.chat_configs import VoiceChatConfigs
@@ -105,6 +105,32 @@ def title(self, value: str):
     def render(self):
         """Create the page."""
 
+    def manual_switch_mic_recorder(self):
+        """Record audio from the microphone."""
+        studio_microphone = "\U0001F399"
+        red_square = "\U0001F7E5"
+
+        recording = mic_recorder(
+            key=f"audiorecorder_widget_{self.page_id}",
+            start_prompt=studio_microphone,
+            stop_prompt=red_square,
+            just_once=True,
+            use_container_width=True,
+            callback=None,
+            args=(),
+            kwargs={},
+        )
+
+        if recording is None:
+            return AudioSegment.silent(duration=0)
+
+        return AudioSegment(
+            data=recording["bytes"],
+            sample_width=recording["sample_width"],
+            frame_rate=recording["sample_rate"],
+            channels=1,
+        )
+
     def render_custom_audio_player(
         self,
         audio: AudioSegment,
@@ -252,14 +278,7 @@ def get_chat_input(self):
                     placeholder=placeholder, key=f"text_input_widget_{self.page_id}"
                 )
             with right:
-                studio_microphone = "\U0001F399"
-                red_square = "\U0001F7E5"
-                audio = audiorecorder(
-                    start_prompt=studio_microphone,
-                    stop_prompt=red_square,
-                    pause_prompt="",
-                    key=f"audiorecorder_widget_{self.page_id}",
-                )
+                audio = self.manual_switch_mic_recorder()
                 recorded_prompt = None
                 if audio.duration_seconds > min_audio_duration_seconds:
                     recorded_prompt = self.chat_obj.stt(audio).text
@@ -359,11 +378,6 @@ def _render_chatbot_page(self):
                         self.sidebar_title = title
                         title_container.header(title, divider="rainbow")
 
-        with contextlib.suppress(KeyError):
-            # Need to delete the audiorecorder widget from the session state to prevent
-            # the previous audio from being used as input again
-            del st.session_state[f"audiorecorder_widget_{self.page_id}"]
-
     def render(self):
         """Render the app's chatbot or costs page, depending on user choice."""
         if st.session_state.get("toggle_show_costs"):