diff --git a/pyrobbot/app/app_page_templates.py b/pyrobbot/app/app_page_templates.py index 45317e3..6530cac 100644 --- a/pyrobbot/app/app_page_templates.py +++ b/pyrobbot/app/app_page_templates.py @@ -160,6 +160,7 @@ def render_custom_audio_player( # Adaped from: + autoplay = "autoplay" if autoplay else "" hidden = "hidden" if hidden else "" @@ -299,7 +300,7 @@ def voice_output(self) -> bool: """Return the state of the voice output toggle.""" return st.session_state.get("toggle_voice_output", False) - def play_chime(self, chime_type: str = "correct-answer-tone"): + def play_chime(self, chime_type: str = "correct-answer-tone", parent_element=None): """Sound a chime to send notificatons to the user.""" type2filename = { "correct-answer-tone": "mixkit-correct-answer-tone-2870.wav", @@ -309,7 +310,9 @@ def play_chime(self, chime_type: str = "correct-answer-tone"): chime = AudioSegment.from_file( GeneralDefinitions.APP_DIR / "data" / type2filename[chime_type], format="wav" ) - self.render_custom_audio_player(chime, hidden=True, autoplay=True) + self.render_custom_audio_player( + chime, hidden=True, autoplay=True, parent_element=parent_element + ) def render_chat_input_widgets(self): """Render chat inut widgets and return the user's input.""" @@ -356,35 +359,34 @@ def _render_chatbot_page(self): # noqa: PLR0915 chat_msgs_container = st.container(height=600, border=False) with chat_msgs_container: status_msg_container = st.empty() + self.render_chat_history() self.render_chat_input_widgets() - if not self.state.get("chat_started", False): - self.play_chime() - - logger.debug("Waiting for user text prompt...") with status_msg_container: - st.status("Waiting for user text prompt...") + self.play_chime() + st.status(f"{self.chat_obj.assistant_name} is listening...") + logger.debug("Waiting for user text prompt...") while True: with contextlib.suppress(queue.Empty): if prompt := self.parent.text_prompt_queue.get_nowait(): break + with contextlib.suppress(queue.Empty): + if prompt := self.text_prompt_queue.get_nowait(): + break + logger.trace("Still waiting for user text prompt...") time.sleep(0.1) - if prompt: - self.parent.reply_ongoing.set() - logger.opt(colors=True).debug( - "Got prompt from user: {}", prompt - ) + if prompt := prompt.strip(): self.play_chime("option-select") + self.parent.reply_ongoing.set() + logger.opt(colors=True).debug("Recived prompt: {}", prompt) status_msg_container.empty() - else: + logger.opt(colors=True).debug("Received empty prompt") self.parent.reply_ongoing.clear() - logger.opt(colors=True).debug("No prompt from user.") with chat_msgs_container: - self.render_chat_history() # Process user input if prompt: time_now = datetime.datetime.now().replace(microsecond=0) @@ -407,19 +409,27 @@ def _render_chatbot_page(self): # noqa: PLR0915 text_reply_container = st.empty() audio_reply_container = st.empty() question_answer_chunks_queue = queue.Queue() + partial_audios_queue = queue.Queue() - def answer_question(prompt): - for chunk in self.chat_obj.answer_question(prompt): - question_answer_chunks_queue.put(chunk.content) - question_answer_chunks_queue.put(None) - + # Create separate threads to process text and audio replies answer_question_thread = threading.Thread( - target=answer_question, args=(prompt,) + target=answer_question, + args=(self.chat_obj, prompt, question_answer_chunks_queue), + ) + play_partial_audios_thread = threading.Thread( + target=play_partial_audios, + args=( + partial_audios_queue, + self.render_custom_audio_player, + audio_reply_container, + ), + daemon=False, ) - add_script_run_ctx(answer_question_thread) - answer_question_thread.start() + for thread in (answer_question_thread, play_partial_audios_thread): + add_script_run_ctx(thread) + thread.start() - # Render text reply + # Render the reply chunk = "" full_response = "" current_audio = AudioSegment.empty() @@ -427,6 +437,7 @@ def answer_question(prompt): text_reply_container.markdown("▌") while (chunk is not None) or (current_audio is not None): logger.trace("Waiting for text or audio chunks...") + # Render text with contextlib.suppress(queue.Empty): chunk = question_answer_chunks_queue.get_nowait() if chunk is not None: @@ -437,25 +448,22 @@ def answer_question(prompt): # Render audio (if any) with contextlib.suppress(queue.Empty): current_audio = self.chat_obj.play_speech_queue.get_nowait() - if current_audio is not None: - full_audio += current_audio.speech - self.render_custom_audio_player( - current_audio.speech, - parent_element=audio_reply_container, - ) - audio_reply_container.empty() self.chat_obj.play_speech_queue.task_done() - - text_reply_container.caption( - datetime.datetime.now().replace(microsecond=0) - ) - text_reply_container.markdown(full_response) + if current_audio is None: + partial_audios_queue.put(None) + else: + partial_audios_queue.put(current_audio.speech) + full_audio += current_audio.speech logger.opt(colors=True).debug( "Replied to user prompt '{}': {}", prompt, full_response, ) + text_reply_container.caption( + datetime.datetime.now().replace(microsecond=0) + ) + text_reply_container.markdown(full_response) self.chat_history.append( { @@ -466,6 +474,10 @@ def answer_question(prompt): } ) + while play_partial_audios_thread.is_alive(): + logger.debug("Waiting for partial audios to finish playing...") + time.sleep(0.1) + if full_audio.duration_seconds > 0: self.render_custom_audio_player( full_audio, @@ -491,7 +503,6 @@ def answer_question(prompt): title_container.header(title, divider="rainbow") self.parent.reply_ongoing.clear() - self.play_chime() if not self.parent.reply_ongoing.is_set(): logger.debug("Rerunning the app") @@ -503,3 +514,37 @@ def render(self): self.render_cost_estimate_page() else: self._render_chatbot_page() + + +def answer_question(chat_obj, prompt, question_answer_chunks_queue): + """Get chunks of the reply to the prompt and put them in the queue.""" + for chunk in chat_obj.answer_question(prompt): + question_answer_chunks_queue.put(chunk.content) + question_answer_chunks_queue.put(None) + + +def play_partial_audios( + partial_audios_queue, audio_player_rendering_function, parent_element +): + """Play queued audio segments.""" + logger.debug("Playing partial audios...") + while True: + try: + partial_audio = partial_audios_queue.get() + if partial_audio is None: + partial_audios_queue.task_done() + break + + logger.debug("Playing partial audio...") + audio_player_rendering_function( + partial_audio, + parent_element=parent_element, + autoplay=True, + hidden=True, + ) + parent_element.empty() + partial_audios_queue.task_done() + except Exception as error: # noqa: BLE001 + logger.error(error) + break + logger.debug("Partial audios finished playing.") diff --git a/pyrobbot/app/multipage.py b/pyrobbot/app/multipage.py index 3c2c6a6..4fb562d 100644 --- a/pyrobbot/app/multipage.py +++ b/pyrobbot/app/multipage.py @@ -237,6 +237,7 @@ def handle_continuous_user_prompt(): else: logger.debug( 'Page "{}"\'s audio input too short ({} < {} sec). Discarding.', + app_page.title, concatenated_audio.duration_seconds, min_audio_duration_seconds, ) @@ -325,26 +326,31 @@ def render_continuous_audio_input_widget(self): selected_page = None def audio_frame_callback(frame): - logger.trace("Received raw audio frame from the stream") + try: + logger.trace("Received raw audio frame from the stream") - if selected_page is None: - logger.trace("No page selected. Discardig audio chunk") - return frame + if selected_page is None: + logger.trace("No page selected. Discardig audio chunk") + return frame - if self.reply_ongoing.is_set(): - logger.trace("Reply is ongoing. Discardig audio chunk") - return frame + if self.reply_ongoing.is_set(): + logger.trace("Reply is ongoing. Discardig audio chunk") + return frame - if not self.continuous_user_prompt_queue.empty(): - logger.trace( - "There are still {} items in the audio input queue. Discardig chunk", - self.continuous_user_prompt_queue.qsize(), - ) - return frame + if not self.continuous_user_prompt_queue.empty(): + logger.trace( + "Audio input queue not empty {} items). Discardig chunk", + self.continuous_user_prompt_queue.qsize(), + ) + return frame + except Exception as error: # noqa: BLE001 + logger.opt(exception=True).debug(error) + logger.error(error) + else: + frame_info = {"frame": frame, "page": selected_page} + self.incoming_frame_queue.put(frame_info) + logger.trace("Raw audio frame sent to the processing queue") - frame_info = {"frame": frame, "page": selected_page} - self.incoming_frame_queue.put(frame_info) - logger.trace("Raw audio frame sent to the processing queue") return frame add_script_run_ctx(audio_frame_callback) diff --git a/pyrobbot/voice_chat.py b/pyrobbot/voice_chat.py index fded21d..355364c 100644 --- a/pyrobbot/voice_chat.py +++ b/pyrobbot/voice_chat.py @@ -194,6 +194,9 @@ def answer_question(self, question: str): if sentence_for_tts and not self.reply_only_as_text: self.tts_conversion_queue.put(sentence_for_tts) + # Signal that the current answer is finished + self.tts_conversion_queue.put(None) + def handle_update_audio_history(self, current_answer_audios_queue: queue.Queue): """Handle updating the chat history with the latest reply's audio file path.""" # Merge all AudioSegments in self.current_answer_audios_queue into a single one @@ -203,9 +206,12 @@ def handle_update_audio_history(self, current_answer_audios_queue: queue.Queue): try: new_audio = current_answer_audios_queue.get() if new_audio is not None: + # Reply not yet finished merged_audio += new_audio current_answer_audios_queue.task_done() continue + + # Now the reply has finished if merged_audio.duration_seconds <= min_audio_duration_seconds: current_answer_audios_queue.task_done() continue @@ -413,6 +419,13 @@ def handle_tts_queue(self, text_queue: queue.Queue): while not self.exit_chat.is_set(): try: text = text_queue.get() + if text is None: + # Signal that the current anwer is finished + self.current_answer_audios_queue.put(None) + self.play_speech_queue.put(None) + continue + + text = text.strip() if text and not self.interrupt_reply.is_set(): tts = self.tts(text) logger.debug("Received text '{}' for TTS", text) @@ -420,19 +433,13 @@ def handle_tts_queue(self, text_queue: queue.Queue): # Trigger the TTS conversion _ = tts.speech + logger.debug("Sending TTS for '{}' to the playing queue", text) # Keep track of audios for the current answer (for the history db) self.current_answer_audios_queue.put(tts.speech) - # Dispatch the audio to be played - logger.debug("Sending audio for text '{}' to the playing queue", text) self.play_speech_queue.put(tts) - if text_queue.empty(): - # Signal that the current anwer is finished - self.current_answer_audios_queue.put(None) - self.play_speech_queue.put(None) - - except Exception as error: # noqa: PERF203, BLE001 + except Exception as error: # noqa: BLE001 logger.opt(exception=True).debug(error) logger.error(error) finally: