diff --git a/pyrobbot/app/app_page_templates.py b/pyrobbot/app/app_page_templates.py index a4b3a92..8c46b18 100644 --- a/pyrobbot/app/app_page_templates.py +++ b/pyrobbot/app/app_page_templates.py @@ -302,7 +302,18 @@ def play_chime(self, chime_type: str = "correct-answer-tone", parent_element=Non chime, hidden=True, autoplay=True, parent_element=parent_element ) - def render_chat_input_widgets(self): + def render_title(self): + """Render the title of the chatbot page.""" + with st.container(height=70, border=False): + self.title_container = st.empty() + with st.container(height=50, border=False): + left, _ = st.columns([0.7, 0.3]) + with left: + self.status_msg_container = st.empty() + self.title_container.subheader(self.title, divider="rainbow") + + @property + def direct_text_prompt(self): """Render chat inut widgets and return the user's input.""" placeholder = ( f"Send a message to {self.chat_obj.assistant_name} ({self.chat_obj.model})" @@ -310,30 +321,48 @@ def render_chat_input_widgets(self): with st.container(): left, right = st.columns([0.95, 0.05]) with left: - if text_prompt := st.chat_input( - placeholder=placeholder, key=f"text_input_widget_{self.page_id}" - ): - self.parent.text_prompt_queue.put({"page": self, "text": text_prompt}) - return - + text_from_chat_input_widget = st.chat_input(placeholder=placeholder) with right: - continuous_audio = st.session_state.get( - "toggle_continuous_voice_input", False - ) - continuous_audio = True # TEST - - audio = AudioSegment.empty() - if continuous_audio: - # We won't handle this here. It is handled in listen, ..., sst threads - if not self.parent.listen_thread.is_alive(): - raise ValueError("The listen thread is not alive") + text_from_manual_audio_recorder = "" + if st.session_state.get("toggle_continuous_voice_input"): + st.empty() else: - audio = self.manual_switch_mic_recorder() - if audio and ( - audio.duration_seconds > self.chat_obj.min_speech_duration_seconds - ): - new_input = {"page": self, "text": self.chat_obj.stt(audio).text} - self.parent.text_prompt_queue.put(new_input) + text_from_manual_audio_recorder = self.chat_obj.stt( + self.manual_switch_mic_recorder() + ).text + return text_from_chat_input_widget or text_from_manual_audio_recorder + + @property + def continuous_text_prompt(self): + """Wait until a promp from the continuous stream is ready and return it.""" + if not st.session_state.get("toggle_continuous_voice_input"): + return None + + if not self.parent.continuous_audio_input_engine_is_running: + logger.warning("Continuous audio input engine is not running!!!") + self.status_msg_container.error( + "The continuous audio input engine is not running!!!" + ) + return None + + logger.debug("Running on continuous audio prompt. Waiting user input...") + with self.status_msg_container: + self.play_chime() + with st.spinner(f"{self.chat_obj.assistant_name} is listening..."): + while True: + with contextlib.suppress(queue.Empty): + with self.parent.text_prompt_queue.mutex: + this_page_prompt_queue = filter_page_info_from_queue( + app_page=self, the_queue=self.parent.text_prompt_queue + ) + if prompt := this_page_prompt_queue.get_nowait()["text"]: + this_page_prompt_queue.task_done() + break + logger.trace("Still waiting for user text prompt...") + time.sleep(0.1) + + logger.debug("Done getting user input: {}", prompt) + return prompt def _render_chatbot_page(self): # noqa: PLR0915 """Render a chatbot page. @@ -346,192 +375,187 @@ def _render_chatbot_page(self): # noqa: PLR0915 question_answer_chunks_queue = queue.Queue() partial_audios_queue = queue.Queue() - with st.container(height=70, border=False): - title_container = st.empty() - with st.container(height=35, border=False): - left, _ = st.columns([0.7, 0.3]) - with left: - status_msg_container = st.empty() - - title_container.subheader(self.title, divider="rainbow") + self.render_title() chat_msgs_container = st.container(height=600, border=False) with chat_msgs_container: self.render_chat_history() - self.render_chat_input_widgets() - - with status_msg_container: - logger.debug("Waiting for user text prompt...") - self.play_chime() - with st.spinner(f"{self.chat_obj.assistant_name} is listening..."): - while True: - with contextlib.suppress(queue.Empty): - this_page_prompt_queue = filter_page_info_from_queue( - app_page=self, the_queue=self.parent.text_prompt_queue - ) - if prompt := this_page_prompt_queue.get_nowait()["text"]: - break - logger.trace("Still waiting for user text prompt...") - time.sleep(0.1) - if prompt := prompt.strip(): + direct_text_prompt = self.direct_text_prompt + continuous_stt_prompt = self.continuous_text_prompt + prompt = direct_text_prompt or continuous_stt_prompt + if prompt: + logger.opt(colors=True).debug("Recived prompt: {}", prompt) self.parent.reply_ongoing.set() + # Interrupt any ongoing reply in this page with question_answer_chunks_queue.mutex: question_answer_chunks_queue.queue.clear() with partial_audios_queue.mutex: partial_audios_queue.queue.clear() - logger.opt(colors=True).debug("Recived prompt: {}", prompt) - - self.play_chime("option-select") - status_msg_container.success("Got your message!") - time.sleep(0.5) - else: - status_msg_container.warning( + if continuous_stt_prompt: + self.play_chime("option-select") + self.status_msg_container.success("Got your message!") + time.sleep(0.5) + elif continuous_stt_prompt: + self.status_msg_container.warning( "Could not understand your message. Please try again." ) logger.opt(colors=True).debug("Received empty prompt") self.parent.reply_ongoing.clear() - with chat_msgs_container: - # Process user input - if prompt: - time_now = datetime.datetime.now().replace(microsecond=0) - self.state.update({"chat_started": True}) - # Display user message in chat message container - with st.chat_message("user", avatar=self.avatars["user"]): - st.caption(time_now) - st.markdown(prompt) - self.chat_history.append( - { - "role": "user", - "name": self.chat_obj.username, - "content": prompt, - "timestamp": time_now, - } - ) - - # Display (stream) assistant response in chat message container - with st.chat_message("assistant", avatar=self.avatars["assistant"]): - text_reply_container = st.empty() - audio_reply_container = st.empty() - - # Create threads to process text and audio replies asynchronously - answer_question_thread = threading.Thread( - target=_put_chat_reply_chunks_in_queue, - args=(self.chat_obj, prompt, question_answer_chunks_queue), - ) - play_partial_audios_thread = threading.Thread( - target=_play_queued_audios, - args=( - partial_audios_queue, - self.render_custom_audio_player, - status_msg_container, - ), - daemon=False, - ) - for thread in (answer_question_thread, play_partial_audios_thread): - add_script_run_ctx(thread) - thread.start() - - # Render the reply - chunk = "" - full_response = "" - current_audio = AudioSegment.empty() - text_reply_container.markdown("▌") - status_msg_container.empty() - while (chunk is not None) or (current_audio is not None): - logger.trace("Waiting for text or audio chunks...") - # Render text - with contextlib.suppress(queue.Empty): - chunk = question_answer_chunks_queue.get_nowait() - if chunk is not None: - full_response += chunk - text_reply_container.markdown(full_response + "▌") - question_answer_chunks_queue.task_done() - - # Render audio (if any) - with contextlib.suppress(queue.Empty): - current_audio = self.chat_obj.play_speech_queue.get_nowait() - self.chat_obj.play_speech_queue.task_done() - if current_audio is None: - partial_audios_queue.put(None) - else: - partial_audios_queue.put(current_audio.speech) - - logger.opt(colors=True).debug( - "Replied to user prompt '{}': {}", - prompt, - full_response, - ) - text_reply_container.caption( - datetime.datetime.now().replace(microsecond=0) - ) - text_reply_container.markdown(full_response) - - while play_partial_audios_thread.is_alive(): - logger.trace("Waiting for partial audios to finish playing...") - time.sleep(0.1) - - logger.debug("Getting path to full audio file...") - try: - full_audio_fpath = self.chat_obj.last_answer_full_audio_fpath.get( - timeout=2 - ) - except queue.Empty: - full_audio_fpath = None - logger.warning("Problem getting path to full audio file") - else: - logger.debug("Got path to full audio file: {}", full_audio_fpath) - self.chat_obj.last_answer_full_audio_fpath.task_done() - + if prompt: + with chat_msgs_container: + # Process user input + if prompt: + time_now = datetime.datetime.now().replace(microsecond=0) + self.state.update({"chat_started": True}) + # Display user message in chat message container + with st.chat_message("user", avatar=self.avatars["user"]): + st.caption(time_now) + st.markdown(prompt) self.chat_history.append( { - "role": "assistant", - "name": self.chat_obj.assistant_name, - "content": full_response, - "assistant_reply_audio_file": full_audio_fpath, + "role": "user", + "name": self.chat_obj.username, + "content": prompt, + "timestamp": time_now, } ) - if full_audio_fpath: - self.render_custom_audio_player( - full_audio_fpath, - parent_element=audio_reply_container, - autoplay=False, + # Display (stream) assistant response in chat message container + with st.chat_message("assistant", avatar=self.avatars["assistant"]): + text_reply_container = st.empty() + audio_reply_container = st.empty() + + # Create threads to process text and audio replies asynchronously + answer_question_thread = threading.Thread( + target=_put_chat_reply_chunks_in_queue, + args=(self.chat_obj, prompt, question_answer_chunks_queue), + ) + play_partial_audios_thread = threading.Thread( + target=_play_queued_audios, + args=( + partial_audios_queue, + self.render_custom_audio_player, + self.status_msg_container, + ), + daemon=False, + ) + for thread in ( + answer_question_thread, + play_partial_audios_thread, + ): + add_script_run_ctx(thread) + thread.start() + + # Render the reply + chunk = "" + full_response = "" + current_audio = AudioSegment.empty() + text_reply_container.markdown("▌") + self.status_msg_container.empty() + while (chunk is not None) or (current_audio is not None): + logger.trace("Waiting for text or audio chunks...") + # Render text + with contextlib.suppress(queue.Empty): + chunk = question_answer_chunks_queue.get_nowait() + if chunk is not None: + full_response += chunk + text_reply_container.markdown(full_response + "▌") + question_answer_chunks_queue.task_done() + + # Render audio (if any) + with contextlib.suppress(queue.Empty): + current_audio = ( + self.chat_obj.play_speech_queue.get_nowait() + ) + self.chat_obj.play_speech_queue.task_done() + if current_audio is None: + partial_audios_queue.put(None) + else: + partial_audios_queue.put(current_audio.speech) + + logger.opt(colors=True).debug( + "Replied to user prompt '{}': {}", + prompt, + full_response, + ) + text_reply_container.caption( + datetime.datetime.now().replace(microsecond=0) + ) + text_reply_container.markdown(full_response) + + while play_partial_audios_thread.is_alive(): + logger.trace( + "Waiting for partial audios to finish playing..." + ) + time.sleep(0.1) + + logger.debug("Getting path to full audio file...") + try: + full_audio_fpath = ( + self.chat_obj.last_answer_full_audio_fpath.get(timeout=2) + ) + except queue.Empty: + full_audio_fpath = None + logger.warning("Problem getting path to full audio file") + else: + logger.debug( + "Got path to full audio file: {}", full_audio_fpath + ) + self.chat_obj.last_answer_full_audio_fpath.task_done() + + self.chat_history.append( + { + "role": "assistant", + "name": self.chat_obj.assistant_name, + "content": full_response, + "assistant_reply_audio_file": full_audio_fpath, + } ) - # Reset title according to conversation initial contents - min_history_len_for_summary = 3 - if ( - "page_title" not in self.state - and len(self.chat_history) > min_history_len_for_summary - ): - logger.debug("Working out conversation topic...") - prompt = "Summarize the previous messages in max 4 words" - title = "".join(self.chat_obj.respond_system_prompt(prompt)) - self.chat_obj.metadata["page_title"] = title - self.chat_obj.metadata["sidebar_title"] = title - self.chat_obj.save_cache() - - self.title = title - self.sidebar_title = title - title_container.header(title, divider="rainbow") - - # Clear the prompt queue for this page, to remove old prompts - with self.parent.continuous_user_prompt_queue.mutex: - filter_page_info_from_queue( - app_page=self, the_queue=self.parent.continuous_user_prompt_queue - ) - with self.parent.text_prompt_queue.mutex: - filter_page_info_from_queue( - app_page=self, the_queue=self.parent.text_prompt_queue - ) + if full_audio_fpath: + self.render_custom_audio_player( + full_audio_fpath, + parent_element=audio_reply_container, + autoplay=False, + ) + + # Reset title according to conversation initial contents + min_history_len_for_summary = 3 + if ( + "page_title" not in self.state + and len(self.chat_history) > min_history_len_for_summary + ): + logger.debug("Working out conversation topic...") + prompt = "Summarize the previous messages in max 4 words" + title = "".join(self.chat_obj.respond_system_prompt(prompt)) + self.chat_obj.metadata["page_title"] = title + self.chat_obj.metadata["sidebar_title"] = title + self.chat_obj.save_cache() + + self.title = title + self.sidebar_title = title + self.title_container.header(title, divider="rainbow") + + # Clear the prompt queue for this page, to remove old prompts + with self.parent.continuous_user_prompt_queue.mutex: + filter_page_info_from_queue( + app_page=self, + the_queue=self.parent.continuous_user_prompt_queue, + ) + with self.parent.text_prompt_queue.mutex: + filter_page_info_from_queue( + app_page=self, the_queue=self.parent.text_prompt_queue + ) - self.parent.reply_ongoing.clear() + self.parent.reply_ongoing.clear() - if not self.parent.reply_ongoing.is_set(): - logger.debug("Rerunning the app") + if continuous_stt_prompt and not self.parent.reply_ongoing.is_set(): + logger.opt(colors=True).debug( + "Rerunning the app to wait for new input..." + ) st.rerun() def render(self): @@ -555,6 +579,7 @@ def _trim_page_padding(): self.render_cost_estimate_page() else: self._render_chatbot_page() + logger.debug("Reached the end of the chatbot page.") def filter_page_info_from_queue(app_page: AppPage, the_queue: queue.Queue): diff --git a/pyrobbot/app/multipage.py b/pyrobbot/app/multipage.py index 1539b7c..a3b74ce 100644 --- a/pyrobbot/app/multipage.py +++ b/pyrobbot/app/multipage.py @@ -254,6 +254,11 @@ def handle_stt(): if audio.duration_seconds >= chat_obj.min_speech_duration_seconds: recorded_prompt_as_txt = chat_obj.stt(audio).text if recorded_prompt_as_txt: + logger.debug( + "Audio from page '{}' transcribed '{}'. Input ready to fetch.", + info_for_stt["page"].title, + recorded_prompt_as_txt, + ) text_prompt_queue.put( {"page": info_for_stt["page"], "text": recorded_prompt_as_txt} ) @@ -285,9 +290,24 @@ def __init__(self, **kwargs) -> None: """Initialise streamlit page configs.""" st.set_page_config(**kwargs) + hide_zero_height_elements = """ + + """ + st.markdown(hide_zero_height_elements, unsafe_allow_html=True) + self.listen_thread = listen_thread self.continuous_user_prompt_thread = continuous_user_prompt_thread - if not listen_thread.is_alive(): + self.handle_stt_thread = handle_stt_thread + if ( + st.session_state.get("toggle_continuous_voice_input") + and not self.continuous_audio_input_engine_is_running + ): for thread in [ listen_thread, continuous_user_prompt_thread, @@ -304,6 +324,15 @@ def __init__(self, **kwargs) -> None: self.text_prompt_queue = text_prompt_queue self.reply_ongoing = reply_ongoing + @property + def continuous_audio_input_engine_is_running(self): + """Return whether the continuous audio input engine is running.""" + return ( + self.listen_thread.is_alive() + and self.continuous_user_prompt_thread.is_alive() + and self.handle_stt_thread.is_alive() + ) + def render_continuous_audio_input_widget(self): """Render the continuous audio input widget.""" # Definitions related to webrtc_streamer @@ -346,19 +375,26 @@ def audio_frame_callback(frame): add_script_run_ctx(audio_frame_callback) - self.stream_audio_context = streamlit_webrtc.component.webrtc_streamer( - key="sendonly-audio", - mode=WebRtcMode.SENDONLY, - rtc_configuration=rtc_configuration, - media_stream_constraints={"audio": True, "video": False}, - desired_playing_state=True, - audio_frame_callback=audio_frame_callback, - ) - - logger.debug("Waiting for the audio stream to start...") - while not self.stream_audio_context.state.playing: - time.sleep(1) - logger.debug("Audio stream started") + logger.debug("Initialising input audio stream...") + try: + with st.container(height=0, border=False): + self.stream_audio_context = streamlit_webrtc.component.webrtc_streamer( + key="sendonly-audio", + mode=WebRtcMode.SENDONLY, + rtc_configuration=rtc_configuration, + media_stream_constraints={"audio": True, "video": False}, + desired_playing_state=True, + audio_frame_callback=audio_frame_callback, + ) + except TypeError: + logger.opt(exception=True).error("Failed to initialise audio stream") + logger.error("Failed to initialise audio stream") + self.stream_audio_context = None + else: + logger.debug("Audio stream initialised. Waiting for it to start...") + while not self.stream_audio_context.state.playing: + time.sleep(1) + logger.debug("Audio stream started") return self.stream_audio_context @@ -588,11 +624,18 @@ def render(self, **kwargs): divider="rainbow", help="https://github.com/paulovcmedeiros/pyRobBot", ) + self.create_api_key_element() + # Create a sidebar with tabs for chats and settings tab1, tab2 = st.tabs(["Chats", "Settings for Current Chat"]) - self.sidebar_tabs = {"chats": tab1, "settings": tab2} + with tab1: + tab1_visible_container = st.container() + tab1_invisible_container = st.container(height=0, border=False) + + self.sidebar_tabs = {"chats": tab1_visible_container, "settings": tab2} + with tab1_visible_container: left, center, right = st.columns(3) with left: # Add button to show the costs table @@ -616,13 +659,9 @@ def render(self, **kwargs): key="toggle_continuous_voice_input", label=":microphone:", help="Toggle continuous voice input", - value=True, + value=False, ) - # Create a container for the continuous audio input widget, which will - # be rendered only later - continuous_audio_input_widget_container = st.container() - # Add button to create a new chat new_chat_button = st.button(label=":heavy_plus_sign: New Chat") @@ -657,7 +696,10 @@ def render(self, **kwargs): if new_chat_button or not self.pages: self.add_page() - with continuous_audio_input_widget_container: + # We'l hide the webrtc input buttom because I don't know how to customise it. + # I'll use the component "toggle_continuous_voice_input" to toggle it + if st.session_state["toggle_continuous_voice_input"]: + with tab1_invisible_container: self.render_continuous_audio_input_widget() return super().render(**kwargs) diff --git a/pyrobbot/sst_and_tts.py b/pyrobbot/sst_and_tts.py index caf0079..a0f741d 100644 --- a/pyrobbot/sst_and_tts.py +++ b/pyrobbot/sst_and_tts.py @@ -2,6 +2,7 @@ import io import socket +import uuid from dataclasses import dataclass, field from typing import Literal @@ -69,7 +70,10 @@ def _stt(self) -> str: fallback_stt_function = self._stt_openai fallback_name = "openai" - logger.debug("Converting audio to text ({} STT)...", self.engine) + conversion_id = uuid.uuid4() + logger.debug( + "Converting audio to text ({} STT). Process {}.", self.engine, conversion_id + ) try: rtn = stt_function() except ( @@ -79,16 +83,24 @@ def _stt(self) -> str: ) as error: logger.error(error) logger.error( - "Can't communicate with `{}` speech-to-text API right now", + "{}: Can't communicate with `{}` speech-to-text API right now", + conversion_id, self.engine, ) - logger.warning("Trying to use `{}` STT instead", fallback_name) + logger.warning( + "{}: Trying to use `{}` STT instead", conversion_id, fallback_name + ) rtn = fallback_stt_function() except sr.exceptions.UnknownValueError: - logger.opt(colors=True).debug("Can't understand audio") + logger.opt(colors=True).debug( + "{}: Can't understand audio", conversion_id + ) rtn = "" self._text = rtn.strip() + logger.opt(colors=True).debug( + "{}: Done with STT: {}", conversion_id, self._text + ) return self._text diff --git a/pyrobbot/voice_chat.py b/pyrobbot/voice_chat.py index 1ced0a3..0b67f44 100644 --- a/pyrobbot/voice_chat.py +++ b/pyrobbot/voice_chat.py @@ -234,6 +234,9 @@ def handle_update_audio_history(self, current_answer_audios_queue: queue.Queue): merged_audio.export(audio_file_path, format="mp3") logger.debug("File {} stored", audio_file_path) self.last_answer_full_audio_fpath.put(audio_file_path) + logger.debug( + "File {} sent to last_answer_full_audio_fpath queue", audio_file_path + ) merged_audio = AudioSegment.empty() current_answer_audios_queue.task_done()