From e92d84b74b86f4fbb52b1791ad96e39222d11df2 Mon Sep 17 00:00:00 2001 From: Paulo V C Medeiros Date: Wed, 21 Feb 2024 23:18:49 +0100 Subject: [PATCH 01/38] Minor change to AI instructions --- pyrobbot/chat.py | 27 +++++++++++++++------------ pyrobbot/chat_configs.py | 2 +- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/pyrobbot/chat.py b/pyrobbot/chat.py index 9659788..1d84e26 100644 --- a/pyrobbot/chat.py +++ b/pyrobbot/chat.py @@ -87,13 +87,14 @@ def base_directive(self): f"\nYou must follow all directives by {self.system_name} " f"unless otherwise instructed by {self.username}\n" f"You must reply in {self.language} unless otherwise instructed\n" - "> If asked to look up online, web internet etc, *you MUST ALWAYS AGREE*\n" + "> If asked to look up online, web, internet etc, *you MUST ALWAYS AGREE*\n" "\n*IMPORTANT*: If you can't find information neither in your training data " - "nor in the the conversation context, then, unless you have already looked up" - "online, do as follows:\n" - " > Do *NOT* apologise. Do NOT say you are sorry.\n" - " > Do NOT give any excuses. Do NOT ask permission to lookup online.\n" - " > STATE CLEARLY that you will look it up online\n" + "nor in the conversation context then, unless you have either already " + f"looked up online or not understood {self.username}'s prompt, you must " + "do as follows:\n" + " > Do *NOT* apologise nor say you are sorry nor give any excuses.\n" + " > Do *NOT* ask for permission to lookup online.\n" + " > STATE CLEARLY that you will look it up online.\n" ) return {"role": "system", "name": self.system_name, "content": msg_content} @@ -318,19 +319,21 @@ def _yield_response_from_msg( if not skip_check: last_msg_exchange = ( f"`user` says: {prompt_msg['content']}\n" - f"`you` reply: {full_reply_content}" + f"`you` replies: {full_reply_content}" ) system_check_msg = ( - "Consider the following dialogue AND NOTHING MORE:\n\n" + "Consider the following dialogue between `user` and `you` " + "AND NOTHING MORE:\n\n" f"{last_msg_exchange}\n\n" "Now answer the following question using only 'yes' or 'no':\n" - "Were `you` unable to fully answer the request made by `user`, " - "or have either of you asked or implied the need for a web search?\n" + "Were `you` able to provide a good answer the `user`s prompt, without " + "neither `you` nor `user` asking or implying the need or intention to " + "perform a search or lookup online, on the web or the internet?\n" ) reply = "".join(self.respond_system_prompt(prompt=system_check_msg)) reply = reply.strip(".' ").lower() - if ("yes" in reply) or (self._translate("yes") in reply): + if ("no" in reply) or (self._translate("no") in reply): instructions_for_web_search = ( "You are a professional web searcher. You will be presented with a " "dialogue between `user` and `you`. Considering the dialogue and " @@ -389,7 +392,7 @@ def _yield_response_from_msg( yield chunk else: yield self._translate( - "I couldn't find anything on the web this time. Sorry." + "Sorry, but I couldn't find anything on the web this time." ) if add_to_history: diff --git a/pyrobbot/chat_configs.py b/pyrobbot/chat_configs.py index 36e40c5..4af084a 100644 --- a/pyrobbot/chat_configs.py +++ b/pyrobbot/chat_configs.py @@ -127,7 +127,7 @@ class ChatOptions(OpenAiApiCallOptions): ai_instructions: tuple[str, ...] = Field( default=( "You answer correctly.", - "You do not lie.", + "You do not lie or make up information unless explicitly asked to do so.", ), description="Initial instructions for the AI", ) From 6e804f351f3da8e7375c3f9ede60f2860ed69f36 Mon Sep 17 00:00:00 2001 From: Paulo V C Medeiros Date: Fri, 23 Feb 2024 14:41:03 +0100 Subject: [PATCH 02/38] Support to voice reply in web app --- pyrobbot/app/app_page_templates.py | 56 ++++++++++++++++++++---------- pyrobbot/chat.py | 18 ++++++++-- pyrobbot/chat_configs.py | 9 +++-- pyrobbot/voice_chat.py | 34 +++--------------- 4 files changed, 61 insertions(+), 56 deletions(-) diff --git a/pyrobbot/app/app_page_templates.py b/pyrobbot/app/app_page_templates.py index d9cce8c..96a0481 100644 --- a/pyrobbot/app/app_page_templates.py +++ b/pyrobbot/app/app_page_templates.py @@ -1,5 +1,6 @@ """Utilities for creating pages in a streamlit app.""" +import base64 import contextlib import datetime import uuid @@ -9,6 +10,7 @@ import streamlit as st from audiorecorder import audiorecorder from PIL import Image +from pydub import AudioSegment from pyrobbot import GeneralDefinitions from pyrobbot.chat import Chat @@ -28,6 +30,21 @@ _RecoveredChat = object() +def autoplay_audio(audio: AudioSegment): + """Autoplay an audio segment in the streamlit app.""" + # Adaped from: + data = audio.export(format="mp3").read() + b64 = base64.b64encode(data).decode() + md = f""" + + """ + st.markdown(md, unsafe_allow_html=True) + + class AppPage(ABC): """Abstract base class for a page within a streamlit application.""" @@ -203,11 +220,12 @@ def _render_chatbot_page(self): f"Send a message to {self.chat_obj.assistant_name} ({self.chat_obj.model})" ) - use_microphone_input = st.session_state.get("toggle_mic_input", False) - if use_microphone_input: - prompt = self.state.pop("recorded_prompt", None) - else: - prompt = st.chat_input(placeholder=placeholder) + mic_input = st.session_state.get("toggle_mic_input", False) + prompt = ( + self.state.pop("recorded_prompt", None) + if mic_input + else st.chat_input(placeholder=placeholder) + ) with st.container(height=600, border=False): self.render_chat_history() @@ -229,16 +247,18 @@ def _render_chatbot_page(self): ) # Display (stream) assistant response in chat message container - with st.chat_message( - "assistant", avatar=self.avatars["assistant"] - ), st.empty(): - st.markdown("▌") - full_response = "" - for chunk in self.chat_obj.respond_user_prompt(prompt): - full_response += chunk - st.markdown(full_response + "▌") - st.caption(datetime.datetime.now().replace(microsecond=0)) - st.markdown(full_response) + with st.chat_message("assistant", avatar=self.avatars["assistant"]): + with st.empty(): + st.markdown("▌") + full_response = "" + for chunk in self.chat_obj.respond_user_prompt(prompt): + full_response += chunk + st.markdown(full_response + "▌") + st.caption(datetime.datetime.now().replace(microsecond=0)) + st.markdown(full_response) + if mic_input: + autoplay_audio(self.chat_obj.tts(full_response).speech) + prompt = None self.chat_history.append( { @@ -265,7 +285,7 @@ def _render_chatbot_page(self): self.sidebar_title = title title_container.header(title, divider="rainbow") - if use_microphone_input and ("recorded_prompt" not in self.state): + if mic_input and ("recorded_prompt" not in self.state): _left, center, _right = st.columns([1, 1, 1]) with center: audio = audiorecorder( @@ -274,11 +294,9 @@ def _render_chatbot_page(self): pause_prompt="", key="audiorecorder_widget", ) - min_audio_duration_seconds = 0.1 if audio.duration_seconds > min_audio_duration_seconds: - self.state["recorded_prompt"] = self.chat_obj.stt(audio) - self.state.update({"chat_started": True}) + self.state["recorded_prompt"] = self.chat_obj.stt(audio).text del st.session_state["audiorecorder_widget"] st.rerun() diff --git a/pyrobbot/chat.py b/pyrobbot/chat.py index 1d84e26..19eac49 100644 --- a/pyrobbot/chat.py +++ b/pyrobbot/chat.py @@ -20,7 +20,7 @@ from .general_utils import AlternativeConstructors, ReachedMaxNumberOfAttemptsError from .internet_utils import websearch from .openai_utils import OpenAiClientWrapper, make_api_chat_completion_call -from .sst_and_tts import SpeechToText +from .sst_and_tts import SpeechToText, TextToSpeech from .tokens import PRICE_PER_K_TOKENS_EMBEDDINGS, TokenUsageDatabase @@ -295,11 +295,25 @@ def stt(self, speech: AudioSegment): return SpeechToText( speech=speech, openai_client=self.openai_client, + engine=self.stt_engine, + language=self.language, + timeout=self.timeout, general_token_usage_db=self.general_token_usage_db, token_usage_db=self.token_usage_db, + ) + + def tts(self, text: str): + """Convert text to audio.""" + return TextToSpeech( + text=text, + openai_client=self.openai_client, language=self.language, + engine=self.tts_engine, + openai_tts_voice=self.openai_tts_voice, timeout=self.timeout, - ).text + general_token_usage_db=self.general_token_usage_db, + token_usage_db=self.token_usage_db, + ) def _yield_response_from_msg( self, prompt_msg: dict, add_to_history: bool = True, skip_check: bool = False diff --git a/pyrobbot/chat_configs.py b/pyrobbot/chat_configs.py index 4af084a..b95b89d 100644 --- a/pyrobbot/chat_configs.py +++ b/pyrobbot/chat_configs.py @@ -157,11 +157,6 @@ class ChatOptions(OpenAiApiCallOptions): description="Initial language adopted by the assistant. Use either the ISO-639-1 " "format (e.g. 'pt'), or an RFC5646 language tag (e.g. 'pt-br').", ) - - -class VoiceAssistantConfigs(BaseConfigModel): - """Model for the text-to-speech assistant's configuration options.""" - tts_engine: Literal["openai", "google"] = Field( default="openai", description="The text-to-speech engine to use. The `google` engine is free " @@ -177,6 +172,10 @@ class VoiceAssistantConfigs(BaseConfigModel): Field(default="onyx", description="Voice to use for OpenAI's TTS") ) + +class VoiceAssistantConfigs(BaseConfigModel): + """Model for the text-to-speech assistant's configuration options.""" + exit_expressions: list[str] = Field( default=["bye-bye", "ok bye-bye", "okay bye-bye"], description="Expression(s) to use in order to exit the chat", diff --git a/pyrobbot/voice_chat.py b/pyrobbot/voice_chat.py index fb92428..95af37f 100644 --- a/pyrobbot/voice_chat.py +++ b/pyrobbot/voice_chat.py @@ -20,7 +20,7 @@ from .chat import Chat from .chat_configs import VoiceChatConfigs from .general_utils import _get_lower_alphanumeric, str2_minus_str1 -from .sst_and_tts import SpeechToText, TextToSpeech +from .sst_and_tts import TextToSpeech try: import sounddevice as sd @@ -217,15 +217,7 @@ def check_for_interrupt_expressions_handler( while not self.exit_chat.is_set(): try: msgs_to_compare = check_for_interrupt_expressions_queue.get() - recorded_prompt = SpeechToText( - openai_client=self.openai_client, - speech=msgs_to_compare["user_audio"], - engine=self.stt_engine, - language=self.language, - timeout=self.timeout, - general_token_usage_db=self.general_token_usage_db, - token_usage_db=self.token_usage_db, - ).text + recorded_prompt = self.stt(speech=msgs_to_compare["user_audio"]).text recorded_prompt = _get_lower_alphanumeric(recorded_prompt).strip() assistant_msg = _get_lower_alphanumeric( @@ -349,15 +341,7 @@ def handle_question_listening(self, questions_queue: queue.Queue): if audio.duration_seconds < minimum_prompt_duration_seconds: continue - question = SpeechToText( - openai_client=self.openai_client, - speech=audio, - engine=self.stt_engine, - language=self.language, - timeout=self.timeout, - general_token_usage_db=self.general_token_usage_db, - token_usage_db=self.token_usage_db, - ).text + question = self.stt(speech=audio).text # Check for the exit expressions if any( @@ -393,17 +377,7 @@ def handle_tts_queue(self, text_queue: queue.Queue): try: text = text_queue.get() if text and not self.interrupt_reply.is_set(): - tts = TextToSpeech( - openai_client=self.openai_client, - text=text, - engine=self.tts_engine, - openai_tts_voice=self.openai_tts_voice, - language=self.language, - timeout=self.timeout, - general_token_usage_db=self.general_token_usage_db, - token_usage_db=self.token_usage_db, - ) - + tts = self.tts(text) # Trigger the TTS conversion _ = tts.speech From 54ed8c8cc83fedd5879eb76626f1fff51393b460 Mon Sep 17 00:00:00 2001 From: Paulo V C Medeiros Date: Fri, 23 Feb 2024 21:54:50 +0100 Subject: [PATCH 03/38] Refactor for more reusability --- pyrobbot/app/app_page_templates.py | 61 ++++++++++++++---------- pyrobbot/app/multipage.py | 31 +++++++------ pyrobbot/chat.py | 41 +++++++++++++---- pyrobbot/chat_configs.py | 3 ++ pyrobbot/voice_chat.py | 74 +++++++++++++----------------- 5 files changed, 121 insertions(+), 89 deletions(-) diff --git a/pyrobbot/app/app_page_templates.py b/pyrobbot/app/app_page_templates.py index 96a0481..22d0434 100644 --- a/pyrobbot/app/app_page_templates.py +++ b/pyrobbot/app/app_page_templates.py @@ -3,6 +3,7 @@ import base64 import contextlib import datetime +import time import uuid from abc import ABC, abstractmethod from typing import TYPE_CHECKING @@ -10,11 +11,11 @@ import streamlit as st from audiorecorder import audiorecorder from PIL import Image -from pydub import AudioSegment from pyrobbot import GeneralDefinitions -from pyrobbot.chat import Chat -from pyrobbot.chat_configs import ChatOptions +from pyrobbot.chat_configs import VoiceChatConfigs +from pyrobbot.sst_and_tts import TextToSpeech +from pyrobbot.voice_chat import VoiceChat if TYPE_CHECKING: from pyrobbot.app.multipage import MultipageChatbotApp @@ -30,19 +31,28 @@ _RecoveredChat = object() -def autoplay_audio(audio: AudioSegment): - """Autoplay an audio segment in the streamlit app.""" - # Adaped from: - data = audio.export(format="mp3").read() - b64 = base64.b64encode(data).decode() - md = f""" +class WebAppChat(VoiceChat): + """A chat object for web apps.""" + + def __init__(self, **kwargs): + """Initialize a new instance of the WebAppChat class.""" + super().__init__(**kwargs) + self.tts_conversion_watcher_thread.start() + + def speak(self, tts: TextToSpeech): + """Autoplay an audio segment in the streamlit app.""" + # Adaped from: + data = tts.speech.export(format="mp3").read() + b64 = base64.b64encode(data).decode() + md = f""" """ - st.markdown(md, unsafe_allow_html=True) + st.markdown(md, unsafe_allow_html=True) + time.sleep(tts.speech.duration_seconds) class AppPage(ABC): @@ -115,15 +125,15 @@ class ChatBotPage(AppPage): def __init__( self, parent: "MultipageChatbotApp", - chat_obj: Chat = None, + chat_obj: WebAppChat = None, sidebar_title: str = "", page_title: str = "", ): - """Initialize new instance of the ChatBotPage class with an optional Chat object. + """Initialize new instance of the ChatBotPage class with an opt WebAppChat object. Args: parent (MultipageChatbotApp): The parent app of the page. - chat_obj (Chat): The chat object. Defaults to None. + chat_obj (WebAppChat): The chat object. Defaults to None. sidebar_title (str): The sidebar title for the chatbot page. Defaults to an empty string. page_title (str): The title for the chatbot page. @@ -139,29 +149,29 @@ def __init__( self.avatars = {"assistant": _ASSISTANT_AVATAR_IMAGE, "user": _USER_AVATAR_IMAGE} @property - def chat_configs(self) -> ChatOptions: + def chat_configs(self) -> VoiceChatConfigs: """Return the configs used for the page's chat object.""" if "chat_configs" not in self.state: self.state["chat_configs"] = self.parent.state["chat_configs"] return self.state["chat_configs"] @chat_configs.setter - def chat_configs(self, value: ChatOptions): - self.state["chat_configs"] = ChatOptions.model_validate(value) + def chat_configs(self, value: VoiceChatConfigs): + self.state["chat_configs"] = VoiceChatConfigs.model_validate(value) if "chat_obj" in self.state: del self.state["chat_obj"] @property - def chat_obj(self) -> Chat: + def chat_obj(self) -> WebAppChat: """Return the chat object responsible for the queries on this page.""" if "chat_obj" not in self.state: - self.chat_obj = Chat( + self.chat_obj = WebAppChat( configs=self.chat_configs, openai_client=self.parent.openai_client ) return self.state["chat_obj"] @chat_obj.setter - def chat_obj(self, new_chat_obj: Chat): + def chat_obj(self, new_chat_obj: WebAppChat): current_chat = self.state.get("chat_obj") if current_chat: current_chat.save_cache() @@ -221,6 +231,7 @@ def _render_chatbot_page(self): ) mic_input = st.session_state.get("toggle_mic_input", False) + self.chat_obj.reply_only_as_text = not mic_input prompt = ( self.state.pop("recorded_prompt", None) if mic_input @@ -251,13 +262,15 @@ def _render_chatbot_page(self): with st.empty(): st.markdown("▌") full_response = "" - for chunk in self.chat_obj.respond_user_prompt(prompt): - full_response += chunk + for chunk in self.chat_obj.answer_question(prompt): + full_response += chunk.content st.markdown(full_response + "▌") st.caption(datetime.datetime.now().replace(microsecond=0)) st.markdown(full_response) if mic_input: - autoplay_audio(self.chat_obj.tts(full_response).speech) + while not self.chat_obj.play_speech_queue.empty(): + self.chat_obj.speak(self.chat_obj.play_speech_queue.get()) + self.chat_obj.play_speech_queue.task_done() prompt = None self.chat_history.append( diff --git a/pyrobbot/app/multipage.py b/pyrobbot/app/multipage.py index d83959f..597d506 100644 --- a/pyrobbot/app/multipage.py +++ b/pyrobbot/app/multipage.py @@ -12,15 +12,16 @@ from pydantic import ValidationError from pyrobbot import GeneralDefinitions -from pyrobbot.app.app_page_templates import ( +from pyrobbot.chat_configs import VoiceChatConfigs +from pyrobbot.openai_utils import OpenAiClientWrapper + +from .app_page_templates import ( _ASSISTANT_AVATAR_IMAGE, AppPage, ChatBotPage, + WebAppChat, _RecoveredChat, ) -from pyrobbot.chat import Chat -from pyrobbot.chat_configs import ChatOptions -from pyrobbot.openai_utils import OpenAiClientWrapper class AbstractMultipageApp(ABC): @@ -137,15 +138,17 @@ def openai_client(self) -> OpenAiClientWrapper: return self.state["openai_client"] @property - def chat_configs(self) -> ChatOptions: + def chat_configs(self) -> VoiceChatConfigs: """Return the configs used for the page's chat object.""" if "chat_configs" not in self.state: try: chat_options_file_path = sys.argv[-1] - self.state["chat_configs"] = ChatOptions.from_file(chat_options_file_path) + self.state["chat_configs"] = VoiceChatConfigs.from_file( + chat_options_file_path + ) except (FileNotFoundError, JSONDecodeError): logger.warning("Could not retrieve cli args. Using default chat options.") - self.state["chat_configs"] = ChatOptions() + self.state["chat_configs"] = VoiceChatConfigs() return self.state["chat_configs"] def create_api_key_element(self): @@ -208,9 +211,9 @@ def handle_ui_page_selection(self): # Present the user with the model and instructions fields first field_names = ["model", "ai_instructions", "context_model"] - field_names += list(ChatOptions.model_fields) + field_names += list(VoiceChatConfigs.model_fields) field_names = list(dict.fromkeys(field_names)) - model_fields = {k: ChatOptions.model_fields[k] for k in field_names} + model_fields = {k: VoiceChatConfigs.model_fields[k] for k in field_names} updates_to_chat_configs = self._handle_chat_configs_value_selection( current_chat_configs, model_fields @@ -219,7 +222,7 @@ def handle_ui_page_selection(self): if updates_to_chat_configs: new_chat_configs = current_chat_configs.model_dump() new_chat_configs.update(updates_to_chat_configs) - new_chat = Chat.from_dict(new_chat_configs) + new_chat = WebAppChat.from_dict(new_chat_configs) self.selected_page.chat_obj = new_chat def render(self, **kwargs): @@ -264,7 +267,7 @@ def render(self, **kwargs): self.state["saved_chats_reloaded"] = True for cache_dir_path in self.openai_client.saved_chat_cache_paths: try: - chat = Chat.from_cache( + chat = WebAppChat.from_cache( cache_dir=cache_dir_path, openai_client=self.openai_client ) except ValidationError: @@ -357,9 +360,9 @@ def _handle_chat_configs_value_selection(self, current_chat_configs, model_field updates_to_chat_configs = {} for field_name, field in model_fields.items(): title = field_name.replace("_", " ").title() - choices = ChatOptions.get_allowed_values(field=field_name) - description = ChatOptions.get_description(field=field_name) - field_type = ChatOptions.get_type(field=field_name) + choices = VoiceChatConfigs.get_allowed_values(field=field_name) + description = VoiceChatConfigs.get_description(field=field_name) + field_type = VoiceChatConfigs.get_type(field=field_name) # Check if the field is frozen and disable corresponding UI element if so chat_started = self.selected_page.state.get("chat_started", False) diff --git a/pyrobbot/chat.py b/pyrobbot/chat.py index 19eac49..3d419f6 100644 --- a/pyrobbot/chat.py +++ b/pyrobbot/chat.py @@ -10,6 +10,7 @@ from typing import Optional import openai +from attr import dataclass from loguru import logger from pydub import AudioSegment from tzlocal import get_localzone @@ -24,6 +25,14 @@ from .tokens import PRICE_PER_K_TOKENS_EMBEDDINGS, TokenUsageDatabase +@dataclass +class AssistantResponseChunk: + """A chunk of the assistant's response.""" + + content: str + chunk_type: str = "text" + + class Chat(AlternativeConstructors): """Manages conversations with an AI chat model. @@ -52,6 +61,8 @@ def __init__( self.id = str(uuid.uuid4()) logger.debug("Init chat {}", self.id) + self._code_marker = "\uE001" # TEST + self._passed_configs = configs for field in self._passed_configs.model_fields: setattr(self, field, self._passed_configs[field]) @@ -74,13 +85,14 @@ def __init__( @property def base_directive(self): """Return the base directive for the LLM.""" + code_marker = self._code_marker local_datetime = datetime.now(get_localzone()).isoformat(timespec="seconds") msg_content = ( f"Your name is {self.assistant_name}. Your model is {self.model}\n" f"You are a helpful assistant to {self.username}\n" f"You have internet access\n" - + "\n".join([f"{instruct.strip(' .')}." for instruct in self.ai_instructions]) - + "\n" + f"You MUST ALWAYS write {code_marker} before AND after code blocks. Example: " + f"```foo ... ``` MUST become {code_marker}```foo ... ```{code_marker}\n" f"The current city is {GeneralDefinitions.IPINFO['city']} in " f"{GeneralDefinitions.IPINFO['country_name']}\n" f"The local datetime is {local_datetime}\n" @@ -95,6 +107,7 @@ def base_directive(self): " > Do *NOT* apologise nor say you are sorry nor give any excuses.\n" " > Do *NOT* ask for permission to lookup online.\n" " > STATE CLEARLY that you will look it up online.\n" + "\n".join([f"{instruct.strip(' .')}." for instruct in self.ai_instructions]) ) return {"role": "system", "name": self.system_name, "content": msg_content} @@ -223,22 +236,34 @@ def respond_system_prompt( self, prompt: str, add_to_history=False, skip_check=True, **kwargs ): """Respond to a system prompt.""" - yield from self._respond_prompt( + for response_chunk in self._respond_prompt( prompt=prompt, role="system", add_to_history=add_to_history, skip_check=skip_check, **kwargs, - ) + ): + yield response_chunk.content def yield_response_from_msg( self, prompt_msg: dict, add_to_history: bool = True, **kwargs ): """Yield response from a prompt message.""" + code_marker = self._code_marker try: - yield from self._yield_response_from_msg( + inside_code_block = False + for answer_chunk in self._yield_response_from_msg( prompt_msg=prompt_msg, add_to_history=add_to_history, **kwargs - ) + ): + code_marker_detected = code_marker in answer_chunk + inside_code_block = (code_marker_detected and not inside_code_block) or ( + inside_code_block and not code_marker_detected + ) + yield AssistantResponseChunk( + content=answer_chunk.strip(code_marker), + chunk_type="code" if inside_code_block else "text", + ) + except (ReachedMaxNumberOfAttemptsError, openai.OpenAIError) as error: yield self.response_failure_message(error=error) @@ -253,7 +278,7 @@ def start(self): continue print(f"{self.assistant_name}> ", end="", flush=True) for chunk in self.respond_user_prompt(prompt=question): - print(chunk, end="", flush=True) + print(chunk.content, end="", flush=True) print() print() except (KeyboardInterrupt, EOFError): @@ -288,7 +313,7 @@ def response_failure_message(self, error: Optional[Exception] = None): msg += f" The reason seems to be: {error} " msg += "Please check your connection or OpenAI API key." logger.opt(exception=True).debug(error) - return msg + return AssistantResponseChunk(msg) def stt(self, speech: AudioSegment): """Convert audio to text.""" diff --git a/pyrobbot/chat_configs.py b/pyrobbot/chat_configs.py index b95b89d..22bca08 100644 --- a/pyrobbot/chat_configs.py +++ b/pyrobbot/chat_configs.py @@ -206,6 +206,9 @@ class VoiceAssistantConfigs(BaseConfigModel): frame_duration: Literal[10, 20, 30] = Field( default=30, description="Frame duration for audio recording, in milliseconds." ) + reply_only_as_text: Optional[bool] = Field( + default=None, description="Reply only as text. The assistant will not speak." + ) skip_initial_greeting: Optional[bool] = Field( default=None, description="Skip initial greeting." ) diff --git a/pyrobbot/voice_chat.py b/pyrobbot/voice_chat.py index 95af37f..7efd33d 100644 --- a/pyrobbot/voice_chat.py +++ b/pyrobbot/voice_chat.py @@ -51,9 +51,9 @@ class VoiceChat(Chat): default_configs = VoiceChatConfigs() - def __init__(self, configs: VoiceChatConfigs = default_configs): + def __init__(self, configs: VoiceChatConfigs = default_configs, **kwargs): """Initializes a chat instance.""" - super().__init__(configs=configs) + super().__init__(configs=configs, **kwargs) _check_needed_imports() self.block_size = int((self.sample_rate * self.frame_duration) / 1000) @@ -82,7 +82,7 @@ def __init__(self, configs: VoiceChatConfigs = default_configs): ) self.play_speech_thread = threading.Thread( target=self.handle_speech_queue, args=(self.play_speech_queue,), daemon=True - ) + ) # TODO: Do not start this in webchat # 3. Watching for expressions that cancel the reply or exit the chat self.check_for_interrupt_expressions_queue = queue.Queue() self.check_for_interrupt_expressions_thread = threading.Thread( @@ -129,16 +129,27 @@ def start(self): self.interrupt_reply.clear() logger.debug(f"{self.assistant_name}> Waiting for user input...") question = self.questions_queue.get() + self.questions_queue.task_done() if question is None: self.exit_chat.set() else: chime.success() - self.answer_question(question) - except (KeyboardInterrupt, EOFError): # noqa: PERF203 + info_printed = False + for chunk in self.answer_question(question): + if chunk.chunk_type != "code": + continue + if not info_printed: + msg = self._translate( + "I'll write the code in the text output." + ) + self.tts_conversion_queue.put(msg) + info_printed = True + print(chunk.content, end="", flush=True) + if info_printed: + print("\n") + except (KeyboardInterrupt, EOFError): self.exit_chat.set() - finally: - self.questions_queue.task_done() chime.info() logger.debug("Leaving chat") @@ -146,52 +157,29 @@ def start(self): def answer_question(self, question: str): """Answer a question.""" logger.debug("{}> Getting response to '{}'...", self.assistant_name, question) - sentence = "" - inside_code_block = False - at_least_one_code_line_written = False + sentence_for_tts = "" for answer_chunk in self.respond_user_prompt(prompt=question): if self.interrupt_reply.is_set() or self.exit_chat.is_set(): - return - - fmtd_chunk = answer_chunk.strip(" \n") - code_block_start_detected = fmtd_chunk.startswith("``") - - if code_block_start_detected and not inside_code_block: - # Toggle the code block state - inside_code_block = True - - if inside_code_block: - code_chunk = answer_chunk - if at_least_one_code_line_written: - inside_code_block = not fmtd_chunk.endswith("``") # Code block ends - if not inside_code_block: - code_chunk = answer_chunk.rstrip("`") + "```\n" - print( - code_chunk, - end="" if inside_code_block else "\n", - flush=True, - ) - at_least_one_code_line_written = True - else: + raise StopIteration + yield answer_chunk + + if answer_chunk.chunk_type == "text" and not self.reply_only_as_text: # The answer chunk is to be spoken - sentence += answer_chunk - stripd_chunk = answer_chunk.strip() + sentence_for_tts += answer_chunk.content + stripd_chunk = answer_chunk.content.strip() if stripd_chunk.endswith(("?", "!", ".")): # Check if second last character is a number, to avoid splitting if stripd_chunk.endswith("."): with contextlib.suppress(IndexError): - previous_char = sentence.strip()[-2] + previous_char = sentence_for_tts.strip()[-2] if previous_char.isdigit(): continue # Send sentence for TTS even if the request hasn't finished - self.tts_conversion_queue.put(sentence) - sentence = "" - if sentence: - self.tts_conversion_queue.put(sentence) - if at_least_one_code_line_written: - spoken_info_to_user = "The code has been written to the console" - spoken_info_to_user = self._translate(spoken_info_to_user) - self.tts_conversion_queue.put(spoken_info_to_user) + self.tts_conversion_queue.put(sentence_for_tts) + sentence_for_tts = "" + + if sentence_for_tts and not self.reply_only_as_text: + self.tts_conversion_queue.put(sentence_for_tts) def speak(self, tts: TextToSpeech): """Reproduce audio from a pygame Sound object.""" From c150df111b3b88cff9bcea9b1898fb31cd02e53a Mon Sep 17 00:00:00 2001 From: Paulo V C Medeiros Date: Sat, 24 Feb 2024 00:56:43 +0100 Subject: [PATCH 04/38] Exploring ways to keep audios in chat history --- pyrobbot/app/app_page_templates.py | 18 ++++++++++++------ pyrobbot/chat_context.py | 6 ++++-- pyrobbot/embeddings_database.py | 19 ++++++++++++++++++- pyrobbot/voice_chat.py | 30 ++++++++++++++++++++++++++++++ 4 files changed, 64 insertions(+), 9 deletions(-) diff --git a/pyrobbot/app/app_page_templates.py b/pyrobbot/app/app_page_templates.py index 22d0434..37dd926 100644 --- a/pyrobbot/app/app_page_templates.py +++ b/pyrobbot/app/app_page_templates.py @@ -39,15 +39,16 @@ def __init__(self, **kwargs): super().__init__(**kwargs) self.tts_conversion_watcher_thread.start() - def speak(self, tts: TextToSpeech): + def speak(self, tts: TextToSpeech, autoplay: bool = True): """Autoplay an audio segment in the streamlit app.""" # Adaped from: + autoplay = "true" if autoplay else "false" data = tts.speech.export(format="mp3").read() b64 = base64.b64encode(data).decode() md = f""" -