Support to STT in webapp. Misc fixes.

paulovcmedeiros · Feb 20, 2024 · 5df7073 · 5df7073
2 parents 4508afd + 3dafae5
commit 5df7073
Show file tree

Hide file tree

Showing 13 changed files with 357 additions and 241 deletions.
diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@
 [![Tests](https://github.com/paulovcmedeiros/pyRobBot/actions/workflows/tests.yaml/badge.svg)](https://github.com/paulovcmedeiros/pyRobBot/actions/workflows/tests.yaml)
 [![codecov](https://codecov.io/gh/paulovcmedeiros/pyRobBot/graph/badge.svg?token=XI8G1WH9O6)](https://codecov.io/gh/paulovcmedeiros/pyRobBot)
 
-# pyRobBot: Talk and Chat with GPT LLMs
+# pyRobBot: Chat with GPT LLMs over voice, UI & terminal, with access to internet
 
 A python package that uses OpenAI's [GPT large language models (LLMs)](https://platform.openai.com/docs/models) to implement:
 * A fully configurable personal assistant that can speak and listen to you

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,10 +1,10 @@
 [tool.poetry]
   authors = ["Paulo V C Medeiros <[email protected]>"]
-  description = "GPT chatbot using OpenAI API"
+  description = "Chat with GPT LLMs over voice, UI & terminal, with access to internet. Uses the OpenAI API."
   license = "MIT"
   name = "pyrobbot"
   readme = "README.md"
-  version = "0.6.0"
+  version = "0.6.1"
 
 [build-system]
   build-backend = "poetry.core.masonry.api"
@@ -43,6 +43,7 @@
   sounddevice = "^0.4.6"
   soundfile = "^0.12.1"
   speechrecognition = "^3.10.0"
+  streamlit-audiorecorder = "^0.0.4"
   tzlocal = "^5.2"
   unidecode = "^1.3.7"
   webrtcvad-wheels = "^2.0.11.post1"

diff --git a/pyrobbot/__init__.py b/pyrobbot/__init__.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """Unnoficial OpenAI API UI and CLI tool."""
-import hashlib
+import contextlib
 import os
 import sys
 import tempfile
@@ -13,7 +13,6 @@
 import ipinfo
 import requests
 from loguru import logger
-from openai import OpenAI
 
 logger.remove()
 logger.add(
@@ -47,15 +46,8 @@ class GeneralDefinitions:
     PARSED_ARGS_FILE = PACKAGE_TMPDIR / f"parsed_args_{RUN_ID}.pkl"
 
     # Location info
-    try:
+    IPINFO = defaultdict(lambda: "unknown")
+    with contextlib.suppress(
+        requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError
+    ):
         IPINFO = ipinfo.getHandler().getDetails().all
-    except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError):
-        IPINFO = defaultdict(lambda: "unknown")
-
-    @classmethod
-    def get_openai_client_cache_dir(cls, openai_client: OpenAI = None):
-        """Return the directory where chats using openai_client will be stored."""
-        if openai_client is None:
-            return cls.PACKAGE_CACHE_DIRECTORY / "user_demo"
-        key_hash = hashlib.sha256(openai_client.api_key.encode("utf-8")).hexdigest()
-        return cls.PACKAGE_CACHE_DIRECTORY / f"user_{key_hash}"
diff --git a/pyrobbot/app/app.py b/pyrobbot/app/app.py
@@ -7,7 +7,9 @@
 def run_app():
     """Create and run an instance of the pacage's app."""
     MultipageChatbotApp(
-        page_title=GeneralDefinitions.APP_NAME, page_icon=":speech_balloon:"
+        page_title=GeneralDefinitions.APP_NAME,
+        page_icon=":speech_balloon:",
+        layout="wide",
     ).render()
 
 

diff --git a/pyrobbot/app/app_page_templates.py b/pyrobbot/app/app_page_templates.py
@@ -2,14 +2,12 @@
 
 import contextlib
 import datetime
-import sys
 import uuid
 from abc import ABC, abstractmethod
-from json.decoder import JSONDecodeError
 from typing import TYPE_CHECKING
 
 import streamlit as st
-from loguru import logger
+from audiorecorder import audiorecorder
 from PIL import Image
 
 from pyrobbot import GeneralDefinitions
@@ -127,12 +125,7 @@ def __init__(
     def chat_configs(self) -> ChatOptions:
         """Return the configs used for the page's chat object."""
         if "chat_configs" not in self.state:
-            try:
-                chat_options_file_path = sys.argv[-1]
-                self.state["chat_configs"] = ChatOptions.from_file(chat_options_file_path)
-            except (FileNotFoundError, JSONDecodeError):
-                logger.warning("Could not retrieve cli args. Using default chat options.")
-                self.state["chat_configs"] = ChatOptions()
+            self.state["chat_configs"] = self.parent.state["chat_configs"]
         return self.state["chat_configs"]
 
     @chat_configs.setter
@@ -143,10 +136,10 @@ def chat_configs(self, value: ChatOptions):
 
     @property
     def chat_obj(self) -> Chat:
-        """Return the chat object responsible for the queries in this page."""
+        """Return the chat object responsible for the queries on this page."""
         if "chat_obj" not in self.state:
             self.chat_obj = Chat(
-                self.chat_configs, openai_client=self.parent.openai_client
+                configs=self.chat_configs, openai_client=self.parent.openai_client
             )
         return self.state["chat_obj"]
 
@@ -170,6 +163,9 @@ def chat_history(self) -> list[dict[str, str]]:
 
     def render_chat_history(self):
         """Render the chat history of the page. Do not include system messages."""
+        with st.chat_message("assistant", avatar=self.avatars["assistant"]):
+            st.markdown(self.chat_obj.initial_greeting)
+
         for message in self.chat_history:
             role = message["role"]
             if role == "system":
@@ -200,81 +196,91 @@ def _render_chatbot_page(self):
         <https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps>
 
         """
-        st.header(self.title, divider="rainbow")
-
-        if self.chat_history:
-            self.render_chat_history()
-        else:
-            with st.chat_message("assistant", avatar=self.avatars["assistant"]):
-                st.markdown(self.chat_obj.initial_greeting)
-            self.chat_history.append(
-                {
-                    "role": "assistant",
-                    "name": self.chat_obj.assistant_name,
-                    "content": self.chat_obj.initial_greeting,
-                }
-            )
+        title_container = st.empty()
+        title_container.header(self.title, divider="rainbow")
 
-        # Accept user input
         placeholder = (
             f"Send a message to {self.chat_obj.assistant_name} ({self.chat_obj.model})"
         )
-        if prompt := st.chat_input(
-            placeholder=placeholder,
-            on_submit=lambda: self.state.update({"chat_started": True}),
-        ):
-            time_now = datetime.datetime.now().replace(microsecond=0)
-            # Display user message in chat message container
-            with st.chat_message("user", avatar=self.avatars["user"]):
-                st.caption(time_now)
-                st.markdown(prompt)
-            self.chat_history.append(
-                {
-                    "role": "user",
-                    "name": self.chat_obj.username,
-                    "content": prompt,
-                    "timestamp": time_now,
-                }
-            )
 
-            # Display (stream) assistant response in chat message container
-            with st.chat_message(
-                "assistant", avatar=self.avatars["assistant"]
-            ), st.empty():
-                st.markdown("▌")
-                full_response = ""
-                for chunk in self.chat_obj.respond_user_prompt(prompt):
-                    full_response += chunk
-                    st.markdown(full_response + "▌")
-                st.caption(datetime.datetime.now().replace(microsecond=0))
-                st.markdown(full_response)
-
-            self.chat_history.append(
-                {
-                    "role": "assistant",
-                    "name": self.chat_obj.assistant_name,
-                    "content": full_response,
-                }
-            )
+        use_microphone_input = st.session_state.get("toggle_mic_input", False)
+        if use_microphone_input:
+            prompt = self.state.pop("recorded_prompt", None)
+        else:
+            prompt = st.chat_input(placeholder=placeholder)
 
-            # Reset title according to conversation initial contents
-            min_history_len_for_summary = 3
-            if (
-                "page_title" not in self.state
-                and len(self.chat_history) > min_history_len_for_summary
-            ):
-                with st.spinner("Working out conversation topic..."):
-                    prompt = "Summarize the messages in max 4 words.\n"
-                    title = "".join(
-                        self.chat_obj.respond_system_prompt(prompt, add_to_history=False)
-                    )
-                    self.chat_obj.metadata["page_title"] = title
-                    self.chat_obj.metadata["sidebar_title"] = title
-                    self.chat_obj.save_cache()
-
-                    self.title = title
-                    self.sidebar_title = title
-                    st.header(title, divider="rainbow")
+        with st.container(height=600, border=False):
+            self.render_chat_history()
+            # Process user input
+            if prompt:
+                time_now = datetime.datetime.now().replace(microsecond=0)
+                self.state.update({"chat_started": True})
+                # Display user message in chat message container
+                with st.chat_message("user", avatar=self.avatars["user"]):
+                    st.caption(time_now)
+                    st.markdown(prompt)
+                self.chat_history.append(
+                    {
+                        "role": "user",
+                        "name": self.chat_obj.username,
+                        "content": prompt,
+                        "timestamp": time_now,
+                    }
+                )
+
+                # Display (stream) assistant response in chat message container
+                with st.chat_message(
+                    "assistant", avatar=self.avatars["assistant"]
+                ), st.empty():
+                    st.markdown("▌")
+                    full_response = ""
+                    for chunk in self.chat_obj.respond_user_prompt(prompt):
+                        full_response += chunk
+                        st.markdown(full_response + "▌")
+                    st.caption(datetime.datetime.now().replace(microsecond=0))
+                    st.markdown(full_response)
+
+                self.chat_history.append(
+                    {
+                        "role": "assistant",
+                        "name": self.chat_obj.assistant_name,
+                        "content": full_response,
+                    }
+                )
+
+                # Reset title according to conversation initial contents
+                min_history_len_for_summary = 3
+                if (
+                    "page_title" not in self.state
+                    and len(self.chat_history) > min_history_len_for_summary
+                ):
+                    with st.spinner("Working out conversation topic..."):
+                        prompt = "Summarize the previous messages in max 4 words"
+                        title = "".join(self.chat_obj.respond_system_prompt(prompt))
+                        self.chat_obj.metadata["page_title"] = title
+                        self.chat_obj.metadata["sidebar_title"] = title
+                        self.chat_obj.save_cache()
+
+                        self.title = title
+                        self.sidebar_title = title
+                        title_container.header(title, divider="rainbow")
+
+        if use_microphone_input and ("recorded_prompt" not in self.state):
+            _left, center, _right = st.columns([1, 1, 1])
+            with center:
+                audio = audiorecorder(
+                    start_prompt=placeholder.replace("Send", "Record"),
+                    stop_prompt="Stop and send prompt",
+                    pause_prompt="",
+                    key="audiorecorder_widget",
+                )
+
+            min_audio_duration_seconds = 0.1
+            if audio.duration_seconds > min_audio_duration_seconds:
+                self.state["recorded_prompt"] = self.chat_obj.stt(audio)
+                self.state.update({"chat_started": True})
+                del st.session_state["audiorecorder_widget"]
+                st.rerun()
 
     def render(self):
         """Render the app's chatbot or costs page, depending on user choice."""