ajar98 · adnaans · Jun 10, 2024 · Jun 7, 2024 · Jun 7, 2024 · Jun 7, 2024
diff --git a/Makefile b/Makefile
@@ -9,6 +9,12 @@ transcribe:
 synthesize:
 	poetry run python playground/streaming/synthesizer/synthesize.py
 
+turn_based_conversation:
+	poetry run python quickstarts/turn_based_conversation.py
+
+streaming_conversation:
+	poetry run python quickstarts/streaming_conversation.py
+
 PYTHON_FILES=.
 lint: PYTHON_FILES=vocode/ quickstarts/ playground/
 lint_diff typecheck_diff: PYTHON_FILES=$(shell git diff --name-only --diff-filter=d main | grep -E '\.py$$')

diff --git a/docs/mint.json b/docs/mint.json
@@ -49,7 +49,11 @@
   "navigation": [
     {
       "group": "Getting Started",
-      "pages": ["welcome", "hosted-quickstart", "open-source-quickstart"]
+      "pages": [
+        "welcome",
+        "hosted-quickstart",
+        "open-source-quickstart"
+      ]
     },
     {
       "group": "Vocode 101",
@@ -75,7 +79,6 @@
         "open-source/playground",
         "open-source/turn-based-conversation",
         "open-source/language-support",
-        "open-source/tracing",
         "open-source/agent-factory"
       ]
     },
@@ -109,7 +112,9 @@
     },
     {
       "group": "Usage",
-      "pages": ["api-reference/usage/get-usage"]
+      "pages": [
+        "api-reference/usage/get-usage"
+      ]
     },
     {
       "group": "Actions",
@@ -223,4 +228,4 @@
     "twitter": "https://twitter.com/vocodehq",
     "website": "https://www.vocode.dev/"
   }
-}
+}
diff --git a/docs/open-source/agent-factory.mdx b/docs/open-source/agent-factory.mdx
@@ -5,38 +5,43 @@ description: 'How to link a custom agent to your app'
 
 # Agent Factories
 
-Agent factories specify which agents are available to your app. In order to connect an agent to your app, you must first define an agent factory. To do so, subclass the [`AgentFactory`](https://github.com/vocodedev/vocode-python/blob/main/vocode/streaming/agent/factory.py) class to specify how agents are created. Here you can import and use your own custom agents.
+Agent factories specify which agents are available to your app. In order to connect an agent to your app, you must first define an agent factory. To do so, subclass the [`AbstractAgentFactory`](https://github.com/vocodedev/vocode-python/blob/main/vocode/streaming/agent/abstract_factory.py) class to specify how agents are created. Here you can import and use your own custom agents.
 
 ## Example
 
-First define your `AgentFactory`:
+First define your `AgentFactory`. In this example, we are creating a factory for a new type of agent called MyActionAgent:
 
 ```python
-from vocode.streaming.agent.factory import AgentFactory
+from vocode.streaming.agent.abstract_factory import AbstractAgentFactory
+from vocode.streaming.action.my_action_factory import MyActionFactory
 
-class MyAgentFactory(AgentFactory):
-    def __init__(self, agent_config: AgentConfig, action_factory: MyActionFactory):
-        self.agent_config = agent_config
+class MyAgentFactory(AbstractAgentFactory):
+    def __init__(self, action_factory: MyActionFactory):
         self.action_factory = action_factory
 
     def create_agent(
         self, agent_config: AgentConfig, logger: Optional[logging.Logger] = None
     ) -> BaseAgent:
         if agent_config.type == "MY_ACTION":
             return MyActionAgent(
-                agent_config=typing.cast(ActionAgentConfig, self.agent_config),
+                agent_config=agent_config,
                 action_factory=self.action_factory
             )
-        raise Exception("Invalid agent config")
+        elif agent_config.type == "other_agent_type":
+            ...
+        else:
+            raise Exception("Invalid agent config")
 ```
 
 Then, in your app, you can connect the agent to the app:
 
 ```python
+from vocode.streaming.telephony.server.base import TelephonyServer
+from vocode.streaming.agent.my_agent_factory import MyAgentFactory
+from vocode.streaming.action.my_action_factory import MyActionFactory
 
 telephony_server = TelephonyServer(
-    agent_factory=MyAgentFactory(
-        agent_config=agent_config, action_factory=action_factory),
+    agent_factory=MyAgentFactory(action_factory=MyActionFactory())
     ...
 )
 ```
diff --git a/docs/open-source/language-support.mdx b/docs/open-source/language-support.mdx
@@ -22,11 +22,9 @@ synthesizer_config = AzureSynthesizerConfig(
 )
 ```
 
-See the [full list of supported voices](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=tts).
-
 ## Transcription
 
-The transcriber used in vocode is also configurable. By default, `DeepgramTranscriber` is used which supports [over 25 languages](https://developers.deepgram.com/docs/languages-overview).
+The transcriber used in vocode is also configurable. By default, `DeepgramTranscriber` is used which supports [over 35 languages](https://developers.deepgram.com/docs/languages-overview).
 
 To configure a different language model, modify the language code passed to `TranscriberConfig` when initializing the config object (`en-US` is the default):
 
@@ -35,10 +33,11 @@ from vocode.streaming.models.transcriber import DeepgramTranscriberConfig
 
 transcriber_config = DeepgramTranscriberConfig(
     language="es" # Spanish
+    model="nova-2" # Most languages are supported on the Nova 2 model
 )
 ```
 
-See the [Deepgram docs](https://developers.deepgram.com/docs/languages-overview) for the list of supported lamguages.
+***Note: the default model for Deepgram is Nova, so you must pass `model="nova-2"` to use that model.***
 
 Other transcription services like Google Cloud Speech or Assembly AI could also be used by configuring the appropriate `TranscriberConfig`.
 

diff --git a/docs/open-source/playground.mdx b/docs/open-source/playground.mdx
@@ -8,6 +8,11 @@ test transcribers, agents, and synthesizers.
 
 To begin, clone the [repo](https://github.com/vocodedev/vocode-python).
 
+Install the core packages by running the following:
+```
+poetry install
+```
+
 # Streaming
 
 ## Transcriber
@@ -32,9 +37,11 @@ make chat
 
 ## Synthesizer
 
-1. Update your synthesizer configuration in `playground/streaming/synthesizer/synthesize.py`
+1. Install the synthesizer packages by running `poetry install --extras=synthesizers`
+
+2. Update your synthesizer configuration in `playground/streaming/synthesizer/synthesize.py`
 
-2. Run the following script to synthesize text to speech and play it to your speaker:
+3. Run the following script to synthesize text to speech and play it to your speaker:
 
 ```
 make synthesize

diff --git a/docs/open-source/tracing.mdx b/docs/open-source/tracing.mdx
diff --git a/docs/open-source/turn-based-conversation.mdx b/docs/open-source/turn-based-conversation.mdx
@@ -9,55 +9,64 @@ A turn-based conversation is a communication system designed for applications wh
 This model differs from streaming conversations that try to mimic natural human discourse. Instead, it fits applications triggered by some kind of user input.
 For example, consider a voice memo application where the user records a message, and the agent generates a complete response.
 
-A turn-based conversation system is perfect for applications that don't require real-time responses or constant back-and-forths.
-This design reduces complexity and allows for a more controlled conversation flow. Each user input is treated as a discrete event,
+A turn-based conversation system is perfect for applications that don't require interruptions and have a controlled conversation flow. Each user input is treated as a discrete event,
 giving the system time to generate and deliver a full and meaningful response.
 
 ## Turn-based quickstart
 
-The code can be found [here](https://github.com/vocodedev/vocode-python/blob/main/quickstarts/turn_based_conversation.py)
+The example below demonstrates a turn-based conversation, using a ChatGPT agent for text generation, WhisperTranscriber for speech-to-text,
+and AzureSynthesizer for text-to-speech. User interactions trigger the beginning and end of the recording, signaling the system when to listen and when to respond. You can run it with 
+```
+make turn_based_conversation
+```
 
-```python
-import logging
-from dotenv import load_dotenv
-from vocode import getenv
-from vocode.helpers import create_turn_based_microphone_input_and_speaker_output
-from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
-from vocode.turn_based.synthesizer.azure_synthesizer import AzureSynthesizer
-from vocode.turn_based.synthesizer.eleven_labs_synthesizer import ElevenLabsSynthesizer
-from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
-from vocode.turn_based.turn_based_conversation import TurnBasedConversation
+*Remember to replace OPENAI_API_KEY and AZURE_SPEECH_KEY with your actual API keys and set the appropriate Azure region. You can also set these variables in a `.env` file and source it in your terminal.
+You can also customize the voice, system prompt, and initial message as needed. The code can be found [here](https://github.com/vocodedev/vocode-python/blob/main/quickstarts/turn_based_conversation.py).*
 
-logging.basicConfig()
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
+```python
+class Settings(BaseSettings):
+    """
+    Settings for the turn-based conversation quickstart.
+    These parameters can be configured with environment variables.
+    """
+
+    openai_api_key: str = "ENTER_YOUR_OPENAI_API_KEY_HERE"
+    azure_speech_key: str = "ENTER_YOUR_AZURE_KEY_HERE"
+
+    azure_speech_region: str = "eastus"
+
+    # This means a .env file can be used to overload these settings
+    # ex: "OPENAI_API_KEY=my_key" will set openai_api_key over the default above
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+    )
 
-load_dotenv()
 
-# See https://api.elevenlabs.io/v1/voices
-ADAM_VOICE_ID = "pNInz6obpgDQGcFmaJgB"
+settings = Settings()
 
 if __name__ == "__main__":
     (
         microphone_input,
         speaker_output,
-    ) = create_turn_based_microphone_input_and_speaker_output(use_default_devices=False)
+    ) = create_turn_based_microphone_input_and_speaker_output(
+        use_default_devices=False,
+    )
 
     conversation = TurnBasedConversation(
         input_device=microphone_input,
         output_device=speaker_output,
-        transcriber=WhisperTranscriber(api_key=getenv("OPENAI_API_KEY")),
+        transcriber=WhisperTranscriber(api_key=settings.openai_api_key),
         agent=ChatGPTAgent(
             system_prompt="The AI is having a pleasant conversation about life",
             initial_message="Hello!",
-            api_key=getenv("OPENAI_API_KEY"),
+            api_key=settings.openai_api_key,
         ),
         synthesizer=AzureSynthesizer(
-            api_key=getenv("AZURE_SPEECH_KEY"),
-            region=getenv("AZURE_SPEECH_REGION"),
+            api_key=settings.azure_speech_key,
+            region=settings.azure_speech_region,
             voice_name="en-US-SteffanNeural",
         ),
-        logger=logger,
     )
     print("Starting conversation. Press Ctrl+C to exit.")
     while True:
@@ -68,16 +77,4 @@ if __name__ == "__main__":
             conversation.end_speech_and_respond()
         except KeyboardInterrupt:
             break
-```
-
-This example demonstrates a turn-based conversation, using a ChatGPT agent for text generation, WhisperTranscriber for speech-to-text,
-and AzureSynthesizer for text-to-speech. User interactions trigger the beginning and end of the recording, signaling the system when to listen and when to respond.
-
-Remember to replace OPENAI_API_KEY and AZURE_SPEECH_KEY with your actual API keys and set the appropriate Azure region.
-You can also customize the voice, system prompt, and initial message as needed.
-
-## React turn-based quickstart
-
-🚧 Under construction
-
-If you want to work on a sample react app for this, reach out to us!
+```