Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge In Recent Fixes #14

Merged
merged 14 commits into from
Jun 7, 2024
610 changes: 315 additions & 295 deletions poetry.lock

Large diffs are not rendered by default.

36 changes: 12 additions & 24 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,46 +11,46 @@ homepage = "https://github.com/vocodedev/vocode-python"
[tool.poetry.dependencies]
python = ">=3.10,<4.0"
aiohttp = "^3.9.5"
anthropic = "^0.28.0"
azure-cognitiveservices-speech = "^1.37.0"
elevenlabs = "^1.2.2"
fastapi = "^0.111.0"
janus = "^1.0.0"
jinja2 = "^3.1.4"
jsonschema = "^4.22.0"
loguru = "^0.7.2"
numpy = "^1.26.4"
openai = "^1.30.5"
opentelemetry-sdk = "^1.24.0"
phonenumbers = "^8.13.37"
pydantic = "^2.7.2"
pydantic-settings = "^2.3.0"
pyht = "^0.0.28"
redis = "^5.0.4"
requests = "^2.32.3"
sentry-sdk = { extras = ["fastapi"], version = "^2.3.1" }
sounddevice = "^0.4.7"
tiktoken = "^0.7.0"
uvicorn = "^0.30.0"
websockets = "^12.0"

# Agents
anthropic = { version = "^0.28.0", optional = true }
openai = { version = "^1.30.5", optional = true }
tiktoken = { version = "0.7.0", optional = true }

# Synthesizers
azure-cognitiveservices-speech = { version = "^1.37.0", optional = true }
elevenlabs = { version = "^1.2.2", optional = true }
google-cloud-texttospeech = { version = "^2.16.3", optional = true }
miniaudio = { version = "^1.59", optional = true }
nltk = { version = "^3.8.1", optional = true }
pvkoala = { version = "^2.0.1", optional = true }
pydub = { version = "^0.25.1", optional = true }
pyht = { version = "^0.0.28", optional = true }

# Transcribers
google-cloud-speech = { version = "^2.26.0", optional = true }

# Telephony
redis = { version = "^5.0.4", optional = true }
twilio = { version = "^9.1.0", optional = true }
vonage = { version = "^3.14.0", optional = true }

# Misc
langchain = { version = "^0.2.1", optional = true }
langchain-community = { version = "^0.2.1", optional = true }
sentry-sdk = { extras = ["fastapi"], version = "^2.3.1", optional = true }


[tool.poetry.group.lint.dependencies]
Expand All @@ -76,39 +76,27 @@ pytest-httpx = "^0.30.0"
pytest-mock = "^3.14.0"

[tool.poetry.extras]
agents = ["anthropic", "openai", "tiktoken"]
synthesizers = [
"azure-cognitiveservices-speech",
"elevenlabs",
"google-cloud-texttospeech",
"miniaudio",
"nltk",
"pvkoala",
"pydub",
"pyht",
]
transcribers = ["google-cloud-speech"]
telephony = ["twilio", "redis", "vonage"]
misc = ["langchain", "langchain-community", "sentry-sdk"]
telephony = ["twilio", "vonage"]
misc = ["langchain", "langchain-community"]
all = [
"anthropic",
"openai",
"tiktoken",
"azure-cognitiveservices-speech",
"elevenlabs",
"google-cloud-texttospeech",
"miniaudio",
"nltk",
"pvkoala",
"pydub",
"pyht",
"google-cloud-speech",
"twilio",
"redis",
"vonage",
"langchain",
"langchain-community",
"sentry-sdk",
]

[tool.mypy]
Expand Down
42 changes: 34 additions & 8 deletions quickstarts/streaming_conversation.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,46 @@
import asyncio
import signal

from dotenv import load_dotenv
from pydantic_settings import BaseSettings, SettingsConfigDict

from vocode.helpers import create_streaming_microphone_input_and_speaker_output
from vocode.logging import configure_pretty_logging
from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent
from vocode.streaming.models.agent import ChatGPTAgentConfig
from vocode.streaming.models.message import BaseMessage
from vocode.streaming.models.synthesizer import AzureSynthesizerConfig
from vocode.streaming.models.transcriber import (
DeepgramTranscriberConfig,
PunctuationEndpointingConfig,
)
from vocode.streaming.streaming_conversation import StreamingConversation
from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber

load_dotenv()
configure_pretty_logging()

from vocode.helpers import create_streaming_microphone_input_and_speaker_output
from vocode.streaming.models.message import BaseMessage
from vocode.streaming.streaming_conversation import StreamingConversation

configure_pretty_logging()
class Settings(BaseSettings):
"""
Settings for the streaming conversation quickstart.
These parameters can be configured with environment variables.
"""

openai_api_key: str = "ENTER_YOUR_OPENAI_API_KEY_HERE"
azure_speech_key: str = "ENTER_YOUR_AZURE_KEY_HERE"
deepgram_api_key: str = "ENTER_YOUR_DEEPGRAM_API_KEY_HERE"

azure_speech_region: str = "eastus"

# This means a .env file can be used to overload these settings
# ex: "OPENAI_API_KEY=my_key" will set openai_api_key over the default above
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
)


settings = Settings()


async def main():
Expand All @@ -38,15 +58,21 @@ async def main():
DeepgramTranscriberConfig.from_input_device(
microphone_input,
endpointing_config=PunctuationEndpointingConfig(),
)
api_key=settings.deepgram_api_key,
),
),
agent=ChatGPTAgent(
ChatGPTAgentConfig(
openai_api_key=settings.openai_api_key,
initial_message=BaseMessage(text="What up"),
prompt_preamble="""The AI is having a pleasant conversation about life""",
)
),
synthesizer=AzureSynthesizer(AzureSynthesizerConfig.from_output_device(speaker_output)),
synthesizer=AzureSynthesizer(
AzureSynthesizerConfig.from_output_device(speaker_output),
azure_speech_key=settings.azure_speech_key,
azure_speech_region=settings.azure_speech_region,
),
)
await conversation.start()
print("Conversation started, press Ctrl+C to end")
Expand Down
34 changes: 25 additions & 9 deletions quickstarts/turn_based_conversation.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,32 @@
from dotenv import load_dotenv
from pydantic_settings import BaseSettings, SettingsConfigDict

from vocode import getenv
from vocode.helpers import create_turn_based_microphone_input_and_speaker_output
from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
from vocode.turn_based.synthesizer.azure_synthesizer import AzureSynthesizer
from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
from vocode.turn_based.turn_based_conversation import TurnBasedConversation

load_dotenv()

# See https://api.elevenlabs.io/v1/voices
ADAM_VOICE_ID = "pNInz6obpgDQGcFmaJgB"
class Settings(BaseSettings):
"""
Settings for the turn-based conversation quickstart.
These parameters can be configured with environment variables.
"""

openai_api_key: str = "ENTER_YOUR_OPENAI_API_KEY_HERE"
azure_speech_key: str = "ENTER_YOUR_AZURE_KEY_HERE"

azure_speech_region: str = "eastus"

# This means a .env file can be used to overload these settings
# ex: "OPENAI_API_KEY=my_key" will set openai_api_key over the default above
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
)


settings = Settings()

if __name__ == "__main__":
(
Expand All @@ -23,15 +39,15 @@
conversation = TurnBasedConversation(
input_device=microphone_input,
output_device=speaker_output,
transcriber=WhisperTranscriber(api_key=getenv("OPENAI_API_KEY")),
transcriber=WhisperTranscriber(api_key=settings.openai_api_key),
agent=ChatGPTAgent(
system_prompt="The AI is having a pleasant conversation about life",
initial_message="Hello!",
api_key=getenv("OPENAI_API_KEY"),
api_key=settings.openai_api_key,
),
synthesizer=AzureSynthesizer(
api_key=getenv("AZURE_SPEECH_KEY"),
region=getenv("AZURE_SPEECH_REGION"),
api_key=settings.azure_speech_key,
region=settings.azure_speech_region,
voice_name="en-US-SteffanNeural",
),
)
Expand Down
4 changes: 4 additions & 0 deletions vocode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@
import sentry_sdk
from loguru import logger

from vocode.meta import ensure_punkt_installed

environment = {}
logger.disable("vocode")

ensure_punkt_installed()


class ContextWrapper:
"""Context Variable Wrapper."""
Expand Down
15 changes: 15 additions & 0 deletions vocode/meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from loguru import logger


def ensure_punkt_installed():
try:
from nltk.data import find

find("tokenizers/punkt")
except LookupError:
from nltk import download

# If not installed, download 'punkt'
logger.info("Downloading 'punkt' tokenizer...")
download("punkt")
logger.info("'punkt' tokenizer downloaded successfully.")
81 changes: 66 additions & 15 deletions vocode/streaming/agent/chat_gpt_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

import sentry_sdk
from loguru import logger
from openai import AsyncAzureOpenAI, AsyncOpenAI
from openai import DEFAULT_MAX_RETRIES as OPENAI_DEFAULT_MAX_RETRIES
from openai import AsyncAzureOpenAI, AsyncOpenAI, NotFoundError, RateLimitError

from vocode import sentry_span_tags
from vocode.streaming.action.abstract_factory import AbstractActionFactory
Expand All @@ -27,6 +28,24 @@
ChatGPTAgentConfigType = TypeVar("ChatGPTAgentConfigType", bound=ChatGPTAgentConfig)


def instantiate_openai_client(agent_config: ChatGPTAgentConfig, model_fallback: bool = False):
if agent_config.azure_params:
return AsyncAzureOpenAI(
azure_endpoint=agent_config.azure_params.base_url,
api_key=agent_config.azure_params.api_key,
api_version=agent_config.azure_params.api_version,
max_retries=0 if model_fallback else OPENAI_DEFAULT_MAX_RETRIES,
)
else:
if agent_config.openai_api_key is not None:
logger.info("Using OpenAI API key override")
return AsyncOpenAI(
api_key=agent_config.openai_api_key or os.environ["OPENAI_API_KEY"],
base_url="https://api.openai.com/v1",
max_retries=0 if model_fallback else OPENAI_DEFAULT_MAX_RETRIES,
)


class ChatGPTAgent(RespondAgent[ChatGPTAgentConfigType]):
openai_client: Union[AsyncOpenAI, AsyncAzureOpenAI]

Expand All @@ -42,19 +61,9 @@ def __init__(
action_factory=action_factory,
**kwargs,
)
if agent_config.azure_params:
self.openai_client = AsyncAzureOpenAI(
azure_endpoint=agent_config.azure_params.base_url,
api_key=agent_config.azure_params.api_key,
api_version=agent_config.azure_params.api_version,
)
else:
if agent_config.openai_api_key is not None:
logger.info("Using OpenAI API key override")
self.openai_client = AsyncOpenAI(
api_key=agent_config.openai_api_key or os.environ["OPENAI_API_KEY"],
base_url="https://api.openai.com/v1",
)
self.openai_client = instantiate_openai_client(
agent_config, model_fallback=agent_config.llm_fallback is not None
)

if not self.openai_client.api_key:
raise ValueError("OPENAI_API_KEY must be set in environment or passed in")
Expand Down Expand Up @@ -109,9 +118,37 @@ def get_model_name_for_tokenizer(self):
else:
return self.agent_config.azure_params.openai_model_name

async def _create_openai_stream(self, chat_parameters: Dict[str, Any]) -> AsyncGenerator:
def apply_model_fallback(self, chat_parameters: Dict[str, Any]):
if self.agent_config.llm_fallback is None:
return
if self.agent_config.llm_fallback.provider == "openai":
self.agent_config.model_name = self.agent_config.llm_fallback.model_name
if isinstance(self.openai_client, AsyncAzureOpenAI):
self.agent_config.azure_params = None
else:
if self.agent_config.azure_params:
self.agent_config.azure_params.deployment_name = (
self.agent_config.llm_fallback.model_name
)
if isinstance(self.openai_client, AsyncOpenAI):
# TODO: handle OpenAI fallback to Azure
pass

self.openai_client = instantiate_openai_client(self.agent_config, model_fallback=False)
chat_parameters["model"] = self.agent_config.llm_fallback.model_name

async def _create_openai_stream_with_fallback(
self, chat_parameters: Dict[str, Any]
) -> AsyncGenerator:
try:
stream = await self.openai_client.chat.completions.create(**chat_parameters)
except (NotFoundError, RateLimitError) as e:
logger.error(
f"{'Model not found' if isinstance(e, NotFoundError) else 'Rate limit error'} for model_name: {chat_parameters.get('model')}. Applying fallback.",
exc_info=True,
)
self.apply_model_fallback(chat_parameters)
stream = await self.openai_client.chat.completions.create(**chat_parameters)
except Exception as e:
logger.error(
f"Error while hitting OpenAI with chat_parameters: {chat_parameters}",
Expand All @@ -120,6 +157,20 @@ async def _create_openai_stream(self, chat_parameters: Dict[str, Any]) -> AsyncG
raise e
return stream

async def _create_openai_stream(self, chat_parameters: Dict[str, Any]) -> AsyncGenerator:
if self.agent_config.llm_fallback is not None and self.openai_client.max_retries == 0:
stream = await self._create_openai_stream_with_fallback(chat_parameters)
else:
try:
stream = await self.openai_client.chat.completions.create(**chat_parameters)
except Exception as e:
logger.error(
f"Error while hitting OpenAI with chat_parameters: {chat_parameters}",
exc_info=True,
)
raise e
return stream

def should_backchannel(self, human_input: str) -> bool:
return (
not self.is_first_response()
Expand Down
1 change: 0 additions & 1 deletion vocode/streaming/agent/default_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
AnthropicAgentConfig,
ChatGPTAgentConfig,
EchoAgentConfig,
LlamacppAgentConfig,
RESTfulUserImplementedAgentConfig,
)

Expand Down
Loading
Loading