Skip to content

Commit cfd6226

Browse files
author
Mac Wilkinson
authored
Merge In Recent Fixes (#14)
1 parent 239addb commit cfd6226

24 files changed

+713
-543
lines changed

poetry.lock

+315-295
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+12-24
Original file line numberDiff line numberDiff line change
@@ -11,46 +11,46 @@ homepage = "https://github.com/vocodedev/vocode-python"
1111
[tool.poetry.dependencies]
1212
python = ">=3.10,<4.0"
1313
aiohttp = "^3.9.5"
14+
anthropic = "^0.28.0"
15+
azure-cognitiveservices-speech = "^1.37.0"
16+
elevenlabs = "^1.2.2"
1417
fastapi = "^0.111.0"
1518
janus = "^1.0.0"
1619
jinja2 = "^3.1.4"
1720
jsonschema = "^4.22.0"
1821
loguru = "^0.7.2"
22+
numpy = "^1.26.4"
23+
openai = "^1.30.5"
1924
opentelemetry-sdk = "^1.24.0"
2025
phonenumbers = "^8.13.37"
2126
pydantic = "^2.7.2"
27+
pydantic-settings = "^2.3.0"
28+
pyht = "^0.0.28"
29+
redis = "^5.0.4"
2230
requests = "^2.32.3"
31+
sentry-sdk = { extras = ["fastapi"], version = "^2.3.1" }
2332
sounddevice = "^0.4.7"
33+
tiktoken = "^0.7.0"
2434
uvicorn = "^0.30.0"
2535
websockets = "^12.0"
2636

27-
# Agents
28-
anthropic = { version = "^0.28.0", optional = true }
29-
openai = { version = "^1.30.5", optional = true }
30-
tiktoken = { version = "0.7.0", optional = true }
31-
3237
# Synthesizers
33-
azure-cognitiveservices-speech = { version = "^1.37.0", optional = true }
34-
elevenlabs = { version = "^1.2.2", optional = true }
3538
google-cloud-texttospeech = { version = "^2.16.3", optional = true }
3639
miniaudio = { version = "^1.59", optional = true }
3740
nltk = { version = "^3.8.1", optional = true }
3841
pvkoala = { version = "^2.0.1", optional = true }
3942
pydub = { version = "^0.25.1", optional = true }
40-
pyht = { version = "^0.0.28", optional = true }
4143

4244
# Transcribers
4345
google-cloud-speech = { version = "^2.26.0", optional = true }
4446

4547
# Telephony
46-
redis = { version = "^5.0.4", optional = true }
4748
twilio = { version = "^9.1.0", optional = true }
4849
vonage = { version = "^3.14.0", optional = true }
4950

5051
# Misc
5152
langchain = { version = "^0.2.1", optional = true }
5253
langchain-community = { version = "^0.2.1", optional = true }
53-
sentry-sdk = { extras = ["fastapi"], version = "^2.3.1", optional = true }
5454

5555

5656
[tool.poetry.group.lint.dependencies]
@@ -76,39 +76,27 @@ pytest-httpx = "^0.30.0"
7676
pytest-mock = "^3.14.0"
7777

7878
[tool.poetry.extras]
79-
agents = ["anthropic", "openai", "tiktoken"]
8079
synthesizers = [
81-
"azure-cognitiveservices-speech",
82-
"elevenlabs",
8380
"google-cloud-texttospeech",
8481
"miniaudio",
8582
"nltk",
8683
"pvkoala",
8784
"pydub",
88-
"pyht",
8985
]
9086
transcribers = ["google-cloud-speech"]
91-
telephony = ["twilio", "redis", "vonage"]
92-
misc = ["langchain", "langchain-community", "sentry-sdk"]
87+
telephony = ["twilio", "vonage"]
88+
misc = ["langchain", "langchain-community"]
9389
all = [
94-
"anthropic",
95-
"openai",
96-
"tiktoken",
97-
"azure-cognitiveservices-speech",
98-
"elevenlabs",
9990
"google-cloud-texttospeech",
10091
"miniaudio",
10192
"nltk",
10293
"pvkoala",
10394
"pydub",
104-
"pyht",
10595
"google-cloud-speech",
10696
"twilio",
107-
"redis",
10897
"vonage",
10998
"langchain",
11099
"langchain-community",
111-
"sentry-sdk",
112100
]
113101

114102
[tool.mypy]

quickstarts/streaming_conversation.py

+34-8
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,46 @@
11
import asyncio
22
import signal
33

4-
from dotenv import load_dotenv
4+
from pydantic_settings import BaseSettings, SettingsConfigDict
55

6+
from vocode.helpers import create_streaming_microphone_input_and_speaker_output
67
from vocode.logging import configure_pretty_logging
78
from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent
89
from vocode.streaming.models.agent import ChatGPTAgentConfig
10+
from vocode.streaming.models.message import BaseMessage
911
from vocode.streaming.models.synthesizer import AzureSynthesizerConfig
1012
from vocode.streaming.models.transcriber import (
1113
DeepgramTranscriberConfig,
1214
PunctuationEndpointingConfig,
1315
)
16+
from vocode.streaming.streaming_conversation import StreamingConversation
1417
from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
1518
from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber
1619

17-
load_dotenv()
20+
configure_pretty_logging()
1821

19-
from vocode.helpers import create_streaming_microphone_input_and_speaker_output
20-
from vocode.streaming.models.message import BaseMessage
21-
from vocode.streaming.streaming_conversation import StreamingConversation
2222

23-
configure_pretty_logging()
23+
class Settings(BaseSettings):
24+
"""
25+
Settings for the streaming conversation quickstart.
26+
These parameters can be configured with environment variables.
27+
"""
28+
29+
openai_api_key: str = "ENTER_YOUR_OPENAI_API_KEY_HERE"
30+
azure_speech_key: str = "ENTER_YOUR_AZURE_KEY_HERE"
31+
deepgram_api_key: str = "ENTER_YOUR_DEEPGRAM_API_KEY_HERE"
32+
33+
azure_speech_region: str = "eastus"
34+
35+
# This means a .env file can be used to overload these settings
36+
# ex: "OPENAI_API_KEY=my_key" will set openai_api_key over the default above
37+
model_config = SettingsConfigDict(
38+
env_file=".env",
39+
env_file_encoding="utf-8",
40+
)
41+
42+
43+
settings = Settings()
2444

2545

2646
async def main():
@@ -38,15 +58,21 @@ async def main():
3858
DeepgramTranscriberConfig.from_input_device(
3959
microphone_input,
4060
endpointing_config=PunctuationEndpointingConfig(),
41-
)
61+
api_key=settings.deepgram_api_key,
62+
),
4263
),
4364
agent=ChatGPTAgent(
4465
ChatGPTAgentConfig(
66+
openai_api_key=settings.openai_api_key,
4567
initial_message=BaseMessage(text="What up"),
4668
prompt_preamble="""The AI is having a pleasant conversation about life""",
4769
)
4870
),
49-
synthesizer=AzureSynthesizer(AzureSynthesizerConfig.from_output_device(speaker_output)),
71+
synthesizer=AzureSynthesizer(
72+
AzureSynthesizerConfig.from_output_device(speaker_output),
73+
azure_speech_key=settings.azure_speech_key,
74+
azure_speech_region=settings.azure_speech_region,
75+
),
5076
)
5177
await conversation.start()
5278
print("Conversation started, press Ctrl+C to end")

quickstarts/turn_based_conversation.py

+25-9
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,32 @@
1-
from dotenv import load_dotenv
1+
from pydantic_settings import BaseSettings, SettingsConfigDict
22

3-
from vocode import getenv
43
from vocode.helpers import create_turn_based_microphone_input_and_speaker_output
54
from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
65
from vocode.turn_based.synthesizer.azure_synthesizer import AzureSynthesizer
76
from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
87
from vocode.turn_based.turn_based_conversation import TurnBasedConversation
98

10-
load_dotenv()
119

12-
# See https://api.elevenlabs.io/v1/voices
13-
ADAM_VOICE_ID = "pNInz6obpgDQGcFmaJgB"
10+
class Settings(BaseSettings):
11+
"""
12+
Settings for the turn-based conversation quickstart.
13+
These parameters can be configured with environment variables.
14+
"""
15+
16+
openai_api_key: str = "ENTER_YOUR_OPENAI_API_KEY_HERE"
17+
azure_speech_key: str = "ENTER_YOUR_AZURE_KEY_HERE"
18+
19+
azure_speech_region: str = "eastus"
20+
21+
# This means a .env file can be used to overload these settings
22+
# ex: "OPENAI_API_KEY=my_key" will set openai_api_key over the default above
23+
model_config = SettingsConfigDict(
24+
env_file=".env",
25+
env_file_encoding="utf-8",
26+
)
27+
28+
29+
settings = Settings()
1430

1531
if __name__ == "__main__":
1632
(
@@ -23,15 +39,15 @@
2339
conversation = TurnBasedConversation(
2440
input_device=microphone_input,
2541
output_device=speaker_output,
26-
transcriber=WhisperTranscriber(api_key=getenv("OPENAI_API_KEY")),
42+
transcriber=WhisperTranscriber(api_key=settings.openai_api_key),
2743
agent=ChatGPTAgent(
2844
system_prompt="The AI is having a pleasant conversation about life",
2945
initial_message="Hello!",
30-
api_key=getenv("OPENAI_API_KEY"),
46+
api_key=settings.openai_api_key,
3147
),
3248
synthesizer=AzureSynthesizer(
33-
api_key=getenv("AZURE_SPEECH_KEY"),
34-
region=getenv("AZURE_SPEECH_REGION"),
49+
api_key=settings.azure_speech_key,
50+
region=settings.azure_speech_region,
3551
voice_name="en-US-SteffanNeural",
3652
),
3753
)

vocode/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,13 @@
66
import sentry_sdk
77
from loguru import logger
88

9+
from vocode.meta import ensure_punkt_installed
10+
911
environment = {}
1012
logger.disable("vocode")
1113

14+
ensure_punkt_installed()
15+
1216

1317
class ContextWrapper:
1418
"""Context Variable Wrapper."""

vocode/meta.py

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from loguru import logger
2+
3+
4+
def ensure_punkt_installed():
5+
try:
6+
from nltk.data import find
7+
8+
find("tokenizers/punkt")
9+
except LookupError:
10+
from nltk import download
11+
12+
# If not installed, download 'punkt'
13+
logger.info("Downloading 'punkt' tokenizer...")
14+
download("punkt")
15+
logger.info("'punkt' tokenizer downloaded successfully.")

vocode/streaming/agent/chat_gpt_agent.py

+66-15
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
import sentry_sdk
66
from loguru import logger
7-
from openai import AsyncAzureOpenAI, AsyncOpenAI
7+
from openai import DEFAULT_MAX_RETRIES as OPENAI_DEFAULT_MAX_RETRIES
8+
from openai import AsyncAzureOpenAI, AsyncOpenAI, NotFoundError, RateLimitError
89

910
from vocode import sentry_span_tags
1011
from vocode.streaming.action.abstract_factory import AbstractActionFactory
@@ -27,6 +28,24 @@
2728
ChatGPTAgentConfigType = TypeVar("ChatGPTAgentConfigType", bound=ChatGPTAgentConfig)
2829

2930

31+
def instantiate_openai_client(agent_config: ChatGPTAgentConfig, model_fallback: bool = False):
32+
if agent_config.azure_params:
33+
return AsyncAzureOpenAI(
34+
azure_endpoint=agent_config.azure_params.base_url,
35+
api_key=agent_config.azure_params.api_key,
36+
api_version=agent_config.azure_params.api_version,
37+
max_retries=0 if model_fallback else OPENAI_DEFAULT_MAX_RETRIES,
38+
)
39+
else:
40+
if agent_config.openai_api_key is not None:
41+
logger.info("Using OpenAI API key override")
42+
return AsyncOpenAI(
43+
api_key=agent_config.openai_api_key or os.environ["OPENAI_API_KEY"],
44+
base_url="https://api.openai.com/v1",
45+
max_retries=0 if model_fallback else OPENAI_DEFAULT_MAX_RETRIES,
46+
)
47+
48+
3049
class ChatGPTAgent(RespondAgent[ChatGPTAgentConfigType]):
3150
openai_client: Union[AsyncOpenAI, AsyncAzureOpenAI]
3251

@@ -42,19 +61,9 @@ def __init__(
4261
action_factory=action_factory,
4362
**kwargs,
4463
)
45-
if agent_config.azure_params:
46-
self.openai_client = AsyncAzureOpenAI(
47-
azure_endpoint=agent_config.azure_params.base_url,
48-
api_key=agent_config.azure_params.api_key,
49-
api_version=agent_config.azure_params.api_version,
50-
)
51-
else:
52-
if agent_config.openai_api_key is not None:
53-
logger.info("Using OpenAI API key override")
54-
self.openai_client = AsyncOpenAI(
55-
api_key=agent_config.openai_api_key or os.environ["OPENAI_API_KEY"],
56-
base_url="https://api.openai.com/v1",
57-
)
64+
self.openai_client = instantiate_openai_client(
65+
agent_config, model_fallback=agent_config.llm_fallback is not None
66+
)
5867

5968
if not self.openai_client.api_key:
6069
raise ValueError("OPENAI_API_KEY must be set in environment or passed in")
@@ -109,9 +118,37 @@ def get_model_name_for_tokenizer(self):
109118
else:
110119
return self.agent_config.azure_params.openai_model_name
111120

112-
async def _create_openai_stream(self, chat_parameters: Dict[str, Any]) -> AsyncGenerator:
121+
def apply_model_fallback(self, chat_parameters: Dict[str, Any]):
122+
if self.agent_config.llm_fallback is None:
123+
return
124+
if self.agent_config.llm_fallback.provider == "openai":
125+
self.agent_config.model_name = self.agent_config.llm_fallback.model_name
126+
if isinstance(self.openai_client, AsyncAzureOpenAI):
127+
self.agent_config.azure_params = None
128+
else:
129+
if self.agent_config.azure_params:
130+
self.agent_config.azure_params.deployment_name = (
131+
self.agent_config.llm_fallback.model_name
132+
)
133+
if isinstance(self.openai_client, AsyncOpenAI):
134+
# TODO: handle OpenAI fallback to Azure
135+
pass
136+
137+
self.openai_client = instantiate_openai_client(self.agent_config, model_fallback=False)
138+
chat_parameters["model"] = self.agent_config.llm_fallback.model_name
139+
140+
async def _create_openai_stream_with_fallback(
141+
self, chat_parameters: Dict[str, Any]
142+
) -> AsyncGenerator:
113143
try:
114144
stream = await self.openai_client.chat.completions.create(**chat_parameters)
145+
except (NotFoundError, RateLimitError) as e:
146+
logger.error(
147+
f"{'Model not found' if isinstance(e, NotFoundError) else 'Rate limit error'} for model_name: {chat_parameters.get('model')}. Applying fallback.",
148+
exc_info=True,
149+
)
150+
self.apply_model_fallback(chat_parameters)
151+
stream = await self.openai_client.chat.completions.create(**chat_parameters)
115152
except Exception as e:
116153
logger.error(
117154
f"Error while hitting OpenAI with chat_parameters: {chat_parameters}",
@@ -120,6 +157,20 @@ async def _create_openai_stream(self, chat_parameters: Dict[str, Any]) -> AsyncG
120157
raise e
121158
return stream
122159

160+
async def _create_openai_stream(self, chat_parameters: Dict[str, Any]) -> AsyncGenerator:
161+
if self.agent_config.llm_fallback is not None and self.openai_client.max_retries == 0:
162+
stream = await self._create_openai_stream_with_fallback(chat_parameters)
163+
else:
164+
try:
165+
stream = await self.openai_client.chat.completions.create(**chat_parameters)
166+
except Exception as e:
167+
logger.error(
168+
f"Error while hitting OpenAI with chat_parameters: {chat_parameters}",
169+
exc_info=True,
170+
)
171+
raise e
172+
return stream
173+
123174
def should_backchannel(self, human_input: str) -> bool:
124175
return (
125176
not self.is_first_response()

vocode/streaming/agent/default_factory.py

-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
AnthropicAgentConfig,
1010
ChatGPTAgentConfig,
1111
EchoAgentConfig,
12-
LlamacppAgentConfig,
1312
RESTfulUserImplementedAgentConfig,
1413
)
1514

0 commit comments

Comments
 (0)