Skip to content

Commit 2b27d48

Browse files
authored
[docs sprint] python quickstart + working with phone calls (#27)
* deprecate SpeakerOutput * remove play.ht default voice id * rename open source quickstarts page * remove building block reference * update python quickstart * extra steps to deprecate speakeroutput * finish telephony docs * fix some references + language in how-to-use-it * fix test
1 parent 3a85f3d commit 2b27d48

15 files changed

+113
-101
lines changed

apps/telephony_app/speller_agent.py

+4-11
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import typing
21
from typing import Optional, Tuple
32

43
from vocode.streaming.agent.abstract_factory import AbstractAgentFactory
@@ -65,16 +64,10 @@ def create_agent(self, agent_config: AgentConfig) -> BaseAgent:
6564
Exception: If the agent configuration type is not recognized.
6665
"""
6766
# If the agent configuration type is CHAT_GPT, create a ChatGPTAgent.
68-
if agent_config.type == AgentType.CHAT_GPT:
69-
return ChatGPTAgent(
70-
# Cast the agent configuration to ChatGPTAgentConfig as we are sure about the type here.
71-
agent_config=typing.cast(ChatGPTAgentConfig, agent_config)
72-
)
67+
if isinstance(agent_config, ChatGPTAgentConfig):
68+
return ChatGPTAgent(agent_config=agent_config)
7369
# If the agent configuration type is agent_speller, create a SpellerAgent.
74-
elif agent_config.type == "agent_speller":
75-
return SpellerAgent(
76-
# Cast the agent configuration to SpellerAgentConfig as we are sure about the type here.
77-
agent_config=typing.cast(SpellerAgentConfig, agent_config)
78-
)
70+
elif isinstance(agent_config, SpellerAgentConfig):
71+
return SpellerAgent(agent_config=agent_config)
7972
# If the agent configuration type is not recognized, raise an exception.
8073
raise Exception("Invalid agent config")

docs/mint.json

+3-17
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,7 @@
4949
"navigation": [
5050
{
5151
"group": "Getting Started",
52-
"pages": [
53-
"welcome",
54-
"hosted-quickstart",
55-
"open-source-quickstart"
56-
]
52+
"pages": ["welcome", "hosted-quickstart", "open-source-quickstarts"]
5753
},
5854
{
5955
"group": "Vocode 101",
@@ -85,14 +81,6 @@
8581
"open-source/agent-factory"
8682
]
8783
},
88-
{
89-
"group": "Python",
90-
"pages": [
91-
"open-source/transcriber-reference",
92-
"open-source/agent-reference",
93-
"open-source/synthesizer-reference"
94-
]
95-
},
9684
{
9785
"group": "Numbers",
9886
"pages": [
@@ -115,9 +103,7 @@
115103
},
116104
{
117105
"group": "Usage",
118-
"pages": [
119-
"api-reference/usage/get-usage"
120-
]
106+
"pages": ["api-reference/usage/get-usage"]
121107
},
122108
{
123109
"group": "Actions",
@@ -231,4 +217,4 @@
231217
"twitter": "https://twitter.com/vocodehq",
232218
"website": "https://www.vocode.dev/"
233219
}
234-
}
220+
}

docs/open-source-quickstart.mdx docs/open-source-quickstarts.mdx

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
---
2-
title: "Open Source Quickstart"
2+
title: "Open Source Quickstarts"
33
description: "How to get Vocode up and running on your own machine"
44
---
55

66
## Start Developing
77

88
<CardGroup>
9-
<Card title="Python Quick Start" icon="circle-play" href="/open-source/python-quickstart">
9+
<Card
10+
title="Python Quick Start"
11+
icon="circle-play"
12+
href="/open-source/python-quickstart"
13+
>
1014
Quickly get up and running with Vocode by following our Python quick start
1115
guide.
1216
</Card>

docs/open-source/how-to-use-it.mdx

+1-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@ description: "Various ways to utilize Vocode."
55

66
## Understanding Our Open Source Libraries
77

8-
Vocode's Open Source supports both Python and React, with plans for future support for
9-
additional languages.
8+
Vocode's Open Source supports Python, and a client library in React.
109

1110
### Getting Started with the Open Source Python Library
1211

docs/open-source/python-quickstart.mdx

+54-37
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,11 @@ description: "Get up and running using Python"
88
Install the [vocode package](https://pypi.org/project/vocode/):
99

1010
```bash
11-
pip install 'vocode[io]'
11+
pip install vocode
1212
```
1313

1414
# Getting started
1515

16-
The `io` extra installs the packages necessary to run our voice conversations locally, but is not needed for other surfaces, e.g. [phone calls](/open-source/telephony).
17-
You may need to install [portaudio](https://formulae.brew.sh/formula/portaudio) and [ffmpeg](https://formulae.brew.sh/formula/ffmpeg) on your system.
18-
1916
## Working with system audio
2017

2118
We provide helper methods to hook into your system audio.
@@ -33,74 +30,94 @@ If the default I/O devices are not being set properly, set `use_default_devices`
3330
Vocode provides a unified interface across various speech transcription, speech synthesis, and AI/NLU providers.
3431
To use these providers with Vocode, you'll need to grab credentials from these providers and set them in the Vocode environment.
3532

36-
```python
37-
# these can also be set as environment variables
38-
vocode.setenv(
39-
OPENAI_API_KEY="<your OpenAI key>",
40-
DEEPGRAM_API_KEY="<your Deepgram key>",
41-
AZURE_SPEECH_KEY="<your Azure key>",
42-
AZURE_SPEECH_REGION="<your Azure region>",
43-
)
44-
```
33+
You can either set the following parameters as environment variables (e.g. by specifying them in a `.env` file and using a package like `python-dotenv` to load), or set them manually in the pydantic settings (see below).
4534

4635
For AZURE_SPEECH_REGION you should use the URL format. For example, if you're using the "East US" region, the value should be "eastus". See [Azure Region list](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech?tabs=streaming#prebuilt-neural-voices).
4736

4837
## `StreamingConversation` example
4938

39+
This can also be found in the [`quickstarts` directory](https://github.com/vocodedev/vocode-core/blob/main/quickstarts/streaming_conversation.py) of the repo.
40+
5041
```python
5142
import asyncio
5243
import signal
5344

54-
import vocode
55-
from vocode.streaming.streaming_conversation import StreamingConversation
45+
from pydantic_settings import BaseSettings, SettingsConfigDict
46+
5647
from vocode.helpers import create_streaming_microphone_input_and_speaker_output
57-
from vocode.streaming.models.transcriber import (
58-
DeepgramTranscriberConfig,
59-
PunctuationEndpointingConfig,
60-
)
48+
from vocode.logging import configure_pretty_logging
6149
from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent
6250
from vocode.streaming.models.agent import ChatGPTAgentConfig
6351
from vocode.streaming.models.message import BaseMessage
6452
from vocode.streaming.models.synthesizer import AzureSynthesizerConfig
53+
from vocode.streaming.models.transcriber import (
54+
DeepgramTranscriberConfig,
55+
PunctuationEndpointingConfig,
56+
)
57+
from vocode.streaming.streaming_conversation import StreamingConversation
6558
from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
6659
from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber
6760

68-
# these can also be set as environment variables
69-
vocode.setenv(
70-
OPENAI_API_KEY="<your OpenAI key>",
71-
DEEPGRAM_API_KEY="<your Deepgram key>",
72-
AZURE_SPEECH_KEY="<your Azure key>",
73-
AZURE_SPEECH_REGION="<your Azure region>",
74-
)
61+
configure_pretty_logging()
62+
63+
64+
class Settings(BaseSettings):
65+
"""
66+
Settings for the streaming conversation quickstart.
67+
These parameters can be configured with environment variables.
68+
"""
69+
70+
openai_api_key: str = "ENTER_YOUR_OPENAI_API_KEY_HERE"
71+
azure_speech_key: str = "ENTER_YOUR_AZURE_KEY_HERE"
72+
deepgram_api_key: str = "ENTER_YOUR_DEEPGRAM_API_KEY_HERE"
73+
74+
azure_speech_region: str = "eastus"
75+
76+
# This means a .env file can be used to overload these settings
77+
# ex: "OPENAI_API_KEY=my_key" will set openai_api_key over the default above
78+
model_config = SettingsConfigDict(
79+
env_file=".env",
80+
env_file_encoding="utf-8",
81+
)
82+
83+
84+
settings = Settings()
7585

7686

7787
async def main():
78-
microphone_input, speaker_output = create_streaming_microphone_input_and_speaker_output(
79-
use_default_devices=True,
88+
(
89+
microphone_input,
90+
speaker_output,
91+
) = create_streaming_microphone_input_and_speaker_output(
92+
use_default_devices=False,
93+
use_blocking_speaker_output=True, # this moves the playback to a separate thread, set to False to use the main thread
8094
)
8195

8296
conversation = StreamingConversation(
8397
output_device=speaker_output,
8498
transcriber=DeepgramTranscriber(
8599
DeepgramTranscriberConfig.from_input_device(
86-
microphone_input, endpointing_config=PunctuationEndpointingConfig()
87-
)
100+
microphone_input,
101+
endpointing_config=PunctuationEndpointingConfig(),
102+
api_key=settings.deepgram_api_key,
103+
),
88104
),
89105
agent=ChatGPTAgent(
90106
ChatGPTAgentConfig(
91-
initial_message=BaseMessage(text="Hello!"),
92-
prompt_preamble="Have a pleasant conversation about life",
93-
),
107+
openai_api_key=settings.openai_api_key,
108+
initial_message=BaseMessage(text="What up"),
109+
prompt_preamble="""The AI is having a pleasant conversation about life""",
110+
)
94111
),
95112
synthesizer=AzureSynthesizer(
96-
AzureSynthesizerConfig.from_output_device(speaker_output)
113+
AzureSynthesizerConfig.from_output_device(speaker_output),
114+
azure_speech_key=settings.azure_speech_key,
115+
azure_speech_region=settings.azure_speech_region,
97116
),
98117
)
99118
await conversation.start()
100119
print("Conversation started, press Ctrl+C to end")
101-
signal.signal(
102-
signal.SIGINT, lambda _0, _1: asyncio.create_task(conversation.terminate())
103-
)
120+
signal.signal(signal.SIGINT, lambda _0, _1: asyncio.create_task(conversation.terminate()))
104121
while conversation.is_active():
105122
chunk = await microphone_input.get_audio()
106123
conversation.receive_audio(chunk)

docs/open-source/telephony.mdx

+22-11
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,6 @@ Make sure the server we just set up is already running. Then, in `outbound_call.
117117

118118
Replace the `to_phone` with the number you want to call and the `from_phone` with the number you want to call from. In order to make a call from the `from_phone`, you must have access to it via Twilio (either a number purchased via Twilio or verify the caller ID).
119119

120-
> Note: To ensure legal compliance with robocall regulations in California, the following code snippet from the [Vocode library](https://github.com/vocodedev/vocode-python/blob/main/vocode/streaming/telephony/conversation/outbound_call.py#L83-L96) utilizes Twilio Line Intelligence to check if calls are made to mobile phones: For Canadian phone numbers, the Twilio Lookup API may not return carrier data due to the Canadian Local Number Portability Consortium (CLNPC) requirements. More information on this issue can be found in the [Twilio Support Article](https://support.twilio.com/hc/en-us/articles/360004563433-Twilio-Lookup-API-is-Not-Returning-Carrier-Data-for-Canadian-Phone-Numbers).
121-
122120
Run the script with `poetry run python outbound_call.py`.
123121

124122
## Configuration
@@ -129,26 +127,39 @@ or `SynthesizerConfig` - the default transcriber is Deepgram and the default syn
129127
This example sets up an agent that spells every word that is sent to it - any text-in, text-out function can be turned into a voice conversation by subclassing `BaseAgent` and creating an `AgentFactory`.
130128

131129
```
130+
import typing
131+
from typing import Optional, Tuple
132+
133+
from vocode.streaming.agent.abstract_factory import AbstractAgentFactory
134+
from vocode.streaming.agent.base_agent import BaseAgent, RespondAgent
135+
from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent
136+
from vocode.streaming.models.agent import AgentConfig, AgentType, ChatGPTAgentConfig
137+
138+
132139
class SpellerAgentConfig(AgentConfig, type="agent_speller"):
133140
pass
134141
135142
136-
class SpellerAgent(BaseAgent):
137-
def __init__(self, agent_config: SpellerAgentConfig):
138-
super().__init__(agent_config=agent_config)
139-
143+
class SpellerAgent(RespondAgent[SpellerAgentConfig]):
140144
async def respond(
141145
self,
142-
human_input,
146+
human_input: str,
143147
conversation_id: str,
144148
is_interrupt: bool = False,
145149
) -> Tuple[Optional[str], bool]:
146150
return "".join(c + " " for c in human_input), False
147151
148152
149-
class SpellerAgentFactory(AgentFactory):
153+
class SpellerAgentFactory(AbstractAgentFactory):
150154
def create_agent(self, agent_config: AgentConfig) -> BaseAgent:
151-
return SpellerAgent(agent_config=agent_config)
155+
# If the agent configuration type is CHAT_GPT, create a ChatGPTAgent.
156+
if isinstance(agent_config, ChatGPTAgentConfig):
157+
return ChatGPTAgent(agent_config=agent_config)
158+
# If the agent configuration type is agent_speller, create a SpellerAgent.
159+
elif isinstance(agent_config, SpellerAgentConfig):
160+
return SpellerAgent(agent_config=agent_config)
161+
# If the agent configuration type is not recognized, raise an exception.
162+
raise Exception("Invalid agent config")
152163
```
153164

154165
An `AgentFactory` instance is passed into the `TelephonyServer` in `telephony_app.py`.
@@ -157,7 +168,7 @@ We provide a small set of agents with already created `AgentConfig`s, including,
157168

158169
### Accessing call information in your agent
159170

160-
We store the `to` and `from` numbers in the [`ConfigManager`](https://github.com/vocodedev/vocode-python-sdk/blob/b37bf7a1172a917b641d0e70ba14756415e09b0b/apps/telephony_app/main.py#L20) - so
171+
We store the `to` and `from` numbers in the [`ConfigManager`](https://github.com/vocodedev/vocode-core/blob/53b01dab0b59f71961ee83dbcaf3653a6935c2e3/apps/telephony_app/main.py#L30) - so
161172
if you'd like to access them in your agent, you can instantiate the manager to hook into the same Redis instance:
162173

163174
```
@@ -168,7 +179,7 @@ class SpellerAgent(BaseAgent):
168179
169180
async def respond(
170181
self,
171-
human_input,
182+
human_input: str,
172183
conversation_id: str,
173184
is_interrupt: bool = False,
174185
) -> Tuple[Optional[str], bool]:

docs/welcome.mdx

+3-3
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ operations flows. Get started with the [Hosted Service](/hosted-quickstart).
2626
## Open Source Library
2727

2828
Most of what we build is open source and free to use! Leverage all of the features mentioned in the introduction by taking a look
29-
at our open source repos. Get started with [Open Source](/open-source-quickstart).
29+
at our open source repos. Get started with [Open Source](/open-source-quickstarts).
3030

3131
## Quickstarts
3232

@@ -35,9 +35,9 @@ at our open source repos. Get started with [Open Source](/open-source-quickstart
3535
Start using the hosted telephony service.
3636
</Card>
3737
<Card
38-
title="Open Source Quick Start"
38+
title="Open Source Quick Starts"
3939
icon="circle-play"
40-
href="/open-source-quickstart"
40+
href="/open-source-quickstarts"
4141
>
4242
Run Vocode self hosted.
4343
</Card>

playground/streaming/synthesizer/synthesize.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from vocode.streaming.models.message import BaseMessage
44
from vocode.streaming.models.synthesizer import AzureSynthesizerConfig
55
from vocode.streaming.output_device.base_output_device import BaseOutputDevice
6-
from vocode.streaming.output_device.speaker_output import SpeakerOutput
6+
from vocode.streaming.output_device.blocking_speaker_output import BlockingSpeakerOutput
77
from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
88
from vocode.streaming.synthesizer.base_synthesizer import BaseSynthesizer
99
from vocode.streaming.utils import get_chunk_size_per_second
@@ -58,7 +58,8 @@ async def speak(
5858
return message_sent, cut_off
5959

6060
async def main():
61-
speaker_output = SpeakerOutput.from_default_device()
61+
speaker_output = BlockingSpeakerOutput.from_default_device()
62+
speaker_output.start()
6263
synthesizer = AzureSynthesizer(AzureSynthesizerConfig.from_output_device(speaker_output))
6364
try:
6465
while True:

quickstarts/streaming_conversation.py

-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ async def main():
4949
speaker_output,
5050
) = create_streaming_microphone_input_and_speaker_output(
5151
use_default_devices=False,
52-
use_blocking_speaker_output=True, # this moves the playback to a separate thread, set to False to use the main thread
5352
)
5453

5554
conversation = StreamingConversation(

tests/fakedata/conversation.py

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
)
2727

2828
DEFAULT_SYNTHESIZER_CONFIG = PlayHtSynthesizerConfig(
29+
voice_id="test_voice_id",
2930
sampling_rate=DEFAULT_SAMPLING_RATE,
3031
audio_encoding=AudioEncoding.MULAW,
3132
)

0 commit comments

Comments
 (0)