Skip to content

Commit

Permalink
added out-of-band responses, updated response returned, and custom VA…
Browse files Browse the repository at this point in the history
…D setting
  • Loading branch information
tinalenguyen committed Jan 11, 2025
1 parent 9ae16b0 commit c1d811e
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ class ServerVad(TypedDict):
threshold: NotRequired[float]
prefix_padding_ms: NotRequired[int]
silence_duration_ms: NotRequired[int]
create_response: NotRequired[bool]


class FunctionTool(TypedDict):
Expand Down Expand Up @@ -307,6 +308,8 @@ class ResponseCreateData(TypedDict, total=False):
tools: list[FunctionTool]
tool_choice: ToolChoice
temperature: float
conversation: Literal["auto", "none"]
metadata: NotRequired[map | None]
max_output_tokens: int | Literal["inf"]

class ResponseCreate(TypedDict):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ class RealtimeResponse:
"""timestamp when the response was created"""
_first_token_timestamp: float | None = None
"""timestamp when the first token was received"""
metadata: map | None = None
"""developer-provided string key-value pairs"""


@dataclass
Expand Down Expand Up @@ -140,6 +142,7 @@ class ServerVadOptions:
threshold: float
prefix_padding_ms: int
silence_duration_ms: int
create_response: bool


@dataclass
Expand Down Expand Up @@ -191,6 +194,7 @@ class _ContentPtr(TypedDict):
threshold=0.5,
prefix_padding_ms=300,
silence_duration_ms=500,
create_response=True,
)

DEFAULT_INPUT_AUDIO_TRANSCRIPTION = InputTranscriptionOptions(model="whisper-1")
Expand Down Expand Up @@ -717,6 +721,10 @@ def create(
on_duplicate: Literal[
"cancel_existing", "cancel_new", "keep_both"
] = "keep_both",
instructions: str | None = None,
modalities: list[api_proto.Modality] | None = None,
conversation: Literal["auto", "none"] = "auto",
metadata: map | None = None,
) -> asyncio.Future[bool]:
"""Creates a new response.
Expand All @@ -725,6 +733,12 @@ def create(
- "cancel_existing": Cancel the existing response before creating new one
- "cancel_new": Skip creating new response if one is in progress
- "keep_both": Wait for the existing response to be done and then create a new one
instructions: explicit prompt used for out-of-band events
modalities: set of modalities that the model can respond in
conversation: specifies whether respones is out-of-band
- "auto": Contents of the response will be added to the default conversation
- "none": Creates an out-of-band response which will not add items to default conversation
metadata: set of key-value pairs that can be used for storing additional information
Returns:
Future that resolves when the response create request is queued
Expand Down Expand Up @@ -758,7 +772,17 @@ def create(
or self._sess._pending_responses[active_resp_id].done_fut.done()
):
# no active response in progress, create a new one
self._sess._queue_msg({"type": "response.create"})
self._sess._queue_msg(
{
"type": "response.create",
"response": {
"instructions": instructions,
"modalities": modalities,
"conversation": conversation,
"metadata": metadata,
},
}
)
_fut = asyncio.Future[bool]()
_fut.set_result(True)
return _fut
Expand Down Expand Up @@ -795,7 +819,17 @@ async def wait_and_create() -> bool:
)
new_create_fut = asyncio.Future[None]()
self._sess._response_create_fut = new_create_fut
self._sess._queue_msg({"type": "response.create"})
self._sess._queue_msg(
{
"type": "response.create",
"response": {
"instructions": instructions,
"modalities": modalities,
"conversation": conversation,
"metadata": metadata,
},
}
)
return True

return asyncio.create_task(wait_and_create())
Expand Down Expand Up @@ -928,6 +962,7 @@ def session_update(
"threshold": self._opts.turn_detection.threshold,
"prefix_padding_ms": self._opts.turn_detection.prefix_padding_ms,
"silence_duration_ms": self._opts.turn_detection.silence_duration_ms,
"create_response": self._opts.turn_detection.create_response,
}
input_audio_transcription_opts: api_proto.InputAudioTranscription | None = None
if self._opts.input_audio_transcription is not None:
Expand Down Expand Up @@ -1228,6 +1263,7 @@ def _handle_session_updated(
threshold=session["turn_detection"]["threshold"],
prefix_padding_ms=session["turn_detection"]["prefix_padding_ms"],
silence_duration_ms=session["turn_detection"]["silence_duration_ms"],
create_response=True,
)
if session["input_audio_transcription"] is None:
input_audio_transcription = None
Expand Down Expand Up @@ -1407,11 +1443,13 @@ def _handle_response_created(
response = response_created["response"]
done_fut = self._loop.create_future()
status_details = response.get("status_details")
metadata = response.get("metadata")
new_response = RealtimeResponse(
id=response["id"],
status=response["status"],
status_details=status_details,
output=[],
metadata=metadata,
usage=response.get("usage"),
done_fut=done_fut,
_created_timestamp=time.time(),
Expand Down Expand Up @@ -1586,6 +1624,8 @@ def _handle_response_done(self, response_done: api_proto.ServerEvent.ResponseDon

response.status = response_data["status"]
response.status_details = response_data.get("status_details")
response.metadata = response_data.get("metadata")
response.output = cast(list[RealtimeOutput], response_data.get("output"))
response.usage = response_data.get("usage")

metrics_error = None
Expand Down Expand Up @@ -1714,3 +1754,4 @@ async def _run_fnc_task(self, fnc_call_info: llm.FunctionCallInfo, item_id: str)

def logging_extra(self) -> dict:
return {"session_id": self._session_id}

0 comments on commit c1d811e

Please sign in to comment.