Skip to content

Commit

Permalink
feat: Implement WebSocket support (#156)
Browse files Browse the repository at this point in the history
- Add WebSocket support for audio speech, transcriptions, and chat interactions
- Introduce asynchronous WebSocket clients for real-time communication
- Expand event handling for various WebSocket events
- Enhance error handling and logging for WebSocket connections
  • Loading branch information
chyroc authored Jan 10, 2025
1 parent a3042be commit da63a6d
Show file tree
Hide file tree
Showing 22 changed files with 2,560 additions and 63 deletions.
4 changes: 0 additions & 4 deletions .coderabbit.yaml

This file was deleted.

1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ dist/
.env_private
scripts/
.cache/
output.wav
88 changes: 88 additions & 0 deletions cozepy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,52 @@
from .request import AsyncHTTPClient, SyncHTTPClient
from .templates import TemplateDuplicateResp, TemplateEntityType
from .version import VERSION
from .websockets.audio.speech import (
AsyncWebsocketsAudioSpeechClient,
AsyncWebsocketsAudioSpeechEventHandler,
InputTextBufferAppendEvent,
InputTextBufferCompletedEvent,
InputTextBufferCompleteEvent,
SpeechAudioCompletedEvent,
SpeechAudioUpdateEvent,
SpeechUpdateEvent,
WebsocketsAudioSpeechClient,
WebsocketsAudioSpeechEventHandler,
)
from .websockets.audio.transcriptions import (
AsyncWebsocketsAudioTranscriptionsClient,
AsyncWebsocketsAudioTranscriptionsEventHandler,
InputAudioBufferAppendEvent,
InputAudioBufferCompletedEvent,
InputAudioBufferCompleteEvent,
TranscriptionsMessageCompletedEvent,
TranscriptionsMessageUpdateEvent,
TranscriptionsUpdateEvent,
WebsocketsAudioTranscriptionsClient,
WebsocketsAudioTranscriptionsEventHandler,
)
from .websockets.chat import (
AsyncWebsocketsChatClient,
AsyncWebsocketsChatEventHandler,
ChatUpdateEvent,
ConversationAudioDeltaEvent,
ConversationChatCompletedEvent,
ConversationChatCreatedEvent,
ConversationChatRequiresActionEvent,
ConversationChatSubmitToolOutputsEvent,
ConversationMessageDeltaEvent,
WebsocketsChatClient,
WebsocketsChatEventHandler,
)
from .websockets.ws import (
InputAudio,
OpusConfig,
OutputAudio,
PCMConfig,
WebsocketsErrorEvent,
WebsocketsEvent,
WebsocketsEventType,
)
from .workflows.runs import (
WorkflowEvent,
WorkflowEventError,
Expand Down Expand Up @@ -180,6 +226,48 @@
"DocumentSourceInfo",
"DocumentUpdateRule",
"DocumentBase",
# websockets.audio.speech
"InputTextBufferAppendEvent",
"InputTextBufferCompleteEvent",
"SpeechUpdateEvent",
"InputTextBufferCompletedEvent",
"SpeechAudioUpdateEvent",
"SpeechAudioCompletedEvent",
"WebsocketsAudioSpeechEventHandler",
"WebsocketsAudioSpeechClient",
"AsyncWebsocketsAudioSpeechEventHandler",
"AsyncWebsocketsAudioSpeechClient",
# websockets.audio.transcriptions
"InputAudioBufferAppendEvent",
"InputAudioBufferCompleteEvent",
"TranscriptionsUpdateEvent",
"InputAudioBufferCompletedEvent",
"TranscriptionsMessageUpdateEvent",
"TranscriptionsMessageCompletedEvent",
"WebsocketsAudioTranscriptionsEventHandler",
"WebsocketsAudioTranscriptionsClient",
"AsyncWebsocketsAudioTranscriptionsEventHandler",
"AsyncWebsocketsAudioTranscriptionsClient",
# websockets.chat
"ChatUpdateEvent",
"ConversationChatSubmitToolOutputsEvent",
"ConversationChatCreatedEvent",
"ConversationMessageDeltaEvent",
"ConversationChatRequiresActionEvent",
"ConversationAudioDeltaEvent",
"ConversationChatCompletedEvent",
"WebsocketsChatEventHandler",
"WebsocketsChatClient",
"AsyncWebsocketsChatEventHandler",
"AsyncWebsocketsChatClient",
# websockets
"WebsocketsEventType",
"WebsocketsEvent",
"WebsocketsErrorEvent",
"InputAudio",
"OpusConfig",
"PCMConfig",
"OutputAudio",
# workflows.runs
"WorkflowRunResult",
"WorkflowEventType",
Expand Down
8 changes: 3 additions & 5 deletions cozepy/audio/voices/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,7 @@ async def clone(
return await self._requester.arequest("post", url, False, Voice, headers=headers, body=body, files=files)

async def list(
self,
*,
filter_system_voice: bool = False,
page_num: int = 1,
page_size: int = 100,
self, *, filter_system_voice: bool = False, page_num: int = 1, page_size: int = 100, **kwargs
) -> AsyncNumberPaged[Voice]:
"""
Get available voices, including system voices + user cloned voices
Expand All @@ -227,6 +223,7 @@ async def list(
:return: list of Voice
"""
url = f"{self._base_url}/v1/audio/voices"
headers: Optional[dict] = kwargs.get("headers")

def request_maker(i_page_num: int, i_page_size: int) -> HTTPRequest:
return self._requester.make_request(
Expand All @@ -237,6 +234,7 @@ def request_maker(i_page_num: int, i_page_size: int) -> HTTPRequest:
"page_num": i_page_num,
"page_size": i_page_size,
},
headers=headers,
cast=_PrivateListVoiceData,
is_async=False,
stream=False,
Expand Down
24 changes: 16 additions & 8 deletions cozepy/chat/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import json
import time
from enum import Enum
Expand All @@ -24,6 +25,8 @@ class MessageRole(str, Enum):


class MessageType(str, Enum):
UNKNOWN = ""

# User input content.
# 用户输入内容。
QUESTION = "question"
Expand Down Expand Up @@ -52,8 +55,6 @@ class MessageType(str, Enum):
# 多 answer 场景下,服务端会返回一个 verbose 包,对应的 content 为 JSON 格式,content.msg_type =generate_answer_finish 代表全部 answer 回复完成。不支持在请求中作为入参。
VERBOSE = "verbose"

UNKNOWN = ""


class MessageContentType(str, Enum):
# Text.
Expand Down Expand Up @@ -187,12 +188,19 @@ def build_assistant_answer(content: str, meta_data: Optional[Dict[str, str]] = N
meta_data=meta_data,
)

def get_audio(self) -> Optional[bytes]:
if self.content_type == MessageContentType.AUDIO:
return base64.b64decode(self.content)
return None


class ChatStatus(str, Enum):
"""
The running status of the session
"""

UNKNOWN = ""

# The session has been created.
CREATED = "created"

Expand All @@ -214,9 +222,9 @@ class ChatStatus(str, Enum):

class ChatError(CozeModel):
# The error code. An integer type. 0 indicates success, other values indicate failure.
code: int
code: int = 0
# The error message. A string type.
msg: str
msg: str = ""


class ChatRequiredActionType(str, Enum):
Expand Down Expand Up @@ -266,13 +274,13 @@ class ChatRequiredAction(CozeModel):
class ChatUsage(CozeModel):
# The total number of Tokens consumed in this chat, including the consumption for both the input
# and output parts.
token_count: int
token_count: int = 0

# The total number of Tokens consumed for the output part.
output_count: int
output_count: int = 0

# The total number of Tokens consumed for the input part.
input_count: int
input_count: int = 0


class Chat(CozeModel):
Expand Down Expand Up @@ -301,7 +309,7 @@ class Chat(CozeModel):
# completed: The Bot has finished processing, and the session has ended.
# failed: The session has failed.
# requires_action: The session is interrupted and requires further processing.
status: ChatStatus
status: ChatStatus = ChatStatus.UNKNOWN

# Details of the information needed for execution.
required_action: Optional[ChatRequiredAction] = None
Expand Down
10 changes: 10 additions & 0 deletions cozepy/coze.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from .files import AsyncFilesClient, FilesClient
from .knowledge import AsyncKnowledgeClient, KnowledgeClient # deprecated
from .templates import AsyncTemplatesClient, TemplatesClient
from .websockets import AsyncWebsocketsClient
from .workflows import AsyncWorkflowsClient, WorkflowsClient
from .workspaces import AsyncWorkspacesClient, WorkspacesClient

Expand Down Expand Up @@ -151,6 +152,7 @@ def __init__(
self._workspaces: Optional[AsyncWorkspacesClient] = None
self._audio: Optional[AsyncAudioClient] = None
self._templates: Optional[AsyncTemplatesClient] = None
self._websockets: Optional[AsyncWebsocketsClient] = None

@property
def bots(self) -> "AsyncBotsClient":
Expand Down Expand Up @@ -237,3 +239,11 @@ def templates(self) -> "AsyncTemplatesClient":

self._templates = AsyncTemplatesClient(self._base_url, self._auth, self._requester)
return self._templates

@property
def websockets(self) -> "AsyncWebsocketsClient":
if not self._websockets:
from .websockets import AsyncWebsocketsClient

self._websockets = AsyncWebsocketsClient(self._base_url, self._auth, self._requester)
return self._websockets
14 changes: 13 additions & 1 deletion cozepy/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,18 @@ def remove_url_trailing_slash(base_url: str) -> str:
return base_url


def http_base_url_to_ws(base_url: str) -> str:
if not base_url:
raise ValueError("base_url cannot be empty")
if not base_url.startswith("https://"):
raise ValueError("base_url must start with 'https://'")
base_url = base_url.replace("https://", "wss://")

if "api-" in base_url:
return base_url.replace("api-", "ws-")
return base_url.replace("api.", "ws.")


def remove_none_values(d: dict) -> dict:
return {k: v for k, v in d.items() if v is not None}

Expand All @@ -55,7 +67,7 @@ def write_pcm_to_wav_file(
Save PCM binary data to WAV file
:param pcm_data: PCM binary data (24kHz, 16-bit, 1 channel, little-endian)
:param output_filename: Output WAV filename
:param filepath: Output WAV filename
"""

with wave.open(filepath, "wb") as wav_file:
Expand Down
52 changes: 52 additions & 0 deletions cozepy/websockets/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from cozepy import Auth
from cozepy.request import Requester
from cozepy.util import http_base_url_to_ws, remove_url_trailing_slash

from .audio import AsyncWebsocketsAudioClient, WebsocketsAudioClient
from .chat import AsyncWebsocketsChatBuildClient, WebsocketsChatBuildClient


class WebsocketsClient(object):
def __init__(self, base_url: str, auth: Auth, requester: Requester):
self._base_url = http_base_url_to_ws(remove_url_trailing_slash(base_url))
self._auth = auth
self._requester = requester

@property
def audio(self) -> WebsocketsAudioClient:
return WebsocketsAudioClient(
base_url=self._base_url,
auth=self._auth,
requester=self._requester,
)

@property
def chat(self) -> WebsocketsChatBuildClient:
return WebsocketsChatBuildClient(
base_url=self._base_url,
auth=self._auth,
requester=self._requester,
)


class AsyncWebsocketsClient(object):
def __init__(self, base_url: str, auth: Auth, requester: Requester):
self._base_url = http_base_url_to_ws(remove_url_trailing_slash(base_url))
self._auth = auth
self._requester = requester

@property
def audio(self) -> AsyncWebsocketsAudioClient:
return AsyncWebsocketsAudioClient(
base_url=self._base_url,
auth=self._auth,
requester=self._requester,
)

@property
def chat(self) -> AsyncWebsocketsChatBuildClient:
return AsyncWebsocketsChatBuildClient(
base_url=self._base_url,
auth=self._auth,
requester=self._requester,
)
51 changes: 51 additions & 0 deletions cozepy/websockets/audio/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from cozepy.auth import Auth
from cozepy.request import Requester

from .speech import AsyncWebsocketsAudioSpeechBuildClient, WebsocketsAudioSpeechBuildClient
from .transcriptions import AsyncWebsocketsAudioTranscriptionsBuildClient, WebsocketsAudioTranscriptionsBuildClient


class WebsocketsAudioClient(object):
def __init__(self, base_url: str, auth: Auth, requester: Requester):
self._base_url = base_url
self._auth = auth
self._requester = requester

@property
def transcriptions(self) -> "WebsocketsAudioTranscriptionsBuildClient":
return WebsocketsAudioTranscriptionsBuildClient(
base_url=self._base_url,
auth=self._auth,
requester=self._requester,
)

@property
def speech(self) -> "WebsocketsAudioSpeechBuildClient":
return WebsocketsAudioSpeechBuildClient(
base_url=self._base_url,
auth=self._auth,
requester=self._requester,
)


class AsyncWebsocketsAudioClient(object):
def __init__(self, base_url: str, auth: Auth, requester: Requester):
self._base_url = base_url
self._auth = auth
self._requester = requester

@property
def transcriptions(self) -> "AsyncWebsocketsAudioTranscriptionsBuildClient":
return AsyncWebsocketsAudioTranscriptionsBuildClient(
base_url=self._base_url,
auth=self._auth,
requester=self._requester,
)

@property
def speech(self) -> "AsyncWebsocketsAudioSpeechBuildClient":
return AsyncWebsocketsAudioSpeechBuildClient(
base_url=self._base_url,
auth=self._auth,
requester=self._requester,
)
Loading

0 comments on commit da63a6d

Please sign in to comment.