Skip to content

Commit

Permalink
Merge pull request #15 from pipecat-ai/changes-to-support-gemini-live…
Browse files Browse the repository at this point in the history
…-websocket-transport

Changes to support upcoming GeminiLiveWebSocket transport
  • Loading branch information
kompfner authored Jan 3, 2025
2 parents 387a8c7 + 75a09bd commit d2d9946
Show file tree
Hide file tree
Showing 10 changed files with 88 additions and 52 deletions.
25 changes: 18 additions & 7 deletions Sources/RTVIClientIOS/RTVIClient.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ open class RTVIClient {

private let options: RTVIClientOptions
private var transport: Transport
private var baseUrl: String
private var baseUrl: String? // see RTVIClientParams for explanation of optionality

private let messageDispatcher: MessageDispatcher
private var helpers: [String: RegisteredHelper] = [:]
Expand Down Expand Up @@ -123,10 +123,13 @@ open class RTVIClient {
let headers = options.customHeaders ?? options.params.headers
let requestData = RTVIClient.appendRtviClientVersion(options.customBodyParams ?? options.params.requestData)

let httpMessageDispatcher = HTTPMessageDispatcher.init(baseUrl: self.baseUrl, endpoints: self.options.params.endpoints, headers: headers, requestData: requestData)
var httpMessageDispatcher: HTTPMessageDispatcher? = nil
if let baseUrl {
httpMessageDispatcher = HTTPMessageDispatcher.init(baseUrl: baseUrl, endpoints: self.options.params.endpoints, headers: headers, requestData: requestData)
}
self.messageDispatcher = MessageDispatcher.init(transport: transport, httpMessageDispatcher: httpMessageDispatcher)

httpMessageDispatcher.onMessage = self.onMessage
httpMessageDispatcher?.onMessage = self.onMessage
self.transport.onMessage = self.onMessage
}

Expand All @@ -152,12 +155,20 @@ open class RTVIClient {
self.devicesInitialized = true
}

private func connectUrl() -> String {
return self.baseUrl + self.options.params.endpoints.connect
private func connectUrl() -> String? {
if let baseUrl {
return baseUrl + self.options.params.endpoints.connect
}
return nil
}

private func fetchAuthBundle() async throws -> AuthBundle {
guard let url = URL(string: self.connectUrl()) else {
private func fetchAuthBundle() async throws -> AuthBundle? {
guard let connectUrl = self.connectUrl() else {
// Assume we're using a transport that doesn't communicate with an RTVI server.
return nil
}

guard let url = URL(string: connectUrl) else {
throw InvalidAuthBundleError()
}

Expand Down
5 changes: 3 additions & 2 deletions Sources/RTVIClientIOS/RTVIClientParams.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
public struct RTVIClientParams: Codable {

/// The base URL for the RTVI POST request.
public let baseUrl: String
/// Not needed when using certain transports that don't communicate with an RTVI server.
public let baseUrl: String?

/// Custom HTTP headers to be sent with the POST request to baseUrl.
public let headers: [[String: String]]
Expand All @@ -17,7 +18,7 @@ public struct RTVIClientParams: Codable {
public let config: [ServiceConfig]

public init(
baseUrl: String,
baseUrl: String? = nil,
headers: [[String: String]] = [],
endpoints: RTVIURLEndpoints = RTVIURLEndpoints(),
requestData: Value? = nil,
Expand Down
46 changes: 23 additions & 23 deletions Sources/RTVIClientIOS/transport/RTVIMessageInbound.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,72 +12,72 @@ public struct RTVIMessageInbound: Codable {
/// Messages from the server to the client.
public enum MessageType {
/// Bot is connected and ready to receive messages
static let BOT_READY = "bot-ready"
public static let BOT_READY = "bot-ready"

/// Received an error response from the server
static let ERROR_RESPONSE = "error-response"
public static let ERROR_RESPONSE = "error-response"

/// Received an error from the server
static let ERROR = "error"
public static let ERROR = "error"

/// STT transcript (both local and remote) flagged with partial final or sentence
static let TRANSCRIPT = "transcript"
public static let TRANSCRIPT = "transcript"

/// Get or update config response
static let CONFIG_RESPONSE = "config"
public static let CONFIG_RESPONSE = "config"

/// Configuration options available on the bot
static let DESCRIBE_CONFIG_RESPONSE = "config-available"
public static let DESCRIBE_CONFIG_RESPONSE = "config-available"

/// Actions available on the bot
static let DESCRIBE_ACTION_RESPONSE = "actions-available"
public static let DESCRIBE_ACTION_RESPONSE = "actions-available"

static let ACTION_RESPONSE = "action-response"
public static let ACTION_RESPONSE = "action-response"

/// STT transcript from the user
static let USER_TRANSCRIPTION = "user-transcription"
public static let USER_TRANSCRIPTION = "user-transcription"

/// STT transcript from the bot
static let BOT_TRANSCRIPTION = "bot-transcription"
public static let BOT_TRANSCRIPTION = "bot-transcription"

/// User started speaking
static let USER_STARTED_SPEAKING = "user-started-speaking"
public static let USER_STARTED_SPEAKING = "user-started-speaking"

// User stopped speaking
static let USER_STOPPED_SPEAKING = "user-stopped-speaking"
public static let USER_STOPPED_SPEAKING = "user-stopped-speaking"

// Bot started speaking
static let BOT_STARTED_SPEAKING = "bot-started-speaking"
public static let BOT_STARTED_SPEAKING = "bot-started-speaking"

// Bot stopped speaking
static let BOT_STOPPED_SPEAKING = "bot-stopped-speaking"
public static let BOT_STOPPED_SPEAKING = "bot-stopped-speaking"

/// Pipecat metrics
static let PIPECAT_METRICS = "pipecat-metrics"
public static let PIPECAT_METRICS = "pipecat-metrics"

/// LLM transcript from the bot
static let BOT_LLM_TEXT = "bot-llm-text"
public static let BOT_LLM_TEXT = "bot-llm-text"
/// LLM transcript from the bot has started
static let BOT_LLM_STARTED = "bot-llm-started"
public static let BOT_LLM_STARTED = "bot-llm-started"
/// LLM transcript from the bot has stopped
static let BOT_LLM_STOPPED = "bot-llm-stopped"
public static let BOT_LLM_STOPPED = "bot-llm-stopped"

/// TTS transcript from the bot
static let BOT_TTS_TEXT = "bot-tts-text"
public static let BOT_TTS_TEXT = "bot-tts-text"
/// LLM transcript from the bot has started
static let BOT_TTS_STARTED = "bot-tts-started"
public static let BOT_TTS_STARTED = "bot-tts-started"
/// LLM transcript from the bot has stopped
static let BOT_TTS_STTOPED = "bot-tts-stopped"
public static let BOT_TTS_STTOPED = "bot-tts-stopped"

/// Text has been stored
static let STORAGE_ITEM_STORED = "storage-item-stored"
public static let STORAGE_ITEM_STORED = "storage-item-stored"
}

init(type: String?, data: String?) {
self.init(type: type, data: data, id: String(UUID().uuidString.prefix(8)), label: "rtvi-ai", metrics: nil)
}

init(type: String?, data: String?, id: String?, label: String? = "rtvi-ai", metrics: PipecatMetrics? = nil) {
public init(type: String?, data: String?, id: String?, label: String? = "rtvi-ai", metrics: PipecatMetrics? = nil) {
self.id = id
self.label = label
self.type = type
Expand Down
29 changes: 20 additions & 9 deletions Sources/RTVIClientIOS/transport/RTVIMessageOutbound.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@ import Foundation
/// An RTVI control message sent to the Transport.
public struct RTVIMessageOutbound: Encodable {

let id: String
let label: String
let type: String
let data: Value?
public let id: String
public let label: String
public let type: String
public let data: Value?

/// Messages from the client to the server.
public enum MessageType {
static let UPDATE_CONFIG = "update-config"
static let GET_CONFIG = "get-config"
static let DESCRIBE_CONFIG = "describe-config"
static let ACTION = "action"
static let DESCRIBE_ACTIONS = "describe-actions"
public static let UPDATE_CONFIG = "update-config"
public static let GET_CONFIG = "get-config"
public static let DESCRIBE_CONFIG = "describe-config"
public static let ACTION = "action"
public static let DESCRIBE_ACTIONS = "describe-actions"
public static let CLIENT_READY = "client-ready"
}

Expand Down Expand Up @@ -59,6 +59,17 @@ public struct RTVIMessageOutbound: Encodable {
)
}

// Decode action data, if this outbound message represents an action request.
// This is useful for implementing transports that can intercept and handle action requests in their own way.
public func decodeActionData() -> ActionRequest? {
if type == RTVIMessageOutbound.MessageType.ACTION {
do {
let encodedData = try JSONEncoder().encode(data)
return try JSONDecoder().decode(ActionRequest.self, from: encodedData)
} catch {}
}
return nil
}
}


Expand Down
2 changes: 1 addition & 1 deletion Sources/RTVIClientIOS/transport/Transport.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ public protocol Transport {

func initDevices() async throws
func release()
func connect(authBundle: AuthBundle) async throws
func connect(authBundle: AuthBundle?) async throws
func disconnect() async throws
func getAllMics() -> [MediaDeviceInfo]
func getAllCams() -> [MediaDeviceInfo]
Expand Down
4 changes: 2 additions & 2 deletions Sources/RTVIClientIOS/types/Option.swift
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import Foundation

public struct Option: Codable {
let name: String
let value: Value
public let name: String
public let value: Value

public init(name: String, value: Value) {
self.name = name
Expand Down
4 changes: 2 additions & 2 deletions Sources/RTVIClientIOS/types/ServiceConfig.swift
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import Foundation

public struct ServiceConfig: Codable {
let service: String
let options: [Option]
public let service: String
public let options: [Option]

public init(service: String, options: [Option]) {
self.service = service
Expand Down
6 changes: 3 additions & 3 deletions Sources/RTVIClientIOS/types/voiceMessages/ActionRequest.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ public typealias Argument = Option

public struct ActionRequest: Codable {

let service: String
let action: String
let arguments: [Argument]?
public let service: String
public let action: String
public let arguments: [Argument]?

public init(service: String, action: String, arguments: [Option]?=nil) {
self.service = service
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,8 @@ public struct BotReadyData: Codable {
public let version: String
public let config: [ServiceConfig]

public init(version: String, config: [ServiceConfig]) {
self.version = version
self.config = config
}
}
15 changes: 12 additions & 3 deletions Sources/RTVIClientIOS/utils/MessageDispatcher.swift
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
import Foundation

enum MessageDispatcherError: Error {
/// HTTP messages not supported when using certain transports that don't communicate with an RTVI server.
case httpMessagesNotSupported
}

/// Helper class for sending messages to the server and awaiting the response.
class MessageDispatcher {

private let transport: Transport
private let httpMessageDispatcher: HTTPMessageDispatcher
private let httpMessageDispatcher: HTTPMessageDispatcher?

/// How long to wait before resolving the message/
private var gcTime: TimeInterval
@MainActor
private var queue: [QueuedVoiceMessage] = []
private var gcTimer: Timer?

init(transport: Transport, httpMessageDispatcher: HTTPMessageDispatcher) {
init(transport: Transport, httpMessageDispatcher: HTTPMessageDispatcher?) {
self.gcTime = 10.0 // 10 seconds
self.transport = transport
self.httpMessageDispatcher = httpMessageDispatcher
Expand All @@ -35,7 +40,11 @@ class MessageDispatcher {
if self.transport.isConnected() {
try self.transport.sendMessage(message: message)
} else {
try self.httpMessageDispatcher.sendMessage(message: message)
if let httpMessageDispatcher {
try httpMessageDispatcher.sendMessage(message: message)
} else {
throw MessageDispatcherError.httpMessagesNotSupported
}
}
} catch {
Logger.shared.error("Failed to send app message \(error)")
Expand Down

0 comments on commit d2d9946

Please sign in to comment.