Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes to support upcoming GeminiLiveWebSocket transport #15

Merged
merged 1 commit into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 18 additions & 7 deletions Sources/RTVIClientIOS/RTVIClient.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ open class RTVIClient {

private let options: RTVIClientOptions
private var transport: Transport
private var baseUrl: String
private var baseUrl: String? // see RTVIClientParams for explanation of optionality
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I’m not mistaken, we have already removed this option from our JavaScript SDK.

Since you are already making changes, would it be worthwhile to remove it here as well now? Or should we include its removal as part of this task, along with a couple of other small refactors needed in our iOS SDK?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I see. Looks like in the JS SDK baseUrl now lives nested under params only and not at the "top level", is that right?

I wasn't necessarily looking to make any breaking changes with this PR; maybe we could save this cleanup for the next batch of work?


private let messageDispatcher: MessageDispatcher
private var helpers: [String: RegisteredHelper] = [:]
Expand Down Expand Up @@ -123,10 +123,13 @@ open class RTVIClient {
let headers = options.customHeaders ?? options.params.headers
let requestData = RTVIClient.appendRtviClientVersion(options.customBodyParams ?? options.params.requestData)

let httpMessageDispatcher = HTTPMessageDispatcher.init(baseUrl: self.baseUrl, endpoints: self.options.params.endpoints, headers: headers, requestData: requestData)
var httpMessageDispatcher: HTTPMessageDispatcher? = nil
if let baseUrl {
httpMessageDispatcher = HTTPMessageDispatcher.init(baseUrl: baseUrl, endpoints: self.options.params.endpoints, headers: headers, requestData: requestData)
}
self.messageDispatcher = MessageDispatcher.init(transport: transport, httpMessageDispatcher: httpMessageDispatcher)

httpMessageDispatcher.onMessage = self.onMessage
httpMessageDispatcher?.onMessage = self.onMessage
self.transport.onMessage = self.onMessage
}

Expand All @@ -152,12 +155,20 @@ open class RTVIClient {
self.devicesInitialized = true
}

private func connectUrl() -> String {
return self.baseUrl + self.options.params.endpoints.connect
private func connectUrl() -> String? {
if let baseUrl {
return baseUrl + self.options.params.endpoints.connect
}
return nil
}

private func fetchAuthBundle() async throws -> AuthBundle {
guard let url = URL(string: self.connectUrl()) else {
private func fetchAuthBundle() async throws -> AuthBundle? {
guard let connectUrl = self.connectUrl() else {
// Assume we're using a transport that doesn't communicate with an RTVI server.
return nil
}

guard let url = URL(string: connectUrl) else {
throw InvalidAuthBundleError()
}

Expand Down
5 changes: 3 additions & 2 deletions Sources/RTVIClientIOS/RTVIClientParams.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
public struct RTVIClientParams: Codable {

/// The base URL for the RTVI POST request.
public let baseUrl: String
/// Not needed when using certain transports that don't communicate with an RTVI server.
public let baseUrl: String?

/// Custom HTTP headers to be sent with the POST request to baseUrl.
public let headers: [[String: String]]
Expand All @@ -17,7 +18,7 @@ public struct RTVIClientParams: Codable {
public let config: [ServiceConfig]

public init(
baseUrl: String,
baseUrl: String? = nil,
headers: [[String: String]] = [],
endpoints: RTVIURLEndpoints = RTVIURLEndpoints(),
requestData: Value? = nil,
Expand Down
46 changes: 23 additions & 23 deletions Sources/RTVIClientIOS/transport/RTVIMessageInbound.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,72 +12,72 @@ public struct RTVIMessageInbound: Codable {
/// Messages from the server to the client.
public enum MessageType {
/// Bot is connected and ready to receive messages
static let BOT_READY = "bot-ready"
public static let BOT_READY = "bot-ready"

/// Received an error response from the server
static let ERROR_RESPONSE = "error-response"
public static let ERROR_RESPONSE = "error-response"

/// Received an error from the server
static let ERROR = "error"
public static let ERROR = "error"

/// STT transcript (both local and remote) flagged with partial final or sentence
static let TRANSCRIPT = "transcript"
public static let TRANSCRIPT = "transcript"

/// Get or update config response
static let CONFIG_RESPONSE = "config"
public static let CONFIG_RESPONSE = "config"

/// Configuration options available on the bot
static let DESCRIBE_CONFIG_RESPONSE = "config-available"
public static let DESCRIBE_CONFIG_RESPONSE = "config-available"

/// Actions available on the bot
static let DESCRIBE_ACTION_RESPONSE = "actions-available"
public static let DESCRIBE_ACTION_RESPONSE = "actions-available"

static let ACTION_RESPONSE = "action-response"
public static let ACTION_RESPONSE = "action-response"

/// STT transcript from the user
static let USER_TRANSCRIPTION = "user-transcription"
public static let USER_TRANSCRIPTION = "user-transcription"

/// STT transcript from the bot
static let BOT_TRANSCRIPTION = "bot-transcription"
public static let BOT_TRANSCRIPTION = "bot-transcription"

/// User started speaking
static let USER_STARTED_SPEAKING = "user-started-speaking"
public static let USER_STARTED_SPEAKING = "user-started-speaking"

// User stopped speaking
static let USER_STOPPED_SPEAKING = "user-stopped-speaking"
public static let USER_STOPPED_SPEAKING = "user-stopped-speaking"

// Bot started speaking
static let BOT_STARTED_SPEAKING = "bot-started-speaking"
public static let BOT_STARTED_SPEAKING = "bot-started-speaking"

// Bot stopped speaking
static let BOT_STOPPED_SPEAKING = "bot-stopped-speaking"
public static let BOT_STOPPED_SPEAKING = "bot-stopped-speaking"

/// Pipecat metrics
static let PIPECAT_METRICS = "pipecat-metrics"
public static let PIPECAT_METRICS = "pipecat-metrics"

/// LLM transcript from the bot
static let BOT_LLM_TEXT = "bot-llm-text"
public static let BOT_LLM_TEXT = "bot-llm-text"
/// LLM transcript from the bot has started
static let BOT_LLM_STARTED = "bot-llm-started"
public static let BOT_LLM_STARTED = "bot-llm-started"
/// LLM transcript from the bot has stopped
static let BOT_LLM_STOPPED = "bot-llm-stopped"
public static let BOT_LLM_STOPPED = "bot-llm-stopped"

/// TTS transcript from the bot
static let BOT_TTS_TEXT = "bot-tts-text"
public static let BOT_TTS_TEXT = "bot-tts-text"
/// LLM transcript from the bot has started
static let BOT_TTS_STARTED = "bot-tts-started"
public static let BOT_TTS_STARTED = "bot-tts-started"
/// LLM transcript from the bot has stopped
static let BOT_TTS_STTOPED = "bot-tts-stopped"
public static let BOT_TTS_STTOPED = "bot-tts-stopped"

/// Text has been stored
static let STORAGE_ITEM_STORED = "storage-item-stored"
public static let STORAGE_ITEM_STORED = "storage-item-stored"
}

init(type: String?, data: String?) {
self.init(type: type, data: data, id: String(UUID().uuidString.prefix(8)), label: "rtvi-ai", metrics: nil)
}

init(type: String?, data: String?, id: String?, label: String? = "rtvi-ai", metrics: PipecatMetrics? = nil) {
public init(type: String?, data: String?, id: String?, label: String? = "rtvi-ai", metrics: PipecatMetrics? = nil) {
self.id = id
self.label = label
self.type = type
Expand Down
29 changes: 20 additions & 9 deletions Sources/RTVIClientIOS/transport/RTVIMessageOutbound.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@ import Foundation
/// An RTVI control message sent to the Transport.
public struct RTVIMessageOutbound: Encodable {

let id: String
let label: String
let type: String
let data: Value?
public let id: String
public let label: String
public let type: String
public let data: Value?

/// Messages from the client to the server.
public enum MessageType {
static let UPDATE_CONFIG = "update-config"
static let GET_CONFIG = "get-config"
static let DESCRIBE_CONFIG = "describe-config"
static let ACTION = "action"
static let DESCRIBE_ACTIONS = "describe-actions"
public static let UPDATE_CONFIG = "update-config"
public static let GET_CONFIG = "get-config"
public static let DESCRIBE_CONFIG = "describe-config"
public static let ACTION = "action"
public static let DESCRIBE_ACTIONS = "describe-actions"
public static let CLIENT_READY = "client-ready"
}

Expand Down Expand Up @@ -59,6 +59,17 @@ public struct RTVIMessageOutbound: Encodable {
)
}

// Decode action data, if this outbound message represents an action request.
// This is useful for implementing transports that can intercept and handle action requests in their own way.
public func decodeActionData() -> ActionRequest? {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where are we using it ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if type == RTVIMessageOutbound.MessageType.ACTION {
do {
let encodedData = try JSONEncoder().encode(data)
return try JSONDecoder().decode(ActionRequest.self, from: encodedData)
} catch {}
}
return nil
}
}


Expand Down
2 changes: 1 addition & 1 deletion Sources/RTVIClientIOS/transport/Transport.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ public protocol Transport {

func initDevices() async throws
func release()
func connect(authBundle: AuthBundle) async throws
func connect(authBundle: AuthBundle?) async throws
func disconnect() async throws
func getAllMics() -> [MediaDeviceInfo]
func getAllCams() -> [MediaDeviceInfo]
Expand Down
4 changes: 2 additions & 2 deletions Sources/RTVIClientIOS/types/Option.swift
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import Foundation

public struct Option: Codable {
let name: String
let value: Value
public let name: String
public let value: Value

public init(name: String, value: Value) {
self.name = name
Expand Down
4 changes: 2 additions & 2 deletions Sources/RTVIClientIOS/types/ServiceConfig.swift
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import Foundation

public struct ServiceConfig: Codable {
let service: String
let options: [Option]
public let service: String
public let options: [Option]

public init(service: String, options: [Option]) {
self.service = service
Expand Down
6 changes: 3 additions & 3 deletions Sources/RTVIClientIOS/types/voiceMessages/ActionRequest.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ public typealias Argument = Option

public struct ActionRequest: Codable {

let service: String
let action: String
let arguments: [Argument]?
public let service: String
public let action: String
public let arguments: [Argument]?

public init(service: String, action: String, arguments: [Option]?=nil) {
self.service = service
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,8 @@ public struct BotReadyData: Codable {
public let version: String
public let config: [ServiceConfig]

public init(version: String, config: [ServiceConfig]) {
self.version = version
self.config = config
}
}
15 changes: 12 additions & 3 deletions Sources/RTVIClientIOS/utils/MessageDispatcher.swift
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
import Foundation

enum MessageDispatcherError: Error {
/// HTTP messages not supported when using certain transports that don't communicate with an RTVI server.
case httpMessagesNotSupported
}

/// Helper class for sending messages to the server and awaiting the response.
class MessageDispatcher {

private let transport: Transport
private let httpMessageDispatcher: HTTPMessageDispatcher
private let httpMessageDispatcher: HTTPMessageDispatcher?

/// How long to wait before resolving the message/
private var gcTime: TimeInterval
@MainActor
private var queue: [QueuedVoiceMessage] = []
private var gcTimer: Timer?

init(transport: Transport, httpMessageDispatcher: HTTPMessageDispatcher) {
init(transport: Transport, httpMessageDispatcher: HTTPMessageDispatcher?) {
self.gcTime = 10.0 // 10 seconds
self.transport = transport
self.httpMessageDispatcher = httpMessageDispatcher
Expand All @@ -35,7 +40,11 @@ class MessageDispatcher {
if self.transport.isConnected() {
try self.transport.sendMessage(message: message)
} else {
try self.httpMessageDispatcher.sendMessage(message: message)
if let httpMessageDispatcher {
try httpMessageDispatcher.sendMessage(message: message)
} else {
throw MessageDispatcherError.httpMessagesNotSupported
}
}
} catch {
Logger.shared.error("Failed to send app message \(error)")
Expand Down