From ca3fdff70ddea836fb08271f5670d6b51aefcedf Mon Sep 17 00:00:00 2001 From: Purav Manot Date: Wed, 20 Mar 2024 02:52:22 +0530 Subject: [PATCH 1/2] Fix model name --- Package.resolved | 77 +++++++++++++++++++ .../Intramodular/Models/OpenAI.Model.swift | 2 +- 2 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 Package.resolved diff --git a/Package.resolved b/Package.resolved new file mode 100644 index 00000000..95d7eefc --- /dev/null +++ b/Package.resolved @@ -0,0 +1,77 @@ +{ + "pins" : [ + { + "identity" : "corepersistence", + "kind" : "remoteSourceControl", + "location" : "https://github.com/vmanot/CorePersistence.git", + "state" : { + "branch" : "main", + "revision" : "e02d171abea447353b1a514266188f121e72b0d0" + } + }, + { + "identity" : "merge", + "kind" : "remoteSourceControl", + "location" : "https://github.com/vmanot/Merge.git", + "state" : { + "branch" : "master", + "revision" : "dfe52aa401a238cd6b9e783756cc645f3e349748" + } + }, + { + "identity" : "networkkit", + "kind" : "remoteSourceControl", + "location" : "https://github.com/vmanot/NetworkKit.git", + "state" : { + "branch" : "master", + "revision" : "01cd6091ddf244840e143dfa2fc1f176621786f2" + } + }, + { + "identity" : "swallow", + "kind" : "remoteSourceControl", + "location" : "https://github.com/vmanot/Swallow.git", + "state" : { + "branch" : "master", + "revision" : "95f1a039ae927a10ee3fa95f802b16a0d05f5351" + } + }, + { + "identity" : "swift-collections", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-collections", + "state" : { + "revision" : "94cf62b3ba8d4bed62680a282d4c25f9c63c2efb", + "version" : "1.1.0" + } + }, + { + "identity" : "swift-syntax", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-syntax.git", + "state" : { + "revision" : "fa8f95c2d536d6620cc2f504ebe8a6167c9fc2dd", + "version" : "510.0.1" + } + }, + { + "identity" : "swiftapi", + "kind" : "remoteSourceControl", + "location" : "https://github.com/vmanot/SwiftAPI.git", + "state" : { + "branch" : "master", + "revision" : "195ce0c8e6c177ba9cfded33c2c0ced21d8d5949" + } + }, + { + "identity" : "swiftuix", + "kind" : "remoteSourceControl", + "location" : "https://github.com/SwiftUIX/SwiftUIX.git", + "state" : { + "branch" : "master", + "revision" : "95e399ac149577d6996a7c62c90282d06373a3b7" + } + } + ], + "version" : 2 +} diff --git a/Sources/OpenAI/Intramodular/Models/OpenAI.Model.swift b/Sources/OpenAI/Intramodular/Models/OpenAI.Model.swift index 6525e039..a10a90b3 100644 --- a/Sources/OpenAI/Intramodular/Models/OpenAI.Model.swift +++ b/Sources/OpenAI/Intramodular/Models/OpenAI.Model.swift @@ -199,7 +199,7 @@ extension OpenAI.Model { case gpt_4_32k_0314 = "gpt-4-32k-0314" case gpt_4_32k_0613 = "gpt-4-32k-0613" - case gpt_4_turbo_preview = "gpt_4_turbo_preview" + case gpt_4_turbo_preview = "gpt-4-turbo-preview" public var name: String { switch self { From 3866553c0eaf89dca6bd4f3275ba79a378d6dec6 Mon Sep 17 00:00:00 2001 From: Purav Manot Date: Wed, 20 Mar 2024 04:13:56 +0530 Subject: [PATCH 2/2] Add TTS support --- ...penAI.APISpecification.RequestBodies.swift | 72 +++++++++++ .../API/OpenAI.APISpecification.swift | 20 +++- .../Intramodular/Models/OpenAI.Model.swift | 112 ++++++++---------- .../Intramodular/Models/OpenAI.Object.swift | 3 + .../Intramodular/Models/OpenAI.Speech.swift | 31 +++++ .../Intramodular/OpenAI.APIClient.swift | 35 ++++++ 6 files changed, 209 insertions(+), 64 deletions(-) create mode 100644 Sources/OpenAI/Intramodular/Models/OpenAI.Speech.swift diff --git a/Sources/OpenAI/Intramodular/API/OpenAI.APISpecification.RequestBodies.swift b/Sources/OpenAI/Intramodular/API/OpenAI.APISpecification.RequestBodies.swift index 1fc62650..9820e344 100644 --- a/Sources/OpenAI/Intramodular/API/OpenAI.APISpecification.RequestBodies.swift +++ b/Sources/OpenAI/Intramodular/API/OpenAI.APISpecification.RequestBodies.swift @@ -358,6 +358,78 @@ extension OpenAI.APISpecification.RequestBodies { } } +extension OpenAI.APISpecification.RequestBodies { + public struct CreateSpeech: Codable { + + /// Encapsulates the voices available for audio generation. + /// + /// To get aquinted with each of the voices and listen to the samples visit: + /// [OpenAI Text-to-Speech – Voice Options](https://platform.openai.com/docs/guides/text-to-speech/voice-options) + public enum Voice: String, Codable, CaseIterable { + case alloy + case echo + case fable + case onyx + case nova + case shimmer + } + + public enum ResponseFormat: String, Codable, CaseIterable { + case mp3 + case opus + case aac + case flac + } + + /// The text to generate audio for. The maximum length is 4096 characters. + public let input: String + /// One of the available TTS models: tts-1 or tts-1-hd + public let model: OpenAI.Model + /// The voice to use when generating the audio. Supported voices are alloy, echo, fable, onyx, nova, and shimmer. Previews of the voices are available in the Text to speech guide. + /// https://platform.openai.com/docs/guides/text-to-speech/voice-options + public let voice: Voice + /// The format to audio in. Supported formats are mp3, opus, aac, and flac. + /// Defaults to mp3 + public let responseFormat: ResponseFormat? + /// The speed of the generated audio. Select a value from **0.25** to **4.0**. **1.0** is the default. + /// Defaults to 1 + public let speed: String? + + public enum CodingKeys: String, CodingKey { + case model + case input + case voice + case responseFormat = "response_format" + case speed + } + + public init(model: OpenAI.Model, input: String, voice: Voice, responseFormat: ResponseFormat = .mp3, speed: Double?) { + self.model = model + self.speed = CreateSpeech.normalizedSpeechSpeed(for: speed) + self.input = input + self.voice = voice + self.responseFormat = responseFormat + } + + enum Speed: Double { + case normal = 1.0 + case max = 4.0 + case min = 0.25 + } + + static func normalizedSpeechSpeed(for inputSpeed: Double?) -> String { + guard let inputSpeed else { return "\(Self.Speed.normal.rawValue)" } + let isSpeedOutOfBounds = inputSpeed <= Self.Speed.min.rawValue || Self.Speed.max.rawValue <= inputSpeed + guard !isSpeedOutOfBounds else { + return inputSpeed < Self.Speed.min.rawValue ? "\(Self.Speed.min.rawValue)" : "\(Self.Speed.max.rawValue)" + } + return "\(inputSpeed)" + } + } +} + + + // MARK: - Auxiliary extension OpenAI.APISpecification.RequestBodies.CreateChatCompletion { diff --git a/Sources/OpenAI/Intramodular/API/OpenAI.APISpecification.swift b/Sources/OpenAI/Intramodular/API/OpenAI.APISpecification.swift index d9290a52..994ee585 100644 --- a/Sources/OpenAI/Intramodular/API/OpenAI.APISpecification.swift +++ b/Sources/OpenAI/Intramodular/API/OpenAI.APISpecification.swift @@ -73,6 +73,13 @@ extension OpenAI { @Body(json: .input, keyEncodingStrategy: .convertToSnakeCase) public var createChatCompletions = Endpoint() + // MARK: Speech + + @POST + @Path("/v1/audio/speech") + @Body(json: .input, keyEncodingStrategy: .convertToSnakeCase) + public var createSpeech = Endpoint() + // MARK: Threads @Header(["OpenAI-Beta": "assistants=v1"]) @@ -218,10 +225,15 @@ extension OpenAI.APISpecification { throw apiError } - return try response.decode( - Output.self, - keyDecodingStrategy: .convertFromSnakeCase - ) + switch Output.self { + case Data.self: + return try cast(response.data, to: Output.self) + default: + return try response.decode( + Output.self, + keyDecodingStrategy: .convertFromSnakeCase + ) + } } } } diff --git a/Sources/OpenAI/Intramodular/Models/OpenAI.Model.swift b/Sources/OpenAI/Intramodular/Models/OpenAI.Model.swift index a10a90b3..d665b0fb 100644 --- a/Sources/OpenAI/Intramodular/Models/OpenAI.Model.swift +++ b/Sources/OpenAI/Intramodular/Models/OpenAI.Model.swift @@ -30,6 +30,7 @@ extension OpenAI { case instructGPT(InstructGPT) case embedding(Embedding) case chat(Chat) + case speech(Speech) /// Deprecated by OpenAI. case feature(Feature) @@ -58,6 +59,8 @@ extension OpenAI { return value case .chat(let value): return value + case .speech(let value): + return value case .unknown: assertionFailure(.unimplemented) @@ -203,76 +206,63 @@ extension OpenAI.Model { public var name: String { switch self { - case .gpt_3_5_turbo: - return "ChatGPT 3.5" - case .gpt_3_5_turbo_16k: - return "ChatGPT 3.5" - case .gpt_4: - return "ChatGPT 4" - case .gpt_4_32k: - return "ChatGPT 4" - case .gpt_4_1106_preview: - return "GPT-4 Turbo" - case .gpt_4_0125_preview: - return "GPT-4 Turbo" - case .gpt_4_vision_preview: - return "GPT-4V" - case .gpt_3_5_turbo_0301: - return "GPT-3.5" - case .gpt_3_5_turbo_0613: - return "GPT-3.5" - case .gpt_3_5_turbo_0125: - return "GPT-3.5" - case .gpt_3_5_turbo_16k_0613: - return "GPT-3.5" - case .gpt_4_0314: - return "GPT-4" - case .gpt_4_0613: - return "GPT-4" - case .gpt_4_32k_0314: - return "GPT-4" - case .gpt_4_32k_0613: - return "GPT-4" - case .gpt_4_turbo_preview: - return "GPT-4 Turbo (Preview)" + case .gpt_3_5_turbo: "ChatGPT 3.5" + case .gpt_3_5_turbo_16k: "ChatGPT 3.5" + case .gpt_4: "ChatGPT 4" + case .gpt_4_32k: "ChatGPT 4" + case .gpt_4_1106_preview: "GPT-4 Turbo" + case .gpt_4_0125_preview: "GPT-4 Turbo" + case .gpt_4_vision_preview: "GPT-4V" + case .gpt_3_5_turbo_0301: "GPT-3.5" + case .gpt_3_5_turbo_0613: "GPT-3.5" + case .gpt_3_5_turbo_0125: "GPT-3.5" + case .gpt_3_5_turbo_16k_0613: "GPT-3.5" + case .gpt_4_0314: "GPT-4" + case .gpt_4_0613: "GPT-4" + case .gpt_4_32k_0314: "GPT-4" + case .gpt_4_32k_0613: "GPT-4" + case .gpt_4_turbo_preview: "GPT-4 Turbo (Preview)" } } public var contextSize: Int { let _4k = 4096 let _8k = 8192 - let _16k = 16384 - let _32k = 16384 + let _16k = 16385 + let _32k = 32768 + let _128k = 128000 // let _128k = 131072 + return switch self { + case .gpt_3_5_turbo, .gpt_3_5_turbo_0125, .gpt_3_5_turbo_16k: _16k + case .gpt_4: _8k + case .gpt_4_32k: _32k + case .gpt_3_5_turbo_0301, .gpt_3_5_turbo_0613: _4k + case .gpt_3_5_turbo_16k_0613: _16k + case .gpt_4_0314: _8k + case .gpt_4_0613: _8k + case .gpt_4_32k_0314: _32k + case .gpt_4_32k_0613: _32k + case .gpt_4_1106_preview, .gpt_4_0125_preview: _128k + case .gpt_4_vision_preview: _128k + case .gpt_4_turbo_preview: _128k + } + } + } +} + +extension OpenAI.Model { + public enum Speech: String, Named, OpenAI._ModelType, CaseIterable { + case tts_1 = "tts-1" + case tts_1_hd = "tts-1-hd" + + public var contextSize: Int { return 4096 } + + public var name: String { switch self { - case .gpt_3_5_turbo: - return _4k - case .gpt_3_5_turbo_16k: - return _16k - case .gpt_4: - return _8k - case .gpt_4_32k: - return _32k - case .gpt_3_5_turbo_0301, .gpt_3_5_turbo_0613, .gpt_3_5_turbo_0125: - return _4k - case .gpt_3_5_turbo_16k_0613: - return _16k - case .gpt_4_0314: - return _8k - case .gpt_4_0613: - return _8k - case .gpt_4_32k_0314: - return _32k - case .gpt_4_32k_0613: - return _32k - case .gpt_4_1106_preview, .gpt_4_0125_preview: - return 4096 // FIXME: !!! - case .gpt_4_vision_preview: - return 4096 // FIXME: !!! - case .gpt_4_turbo_preview: - return 4096 // FIXME: !!! + case .tts_1: "Text-to-speech" + case .tts_1_hd: "Text-to-speech HD" } } } @@ -325,6 +315,8 @@ extension OpenAI.Model: RawRepresentable { return model.rawValue case .chat(let model): return model.rawValue + case .speech(let model): + return model.rawValue case .unknown(let rawValue): return rawValue } diff --git a/Sources/OpenAI/Intramodular/Models/OpenAI.Object.swift b/Sources/OpenAI/Intramodular/Models/OpenAI.Object.swift index 4ff31911..e407c252 100644 --- a/Sources/OpenAI/Intramodular/Models/OpenAI.Object.swift +++ b/Sources/OpenAI/Intramodular/Models/OpenAI.Object.swift @@ -12,6 +12,7 @@ extension OpenAI { case textCompletion = "text_completion" case chatCompletion = "chat.completion" case chatCompletionChunk = "chat.completion.chunk" + case speech = "speech" case thread = "thread" case message = "thread.message" case assistant = "assistant" @@ -30,6 +31,8 @@ extension OpenAI { return OpenAI.ChatCompletion.self case .chatCompletionChunk: return OpenAI.ChatCompletionChunk.self + case .speech: + return OpenAI.Speech.self case .thread: return OpenAI.Thread.self case .message: diff --git a/Sources/OpenAI/Intramodular/Models/OpenAI.Speech.swift b/Sources/OpenAI/Intramodular/Models/OpenAI.Speech.swift new file mode 100644 index 00000000..e32dd551 --- /dev/null +++ b/Sources/OpenAI/Intramodular/Models/OpenAI.Speech.swift @@ -0,0 +1,31 @@ +// +// OpenAI.Speech.swift +// graph +// +// Created by Purav Manot on 10/03/24. +// + +import Foundation + +extension OpenAI { + public final class Speech: OpenAI.Object { + public let data: Data + + public required init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + + self.data = try container.decode(forKey: .data) + + try super.init(from: decoder) + } + + public init(data: Data) { + self.data = data + super.init(type: .speech) + } + + enum CodingKeys: CodingKey { + case data + } + } +} diff --git a/Sources/OpenAI/Intramodular/OpenAI.APIClient.swift b/Sources/OpenAI/Intramodular/OpenAI.APIClient.swift index 820acb77..ce0701cf 100644 --- a/Sources/OpenAI/Intramodular/OpenAI.APIClient.swift +++ b/Sources/OpenAI/Intramodular/OpenAI.APIClient.swift @@ -135,6 +135,41 @@ extension OpenAI.APIClient { } } +extension OpenAI.APIClient { + public func createSpeech( + model: OpenAI.Model, + text: String, + voice: OpenAI.APISpecification.RequestBodies.CreateSpeech.Voice = .alloy, + speed: Double? + ) async throws -> OpenAI.Speech { + let requestBody = OpenAI.APISpecification.RequestBodies.CreateSpeech( + model: model, + input: text, + voice: voice, + speed: speed + ) + let data = try await run(\.createSpeech, with: requestBody) + return OpenAI.Speech(data: data) + } + + public func createSpeech( + model: OpenAI.Model.Speech, + text: String, + voice: OpenAI.APISpecification.RequestBodies.CreateSpeech.Voice = .alloy, + speed: Double? + ) async throws -> OpenAI.Speech { + let requestBody = OpenAI.APISpecification.RequestBodies.CreateSpeech( + model: OpenAI.Model.speech(model), + input: text, + voice: voice, + speed: speed + ) + let data = try await run(\.createSpeech, with: requestBody) + return OpenAI.Speech(data: data) + } + +} + extension OpenAI.APIClient { @discardableResult public func createRun(