diff --git a/Demo/DemoChat/Sources/ChatStore.swift b/Demo/DemoChat/Sources/ChatStore.swift index 51ee6b11..0462e87a 100644 --- a/Demo/DemoChat/Sources/ChatStore.swift +++ b/Demo/DemoChat/Sources/ChatStore.swift @@ -85,7 +85,7 @@ public final class ChatStore: ObservableObject { return } - let weatherFunction = ChatFunctionDeclaration( + let weatherFunction = ChatQuery.ChatCompletionToolParam(function: .init( name: "getWeatherData", description: "Get the current weather in a given location", parameters: .init( @@ -95,38 +95,38 @@ public final class ChatStore: ObservableObject { ], required: ["location"] ) - ) + )) let functions = [weatherFunction] let chatsStream: AsyncThrowingStream = openAIClient.chatsStream( query: ChatQuery( - model: model, messages: conversation.messages.map { message in - Chat(role: message.role, content: message.content) - }, - functions: functions + ChatQuery.ChatCompletionMessageParam(role: message.role, content: message.content)! + }, model: model, + tools: functions ) ) - var functionCallName = "" - var functionCallArguments = "" + var functionCalls = [(name: String, argument: String?)]() for try await partialChatResult in chatsStream { for choice in partialChatResult.choices { let existingMessages = conversations[conversationIndex].messages // Function calls are also streamed, so we need to accumulate. - if let functionCallDelta = choice.delta.functionCall { - if let nameDelta = functionCallDelta.name { - functionCallName += nameDelta - } - if let argumentsDelta = functionCallDelta.arguments { - functionCallArguments += argumentsDelta + choice.delta.toolCalls?.forEach { toolCallDelta in + if let functionCallDelta = toolCallDelta.function { + if let nameDelta = functionCallDelta.name { + functionCalls.append((nameDelta, functionCallDelta.arguments)) + } } } var messageText = choice.delta.content ?? "" if let finishReason = choice.finishReason, - finishReason == "function_call" { - messageText += "Function call: name=\(functionCallName) arguments=\(functionCallArguments)" + finishReason == .toolCalls + { + functionCalls.forEach { (name: String, argument: String?) in + messageText += "Function call: name=\(name) arguments=\(argument ?? "")\n" + } } let message = Message( id: partialChatResult.id, diff --git a/Demo/DemoChat/Sources/Extensions/View.swift b/Demo/DemoChat/Sources/Extensions/View.swift new file mode 100644 index 00000000..4f311176 --- /dev/null +++ b/Demo/DemoChat/Sources/Extensions/View.swift @@ -0,0 +1,65 @@ +// +// View.swift +// +// +// Created by James J Kalafus on 2024-02-03. +// + +import SwiftUI + +extension View { + + @inlinable public func navigationTitle(_ titleKey: LocalizedStringKey, selectedModel: Binding) -> some View { + self + .navigationTitle(titleKey) + .safeAreaInset(edge: .top) { + HStack { + Text( + "Model: \(selectedModel.wrappedValue)" + ) + .font(.caption) + .foregroundColor(.secondary) + Spacer() + } + .padding(.horizontal, 16) + .padding(.vertical, 8) + } + } + + @inlinable public func modelSelect(selectedModel: Binding, models: [String], showsModelSelectionSheet: Binding, help: String) -> some View { + self + .toolbar { + ToolbarItem(placement: .navigationBarTrailing) { + Button(action: { + showsModelSelectionSheet.wrappedValue.toggle() + }) { + Image(systemName: "cpu") + } + } + } + .confirmationDialog( + "Select model", + isPresented: showsModelSelectionSheet, + titleVisibility: .visible, + actions: { + ForEach(models, id: \.self) { (model: String) in + Button { + selectedModel.wrappedValue = model + } label: { + Text(model) + } + } + + Button("Cancel", role: .cancel) { + showsModelSelectionSheet.wrappedValue = false + } + }, + message: { + Text( + "View \(help) for details" + ) + .font(.caption) + } + ) + } +} diff --git a/Demo/DemoChat/Sources/ImageStore.swift b/Demo/DemoChat/Sources/ImageStore.swift index 48ca2967..5f479d2a 100644 --- a/Demo/DemoChat/Sources/ImageStore.swift +++ b/Demo/DemoChat/Sources/ImageStore.swift @@ -11,7 +11,7 @@ import OpenAI public final class ImageStore: ObservableObject { public var openAIClient: OpenAIProtocol - @Published var images: [ImagesResult.URLResult] = [] + @Published var images: [ImagesResult.Image] = [] public init( openAIClient: OpenAIProtocol diff --git a/Demo/DemoChat/Sources/MiscStore.swift b/Demo/DemoChat/Sources/MiscStore.swift index 8665974b..4693e423 100644 --- a/Demo/DemoChat/Sources/MiscStore.swift +++ b/Demo/DemoChat/Sources/MiscStore.swift @@ -51,7 +51,7 @@ public final class MiscStore: ObservableObject { do { let response = try await openAIClient.moderations( query: ModerationsQuery( - input: message.content, + input: .init(message.content), model: .textModerationLatest ) ) diff --git a/Demo/DemoChat/Sources/Models/Message.swift b/Demo/DemoChat/Sources/Models/Message.swift index afea9099..a8429da0 100644 --- a/Demo/DemoChat/Sources/Models/Message.swift +++ b/Demo/DemoChat/Sources/Models/Message.swift @@ -10,7 +10,7 @@ import OpenAI struct Message { var id: String - var role: Chat.Role + var role: ChatQuery.ChatCompletionMessageParam.Role var content: String var createdAt: Date } diff --git a/Demo/DemoChat/Sources/SpeechStore.swift b/Demo/DemoChat/Sources/SpeechStore.swift index 516d5cb8..dfd7ef9e 100644 --- a/Demo/DemoChat/Sources/SpeechStore.swift +++ b/Demo/DemoChat/Sources/SpeechStore.swift @@ -30,15 +30,16 @@ public final class SpeechStore: ObservableObject { @MainActor func createSpeech(_ query: AudioSpeechQuery) async { - guard let input = query.input, !input.isEmpty else { return } + let input = query.input + guard !input.isEmpty else { return } do { let response = try await openAIClient.audioCreateSpeech(query: query) - guard let data = response.audioData else { return } + let data = response.audio let player = try? AVAudioPlayer(data: data) let audioObject = AudioObject(prompt: input, audioPlayer: player, originResponse: response, - format: query.responseFormat.rawValue) + format: query.responseFormat?.rawValue ?? AudioSpeechQuery.AudioSpeechResponseFormat.mp3.rawValue) audioObjects.append(audioObject) } catch { print(error.localizedDescription) diff --git a/Demo/DemoChat/Sources/UI/DetailView.swift b/Demo/DemoChat/Sources/UI/DetailView.swift index 9e2a07e9..7aa44479 100644 --- a/Demo/DemoChat/Sources/UI/DetailView.swift +++ b/Demo/DemoChat/Sources/UI/DetailView.swift @@ -19,7 +19,7 @@ struct DetailView: View { @State private var showsModelSelectionSheet = false @State private var selectedChatModel: Model = .gpt4_0613 - private let availableChatModels: [Model] = [.gpt3_5Turbo0613, .gpt4_0613] + private static let availableChatModels: [Model] = [.gpt3_5Turbo, .gpt4] let conversation: Conversation let error: Error? @@ -65,52 +65,8 @@ struct DetailView: View { inputBar(scrollViewProxy: scrollViewProxy) } - .navigationTitle("Chat") - .safeAreaInset(edge: .top) { - HStack { - Text( - "Model: \(selectedChatModel)" - ) - .font(.caption) - .foregroundColor(.secondary) - Spacer() - } - .padding(.horizontal, 16) - .padding(.vertical, 8) - } - .toolbar { - ToolbarItem(placement: .navigationBarTrailing) { - Button(action: { - showsModelSelectionSheet.toggle() - }) { - Image(systemName: "cpu") - } - } - } - .confirmationDialog( - "Select model", - isPresented: $showsModelSelectionSheet, - titleVisibility: .visible, - actions: { - ForEach(availableChatModels, id: \.self) { model in - Button { - selectedChatModel = model - } label: { - Text(model) - } - } - - Button("Cancel", role: .cancel) { - showsModelSelectionSheet = false - } - }, - message: { - Text( - "View https://platform.openai.com/docs/models/overview for details" - ) - .font(.caption) - } - ) + .navigationTitle("Chat", selectedModel: $selectedChatModel) + .modelSelect(selectedModel: $selectedChatModel, models: Self.availableChatModels, showsModelSelectionSheet: $showsModelSelectionSheet, help: "https://platform.openai.com/docs/models/overview") } } } @@ -243,7 +199,7 @@ struct ChatBubble: View { .foregroundColor(userForegroundColor) .background(userBackgroundColor) .clipShape(RoundedRectangle(cornerRadius: 16, style: .continuous)) - case .function: + case .tool: Text(message.content) .font(.footnote.monospaced()) .padding(.horizontal, 16) @@ -267,7 +223,7 @@ struct DetailView_Previews: PreviewProvider { Message(id: "1", role: .assistant, content: "Hello, how can I help you today?", createdAt: Date(timeIntervalSinceReferenceDate: 0)), Message(id: "2", role: .user, content: "I need help with my subscription.", createdAt: Date(timeIntervalSinceReferenceDate: 100)), Message(id: "3", role: .assistant, content: "Sure, what seems to be the problem with your subscription?", createdAt: Date(timeIntervalSinceReferenceDate: 200)), - Message(id: "4", role: .function, content: + Message(id: "4", role: .tool, content: """ get_current_weather({ "location": "Glasgow, Scotland", diff --git a/Demo/DemoChat/Sources/UI/Images/ImageCreationView.swift b/Demo/DemoChat/Sources/UI/Images/ImageCreationView.swift index 98ea5216..11ba9ac3 100644 --- a/Demo/DemoChat/Sources/UI/Images/ImageCreationView.swift +++ b/Demo/DemoChat/Sources/UI/Images/ImageCreationView.swift @@ -13,9 +13,9 @@ public struct ImageCreationView: View { @State private var prompt: String = "" @State private var n: Int = 1 - @State private var size: String - - private var sizes = ["256x256", "512x512", "1024x1024"] + @State private var size = ImagesQuery.Size.allCases.first! + + private var sizes = ImagesQuery.Size.allCases public init(store: ImageStore) { self.store = store @@ -37,7 +37,7 @@ public struct ImageCreationView: View { HStack { Picker("Size", selection: $size) { ForEach(sizes, id: \.self) { - Text($0) + Text($0.rawValue) } } } @@ -56,7 +56,7 @@ public struct ImageCreationView: View { } if !$store.images.isEmpty { Section("Images") { - ForEach($store.images, id: \.self) { image in + ForEach($store.images, id: \.url) { image in let urlString = image.wrappedValue.url ?? "" if let imageURL = URL(string: urlString), UIApplication.shared.canOpenURL(imageURL) { LinkPreview(previewURL: imageURL) diff --git a/Demo/DemoChat/Sources/UI/Misc/ListModelsView.swift b/Demo/DemoChat/Sources/UI/Misc/ListModelsView.swift index b7396698..9997b6a9 100644 --- a/Demo/DemoChat/Sources/UI/Misc/ListModelsView.swift +++ b/Demo/DemoChat/Sources/UI/Misc/ListModelsView.swift @@ -12,7 +12,7 @@ public struct ListModelsView: View { public var body: some View { NavigationStack { - List($store.availableModels) { row in + List($store.availableModels.wrappedValue, id: \.id) { row in Text(row.id) } .listStyle(.insetGrouped) diff --git a/Demo/DemoChat/Sources/UI/TextToSpeechView.swift b/Demo/DemoChat/Sources/UI/TextToSpeechView.swift index 459a4423..7da3e365 100644 --- a/Demo/DemoChat/Sources/UI/TextToSpeechView.swift +++ b/Demo/DemoChat/Sources/UI/TextToSpeechView.swift @@ -15,9 +15,13 @@ public struct TextToSpeechView: View { @State private var prompt: String = "" @State private var voice: AudioSpeechQuery.AudioSpeechVoice = .alloy - @State private var speed: Double = 1 + @State private var speed: Double = AudioSpeechQuery.Speed.normal.rawValue @State private var responseFormat: AudioSpeechQuery.AudioSpeechResponseFormat = .mp3 - + @State private var showsModelSelectionSheet = false + @State private var selectedSpeechModel: String = Model.tts_1 + + private static let availableSpeechModels: [String] = [Model.tts_1, Model.tts_1_hd] + public init(store: SpeechStore) { self.store = store } @@ -56,7 +60,7 @@ public struct TextToSpeechView: View { HStack { Text("Speed: ") Spacer() - Stepper(value: $speed, in: 0.25...4, step: 0.25) { + Stepper(value: $speed, in: AudioSpeechQuery.Speed.min.rawValue...AudioSpeechQuery.Speed.max.rawValue, step: 0.25) { HStack { Spacer() Text("**\(String(format: "%.2f", speed))**") @@ -79,7 +83,7 @@ public struct TextToSpeechView: View { Section { HStack { Button("Create Speech") { - let query = AudioSpeechQuery(model: .tts_1, + let query = AudioSpeechQuery(model: selectedSpeechModel, input: prompt, voice: voice, responseFormat: responseFormat, @@ -93,10 +97,11 @@ public struct TextToSpeechView: View { .disabled(prompt.replacingOccurrences(of: " ", with: "").isEmpty) Spacer() } + .modelSelect(selectedModel: $selectedSpeechModel, models: Self.availableSpeechModels, showsModelSelectionSheet: $showsModelSelectionSheet, help: "https://platform.openai.com/docs/models/tts") } if !$store.audioObjects.wrappedValue.isEmpty { Section("Click to play, swipe to save:") { - ForEach(store.audioObjects) { object in + ForEach(store.audioObjects, id: \.id) { object in HStack { Text(object.prompt.capitalized) Spacer() @@ -117,7 +122,7 @@ public struct TextToSpeechView: View { } .swipeActions(edge: .trailing, allowsFullSwipe: false) { Button { - presentUserDirectoryDocumentPicker(for: object.originResponse.audioData, filename: "GeneratedAudio.\(object.format)") + presentUserDirectoryDocumentPicker(for: object.originResponse.audio, filename: "GeneratedAudio.\(object.format)") } label: { Image(systemName: "square.and.arrow.down") } @@ -129,7 +134,7 @@ public struct TextToSpeechView: View { } .listStyle(.insetGrouped) .scrollDismissesKeyboard(.interactively) - .navigationTitle("Create Speech") + .navigationTitle("Create Speech", selectedModel: $selectedSpeechModel) } } diff --git a/Sources/OpenAI/OpenAI.swift b/Sources/OpenAI/OpenAI.swift index 1e8d27eb..e91b33f5 100644 --- a/Sources/OpenAI/OpenAI.swift +++ b/Sources/OpenAI/OpenAI.swift @@ -35,7 +35,7 @@ final public class OpenAI: OpenAIProtocol { } private let session: URLSessionProtocol - private var streamingSessions: [NSObject] = [] + private var streamingSessions = ArrayWithThreadSafety() public let configuration: Configuration @@ -61,7 +61,7 @@ final public class OpenAI: OpenAIProtocol { } public func completionsStream(query: CompletionsQuery, onResult: @escaping (Result) -> Void, completion: ((Error?) -> Void)?) { - performSteamingRequest(request: JSONRequest(body: query.makeStreamable(), url: buildURL(path: .completions)), onResult: onResult, completion: completion) + performStreamingRequest(request: JSONRequest(body: query.makeStreamable(), url: buildURL(path: .completions)), onResult: onResult, completion: completion) } public func images(query: ImagesQuery, completion: @escaping (Result) -> Void) { @@ -85,7 +85,7 @@ final public class OpenAI: OpenAIProtocol { } public func chatsStream(query: ChatQuery, onResult: @escaping (Result) -> Void, completion: ((Error?) -> Void)?) { - performSteamingRequest(request: JSONRequest(body: query.makeStreamable(), url: buildURL(path: .chats)), onResult: onResult, completion: completion) + performStreamingRequest(request: JSONRequest(body: query.makeStreamable(), url: buildURL(path: .chats)), onResult: onResult, completion: completion) } public func edits(query: EditsQuery, completion: @escaping (Result) -> Void) { @@ -145,7 +145,7 @@ extension OpenAI { } } - func performSteamingRequest(request: any URLRequestBuildable, onResult: @escaping (Result) -> Void, completion: ((Error?) -> Void)?) { + func performStreamingRequest(request: any URLRequestBuildable, onResult: @escaping (Result) -> Void, completion: ((Error?) -> Void)?) { do { let request = try request.build(token: configuration.token, organizationIdentifier: configuration.organizationIdentifier, @@ -182,7 +182,7 @@ extension OpenAI { return completion(.failure(OpenAIError.emptyData)) } - completion(.success(AudioSpeechResult(audioData: data))) + completion(.success(AudioSpeechResult(audio: data))) } task.resume() } catch { diff --git a/Sources/OpenAI/Public/Models/AudioSpeechQuery.swift b/Sources/OpenAI/Public/Models/AudioSpeechQuery.swift index 36db44a5..e8d4382e 100644 --- a/Sources/OpenAI/Public/Models/AudioSpeechQuery.swift +++ b/Sources/OpenAI/Public/Models/AudioSpeechQuery.swift @@ -7,8 +7,9 @@ import Foundation +/// Generates audio from the input text. /// Learn more: [OpenAI Speech – Documentation](https://platform.openai.com/docs/api-reference/audio/createSpeech) -public struct AudioSpeechQuery: Codable, Equatable { +public struct AudioSpeechQuery: Codable { /// Encapsulates the voices available for audio generation. /// @@ -36,15 +37,19 @@ public struct AudioSpeechQuery: Codable, Equatable { case aac case flac } + + /// The text to generate audio for. The maximum length is 4096 characters. + public let input: String /// One of the available TTS models: tts-1 or tts-1-hd public let model: Model - /// The text to generate audio for. The maximum length is 4096 characters. - public let input: String? - /// The voice to use when generating the audio. Supported voices are alloy, echo, fable, onyx, nova, and shimmer. + /// The voice to use when generating the audio. Supported voices are alloy, echo, fable, onyx, nova, and shimmer. Previews of the voices are available in the Text to speech guide. + /// https://platform.openai.com/docs/guides/text-to-speech/voice-options public let voice: AudioSpeechVoice /// The format to audio in. Supported formats are mp3, opus, aac, and flac. - public let responseFormat: AudioSpeechResponseFormat - /// The speed of the generated audio. Enter a value between **0.25** and **4.0**. Default: **1.0** + /// Defaults to mp3 + public let responseFormat: AudioSpeechResponseFormat? + /// The speed of the generated audio. Select a value from **0.25** to **4.0**. **1.0** is the default. + /// Defaults to 1 public let speed: String? public enum CodingKeys: String, CodingKey { @@ -54,13 +59,7 @@ public struct AudioSpeechQuery: Codable, Equatable { case responseFormat = "response_format" case speed } - - private enum Constants { - static let normalSpeed = 1.0 - static let maxSpeed = 4.0 - static let minSpeed = 0.25 - } - + public init(model: Model, input: String, voice: AudioSpeechVoice, responseFormat: AudioSpeechResponseFormat = .mp3, speed: Double?) { self.model = AudioSpeechQuery.validateSpeechModel(model) self.speed = AudioSpeechQuery.normalizeSpeechSpeed(speed) @@ -80,13 +79,22 @@ private extension AudioSpeechQuery { } return inputModel } - +} + +public extension AudioSpeechQuery { + + enum Speed: Double { + case normal = 1.0 + case max = 4.0 + case min = 0.25 + } + static func normalizeSpeechSpeed(_ inputSpeed: Double?) -> String { - guard let inputSpeed else { return "\(Constants.normalSpeed)" } - let isSpeedOutOfBounds = inputSpeed >= Constants.maxSpeed && inputSpeed <= Constants.minSpeed + guard let inputSpeed else { return "\(Self.Speed.normal.rawValue)" } + let isSpeedOutOfBounds = inputSpeed <= Self.Speed.min.rawValue || Self.Speed.max.rawValue <= inputSpeed guard !isSpeedOutOfBounds else { print("[AudioSpeech] Speed value must be between 0.25 and 4.0. Setting value to closest valid.") - return inputSpeed < Constants.minSpeed ? "\(Constants.minSpeed)" : "\(Constants.maxSpeed)" + return inputSpeed < Self.Speed.min.rawValue ? "\(Self.Speed.min.rawValue)" : "\(Self.Speed.max.rawValue)" } return "\(inputSpeed)" } diff --git a/Sources/OpenAI/Public/Models/AudioSpeechResult.swift b/Sources/OpenAI/Public/Models/AudioSpeechResult.swift index 4d8e62fb..18bae3e5 100644 --- a/Sources/OpenAI/Public/Models/AudioSpeechResult.swift +++ b/Sources/OpenAI/Public/Models/AudioSpeechResult.swift @@ -7,8 +7,10 @@ import Foundation -public struct AudioSpeechResult { - +/// The audio file content. +/// Learn more: [OpenAI Speech – Documentation](https://platform.openai.com/docs/api-reference/audio/createSpeech) +public struct AudioSpeechResult: Codable, Equatable { + /// Audio data for one of the following formats :`mp3`, `opus`, `aac`, `flac` - public let audioData: Data? + public let audio: Data } diff --git a/Sources/OpenAI/Public/Models/AudioTranscriptionQuery.swift b/Sources/OpenAI/Public/Models/AudioTranscriptionQuery.swift index fd61166f..1acdd724 100644 --- a/Sources/OpenAI/Public/Models/AudioTranscriptionQuery.swift +++ b/Sources/OpenAI/Public/Models/AudioTranscriptionQuery.swift @@ -7,7 +7,9 @@ import Foundation -public enum AudioResponseFormat: String, Codable, Equatable { +public struct AudioTranscriptionQuery: Codable { + +public enum ResponseFormat: String, Codable, Equatable, CaseIterable { case json case text case verboseJson = "verbose_json" @@ -15,34 +17,77 @@ public enum AudioResponseFormat: String, Codable, Equatable { case vtt } -public struct AudioTranscriptionQuery: Codable, Equatable { - public typealias ResponseFormat = AudioResponseFormat - + /// The audio file object (not file name) to transcribe, in one of these formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. public let file: Data - public let fileName: String + public let fileType: Self.FileType + /// ID of the model to use. Only whisper-1 is currently available. public let model: Model + /// The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt. + /// Defaults to json public let responseFormat: Self.ResponseFormat? - + /// An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language. public let prompt: String? + /// The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit. + /// Defaults to 0 public let temperature: Double? + /// The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency. + /// https://platform.openai.com/docs/guides/speech-to-text/prompting public let language: String? - - public init(file: Data, fileName: String, model: Model, prompt: String? = nil, temperature: Double? = nil, language: String? = nil, responseFormat: Self.ResponseFormat? = nil) { + + public init(file: Data, fileType: Self.FileType, model: Model, prompt: String? = nil, temperature: Double? = nil, language: String? = nil, responseFormat: Self.ResponseFormat? = nil) { self.file = file - self.fileName = fileName + self.fileType = fileType self.model = model self.prompt = prompt self.temperature = temperature self.language = language self.responseFormat = responseFormat } + + public enum FileType: String, Codable, Equatable, CaseIterable { + case flac + case mp3, mpga + case mp4, m4a + case mpeg + case ogg + case wav + case webm + + var fileName: String { get { + var fileName = "speech." + switch self { + case .mpga: + fileName += Self.mp3.rawValue + case .m4a: + fileName += Self.mp4.rawValue + default: + fileName += self.rawValue + } + + return fileName + }} + + var contentType: String { get { + var contentType = "audio/" + switch self { + case .mpga: + contentType += Self.mp3.rawValue + case .m4a: + contentType += Self.mp4.rawValue + default: + contentType += self.rawValue + } + + return contentType + }} + } } extension AudioTranscriptionQuery: MultipartFormDataBodyEncodable { func encode(boundary: String) -> Data { let bodyBuilder = MultipartFormDataBodyBuilder(boundary: boundary, entries: [ - .file(paramName: "file", fileName: fileName, fileData: file, contentType: "audio/mpeg"), + .file(paramName: "file", fileName: fileType.fileName, fileData: file, contentType: fileType.contentType), .string(paramName: "model", value: model), .string(paramName: "prompt", value: prompt), .string(paramName: "temperature", value: temperature), diff --git a/Sources/OpenAI/Public/Models/AudioTranscriptionResult.swift b/Sources/OpenAI/Public/Models/AudioTranscriptionResult.swift index 0a334639..b1c96f56 100644 --- a/Sources/OpenAI/Public/Models/AudioTranscriptionResult.swift +++ b/Sources/OpenAI/Public/Models/AudioTranscriptionResult.swift @@ -8,6 +8,7 @@ import Foundation public struct AudioTranscriptionResult: Codable, Equatable { - + + /// The transcribed text. public let text: String } diff --git a/Sources/OpenAI/Public/Models/AudioTranslationQuery.swift b/Sources/OpenAI/Public/Models/AudioTranslationQuery.swift index 27d2d285..0e3b3864 100644 --- a/Sources/OpenAI/Public/Models/AudioTranslationQuery.swift +++ b/Sources/OpenAI/Public/Models/AudioTranslationQuery.swift @@ -7,20 +7,29 @@ import Foundation -public struct AudioTranslationQuery: Codable, Equatable { - public typealias ResponseFormat = AudioResponseFormat - +/// Translates audio into English. +public struct AudioTranslationQuery: Codable { + public typealias FileType = AudioTranscriptionQuery.FileType + public typealias ResponseFormat = AudioTranscriptionQuery.ResponseFormat + + /// The audio file object (not file name) translate, in one of these formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. public let file: Data - public let fileName: String + public let fileType: Self.FileType + /// ID of the model to use. Only whisper-1 is currently available. public let model: Model - + /// The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt. + /// Defaults to json public let responseFormat: Self.ResponseFormat? + /// An optional text to guide the model's style or continue a previous audio segment. The prompt should be in English. + /// https://platform.openai.com/docs/guides/speech-to-text/prompting public let prompt: String? + /// The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit. + /// Defaults to 0 public let temperature: Double? - public init(file: Data, fileName: String, model: Model, prompt: String? = nil, temperature: Double? = nil, responseFormat: Self.ResponseFormat? = nil) { + public init(file: Data, fileType: Self.FileType, model: Model, prompt: String? = nil, temperature: Double? = nil, responseFormat: Self.ResponseFormat? = nil) { self.file = file - self.fileName = fileName + self.fileType = fileType self.model = model self.prompt = prompt self.temperature = temperature @@ -32,7 +41,7 @@ extension AudioTranslationQuery: MultipartFormDataBodyEncodable { func encode(boundary: String) -> Data { let bodyBuilder = MultipartFormDataBodyBuilder(boundary: boundary, entries: [ - .file(paramName: "file", fileName: fileName, fileData: file, contentType: "audio/mpeg"), + .file(paramName: "file", fileName: fileType.fileName, fileData: file, contentType: fileType.contentType), .string(paramName: "model", value: model), .string(paramName: "prompt", value: prompt), .string(paramName: "response_format", value: responseFormat), diff --git a/Sources/OpenAI/Public/Models/AudioTranslationResult.swift b/Sources/OpenAI/Public/Models/AudioTranslationResult.swift index 4567f7b6..339fcfe8 100644 --- a/Sources/OpenAI/Public/Models/AudioTranslationResult.swift +++ b/Sources/OpenAI/Public/Models/AudioTranslationResult.swift @@ -8,6 +8,7 @@ import Foundation public struct AudioTranslationResult: Codable, Equatable { - + + /// The translated text. public let text: String } diff --git a/Sources/OpenAI/Public/Models/ChatQuery.swift b/Sources/OpenAI/Public/Models/ChatQuery.swift index 58be8f16..f2f1a98c 100644 --- a/Sources/OpenAI/Public/Models/ChatQuery.swift +++ b/Sources/OpenAI/Public/Models/ChatQuery.swift @@ -7,264 +7,577 @@ import Foundation -// See more https://platform.openai.com/docs/guides/text-generation/json-mode -public struct ResponseFormat: Codable, Equatable { - public static let jsonObject = ResponseFormat(type: .jsonObject) - public static let text = ResponseFormat(type: .text) - - public let type: Self.ResponseFormatType - - public enum ResponseFormatType: String, Codable, Equatable { - case jsonObject = "json_object" - case text - } -} +/// Creates a model response for the given chat conversation +/// https://platform.openai.com/docs/guides/text-generation +public struct ChatQuery: Equatable, Codable, Streamable { -public struct Chat: Codable, Equatable { - public let role: Role - /// The contents of the message. `content` is required for all messages except assistant messages with function calls. - public let content: String? - /// The name of the author of this message. `name` is required if role is `function`, and it should be the name of the function whose response is in the `content`. May contain a-z, A-Z, 0-9, and underscores, with a maximum length of 64 characters. - public let name: String? - public let functionCall: ChatFunctionCall? - - public enum Role: String, Codable, Equatable { - case system - case assistant - case user - case function - } - - enum CodingKeys: String, CodingKey { - case role - case content - case name - case functionCall = "function_call" - } - - public init(role: Role, content: String? = nil, name: String? = nil, functionCall: ChatFunctionCall? = nil) { - self.role = role - self.content = content - self.name = name - self.functionCall = functionCall + /// A list of messages comprising the conversation so far + public let messages: [Self.ChatCompletionMessageParam] + /// ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API. + /// https://platform.openai.com/docs/models/model-endpoint-compatibility + public let model: Model + /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + /// Defaults to 0 + /// https://platform.openai.com/docs/guides/text-generation/parameter-details + public let frequencyPenalty: Double? + /// Modify the likelihood of specified tokens appearing in the completion. + /// Accepts a JSON object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token. + /// Defaults to null + public let logitBias: [String:Int]? + /// Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message. This option is currently not available on the gpt-4-vision-preview model. + /// Defaults to false + public let logprobs: Bool? + /// The maximum number of tokens to generate in the completion. + /// The total length of input tokens and generated tokens is limited by the model's context length. + /// https://platform.openai.com/tokenizer + public let maxTokens: Int? + /// How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs. + /// Defaults to 1 + public let n: Int? + /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + /// https://platform.openai.com/docs/guides/text-generation/parameter-details + public let presencePenalty: Double? + /// An object specifying the format that the model must output. Compatible with gpt-4-1106-preview and gpt-3.5-turbo-1106. + /// Setting to { "type": "json_object" } enables JSON mode, which guarantees the message the model generates is valid JSON. + /// Important: when using JSON mode, you must also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if finish_reason="length", which indicates the generation exceeded max_tokens or the conversation exceeded the max context length. + public let responseFormat: Self.ResponseFormat? + /// This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend. + public let seed: Int? // BETA + /// Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. + /// Defaults to null + public let stop: Stop? + /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. + /// We generally recommend altering this or top_p but not both. + /// Defaults to 1 + public let temperature: Double? + /// Controls which (if any) function is called by the model. none means the model will not call a function and instead generates a message. auto means the model can pick between generating a message or calling a function. Specifying a particular function via {"type": "function", "function": {"name": "my_function"}} forces the model to call that function. + /// none is the default when no functions are present. auto is the default if functions are present + public let toolChoice: Self.ChatCompletionFunctionCallOptionParam? + /// A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. + public let tools: [Self.ChatCompletionToolParam]? + /// An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used. + public let topLogprobs: Int? + /// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. + /// We generally recommend altering this or temperature but not both. + /// Defaults to 1 + public let topP: Double? + /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + /// https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids + public let user: String? + /// If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message. + /// https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format + public var stream: Bool + + public init( + messages: [Self.ChatCompletionMessageParam], + model: Model, + frequencyPenalty: Double? = nil, + logitBias: [String : Int]? = nil, + logprobs: Bool? = nil, + maxTokens: Int? = nil, + n: Int? = nil, + presencePenalty: Double? = nil, + responseFormat: Self.ResponseFormat? = nil, + seed: Int? = nil, + stop: Self.Stop? = nil, + temperature: Double? = nil, + toolChoice: Self.ChatCompletionFunctionCallOptionParam? = nil, + tools: [Self.ChatCompletionToolParam]? = nil, + topLogprobs: Int? = nil, + topP: Double? = nil, + user: String? = nil, + stream: Bool = false + ) { + self.messages = messages + self.model = model + self.frequencyPenalty = frequencyPenalty + self.logitBias = logitBias + self.logprobs = logprobs + self.maxTokens = maxTokens + self.n = n + self.presencePenalty = presencePenalty + self.responseFormat = responseFormat + self.seed = seed + self.stop = stop + self.temperature = temperature + self.toolChoice = toolChoice + self.tools = tools + self.topLogprobs = topLogprobs + self.topP = topP + self.user = user + self.stream = stream } - public func encode(to encoder: Encoder) throws { - var container = encoder.container(keyedBy: CodingKeys.self) - try container.encode(role, forKey: .role) + public enum ChatCompletionMessageParam: Codable, Equatable { + + case system(Self.ChatCompletionSystemMessageParam) + case user(Self.ChatCompletionUserMessageParam) + case assistant(Self.ChatCompletionAssistantMessageParam) + case tool(Self.ChatCompletionToolMessageParam) + + public var content: Self.ChatCompletionUserMessageParam.Content? { get { // TODO: String type except for .user + switch self { + case .system(let systemMessage): + return Self.ChatCompletionUserMessageParam.Content.string(systemMessage.content) + case .user(let userMessage): + return userMessage.content // TODO: Content type + case .assistant(let assistantMessage): + if let content = assistantMessage.content { + return Self.ChatCompletionUserMessageParam.Content.string(content) + } + return nil + case .tool(let toolMessage): + return Self.ChatCompletionUserMessageParam.Content.string(toolMessage.content) + } + }} + + public var role: Role { get { + switch self { + case .system(let systemMessage): + return systemMessage.role + case .user(let userMessage): + return userMessage.role + case .assistant(let assistantMessage): + return assistantMessage.role + case .tool(let toolMessage): + return toolMessage.role + } + }} - if let name = name { - try container.encode(name, forKey: .name) + public var name: String? { get { + switch self { + case .system(let systemMessage): + return systemMessage.name + case .user(let userMessage): + return userMessage.name + case .assistant(let assistantMessage): + return assistantMessage.name + default: + return nil + } + }} + + public var toolCallId: String? { get { + switch self { + case .tool(let toolMessage): + return toolMessage.toolCallId + default: + return nil + } + }} + + public var toolCalls: [Self.ChatCompletionAssistantMessageParam.ChatCompletionMessageToolCallParam]? { get { + switch self { + case .assistant(let assistantMessage): + return assistantMessage.toolCalls + default: + return nil + } + }} + + public init?( + role: Role, + content: String? = nil, + imageUrl: URL? = nil, + name: String? = nil, + toolCalls: [Self.ChatCompletionAssistantMessageParam.ChatCompletionMessageToolCallParam]? = nil, + toolCallId: String? = nil + ) { + switch role { + case .system: + if let content { + self = .system(.init(content: content, name: name)) + } else { + return nil + } + case .user: + if let content { + self = .user(.init(content: .init(string: content), name: name)) + } else if let imageUrl { + self = .user(.init(content: .init(chatCompletionContentPartImageParam: .init(imageUrl: .init(url: imageUrl.absoluteString, detail: .auto))), name: name)) + } else { + return nil + } + case .assistant: + self = .assistant(.init(content: content, name: name, toolCalls: toolCalls)) + case .tool: + if let content, let toolCallId { + self = .tool(.init(content: content, toolCallId: toolCallId)) + } else { + return nil + } + } } - if let functionCall = functionCall { - try container.encode(functionCall, forKey: .functionCall) + private init?( + content: String, + role: Role, + name: String? = nil + ) { + if role == .system { + self = .system(.init(content: content, name: name)) + } else { + return nil + } } - // Should add 'nil' to 'content' property for function calling response - // See https://openai.com/blog/function-calling-and-other-api-updates - if content != nil || (role == .assistant && functionCall != nil) { - try container.encode(content, forKey: .content) + private init?( + content: Self.ChatCompletionUserMessageParam.Content, + role: Role, + name: String? = nil + ) { + if role == .user { + self = .user(.init(content: content, name: name)) + } else { + return nil + } } - } -} -public struct ChatFunctionCall: Codable, Equatable { - /// The name of the function to call. - public let name: String? - /// The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function. - public let arguments: String? + private init?( + role: Role, + content: String? = nil, + name: String? = nil, + toolCalls: [Self.ChatCompletionAssistantMessageParam.ChatCompletionMessageToolCallParam]? = nil + ) { + if role == .assistant { + self = .assistant(.init(content: content, name: name, toolCalls: toolCalls)) + } else { + return nil + } + } - public init(name: String?, arguments: String?) { - self.name = name - self.arguments = arguments - } -} + private init?( + content: String, + role: Role, + toolCallId: String + ) { + if role == .tool { + self = .tool(.init(content: content, toolCallId: toolCallId)) + } else { + return nil + } + } + public func encode(to encoder: Encoder) throws { + var container = encoder.singleValueContainer() + switch self { + case .system(let a0): + try container.encode(a0) + case .user(let a0): + try container.encode(a0) + case .assistant(let a0): + try container.encode(a0) + case .tool(let a0): + try container.encode(a0) + } + } -/// See the [guide](/docs/guides/gpt/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format. -public struct JSONSchema: Codable, Equatable { - public let type: JSONType - public let properties: [String: Property]? - public let required: [String]? - public let pattern: String? - public let const: String? - public let enumValues: [String]? - public let multipleOf: Int? - public let minimum: Int? - public let maximum: Int? - - private enum CodingKeys: String, CodingKey { - case type, properties, required, pattern, const - case enumValues = "enum" - case multipleOf, minimum, maximum - } - - public struct Property: Codable, Equatable { - public let type: JSONType - public let description: String? - public let format: String? - public let items: Items? - public let required: [String]? - public let pattern: String? - public let const: String? - public let enumValues: [String]? - public let multipleOf: Int? - public let minimum: Double? - public let maximum: Double? - public let minItems: Int? - public let maxItems: Int? - public let uniqueItems: Bool? - - private enum CodingKeys: String, CodingKey { - case type, description, format, items, required, pattern, const - case enumValues = "enum" - case multipleOf, minimum, maximum - case minItems, maxItems, uniqueItems + enum CodingKeys: CodingKey { + case system + case user + case assistant + case tool } - - public init(type: JSONType, description: String? = nil, format: String? = nil, items: Items? = nil, required: [String]? = nil, pattern: String? = nil, const: String? = nil, enumValues: [String]? = nil, multipleOf: Int? = nil, minimum: Double? = nil, maximum: Double? = nil, minItems: Int? = nil, maxItems: Int? = nil, uniqueItems: Bool? = nil) { - self.type = type - self.description = description - self.format = format - self.items = items - self.required = required - self.pattern = pattern - self.const = const - self.enumValues = enumValues - self.multipleOf = multipleOf - self.minimum = minimum - self.maximum = maximum - self.minItems = minItems - self.maxItems = maxItems - self.uniqueItems = uniqueItems + + public struct ChatCompletionSystemMessageParam: Codable, Equatable { + public typealias Role = ChatQuery.ChatCompletionMessageParam.Role + + /// The contents of the system message. + public let content: String + /// The role of the messages author, in this case system. + public let role: Self.Role = .system + /// An optional name for the participant. Provides the model information to differentiate between participants of the same role. + public let name: String? + + public init( + content: String, + name: String? = nil + ) { + self.content = content + self.name = name + } + + enum CodingKeys: CodingKey { + case content + case role + case name + } } - } - public enum JSONType: String, Codable { - case integer = "integer" - case string = "string" - case boolean = "boolean" - case array = "array" - case object = "object" - case number = "number" - case `null` = "null" - } + public struct ChatCompletionUserMessageParam: Codable, Equatable { + public typealias Role = ChatQuery.ChatCompletionMessageParam.Role + + /// The contents of the user message. + public let content: Content + /// The role of the messages author, in this case user. + public let role: Self.Role = .user + /// An optional name for the participant. Provides the model information to differentiate between participants of the same role. + public let name: String? + + public init( + content: Content, + name: String? = nil + ) { + self.content = content + self.name = name + } + + enum CodingKeys: CodingKey { + case content + case role + case name + } + + public enum Content: Codable, Equatable { + case string(String) + case chatCompletionContentPartTextParam(ChatCompletionContentPartTextParam) + case chatCompletionContentPartImageParam(ChatCompletionContentPartImageParam) + + public var string: String? { get { + switch self { + case .string(let string): + return string + default: + return nil + } + }} + + public var text: String? { get { + switch self { + case .chatCompletionContentPartTextParam(let text): + return text.text + default: + return nil + } + }} + + public var imageUrl: Self.ChatCompletionContentPartImageParam.ImageURL? { get { + switch self { + case .chatCompletionContentPartImageParam(let image): + return image.imageUrl + default: + return nil + } + }} + + public init(string: String) { + self = .string(string) + } + + public init(chatCompletionContentPartTextParam: ChatCompletionContentPartTextParam) { + self = .chatCompletionContentPartTextParam(chatCompletionContentPartTextParam) + } - public struct Items: Codable, Equatable { - public let type: JSONType - public let properties: [String: Property]? - public let pattern: String? - public let const: String? - public let enumValues: [String]? - public let multipleOf: Int? - public let minimum: Double? - public let maximum: Double? - public let minItems: Int? - public let maxItems: Int? - public let uniqueItems: Bool? - - private enum CodingKeys: String, CodingKey { - case type, properties, pattern, const - case enumValues = "enum" - case multipleOf, minimum, maximum, minItems, maxItems, uniqueItems + public init(chatCompletionContentPartImageParam: ChatCompletionContentPartImageParam) { + self = .chatCompletionContentPartImageParam(chatCompletionContentPartImageParam) + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.singleValueContainer() + switch self { + case .string(let a0): + try container.encode(a0) + case .chatCompletionContentPartTextParam(let a0): + try container.encode(a0) + case .chatCompletionContentPartImageParam(let a0): + try container.encode(a0) + } + } + + enum CodingKeys: CodingKey { + case string + case chatCompletionContentPartTextParam + case chatCompletionContentPartImageParam + } + + public struct ChatCompletionContentPartTextParam: Codable, Equatable { + /// The text content. + public let text: String + /// The type of the content part. + public let type: String + + public init(text: String) { + self.text = text + self.type = "text" + } + } + + public struct ChatCompletionContentPartImageParam: Codable, Equatable { + public let imageUrl: ImageURL + /// The type of the content part. + public let type: String + + public init(imageUrl: ImageURL) { + self.imageUrl = imageUrl + self.type = "imageUrl" + } + + public struct ImageURL: Codable, Equatable { + /// Either a URL of the image or the base64 encoded image data. + public let url: String + /// Specifies the detail level of the image. Learn more in the + /// Vision guide https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding + public let detail: Detail + + public init(url: String, detail: Detail) { + self.url = url + self.detail = detail + } + + public enum Detail: String, Codable, Equatable, CaseIterable { + case auto + case low + case high + } + } + + public enum CodingKeys: String, CodingKey { + case imageUrl = "image_url" + case type + } + } + } } - - public init(type: JSONType, properties: [String : Property]? = nil, pattern: String? = nil, const: String? = nil, enumValues: [String]? = nil, multipleOf: Int? = nil, minimum: Double? = nil, maximum: Double? = nil, minItems: Int? = nil, maxItems: Int? = nil, uniqueItems: Bool? = nil) { - self.type = type - self.properties = properties - self.pattern = pattern - self.const = const - self.enumValues = enumValues - self.multipleOf = multipleOf - self.minimum = minimum - self.maximum = maximum - self.minItems = minItems - self.maxItems = maxItems - self.uniqueItems = uniqueItems + + internal struct ChatCompletionMessageParam: Codable, Equatable { + typealias Role = ChatQuery.ChatCompletionMessageParam.Role + + let role: Self.Role + + enum CodingKeys: CodingKey { + case role + } + } + + public struct ChatCompletionAssistantMessageParam: Codable, Equatable { + public typealias Role = ChatQuery.ChatCompletionMessageParam.Role + + //// The role of the messages author, in this case assistant. + public let role: Self.Role = .assistant + /// The contents of the assistant message. Required unless tool_calls is specified. + public let content: String? + /// The name of the author of this message. `name` is required if role is `function`, and it should be the name of the function whose response is in the `content`. May contain a-z, A-Z, 0-9, and underscores, with a maximum length of 64 characters. + public let name: String? + /// The tool calls generated by the model, such as function calls. + public let toolCalls: [Self.ChatCompletionMessageToolCallParam]? + + public init( + content: String? = nil, + name: String? = nil, + toolCalls: [Self.ChatCompletionMessageToolCallParam]? = nil + ) { + self.content = content + self.name = name + self.toolCalls = toolCalls + } + + public enum CodingKeys: String, CodingKey { + case name + case role + case content + case toolCalls = "tool_calls" + } + + public struct ChatCompletionMessageToolCallParam: Codable, Equatable { + public typealias ToolsType = ChatQuery.ChatCompletionToolParam.ToolsType + + /// The ID of the tool call. + public let id: String + /// The function that the model called. + public let function: Self.FunctionCall + /// The type of the tool. Currently, only `function` is supported. + public let type: Self.ToolsType + + public init( + id: String, + function: Self.FunctionCall + ) { + self.id = id + self.function = function + self.type = .function + } + + public struct FunctionCall: Codable, Equatable { + /// The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function. + public let arguments: String + /// The name of the function to call. + public let name: String + } + } + } + + public struct ChatCompletionToolMessageParam: Codable, Equatable { + public typealias Role = ChatQuery.ChatCompletionMessageParam.Role + + /// The contents of the tool message. + public let content: String + /// The role of the messages author, in this case tool. + public let role: Self.Role = .tool + /// Tool call that this message is responding to. + public let toolCallId: String + + public init( + content: String, + toolCallId: String + ) { + self.content = content + self.toolCallId = toolCallId + } + + public enum CodingKeys: String, CodingKey { + case content + case role + case toolCallId = "tool_call_id" + } + } + + public enum Role: String, Codable, Equatable, CaseIterable { + case system + case user + case assistant + case tool } } - - public init(type: JSONType, properties: [String : Property]? = nil, required: [String]? = nil, pattern: String? = nil, const: String? = nil, enumValues: [String]? = nil, multipleOf: Int? = nil, minimum: Int? = nil, maximum: Int? = nil) { - self.type = type - self.properties = properties - self.required = required - self.pattern = pattern - self.const = const - self.enumValues = enumValues - self.multipleOf = multipleOf - self.minimum = minimum - self.maximum = maximum - } -} -public struct ChatFunctionDeclaration: Codable, Equatable { - /// The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. - public let name: String - - /// The description of what the function does. - public let description: String - - /// The parameters the functions accepts, described as a JSON Schema object. - public let parameters: JSONSchema - - public init(name: String, description: String, parameters: JSONSchema) { - self.name = name - self.description = description - self.parameters = parameters + public enum Stop: Codable, Equatable { + case string(String) + case stringList([String]) + + public func encode(to encoder: Encoder) throws { + var container = encoder.singleValueContainer() + switch self { + case .string(let a0): + try container.encode(a0) + case .stringList(let a0): + try container.encode(a0) + } + } + + public init(string: String) { + self = .string(string) + } + + public init(stringList: [String]) { + self = .stringList(stringList) + } } -} -public struct ChatQueryFunctionCall: Codable, Equatable { - /// The name of the function to call. - public let name: String? - /// The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function. - public let arguments: String? -} + // See more https://platform.openai.com/docs/guides/text-generation/json-mode + public enum ResponseFormat: String, Codable, Equatable { + case jsonObject = "json_object" + case text -public struct ChatQuery: Equatable, Codable, Streamable { - /// ID of the model to use. Currently, only gpt-3.5-turbo and gpt-3.5-turbo-0301 are supported. - public let model: Model - /// An object specifying the format that the model must output. - public let responseFormat: ResponseFormat? - /// The messages to generate chat completions for - public let messages: [Chat] - /// A list of functions the model may generate JSON inputs for. - public let functions: [ChatFunctionDeclaration]? - /// Controls how the model responds to function calls. "none" means the model does not call a function, and responds to the end-user. "auto" means the model can pick between and end-user or calling a function. Specifying a particular function via `{"name": "my_function"}` forces the model to call that function. "none" is the default when no functions are present. "auto" is the default if functions are present. - public let functionCall: FunctionCall? - /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and We generally recommend altering this or top_p but not both. - public let temperature: Double? - /// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. - public let topP: Double? - /// How many chat completion choices to generate for each input message. - public let n: Int? - /// Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. - public let stop: [String]? - /// The maximum number of tokens to generate in the completion. - public let maxTokens: Int? - /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. - public let presencePenalty: Double? - /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. - public let frequencyPenalty: Double? - /// Modify the likelihood of specified tokens appearing in the completion. - public let logitBias: [String:Int]? - /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. - public let user: String? - - var stream: Bool = false + public func encode(to encoder: Encoder) throws { + var container = encoder.singleValueContainer() + try container.encode(["type": self.rawValue]) + } + } - public enum FunctionCall: Codable, Equatable { + public enum ChatCompletionFunctionCallOptionParam: Codable, Equatable { case none case auto case function(String) - - enum CodingKeys: String, CodingKey { - case none = "none" - case auto = "auto" - case function = "name" - } - + public func encode(to encoder: Encoder) throws { switch self { case .none: @@ -274,45 +587,232 @@ public struct ChatQuery: Equatable, Codable, Streamable { var container = encoder.singleValueContainer() try container.encode(CodingKeys.auto.rawValue) case .function(let name): - var container = encoder.container(keyedBy: CodingKeys.self) - try container.encode(name, forKey: .function) + var container = encoder.container(keyedBy: Self.ChatCompletionFunctionCallNameParam.CodingKeys.self) + try container.encode("function", forKey: .type) + try container.encode(["name": name], forKey: .function) + } + } + + public init(function: String) { + self = .function(function) + } + + enum CodingKeys: String, CodingKey { + case none = "none" + case auto = "auto" + case function = "name" + } + + private enum ChatCompletionFunctionCallNameParam: Codable, Equatable { + case type + case function + + enum CodingKeys: CodingKey { + case type + case function } } } - - enum CodingKeys: String, CodingKey { - case model + + public struct ChatCompletionToolParam: Codable, Equatable { + + public let function: Self.FunctionDefinition + public let type: Self.ToolsType + + public init( + function: Self.FunctionDefinition + ) { + self.function = function + self.type = .function + } + + public struct FunctionDefinition: Codable, Equatable { + /// The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. + public let name: String + + /// The description of what the function does. + public let description: String? + + /// The parameters the functions accepts, described as a JSON Schema object. + /// https://platform.openai.com/docs/guides/text-generation/function-calling + /// https://json-schema.org/understanding-json-schema/ + /// **Python library defines only [String: Object] dictionary. + public let parameters: Self.FunctionParameters? + + public init( + name: String, + description: String? = nil, + parameters: Self.FunctionParameters? = nil + ) { + self.name = name + self.description = description + self.parameters = parameters + } + + /// See the [guide](/docs/guides/gpt/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format. + public struct FunctionParameters: Codable, Equatable { + + public let type: Self.JSONType + public let properties: [String: Property]? + public let required: [String]? + public let pattern: String? + public let const: String? + public let `enum`: [String]? + public let multipleOf: Int? + public let minimum: Int? + public let maximum: Int? + + public init( + type: Self.JSONType, + properties: [String : Property]? = nil, + required: [String]? = nil, + pattern: String? = nil, + const: String? = nil, + enum: [String]? = nil, + multipleOf: Int? = nil, + minimum: Int? = nil, + maximum: Int? = nil + ) { + self.type = type + self.properties = properties + self.required = required + self.pattern = pattern + self.const = const + self.`enum` = `enum` + self.multipleOf = multipleOf + self.minimum = minimum + self.maximum = maximum + } + + public struct Property: Codable, Equatable { + public typealias JSONType = ChatQuery.ChatCompletionToolParam.FunctionDefinition.FunctionParameters.JSONType + + public let type: Self.JSONType + public let description: String? + public let format: String? + public let items: Self.Items? + public let required: [String]? + public let pattern: String? + public let const: String? + public let `enum`: [String]? + public let multipleOf: Int? + public let minimum: Double? + public let maximum: Double? + public let minItems: Int? + public let maxItems: Int? + public let uniqueItems: Bool? + + public init( + type: Self.JSONType, + description: String? = nil, + format: String? = nil, + items: Self.Items? = nil, + required: [String]? = nil, + pattern: String? = nil, + const: String? = nil, + enum: [String]? = nil, + multipleOf: Int? = nil, + minimum: Double? = nil, + maximum: Double? = nil, + minItems: Int? = nil, + maxItems: Int? = nil, + uniqueItems: Bool? = nil + ) { + self.type = type + self.description = description + self.format = format + self.items = items + self.required = required + self.pattern = pattern + self.const = const + self.`enum` = `enum` + self.multipleOf = multipleOf + self.minimum = minimum + self.maximum = maximum + self.minItems = minItems + self.maxItems = maxItems + self.uniqueItems = uniqueItems + } + + public struct Items: Codable, Equatable { + public typealias JSONType = ChatQuery.ChatCompletionToolParam.FunctionDefinition.FunctionParameters.JSONType + + public let type: Self.JSONType + public let properties: [String: Property]? + public let pattern: String? + public let const: String? + public let `enum`: [String]? + public let multipleOf: Int? + public let minimum: Double? + public let maximum: Double? + public let minItems: Int? + public let maxItems: Int? + public let uniqueItems: Bool? + + public init( + type: Self.JSONType, + properties: [String : Property]? = nil, + pattern: String? = nil, + const: String? = nil, + `enum`: [String]? = nil, + multipleOf: Int? = nil, + minimum: Double? = nil, + maximum: Double? = nil, + minItems: Int? = nil, + maxItems: Int? = nil, + uniqueItems: Bool? = nil + ) { + self.type = type + self.properties = properties + self.pattern = pattern + self.const = const + self.`enum` = `enum` + self.multipleOf = multipleOf + self.minimum = minimum + self.maximum = maximum + self.minItems = minItems + self.maxItems = maxItems + self.uniqueItems = uniqueItems + } + } + } + + + public enum JSONType: String, Codable { + case integer + case string + case boolean + case array + case object + case number + case null + } + } + } + + public enum ToolsType: String, Codable, Equatable { + case function + } + } + + public enum CodingKeys: String, CodingKey { case messages - case functions - case functionCall = "function_call" - case temperature - case topP = "top_p" - case n - case stream - case stop - case maxTokens = "max_tokens" - case presencePenalty = "presence_penalty" + case model case frequencyPenalty = "frequency_penalty" case logitBias = "logit_bias" - case user + case logprobs + case maxTokens = "max_tokens" + case n + case presencePenalty = "presence_penalty" case responseFormat = "response_format" - } - - public init(model: Model, messages: [Chat], responseFormat: ResponseFormat? = nil, functions: [ChatFunctionDeclaration]? = nil, functionCall: FunctionCall? = nil, temperature: Double? = nil, topP: Double? = nil, n: Int? = nil, stop: [String]? = nil, maxTokens: Int? = nil, presencePenalty: Double? = nil, frequencyPenalty: Double? = nil, logitBias: [String : Int]? = nil, user: String? = nil, stream: Bool = false) { - self.model = model - self.messages = messages - self.functions = functions - self.functionCall = functionCall - self.temperature = temperature - self.topP = topP - self.n = n - self.responseFormat = responseFormat - self.stop = stop - self.maxTokens = maxTokens - self.presencePenalty = presencePenalty - self.frequencyPenalty = frequencyPenalty - self.logitBias = logitBias - self.user = user - self.stream = stream + case seed + case stop + case temperature + case toolChoice = "tool_choice" + case tools + case topLogprobs = "top_logprobs" + case topP = "top_p" + case user + case stream } } diff --git a/Sources/OpenAI/Public/Models/ChatResult.swift b/Sources/OpenAI/Public/Models/ChatResult.swift index f1a80a0c..5e42c37c 100644 --- a/Sources/OpenAI/Public/Models/ChatResult.swift +++ b/Sources/OpenAI/Public/Models/ChatResult.swift @@ -8,56 +8,152 @@ import Foundation public struct ChatResult: Codable, Equatable { - + public struct Choice: Codable, Equatable { - + public typealias ChatCompletionMessage = ChatQuery.ChatCompletionMessageParam + + /// The index of the choice in the list of choices. public let index: Int - /// Exists only if it is a complete message. - public let message: Chat - /// Exists only if it is a complete message. + /// Log probability information for the choice. + public let logprobs: Self.ChoiceLogprobs? + /// A chat completion message generated by the model. + public let message: Self.ChatCompletionMessage + /// The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool, or function_call (deprecated) if the model called a function. public let finishReason: String? - - enum CodingKeys: String, CodingKey { + + public struct ChoiceLogprobs: Codable, Equatable { + + public let content: [Self.ChatCompletionTokenLogprob]? + + public struct ChatCompletionTokenLogprob: Codable, Equatable { + + /// The token. + public let token: String + /// A list of integers representing the UTF-8 bytes representation of the token. + /// Useful in instances where characters are represented by multiple tokens and + /// their byte representations must be combined to generate the correct text + /// representation. Can be `null` if there is no bytes representation for the token. + public let bytes: [Int]? + /// The log probability of this token. + public let logprob: Double + /// List of the most likely tokens and their log probability, at this token position. + /// In rare cases, there may be fewer than the number of requested `top_logprobs` returned. + public let topLogprobs: [TopLogprob] + + public struct TopLogprob: Codable, Equatable { + + /// The token. + public let token: String + /// A list of integers representing the UTF-8 bytes representation of the token. + /// Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be `null` if there is no bytes representation for the token. + public let bytes: [Int]? + /// The log probability of this token. + public let logprob: Double + } + + public enum CodingKeys: String, CodingKey { + case token + case bytes + case logprob + case topLogprobs = "top_logprobs" + } + } + } + + public enum CodingKeys: String, CodingKey { case index + case logprobs case message case finishReason = "finish_reason" } + + public enum FinishReason: String, Codable, Equatable { + case stop + case length + case toolCalls = "tool_calls" + case contentFilter = "content_filter" + case functionCall = "function_call" + } } - - public struct Usage: Codable, Equatable { - public let promptTokens: Int + + public struct CompletionUsage: Codable, Equatable { + + /// Number of tokens in the generated completion. public let completionTokens: Int + /// Number of tokens in the prompt. + public let promptTokens: Int + /// Total number of tokens used in the request (prompt + completion). public let totalTokens: Int - + enum CodingKeys: String, CodingKey { - case promptTokens = "prompt_tokens" case completionTokens = "completion_tokens" + case promptTokens = "prompt_tokens" case totalTokens = "total_tokens" } } - + + /// A unique identifier for the chat completion. public let id: String + /// The object type, which is always chat.completion. public let object: String + /// The Unix timestamp (in seconds) of when the chat completion was created. public let created: TimeInterval - public let model: Model + /// The model used for the chat completion. + public let model: String + /// A list of chat completion choices. Can be more than one if n is greater than 1. public let choices: [Choice] - public let usage: Usage? - - enum CodingKeys: String, CodingKey { + /// Usage statistics for the completion request. + public let usage: Self.CompletionUsage? + /// This fingerprint represents the backend configuration that the model runs with. + /// Can be used in conjunction with the seed request parameter to understand when backend changes have been made that might impact determinism. + public let systemFingerprint: String? + + public enum CodingKeys: String, CodingKey { case id case object case created case model case choices case usage + case systemFingerprint = "system_fingerprint" } - - init(id: String, object: String, created: TimeInterval, model: Model, choices: [Choice], usage: Usage) { - self.id = id - self.object = object - self.created = created - self.model = model - self.choices = choices - self.usage = usage +} + +extension ChatQuery.ChatCompletionMessageParam { + + public init(from decoder: Decoder) throws { + let messageContainer = try decoder.container(keyedBy: Self.ChatCompletionMessageParam.CodingKeys.self) + switch try messageContainer.decode(Role.self, forKey: .role) { + case .system: + self = try .system(.init(from: decoder)) + case .user: + self = try .user(.init(from: decoder)) + case .assistant: + self = try .assistant(.init(from: decoder)) + case .tool: + self = try .tool(.init(from: decoder)) + } + } +} + +extension ChatQuery.ChatCompletionMessageParam.ChatCompletionUserMessageParam.Content { + public init(from decoder: Decoder) throws { + let container = try decoder.singleValueContainer() + do { + let string = try container.decode(String.self) + self = .string(string) + return + } catch {} + do { + let text = try container.decode(ChatCompletionContentPartTextParam.self) + self = .chatCompletionContentPartTextParam(text) + return + } catch {} + do { + let image = try container.decode(ChatCompletionContentPartImageParam.self) + self = .chatCompletionContentPartImageParam(image) + return + } catch {} + throw DecodingError.typeMismatch(Self.self, .init(codingPath: [Self.CodingKeys.string, CodingKeys.chatCompletionContentPartTextParam, CodingKeys.chatCompletionContentPartImageParam], debugDescription: "Content: expected String, ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam")) } } diff --git a/Sources/OpenAI/Public/Models/ChatStreamResult.swift b/Sources/OpenAI/Public/Models/ChatStreamResult.swift index 4d69713c..3457b089 100644 --- a/Sources/OpenAI/Public/Models/ChatStreamResult.swift +++ b/Sources/OpenAI/Public/Models/ChatStreamResult.swift @@ -10,51 +10,133 @@ import Foundation public struct ChatStreamResult: Codable, Equatable { public struct Choice: Codable, Equatable { - public struct Delta: Codable, Equatable { + public typealias FinishReason = ChatResult.Choice.FinishReason + + public struct ChoiceDelta: Codable, Equatable { + public typealias Role = ChatQuery.ChatCompletionMessageParam.Role + + /// The contents of the chunk message. public let content: String? - public let role: Chat.Role? - /// The name of the author of this message. `name` is required if role is `function`, and it should be the name of the function whose response is in the `content`. May contain a-z, A-Z, 0-9, and underscores, with a maximum length of 64 characters. - public let name: String? - public let functionCall: ChatFunctionCall? + /// The role of the author of this message. + public let role: Self.Role? + public let toolCalls: [Self.ChoiceDeltaToolCall]? - enum CodingKeys: String, CodingKey { - case role + public struct ChoiceDeltaToolCall: Codable, Equatable { + + public let index: Int + /// The ID of the tool call. + public let id: String? + /// The function that the model called. + public let function: Self.ChoiceDeltaToolCallFunction? + /// The type of the tool. Currently, only function is supported. + public let type: String? + + public init( + index: Int, + id: String? = nil, + function: Self.ChoiceDeltaToolCallFunction? = nil + ) { + self.index = index + self.id = id + self.function = function + self.type = "function" + } + + public struct ChoiceDeltaToolCallFunction: Codable, Equatable { + + /// The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function. + public let arguments: String? + /// The name of the function to call. + public let name: String? + + public init( + arguments: String? = nil, + name: String? = nil + ) { + self.arguments = arguments + self.name = name + } + } + } + + public enum CodingKeys: String, CodingKey { case content - case name - case functionCall = "function_call" + case role + case toolCalls = "tool_calls" } } - + + /// The index of the choice in the list of choices. public let index: Int - public let delta: Delta - public let finishReason: String? - - enum CodingKeys: String, CodingKey { + /// A chat completion delta generated by streamed model responses. + public let delta: Self.ChoiceDelta + /// The reason the model stopped generating tokens. + /// This will be `stop` if the model hit a natural stop point or a provided stop sequence, `length` if the maximum number of tokens specified in the request was reached, `content_filter` if content was omitted due to a flag from our content filters, `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called a function. + public let finishReason: FinishReason? + /// Log probability information for the choice. + public let logprobs: Self.ChoiceLogprobs? + + public struct ChoiceLogprobs: Codable, Equatable { + /// A list of message content tokens with log probability information. + public let content: [Self.ChatCompletionTokenLogprob]? + + public struct ChatCompletionTokenLogprob: Codable, Equatable { + /// The token. + public let token: String + /// A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token. + public let bytes: [Int]? + /// The log probability of this token. + public let logprob: Double + /// List of the most likely tokens and their log probability, at this token position. In rare cases, there may be fewer than the number of requested top_logprobs returned. + public let topLogprobs: [Self.TopLogprob]? + + public struct TopLogprob: Codable, Equatable { + /// The token. + public let token: String + /// A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token. + public let bytes: [Int]? + /// The log probability of this token. + public let logprob: Double + } + + public enum CodingKeys: String, CodingKey { + case token + case bytes + case logprob + case topLogprobs = "top_logprobs" + } + } + } + + public enum CodingKeys: String, CodingKey { case index case delta case finishReason = "finish_reason" + case logprobs } } - + + /// A unique identifier for the chat completion. Each chunk has the same ID. public let id: String + /// The object type, which is always `chat.completion.chunk`. public let object: String + /// The Unix timestamp (in seconds) of when the chat completion was created. + /// Each chunk has the same timestamp. public let created: TimeInterval - public let model: Model + /// The model to generate the completion. + public let model: String + /// A list of chat completion choices. + /// Can be more than one if `n` is greater than 1. public let choices: [Choice] - - enum CodingKeys: String, CodingKey { + /// This fingerprint represents the backend configuration that the model runs with. Can be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism. + public let systemFingerprint: String? + + public enum CodingKeys: String, CodingKey { case id case object case created case model case choices - } - - init(id: String, object: String, created: TimeInterval, model: Model, choices: [Choice]) { - self.id = id - self.object = object - self.created = created - self.model = model - self.choices = choices + case systemFingerprint = "system_fingerprint" } } diff --git a/Sources/OpenAI/Public/Models/EmbeddingsQuery.swift b/Sources/OpenAI/Public/Models/EmbeddingsQuery.swift index af7acad6..847f49ee 100644 --- a/Sources/OpenAI/Public/Models/EmbeddingsQuery.swift +++ b/Sources/OpenAI/Public/Models/EmbeddingsQuery.swift @@ -8,13 +8,78 @@ import Foundation public struct EmbeddingsQuery: Codable { - /// ID of the model to use. + + /// Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for text-embedding-ada-002), cannot be an empty string, and any array must be 2048 dimensions or less. + public let input: Self.Input + /// ID of the model to use. You can use the List models API to see all of your available models, or see our Model overview for descriptions of them. + /// https://platform.openai.com/docs/api-reference/models/list + /// https://platform.openai.com/docs/models/overview public let model: Model - /// Input text to get embeddings for. - public let input: String + /// The format to return the embeddings in. Can be either float or base64. + /// https://pypi.org/project/pybase64/ + public let encodingFormat: Self.EncodingFormat? + /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + /// https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids + public let user: String? - public init(model: Model, input: String) { - self.model = model + public init( + input: Self.Input, + model: Model, + encodingFormat: Self.EncodingFormat? = nil, + user: String? = nil + ) { self.input = input + self.model = model + self.encodingFormat = encodingFormat + self.user = user + } + + public enum Input: Codable, Equatable { + case string(String) + case stringList([String]) + case intList([Int]) + case intMatrix([[Int]]) + + public func encode(to encoder: Encoder) throws { + var container = encoder.singleValueContainer() + switch self { + case .string(let a0): + try container.encode(a0) + case .stringList(let a0): + try container.encode(a0) + case .intList(let a0): + try container.encode(a0) + case .intMatrix(let a0): + try container.encode(a0) + } + } + + public init(string: String) { + self = .string(string) + } + + public init(stringList: [String]) { + self = .stringList(stringList) + } + + public init(intList: [Int]) { + self = .intList(intList) + } + + public init(intMatrix: [[Int]]) { + self = .intMatrix(intMatrix) + } + } + + public enum EncodingFormat: String, Codable { + case float + case base64 + } + + public enum CodingKeys: String, CodingKey { + case input + case model + case encodingFormat = "encoding_format" + case user } } diff --git a/Sources/OpenAI/Public/Models/EmbeddingsResult.swift b/Sources/OpenAI/Public/Models/EmbeddingsResult.swift index f763b39d..ce989018 100644 --- a/Sources/OpenAI/Public/Models/EmbeddingsResult.swift +++ b/Sources/OpenAI/Public/Models/EmbeddingsResult.swift @@ -10,8 +10,12 @@ import Foundation public struct EmbeddingsResult: Codable, Equatable { public struct Embedding: Codable, Equatable { + /// The object type, which is always "embedding". public let object: String + /// The embedding vector, which is a list of floats. The length of vector depends on the model as listed in the embedding guide. + /// https://platform.openai.com/docs/guides/embeddings public let embedding: [Double] + /// The index of the embedding in the list of embeddings. public let index: Int } @@ -26,6 +30,8 @@ public struct EmbeddingsResult: Codable, Equatable { } public let data: [Embedding] - public let model: Model + public let model: String public let usage: Usage + /// The object type, which is always "list". + public let object: String } diff --git a/Sources/OpenAI/Public/Models/ImageEditsQuery.swift b/Sources/OpenAI/Public/Models/ImageEditsQuery.swift index a208c8d7..968e9864 100644 --- a/Sources/OpenAI/Public/Models/ImageEditsQuery.swift +++ b/Sources/OpenAI/Public/Models/ImageEditsQuery.swift @@ -8,35 +8,69 @@ import Foundation public struct ImageEditsQuery: Codable { + public typealias ResponseFormat = ImagesQuery.ResponseFormat + public typealias Size = ImagesQuery.Size + /// The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask is not provided, image must have transparency, which will be used as the mask. public let image: Data - public let fileName: String /// An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where image should be edited. Must be a valid PNG file, less than 4MB, and have the same dimensions as image. public let mask: Data? - public let maskFileName: String? /// A text description of the desired image(s). The maximum length is 1000 characters. public let prompt: String + /// The model to use for image generation. + /// Defaults to dall-e-2 + public let model: Model? /// The number of images to generate. Must be between 1 and 10. public let n: Int? + /// The format in which the generated images are returned. Must be one of url or b64_json. + /// Defaults to url + public let responseFormat: Self.ResponseFormat? /// The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024. - public let size: String? + public let size: Size? + /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + /// https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids + public let user: String? - public init(image: Data, fileName: String, mask: Data? = nil, maskFileName: String? = nil, prompt: String, n: Int? = nil, size: String? = nil) { + public init( + image: Data, + prompt: String, + mask: Data? = nil, + model: Model? = nil, + n: Int? = nil, + responseFormat: Self.ResponseFormat? = nil, + size: Self.Size? = nil, + user: String? = nil + ) { self.image = image - self.fileName = fileName self.mask = mask - self.maskFileName = maskFileName self.prompt = prompt + self.model = model self.n = n + self.responseFormat = responseFormat self.size = size + self.user = user + } + + public enum CodingKeys: String, CodingKey { + case image + case mask + case prompt + case model + case n + case responseFormat = "response_format" + case size + case user } } extension ImageEditsQuery: MultipartFormDataBodyEncodable { func encode(boundary: String) -> Data { let bodyBuilder = MultipartFormDataBodyBuilder(boundary: boundary, entries: [ - .file(paramName: "image", fileName: fileName, fileData: image, contentType: "image/png"), - .file(paramName: "mask", fileName: maskFileName, fileData: mask, contentType: "image/png"), + .file(paramName: "image", fileName: "image.png", fileData: image, contentType: "image/png"), + .file(paramName: "mask", fileName: "mask.png", fileData: mask, contentType: "image/png"), + .string(paramName: "model", value: model), + .string(paramName: "response_format", value: responseFormat), + .string(paramName: "user", value: user), .string(paramName: "prompt", value: prompt), .string(paramName: "n", value: n), .string(paramName: "size", value: size) diff --git a/Sources/OpenAI/Public/Models/ImageVariationsQuery.swift b/Sources/OpenAI/Public/Models/ImageVariationsQuery.swift index 35276a06..3bb90bf6 100644 --- a/Sources/OpenAI/Public/Models/ImageVariationsQuery.swift +++ b/Sources/OpenAI/Public/Models/ImageVariationsQuery.swift @@ -8,26 +8,59 @@ import Foundation public struct ImageVariationsQuery: Codable { + public typealias ResponseFormat = ImagesQuery.ResponseFormat + /// The image to edit. Must be a valid PNG file, less than 4MB, and square. public let image: Data - public let fileName: String + /// The model to use for image generation. Only dall-e-2 is supported at this time. + /// Defaults to dall-e-2 + public let model: Model? /// The number of images to generate. Must be between 1 and 10. + /// Defaults to 1 public let n: Int? + /// The format in which the generated images are returned. Must be one of url or b64_json. + /// Defaults to url + public let responseFormat: Self.ResponseFormat? /// The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024. + /// Defaults to 1024x1024 public let size: String? + /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + /// https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids + public let user: String? - public init(image: Data, fileName: String, n: Int? = nil, size: String? = nil) { + public init( + image: Data, + model: Model? = nil, + n: Int? = nil, + responseFormat: Self.ResponseFormat? = nil, + size: String? = nil, + user: String? = nil + ) { self.image = image - self.fileName = fileName + self.model = model self.n = n + self.responseFormat = responseFormat self.size = size + self.user = user + } + + public enum CodingKeys: String, CodingKey { + case image + case model + case n + case responseFormat = "response_format" + case size + case user } } extension ImageVariationsQuery: MultipartFormDataBodyEncodable { func encode(boundary: String) -> Data { let bodyBuilder = MultipartFormDataBodyBuilder(boundary: boundary, entries: [ - .file(paramName: "image", fileName: fileName, fileData: image, contentType: "image/png"), + .file(paramName: "image", fileName: "image.png", fileData: image, contentType: "image/png"), + .string(paramName: "model", value: model), + .string(paramName: "responseFormat", value: responseFormat), + .string(paramName: "user", value: user), .string(paramName: "n", value: n), .string(paramName: "size", value: size) ]) diff --git a/Sources/OpenAI/Public/Models/ImagesQuery.swift b/Sources/OpenAI/Public/Models/ImagesQuery.swift index 6f9bd788..fc479f16 100644 --- a/Sources/OpenAI/Public/Models/ImagesQuery.swift +++ b/Sources/OpenAI/Public/Models/ImagesQuery.swift @@ -7,44 +7,59 @@ import Foundation +/// Given a prompt and/or an input image, the model will generate a new image. +/// https://platform.openai.com/docs/guides/images +public struct ImagesQuery: Codable { -public enum ImageResponseFormat: String, Codable, Equatable { +public enum ResponseFormat: String, Codable, Equatable { case url case b64_json } -public struct ImagesQuery: Codable { - public typealias ResponseFormat = ImageResponseFormat - - /// A text description of the desired image(s). The maximum length is 1000 characters. + /// A text description of the desired image(s). The maximum length is 1000 characters for dall-e-2 and 4000 characters for dall-e-3. public let prompt: String - - /// ID of the model to use. + /// The model to use for image generation. + /// Defaults to dall-e-2 public let model: Model? - /// The format in which the generated images are returned + /// The format in which the generated images are returned. Must be one of url or b64_json. + /// Defaults to url public let responseFormat: Self.ResponseFormat? - /// The number of images to generate. Must be between 1 and 10. + /// The number of images to generate. Must be between 1 and 10. For dall-e-3, only n=1 is supported. + /// Defaults to 1 public let n: Int? - /// The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024. - public let size: String? + /// The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024 for dall-e-2. Must be one of 1024x1024, 1792x1024, or 1024x1792 for dall-e-3 models. + /// Defaults to 1024x1024 + public let size: Self.Size? /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + /// https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids public let user: String? /// The style of the generated images. Must be one of vivid or natural. Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images. This param is only supported for dall-e-3. - public let style: String? + /// Defaults to vivid + public let style: Self.Style? /// The quality of the image that will be generated. hd creates images with finer details and greater consistency across the image. This param is only supported for dall-e-3. - public let quality: String? - - public init(prompt: String, model: Model?=nil, responseFormat: Self.ResponseFormat?=nil, n: Int?, size: String?, style: String?=nil, user:String?=nil, quality:String?=nil) { - self.style = style + /// Defaults to standard + public let quality: Self.Quality? + + public init( + prompt: String, + model: Model? = nil, + n: Int? = nil, + quality:Self.Quality? = nil, + responseFormat: Self.ResponseFormat? = nil, + size: Size? = nil, + style: Self.Style? = nil, + user:String? = nil + ) { self.prompt = prompt self.n = n self.size = size self.model = model self.responseFormat = responseFormat + self.style = style self.user = user self.quality = quality } - + public enum CodingKeys: String, CodingKey { case model case prompt @@ -55,4 +70,22 @@ public struct ImagesQuery: Codable { case responseFormat = "response_format" case quality } + + public enum Style: String, Codable, CaseIterable { + case natural + case vivid + } + + public enum Quality: String, Codable, CaseIterable { + case standard + case hd + } + + public enum Size: String, Codable, CaseIterable { + case _256 = "256x256" + case _512 = "512x512" + case _1024 = "1024x1024" +// case _1792_1024 = "1792x1024" // for dall-e-3 models +// case _1024_1792 = "1024x1792" // for dall-e-3 models + } } diff --git a/Sources/OpenAI/Public/Models/ImagesResult.swift b/Sources/OpenAI/Public/Models/ImagesResult.swift index f6817482..6b0678d2 100644 --- a/Sources/OpenAI/Public/Models/ImagesResult.swift +++ b/Sources/OpenAI/Public/Models/ImagesResult.swift @@ -7,15 +7,26 @@ import Foundation +/// Returns a list of image objects. public struct ImagesResult: Codable, Equatable { - - public struct URLResult: Codable, Equatable { + + public let created: TimeInterval + public let data: [Self.Image] + + /// Represents the url or the content of an image generated by the OpenAI API. + public struct Image: Codable, Equatable { + + /// The base64-encoded JSON of the generated image, if response_format is b64_json + public let b64Json: String? + /// The prompt that was used to generate the image, if there was any revision to the prompt. + public let revisedPrompt: String? + /// The URL of the generated image, if response_format is url (default). public let url: String? - public let b64_json: String? + + public enum CodingKeys: String, CodingKey { + case b64Json = "b64_json" + case revisedPrompt = "revised_prompt" + case url + } } - - public let created: TimeInterval - public let data: [URLResult] } - -extension ImagesResult.URLResult: Hashable { } diff --git a/Sources/OpenAI/Public/Models/Models/ModelQuery.swift b/Sources/OpenAI/Public/Models/Models/ModelQuery.swift index 9134bcd4..6adb38f4 100644 --- a/Sources/OpenAI/Public/Models/Models/ModelQuery.swift +++ b/Sources/OpenAI/Public/Models/Models/ModelQuery.swift @@ -7,6 +7,7 @@ import Foundation +/// Retrieves a model instance, providing basic information about the model such as the owner and permissioning. public struct ModelQuery: Codable, Equatable { /// The ID of the model to use for this request. public let model: Model diff --git a/Sources/OpenAI/Public/Models/Models/ModelResult.swift b/Sources/OpenAI/Public/Models/Models/ModelResult.swift index 13a0fe17..1fcc7cfa 100644 --- a/Sources/OpenAI/Public/Models/Models/ModelResult.swift +++ b/Sources/OpenAI/Public/Models/Models/ModelResult.swift @@ -1,23 +1,28 @@ // -// ModelResult.swift -// +// Model.swift +// // // Created by Aled Samuel on 08/04/2023. // import Foundation +/// The model object matching the specified ID. public struct ModelResult: Codable, Equatable { - public let id: Model + /// The model identifier, which can be referenced in the API endpoints. + public let id: String + /// The Unix timestamp (in seconds) when the model was created. + public let created: TimeInterval + /// The object type, which is always "model". public let object: String + /// The organization that owns the model. public let ownedBy: String - - enum CodingKeys: String, CodingKey { + + public enum CodingKeys: String, CodingKey { case id + case created case object case ownedBy = "owned_by" } } - -extension ModelResult: Identifiable {} diff --git a/Sources/OpenAI/Public/Models/Models/ModelsResult.swift b/Sources/OpenAI/Public/Models/Models/ModelsResult.swift index ea74fd21..5a0e9bf5 100644 --- a/Sources/OpenAI/Public/Models/Models/ModelsResult.swift +++ b/Sources/OpenAI/Public/Models/Models/ModelsResult.swift @@ -1,14 +1,17 @@ // // ModelsResult.swift -// +// // // Created by Aled Samuel on 08/04/2023. // import Foundation +/// A list of model objects. public struct ModelsResult: Codable, Equatable { - + + /// A list of model objects. public let data: [ModelResult] + /// The object type, which is always `list` public let object: String } diff --git a/Sources/OpenAI/Public/Protocols/OpenAIProtocol+Combine.swift b/Sources/OpenAI/Public/Protocols/OpenAIProtocol+Combine.swift index da8b7dfb..c5142044 100644 --- a/Sources/OpenAI/Public/Protocols/OpenAIProtocol+Combine.swift +++ b/Sources/OpenAI/Public/Protocols/OpenAIProtocol+Combine.swift @@ -113,6 +113,13 @@ public extension OpenAIProtocol { .eraseToAnyPublisher() } + func audioCreateSpeech(query: AudioSpeechQuery) -> AnyPublisher { + Future { + audioCreateSpeech(query: query, completion: $0) + } + .eraseToAnyPublisher() + } + func audioTranscriptions(query: AudioTranscriptionQuery) -> AnyPublisher { Future { audioTranscriptions(query: query, completion: $0) diff --git a/Sources/OpenAI/Public/Protocols/OpenAIProtocol.swift b/Sources/OpenAI/Public/Protocols/OpenAIProtocol.swift index caf97090..8c65b190 100644 --- a/Sources/OpenAI/Public/Protocols/OpenAIProtocol.swift +++ b/Sources/OpenAI/Public/Protocols/OpenAIProtocol.swift @@ -218,7 +218,7 @@ public protocol OpenAIProtocol { Example: ``` - let query = AudioSpeechQuery(model: .tts_1, input: "Hello, world!", voice: .alloy, response_format: .mp3, speed: 1.0) + let query = AudioSpeechQuery(model: .tts_1, input: "Hello, world!", voice: .alloy, responseFormat: .mp3, speed: 1.0) openAI.audioCreateSpeech(query: query) { result in // Handle response here } diff --git a/Sources/OpenAI/Public/Utilities/ArrayWithThreadSafety.swift b/Sources/OpenAI/Public/Utilities/ArrayWithThreadSafety.swift new file mode 100644 index 00000000..a0c4db64 --- /dev/null +++ b/Sources/OpenAI/Public/Utilities/ArrayWithThreadSafety.swift @@ -0,0 +1,26 @@ +// +// ArrayWithThreadSafety.swift +// +// +// Created by James J Kalafus on 2024-02-01. +// + +import Foundation + +internal class ArrayWithThreadSafety { + private var array = [Element]() + private let queue = DispatchQueue(label: "us.kalaf.OpenAI.threadSafeArray", attributes: .concurrent) + + @inlinable public func append(_ element: Element) { + queue.async(flags: .barrier) { + self.array.append(element) + } + } + + @inlinable public func removeAll(where shouldBeRemoved: @escaping (Element) throws -> Bool) rethrows { + try queue.sync(flags: .barrier) { + try self.array.removeAll(where: shouldBeRemoved) + } + } +} + diff --git a/Tests/OpenAITests/OpenAITests.swift b/Tests/OpenAITests/OpenAITests.swift index 8a552b78..7285dd16 100644 --- a/Tests/OpenAITests/OpenAITests.swift +++ b/Tests/OpenAITests/OpenAITests.swift @@ -44,9 +44,9 @@ class OpenAITests: XCTestCase { } func testImages() async throws { - let query = ImagesQuery(prompt: "White cat with heterochromia sitting on the kitchen table", model: .dall_e_2, n: 1, size: "1024x1024") + let query = ImagesQuery(prompt: "White cat with heterochromia sitting on the kitchen table", model: .dall_e_2, n: 1, size: ._1024) let imagesResult = ImagesResult(created: 100, data: [ - .init(url: "http://foo.bar", b64_json: nil) + .init(b64Json: nil, revisedPrompt: nil, url: "http://foo.bar") ]) try self.stub(result: imagesResult) let result = try await openAI.images(query: query) @@ -54,7 +54,7 @@ class OpenAITests: XCTestCase { } func testImagesError() async throws { - let query = ImagesQuery(prompt: "White cat with heterochromia sitting on the kitchen table", n: 1, size: "1024x1024") + let query = ImagesQuery(prompt: "White cat with heterochromia sitting on the kitchen table", n: 1, size: ._1024) let inError = APIError(message: "foo", type: "bar", param: "baz", code: "100") self.stub(error: inError) @@ -63,9 +63,9 @@ class OpenAITests: XCTestCase { } func testImageEdit() async throws { - let query = ImageEditsQuery(image: Data(), fileName: "whitecat.png", prompt: "White cat with heterochromia sitting on the kitchen table with a bowl of food", n: 1, size: "1024x1024") + let query = ImageEditsQuery(image: Data(), prompt: "White cat with heterochromia sitting on the kitchen table with a bowl of food", mask: Data(), n: 1, size: ._1024) let imagesResult = ImagesResult(created: 100, data: [ - .init(url: "http://foo.bar", b64_json: nil) + .init(b64Json: nil, revisedPrompt: nil, url: "http://foo.bar") ]) try self.stub(result: imagesResult) let result = try await openAI.imageEdits(query: query) @@ -73,7 +73,7 @@ class OpenAITests: XCTestCase { } func testImageEditError() async throws { - let query = ImageEditsQuery(image: Data(), fileName: "whitecat.png", prompt: "White cat with heterochromia sitting on the kitchen table with a bowl of food", n: 1, size: "1024x1024") + let query = ImageEditsQuery(image: Data(), prompt: "White cat with heterochromia sitting on the kitchen table with a bowl of food", mask: Data(), n: 1, size: ._1024) let inError = APIError(message: "foo", type: "bar", param: "baz", code: "100") self.stub(error: inError) @@ -82,9 +82,9 @@ class OpenAITests: XCTestCase { } func testImageVariation() async throws { - let query = ImageVariationsQuery(image: Data(), fileName: "whitecat.png", n: 1, size: "1024x1024") + let query = ImageVariationsQuery(image: Data(), n: 1, size: "1024x1024") let imagesResult = ImagesResult(created: 100, data: [ - .init(url: "http://foo.bar", b64_json: nil) + .init(b64Json: nil, revisedPrompt: nil, url: "http://foo.bar") ]) try self.stub(result: imagesResult) let result = try await openAI.imageVariations(query: query) @@ -92,7 +92,7 @@ class OpenAITests: XCTestCase { } func testImageVariationError() async throws { - let query = ImageVariationsQuery(image: Data(), fileName: "whitecat.png", n: 1, size: "1024x1024") + let query = ImageVariationsQuery(image: Data(), n: 1, size: "1024x1024") let inError = APIError(message: "foo", type: "bar", param: "baz", code: "100") self.stub(error: inError) @@ -101,15 +101,15 @@ class OpenAITests: XCTestCase { } func testChats() async throws { - let query = ChatQuery(model: .gpt4, messages: [ - .init(role: .system, content: "You are Librarian-GPT. You know everything about the books."), - .init(role: .user, content: "Who wrote Harry Potter?") - ]) - let chatResult = ChatResult(id: "id-12312", object: "foo", created: 100, model: .gpt3_5Turbo, choices: [ - .init(index: 0, message: .init(role: .system, content: "bar"), finishReason: "baz"), - .init(index: 0, message: .init(role: .user, content: "bar1"), finishReason: "baz1"), - .init(index: 0, message: .init(role: .assistant, content: "bar2"), finishReason: "baz2") - ], usage: .init(promptTokens: 100, completionTokens: 200, totalTokens: 300)) + let query = ChatQuery(messages: [ + .system(.init(content: "You are Librarian-GPT. You know everything about the books.")), + .user(.init(content: .string("Who wrote Harry Potter?"))) + ], model: .gpt3_5Turbo) + let chatResult = ChatResult(id: "id-12312", object: "foo", created: 100, model: .gpt3_5Turbo, choices: [ + .init(index: 0, logprobs: nil, message: .system(.init(content: "bar")), finishReason: "baz"), + .init(index: 0, logprobs: nil, message: .user(.init(content: .string("bar1"))), finishReason: "baz1"), + .init(index: 0, logprobs: nil, message: .assistant(.init(content: "bar2")), finishReason: "baz2") + ], usage: .init(completionTokens: 200, promptTokens: 100, totalTokens: 300), systemFingerprint: nil) try self.stub(result: chatResult) let result = try await openAI.chats(query: query) @@ -117,21 +117,21 @@ class OpenAITests: XCTestCase { } func testChatsFunction() async throws { - let query = ChatQuery(model: .gpt3_5Turbo_0125, messages: [ - .init(role: .system, content: "You are Weather-GPT. You know everything about the weather."), - .init(role: .user, content: "What's the weather like in Boston?"), - ], functions: [ - .init(name: "get_current_weather", description: "Get the current weather in a given location", parameters: .init(type: .object, properties: [ + let query = ChatQuery(messages: [ + .system(.init(content: "You are Weather-GPT. You know everything about the weather.")), + .user(.init(content: .string("What's the weather like in Boston?"))), + ], model: .gpt3_5Turbo, toolChoice: .auto, tools: [ + .init(function: .init(name: "get_current_weather", description: "Get the current weather in a given location", parameters: .init(type: .object, properties: [ "location": .init(type: .string, description: "The city and state, e.g. San Francisco, CA"), - "unit": .init(type: .string, enumValues: ["celsius", "fahrenheit"]) - ], required: ["location"])) - ], functionCall: .auto) - + "unit": .init(type: .string, enum: ["celsius", "fahrenheit"]) + ], required: ["location"]))) + ]) + let chatResult = ChatResult(id: "id-12312", object: "foo", created: 100, model: .gpt3_5Turbo, choices: [ - .init(index: 0, message: .init(role: .system, content: "bar"), finishReason: "baz"), - .init(index: 0, message: .init(role: .user, content: "bar1"), finishReason: "baz1"), - .init(index: 0, message: .init(role: .assistant, content: "bar2"), finishReason: "baz2") - ], usage: .init(promptTokens: 100, completionTokens: 200, totalTokens: 300)) + .init(index: 0, logprobs: nil, message: .system(.init(content: "bar")), finishReason: "baz"), + .init(index: 0, logprobs: nil, message: .user(.init(content: .string("bar1"))), finishReason: "baz1"), + .init(index: 0, logprobs: nil, message: .assistant(.init(content: "bar2")), finishReason: "baz2") + ], usage: .init(completionTokens: 200, promptTokens: 100, totalTokens: 300), systemFingerprint: nil) try self.stub(result: chatResult) let result = try await openAI.chats(query: query) @@ -139,10 +139,10 @@ class OpenAITests: XCTestCase { } func testChatsError() async throws { - let query = ChatQuery(model: .gpt4, messages: [ - .init(role: .system, content: "You are Librarian-GPT. You know everything about the books."), - .init(role: .user, content: "Who wrote Harry Potter?") - ]) + let query = ChatQuery(messages: [ + .system(.init(content: "You are Librarian-GPT. You know everything about the books.")), + .user(.init(content: .string("Who wrote Harry Potter?"))) + ], model: .gpt3_5Turbo) let inError = APIError(message: "foo", type: "bar", param: "baz", code: "100") self.stub(error: inError) @@ -171,12 +171,18 @@ class OpenAITests: XCTestCase { } func testEmbeddings() async throws { - let query = EmbeddingsQuery(model: .textSearchBabbageDoc, input: "The food was delicious and the waiter...") - let embeddingsResult = EmbeddingsResult(data: [ - .init(object: "id-sdasd", embedding: [0.1, 0.2, 0.3, 0.4], index: 0), - .init(object: "id-sdasd1", embedding: [0.4, 0.1, 0.7, 0.1], index: 1), - .init(object: "id-sdasd2", embedding: [0.8, 0.1, 0.2, 0.8], index: 2) - ], model: .textSearchBabbageDoc, usage: .init(promptTokens: 10, totalTokens: 10)) + let query = EmbeddingsQuery( + input: .string("The food was delicious and the waiter..."), + model: .textEmbeddingAda) + let embeddingsResult = EmbeddingsResult( + data: [ + .init(object: "id-sdasd", embedding: [0.1, 0.2, 0.3, 0.4], index: 0), + .init(object: "id-sdasd1", embedding: [0.4, 0.1, 0.7, 0.1], index: 1), + .init(object: "id-sdasd2", embedding: [0.8, 0.1, 0.2, 0.8], index: 2) + ], + model: .textEmbeddingAda, + usage: .init(promptTokens: 10, totalTokens: 10), + object: "embeddings") try self.stub(result: embeddingsResult) let result = try await openAI.embeddings(query: query) @@ -184,7 +190,7 @@ class OpenAITests: XCTestCase { } func testEmbeddingsError() async throws { - let query = EmbeddingsQuery(model: .textSearchBabbageDoc, input: "The food was delicious and the waiter...") + let query = EmbeddingsQuery(input: .string("The food was delicious and the waiter..."), model: .textEmbeddingAda) let inError = APIError(message: "foo", type: "bar", param: "baz", code: "100") self.stub(error: inError) @@ -193,14 +199,14 @@ class OpenAITests: XCTestCase { } func testQueryString() throws { - let pathParameter = APIPath.gpt4 + let pathParameter = APIPath.chats let result = APIPath.models.withPath(pathParameter) XCTAssertEqual(result, APIPath.models + "/" + pathParameter) } func testRetrieveModel() async throws { - let query = ModelQuery(model: .gpt4) - let modelResult = ModelResult(id: .gpt4, object: "model", ownedBy: "organization-owner") + let query = ModelQuery(model: .gpt3_5Turbo) + let modelResult = ModelResult(id: .gpt3_5Turbo, created: 999, object: "model", ownedBy: "openai") try self.stub(result: modelResult) let result = try await openAI.model(query: query) @@ -208,7 +214,7 @@ class OpenAITests: XCTestCase { } func testRetrieveModelError() async throws { - let query = ModelQuery(model: .gpt4) + let query = ModelQuery(model: .gpt3_5Turbo) let inError = APIError(message: "foo", type: "bar", param: "baz", code: "100") self.stub(error: inError) @@ -218,9 +224,9 @@ class OpenAITests: XCTestCase { func testListModels() async throws { let listModelsResult = ModelsResult(data: [ - .init(id: "model-id-0", object: "model", ownedBy: "organization-owner"), - .init(id: "model-id-1", object: "model", ownedBy: "organization-owner"), - .init(id: "model-id-2", object: "model", ownedBy: "openai") + .init(id: "model-id-0", created: 7777, object: "model", ownedBy: "organization-owner"), + .init(id: "model-id-1", created: 7777, object: "model", ownedBy: "organization-owner"), + .init(id: "model-id-2", created: 7777, object: "model", ownedBy: "openai") ], object: "list") try self.stub(result: listModelsResult) @@ -258,6 +264,30 @@ class OpenAITests: XCTestCase { XCTAssertEqual(inError, apiError) } + func testAudioSpeechDoesNotNormalize() async throws { + let query = AudioSpeechQuery(model: .tts_1, input: "Hello, world!", voice: .alloy, responseFormat: .mp3, speed: 2.0) + + XCTAssertEqual(query.speed, "\(2.0)") + } + + func testAudioSpeechNormalizeNil() async throws { + let query = AudioSpeechQuery(model: .tts_1, input: "Hello, world!", voice: .alloy, responseFormat: .mp3, speed: nil) + + XCTAssertEqual(query.speed, "\(1.0)") + } + + func testAudioSpeechNormalizeLow() async throws { + let query = AudioSpeechQuery(model: .tts_1, input: "Hello, world!", voice: .alloy, responseFormat: .mp3, speed: 0.0) + + XCTAssertEqual(query.speed, "\(0.25)") + } + + func testAudioSpeechNormalizeHigh() async throws { + let query = AudioSpeechQuery(model: .tts_1, input: "Hello, world!", voice: .alloy, responseFormat: .mp3, speed: 10.0) + + XCTAssertEqual(query.speed, "\(4.0)") + } + func testAudioSpeechError() async throws { let query = AudioSpeechQuery(model: .tts_1, input: "Hello, world!", voice: .alloy, responseFormat: .mp3, speed: 1.0) let inError = APIError(message: "foo", type: "bar", param: "baz", code: "100") @@ -269,7 +299,7 @@ class OpenAITests: XCTestCase { func testAudioTranscriptions() async throws { let data = Data() - let query = AudioTranscriptionQuery(file: data, fileName: "audio.m4a", model: .whisper_1) + let query = AudioTranscriptionQuery(file: data, fileType: .m4a, model: .whisper_1) let transcriptionResult = AudioTranscriptionResult(text: "Hello, world!") try self.stub(result: transcriptionResult) @@ -279,7 +309,7 @@ class OpenAITests: XCTestCase { func testAudioTranscriptionsError() async throws { let data = Data() - let query = AudioTranscriptionQuery(file: data, fileName: "audio.m4a", model: .whisper_1) + let query = AudioTranscriptionQuery(file: data, fileType: .m4a, model: .whisper_1) let inError = APIError(message: "foo", type: "bar", param: "baz", code: "100") self.stub(error: inError) @@ -289,7 +319,7 @@ class OpenAITests: XCTestCase { func testAudioTranslations() async throws { let data = Data() - let query = AudioTranslationQuery(file: data, fileName: "audio.m4a", model: .whisper_1) + let query = AudioTranslationQuery(file: data, fileType: .m4a, model: .whisper_1) let transcriptionResult = AudioTranslationResult(text: "Hello, world!") try self.stub(result: transcriptionResult) @@ -299,7 +329,7 @@ class OpenAITests: XCTestCase { func testAudioTranslationsError() async throws { let data = Data() - let query = AudioTranslationQuery(file: data, fileName: "audio.m4a", model: .whisper_1) + let query = AudioTranslationQuery(file: data, fileType: .m4a, model: .whisper_1) let inError = APIError(message: "foo", type: "bar", param: "baz", code: "100") self.stub(error: inError) @@ -323,8 +353,8 @@ class OpenAITests: XCTestCase { func testJSONRequestCreation() throws { let configuration = OpenAI.Configuration(token: "foo", organizationIdentifier: "bar", timeoutInterval: 14) - let completionQuery = CompletionsQuery(model: .whisper_1, prompt: "how are you?") - let jsonRequest = JSONRequest(body: completionQuery, url: URL(string: "http://google.com")!) + let completionQuery = ChatQuery(messages: [.user(.init(content: .string("how are you?")))], model: .gpt3_5Turbo_16k) + let jsonRequest = JSONRequest(body: completionQuery, url: URL(string: "http://google.com")!) let urlRequest = try jsonRequest.build(token: configuration.token, organizationIdentifier: configuration.organizationIdentifier, timeoutInterval: configuration.timeoutInterval) XCTAssertEqual(urlRequest.value(forHTTPHeaderField: "Authorization"), "Bearer \(configuration.token)") @@ -335,8 +365,8 @@ class OpenAITests: XCTestCase { func testMultipartRequestCreation() throws { let configuration = OpenAI.Configuration(token: "foo", organizationIdentifier: "bar", timeoutInterval: 14) - let completionQuery = AudioTranslationQuery(file: Data(), fileName: "foo", model: .whisper_1) - let jsonRequest = MultipartFormDataRequest(body: completionQuery, url: URL(string: "http://google.com")!) + let completionQuery = AudioTranslationQuery(file: Data(), fileType: .mp3, model: .whisper_1) + let jsonRequest = MultipartFormDataRequest(body: completionQuery, url: URL(string: "http://google.com")!) let urlRequest = try jsonRequest.build(token: configuration.token, organizationIdentifier: configuration.organizationIdentifier, timeoutInterval: configuration.timeoutInterval) XCTAssertEqual(urlRequest.value(forHTTPHeaderField: "Authorization"), "Bearer \(configuration.token)") @@ -347,15 +377,15 @@ class OpenAITests: XCTestCase { func testDefaultHostURLBuilt() { let configuration = OpenAI.Configuration(token: "foo", organizationIdentifier: "bar", timeoutInterval: 14) let openAI = OpenAI(configuration: configuration, session: self.urlSession) - let completionsURL = openAI.buildURL(path: .completions) - XCTAssertEqual(completionsURL, URL(string: "https://api.openai.com/v1/completions")) + let chatsURL = openAI.buildURL(path: .chats) + XCTAssertEqual(chatsURL, URL(string: "https://api.openai.com/v1/chat/completions")) } func testCustomURLBuilt() { let configuration = OpenAI.Configuration(token: "foo", organizationIdentifier: "bar", host: "my.host.com", timeoutInterval: 14) let openAI = OpenAI(configuration: configuration, session: self.urlSession) - let completionsURL = openAI.buildURL(path: .completions) - XCTAssertEqual(completionsURL, URL(string: "https://my.host.com/v1/completions")) + let chatsURL = openAI.buildURL(path: .chats) + XCTAssertEqual(chatsURL, URL(string: "https://my.host.com/v1/chat/completions")) } } diff --git a/Tests/OpenAITests/OpenAITestsCombine.swift b/Tests/OpenAITests/OpenAITestsCombine.swift index e2b58458..b7918b44 100644 --- a/Tests/OpenAITests/OpenAITestsCombine.swift +++ b/Tests/OpenAITests/OpenAITestsCombine.swift @@ -37,15 +37,15 @@ final class OpenAITestsCombine: XCTestCase { } func testChats() throws { - let query = ChatQuery(model: .gpt4, messages: [ - .init(role: .system, content: "You are Librarian-GPT. You know everything about the books."), - .init(role: .user, content: "Who wrote Harry Potter?") - ]) - let chatResult = ChatResult(id: "id-12312", object: "foo", created: 100, model: .gpt3_5Turbo, choices: [ - .init(index: 0, message: .init(role: .system, content: "bar"), finishReason: "baz"), - .init(index: 0, message: .init(role: .user, content: "bar1"), finishReason: "baz1"), - .init(index: 0, message: .init(role: .assistant, content: "bar2"), finishReason: "baz2") - ], usage: .init(promptTokens: 100, completionTokens: 200, totalTokens: 300)) + let query = ChatQuery(messages: [ + .system(.init(content: "You are Librarian-GPT. You know everything about the books.")), + .user(.init(content: .string("Who wrote Harry Potter?"))) + ], model: .gpt3_5Turbo) + let chatResult = ChatResult(id: "id-12312", object: "foo", created: 100, model: .gpt3_5Turbo, choices: [ + .init(index: 0, logprobs: nil, message: .system(.init(content: "bar")), finishReason: "baz"), + .init(index: 0, logprobs: nil, message: .user(.init(content: .string("bar1"))), finishReason: "baz1"), + .init(index: 0, logprobs: nil, message: .assistant(.init(content: "bar2")), finishReason: "baz2") + ], usage: .init(completionTokens: 200, promptTokens: 100, totalTokens: 300), systemFingerprint: nil) try self.stub(result: chatResult) let result = try awaitPublisher(openAI.chats(query: query)) XCTAssertEqual(result, chatResult) @@ -62,12 +62,12 @@ final class OpenAITestsCombine: XCTestCase { } func testEmbeddings() throws { - let query = EmbeddingsQuery(model: .textSearchBabbageDoc, input: "The food was delicious and the waiter...") + let query = EmbeddingsQuery(input: .string("The food was delicious and the waiter..."), model: .textEmbeddingAda) let embeddingsResult = EmbeddingsResult(data: [ .init(object: "id-sdasd", embedding: [0.1, 0.2, 0.3, 0.4], index: 0), .init(object: "id-sdasd1", embedding: [0.4, 0.1, 0.7, 0.1], index: 1), .init(object: "id-sdasd2", embedding: [0.8, 0.1, 0.2, 0.8], index: 2) - ], model: .textSearchBabbageDoc, usage: .init(promptTokens: 10, totalTokens: 10)) + ], model: .textSearchBabbageDoc, usage: .init(promptTokens: 10, totalTokens: 10), object: "list") try self.stub(result: embeddingsResult) let result = try awaitPublisher(openAI.embeddings(query: query)) @@ -75,8 +75,8 @@ final class OpenAITestsCombine: XCTestCase { } func testRetrieveModel() throws { - let query = ModelQuery(model: .gpt4) - let modelResult = ModelResult(id: .gpt4, object: "model", ownedBy: "organization-owner") + let query = ModelQuery(model: .gpt3_5Turbo_0125) + let modelResult = ModelResult(id: .gpt3_5Turbo_0125, created: 200000000, object: "model", ownedBy: "organization-owner") try self.stub(result: modelResult) let result = try awaitPublisher(openAI.model(query: query)) @@ -106,7 +106,7 @@ final class OpenAITestsCombine: XCTestCase { func testAudioTranscriptions() throws { let data = Data() - let query = AudioTranscriptionQuery(file: data, fileName: "audio.m4a", model: .whisper_1) + let query = AudioTranscriptionQuery(file: data, fileType: .m4a, model: .whisper_1) let transcriptionResult = AudioTranscriptionResult(text: "Hello, world!") try self.stub(result: transcriptionResult) @@ -116,7 +116,7 @@ final class OpenAITestsCombine: XCTestCase { func testAudioTranslations() throws { let data = Data() - let query = AudioTranslationQuery(file: data, fileName: "audio.m4a", model: .whisper_1) + let query = AudioTranslationQuery(file: data, fileType: .m4a, model: .whisper_1) let transcriptionResult = AudioTranslationResult(text: "Hello, world!") try self.stub(result: transcriptionResult) diff --git a/Tests/OpenAITests/OpenAITestsDecoder.swift b/Tests/OpenAITests/OpenAITestsDecoder.swift index 253a4486..d9672c04 100644 --- a/Tests/OpenAITests/OpenAITestsDecoder.swift +++ b/Tests/OpenAITests/OpenAITestsDecoder.swift @@ -75,9 +75,9 @@ class OpenAITestsDecoder: XCTestCase { """ let expectedValue = ImagesResult(created: 1589478378, data: [ - .init(url: "https://foo.bar", b64_json: nil), - .init(url: "https://bar.foo", b64_json: nil), - .init(url: nil, b64_json: "test") + .init(b64Json: nil, revisedPrompt: nil, url: "https://foo.bar"), + .init(b64Json: nil, revisedPrompt: nil, url: "https://bar.foo"), + .init(b64Json: "test", revisedPrompt: nil, url: nil) ]) try decode(data, expectedValue) } @@ -106,8 +106,8 @@ class OpenAITestsDecoder: XCTestCase { """ let expectedValue = ChatResult(id: "chatcmpl-123", object: "chat.completion", created: 1677652288, model: .gpt4, choices: [ - .init(index: 0, message: Chat(role: .assistant, content: "Hello, world!"), finishReason: "stop") - ], usage: .init(promptTokens: 9, completionTokens: 12, totalTokens: 21)) + .init(index: 0, logprobs: nil, message: .assistant(.init(content: "Hello, world!")), finishReason: "stop") + ], usage: .init(completionTokens: 12, promptTokens: 9, totalTokens: 21), systemFingerprint: nil) try decode(data, expectedValue) } @@ -115,10 +115,10 @@ class OpenAITestsDecoder: XCTestCase { let imageQuery = ImagesQuery( prompt: "test", model: .dall_e_2, - responseFormat: .b64_json, n: 1, - size: "10", - style: "vivid", + responseFormat: .b64_json, + size: ._512, + style: .vivid, user: "user" ) @@ -127,7 +127,7 @@ class OpenAITestsDecoder: XCTestCase { "model": "dall-e-2", "prompt": "test", "n": 1, - "size": "10", + "size": "512x512", "style": "vivid", "user": "user", "response_format": "b64_json" @@ -143,25 +143,24 @@ class OpenAITestsDecoder: XCTestCase { func testChatQueryWithFunctionCall() async throws { let chatQuery = ChatQuery( - model: .gpt3_5Turbo, messages: [ - Chat(role: .user, content: "What's the weather like in Boston?") + .user(.init(content: .string("What's the weather like in Boston?"))) ], - responseFormat: .init(type: .jsonObject), - functions: [ - ChatFunctionDeclaration( + model: .gpt3_5Turbo, + responseFormat: ChatQuery.ResponseFormat.jsonObject, + tools: [ + .init(function: .init( name: "get_current_weather", description: "Get the current weather in a given location", - parameters: - JSONSchema( + parameters: .init( type: .object, properties: [ "location": .init(type: .string, description: "The city and state, e.g. San Francisco, CA"), - "unit": .init(type: .string, enumValues: ["celsius", "fahrenheit"]) + "unit": .init(type: .string, enum: ["celsius", "fahrenheit"]) ], required: ["location"] ) - ) + )) ] ) let expectedValue = """ @@ -173,21 +172,24 @@ class OpenAITestsDecoder: XCTestCase { "response_format": { "type": "json_object" }, - "functions": [ + "tools": [ { - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA" + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "unit": { "type": "string", "enum": ["celsius", "fahrenheit"] } }, - "unit": { "type": "string", "enum": ["celsius", "fahrenheit"] } - }, - "required": ["location"] - } + "required": ["location"] + } + }, + "type": "function" } ], "stream": false @@ -213,12 +215,19 @@ class OpenAITestsDecoder: XCTestCase { "index": 0, "message": { "role": "assistant", - "content": null, - "function_call": { - "name": "get_current_weather" - } + "tool_calls": [ + { + "type": "function", + "id": "chatcmpl-1234", + "function": { + "name": "get_current_weather", + "arguments": "" + } + } + ] }, - "finish_reason": "function_call" + "finish_reason": "tool_calls", + "logprobs": null } ], "usage": { @@ -235,12 +244,12 @@ class OpenAITestsDecoder: XCTestCase { created: 1677652288, model: .gpt3_5Turbo, choices: [ - .init(index: 0, message: - Chat(role: .assistant, - functionCall: ChatFunctionCall(name: "get_current_weather", arguments: nil)), - finishReason: "function_call") + .init(index: 0, + logprobs: nil, message: + .assistant(.init(toolCalls: [.init(id: "chatcmpl-1234", function: .init(arguments: "", name: "get_current_weather"))])), finishReason: "tool_calls") ], - usage: .init(promptTokens: 82, completionTokens: 18, totalTokens: 100)) + usage: .init(completionTokens: 18, promptTokens: 82, totalTokens: 100), + systemFingerprint: nil) try decode(data, expectedValue) } @@ -294,7 +303,7 @@ class OpenAITestsDecoder: XCTestCase { let expectedValue = EmbeddingsResult(data: [ .init(object: "embedding", embedding: [0.0023064255, -0.009327292, -0.0028842222], index: 0) - ], model: .textEmbeddingAda, usage: .init(promptTokens: 8, totalTokens: 8)) + ], model: .textEmbeddingAda, usage: .init(promptTokens: 8, totalTokens: 8), object: "list") try decode(data, expectedValue) } @@ -304,16 +313,19 @@ class OpenAITestsDecoder: XCTestCase { "data": [ { "id": "gpt-3.5-turbo", + "created": 222, "object": "model", "owned_by": "organization-owner" }, { - "id": "gpt-4", + "id": "dall-e-2", + "created": 111, "object": "model", "owned_by": "organization-owner" }, { - "id": "text-davinci-001", + "id": "whisper-1", + "created": 333, "object": "model", "owned_by": "openai" } @@ -323,9 +335,9 @@ class OpenAITestsDecoder: XCTestCase { """ let expectedValue = ModelsResult(data: [ - .init(id: .gpt3_5Turbo, object: "model", ownedBy: "organization-owner"), - .init(id: .gpt4, object: "model", ownedBy: "organization-owner"), - .init(id: .textDavinci_001, object: "model", ownedBy: "openai") + .init(id: .gpt3_5Turbo, created: 222, object: "model", ownedBy: "organization-owner"), + .init(id: .dall_e_2, created: 111, object: "model", ownedBy: "organization-owner"), + .init(id: .whisper_1, created: 333, object: "model", ownedBy: "openai") ], object: "list") try decode(data, expectedValue) } @@ -333,13 +345,14 @@ class OpenAITestsDecoder: XCTestCase { func testModelType() async throws { let data = """ { - "id": "text-davinci-003", + "id": "whisper-1", + "created": 555, "object": "model", "owned_by": "openai" } """ - let expectedValue = ModelResult(id: .textDavinci_003, object: "model", ownedBy: "openai") + let expectedValue = ModelResult(id: .whisper_1, created: 555, object: "model", ownedBy: "openai") try decode(data, expectedValue) } @@ -393,7 +406,7 @@ class OpenAITestsDecoder: XCTestCase { try decode(data, expectedValue) } - func testAudioTranslations() async throws { + func testAudioAudioTranslationResults() async throws { let data = """ { "text": "Hello, world!"