Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Vertex AI] Add image generation support using Imagen #14236

Draft
wants to merge 13 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/vertexai.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ on:
schedule:
# Run every day at 11pm (PST) - cron uses UTC times
- cron: '0 7 * * *'
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
Expand Down Expand Up @@ -102,6 +103,7 @@ jobs:
needs: spm-package-resolved
env:
TEST_RUNNER_FIRAAppCheckDebugToken: ${{ secrets.VERTEXAI_INTEGRATION_FAC_DEBUG_TOKEN }}
TEST_RUNNER_VTXIntegrationImagen: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
FIREBASECI_USE_LATEST_GOOGLEAPPMEASUREMENT: 1
secrets_passphrase: ${{ secrets.GHASecretsGPGPassphrase1 }}
steps:
Expand Down
15 changes: 14 additions & 1 deletion FirebaseVertexAI/Sources/GenerationConfig.swift
Original file line number Diff line number Diff line change
Expand Up @@ -162,4 +162,17 @@ public struct GenerationConfig {
// MARK: - Codable Conformances

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension GenerationConfig: Encodable {}
extension GenerationConfig: Encodable {
enum CodingKeys: String, CodingKey {
case temperature
case topP
case topK
case candidateCount
case maxOutputTokens
case presencePenalty
case frequencyPenalty
case stopSequences
case responseMIMEType = "responseMimeType"
case responseSchema
}
}
3 changes: 3 additions & 0 deletions FirebaseVertexAI/Sources/GenerativeAIRequest.swift
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,6 @@ public struct RequestOptions {
self.timeout = timeout
}
}

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension RequestOptions: Equatable {}
1 change: 0 additions & 1 deletion FirebaseVertexAI/Sources/GenerativeAIService.swift
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,6 @@ struct GenerativeAIService {
}

let encoder = JSONEncoder()
encoder.keyEncodingStrategy = .convertToSnakeCase
urlRequest.httpBody = try encoder.encode(request)
urlRequest.timeoutInterval = request.options.timeout

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
struct ImageGenerationInstance {
let prompt: String
}

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension ImageGenerationInstance: Equatable {}

// MARK: - Codable Conformance

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension ImageGenerationInstance: Encodable {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import Foundation

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
struct ImageGenerationOutputOptions {
let mimeType: String
let compressionQuality: Int?
}

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension ImageGenerationOutputOptions: Equatable {}

// MARK: - Codable Conformance

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension ImageGenerationOutputOptions: Encodable {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
struct ImageGenerationParameters {
let sampleCount: Int?
let storageURI: String?
let negativePrompt: String?
let aspectRatio: String?
let safetyFilterLevel: String?
let personGeneration: String?
let outputOptions: ImageGenerationOutputOptions?
let addWatermark: Bool?
let includeResponsibleAIFilterReason: Bool?
}

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension ImageGenerationParameters: Equatable {}

// MARK: - Codable Conformance

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension ImageGenerationParameters: Encodable {
enum CodingKeys: String, CodingKey {
case sampleCount
case storageURI = "storageUri"
case negativePrompt
case aspectRatio
case safetyFilterLevel = "safetySetting"
case personGeneration
case outputOptions
case addWatermark
case includeResponsibleAIFilterReason = "includeRaiReason"
}

func encode(to encoder: any Encoder) throws {
var container = encoder.container(keyedBy: CodingKeys.self)
try container.encodeIfPresent(sampleCount, forKey: .sampleCount)
try container.encodeIfPresent(storageURI, forKey: .storageURI)
try container.encodeIfPresent(negativePrompt, forKey: .negativePrompt)
try container.encodeIfPresent(aspectRatio, forKey: .aspectRatio)
try container.encodeIfPresent(safetyFilterLevel, forKey: .safetyFilterLevel)
try container.encodeIfPresent(personGeneration, forKey: .personGeneration)
try container.encodeIfPresent(outputOptions, forKey: .outputOptions)
try container.encodeIfPresent(addWatermark, forKey: .addWatermark)
try container.encodeIfPresent(
includeResponsibleAIFilterReason,
forKey: .includeResponsibleAIFilterReason
)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import Foundation

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
struct ImagenGenerationRequest<ImageType: ImagenImageRepresentable> {
let model: String
let options: RequestOptions
let instances: [ImageGenerationInstance]
let parameters: ImageGenerationParameters

init(model: String, options: RequestOptions, instances: [ImageGenerationInstance],
parameters: ImageGenerationParameters) {
self.model = model
self.options = options
self.instances = instances
self.parameters = parameters
}
}

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension ImagenGenerationRequest: GenerativeAIRequest where ImageType: Decodable {
typealias Response = ImagenGenerationResponse<ImageType>

var url: URL {
return URL(string: "\(Constants.baseURL)/\(options.apiVersion)/\(model):predict")!
}
}

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension ImagenGenerationRequest: Encodable {
enum CodingKeys: CodingKey {
case instances
case parameters
}

func encode(to encoder: any Encoder) throws {
var container = encoder.container(keyedBy: CodingKeys.self)
try container.encode(instances, forKey: .instances)
try container.encode(parameters, forKey: .parameters)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
struct RAIFilteredReason {
let raiFilteredReason: String
}

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension RAIFilteredReason: Decodable {
enum CodingKeys: CodingKey {
case raiFilteredReason
}
}
5 changes: 5 additions & 0 deletions FirebaseVertexAI/Sources/Types/Internal/InternalPart.swift
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ struct FileData: Codable, Equatable, Sendable {
self.fileURI = fileURI
self.mimeType = mimeType
}

enum CodingKeys: String, CodingKey {
case fileURI = "fileUri"
case mimeType
}
}

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import Foundation

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
public struct ImagenAspectRatio {
public static let square1x1 = ImagenAspectRatio(kind: .square1x1)

public static let portrait9x16 = ImagenAspectRatio(kind: .portrait9x16)

public static let landscape16x9 = ImagenAspectRatio(kind: .landscape16x9)

public static let portrait3x4 = ImagenAspectRatio(kind: .portrait3x4)

public static let landscape4x3 = ImagenAspectRatio(kind: .landscape4x3)

let rawValue: String
}

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension ImagenAspectRatio: ProtoEnum {
enum Kind: String {
case square1x1 = "1:1"
case portrait9x16 = "9:16"
case landscape16x9 = "16:9"
case portrait3x4 = "3:4"
case landscape4x3 = "4:3"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import Foundation

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
public struct ImagenFileDataImage {
public let mimeType: String
public let gcsURI: String

init(mimeType: String, gcsURI: String) {
self.mimeType = mimeType
self.gcsURI = gcsURI
}
}

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension ImagenFileDataImage: ImagenImageRepresentable {
public var _imagenImage: _ImagenImage {
_ImagenImage(mimeType: mimeType, bytesBase64Encoded: nil, gcsURI: gcsURI)
}
}

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension ImagenFileDataImage: Equatable {}

// MARK: - Codable Conformances

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension ImagenFileDataImage: Decodable {
enum CodingKeys: String, CodingKey {
case mimeType
case gcsURI = "gcsUri"
}

public init(from decoder: any Decoder) throws {
let container = try decoder.container(keyedBy: CodingKeys.self)
let mimeType = try container.decode(String.self, forKey: .mimeType)
let gcsURI = try container.decode(String.self, forKey: .gcsURI)
self.init(mimeType: mimeType, gcsURI: gcsURI)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
public struct ImagenGenerationConfig {
public var numberOfImages: Int?
public var negativePrompt: String?
public var aspectRatio: ImagenAspectRatio?

public init(numberOfImages: Int? = nil, negativePrompt: String? = nil,
aspectRatio: ImagenAspectRatio? = nil) {
self.numberOfImages = numberOfImages
self.negativePrompt = negativePrompt
self.aspectRatio = aspectRatio
}
}
Loading
Loading