diff --git a/fern/apis/fine-tuning/openapi/openapi.json b/fern/apis/fine-tuning/openapi/openapi.json index e5c2431..9d45ee5 100644 --- a/fern/apis/fine-tuning/openapi/openapi.json +++ b/fern/apis/fine-tuning/openapi/openapi.json @@ -2,13 +2,11 @@ "components": { "schemas": { "BaseEngine": { - "description": "Base Engine names supported by asset library.", + "description": "BaseEngine", "enum": [ "text/llama-2-7b", "text/llama-2-13b", "text/llama-2-70b", - "text/llama-3-8b", - "text/llama-3-70b", "text/codellama-7b", "text/codellama-13b", "text/codellama-34b", @@ -59,8 +57,7 @@ "discriminator": { "mapping": { "lora_tune": "#/components/schemas/LoraTune-Input", - "text_to_speech_latent_tune": "#/components/schemas/TextToSpeechLatentTune", - "text_tune": "#/components/schemas/TextTune" + "text_to_speech_latent_tune": "#/components/schemas/TextToSpeechLatentTune" }, "propertyName": "tune_type" }, @@ -70,9 +67,6 @@ }, { "$ref": "#/components/schemas/TextToSpeechLatentTune" - }, - { - "$ref": "#/components/schemas/TextTune" } ], "title": "Details" @@ -88,10 +82,8 @@ "type": "object" }, "HTTPValidationError": { - "description": "HTTP validation error in API.", "properties": { "detail": { - "description": "Detail of the http validation error.", "items": { "$ref": "#/components/schemas/ValidationError" }, @@ -106,7 +98,7 @@ "description": "The list tunes response.", "properties": { "data": { - "description": "List of tunes on the account.", + "description": "List of tunes.", "items": { "$ref": "#/components/schemas/Tune" }, @@ -129,7 +121,7 @@ "type": "object" }, "LoraTune-Input": { - "description": "A LoRA tune model that includes tune configurations.", + "description": "A LoRA Tune.", "properties": { "base_checkpoint": { "allOf": [ @@ -188,10 +180,8 @@ "tune_type": { "const": "lora_tune", "default": "lora_tune", - "description": "Determines the tune type for a tune request.", - "enum": ["lora_tune"], - "title": "Tune Type", - "type": "string" + "description": "Tune type.", + "title": "Tune Type" } }, "required": ["base_checkpoint", "steps", "files", "trigger_words"], @@ -199,7 +189,7 @@ "type": "object" }, "LoraTune-Output": { - "description": "A LoRA tune model that includes tune configurations.", + "description": "A LoRA Tune.", "properties": { "base_checkpoint": { "allOf": [ @@ -258,10 +248,8 @@ "tune_type": { "const": "lora_tune", "default": "lora_tune", - "description": "Determines the tune type for a tune request.", - "enum": ["lora_tune"], - "title": "Tune Type", - "type": "string" + "description": "Tune type.", + "title": "Tune Type" } }, "required": ["base_checkpoint", "steps", "files", "trigger_words"], @@ -356,57 +344,16 @@ "tune_type": { "const": "text_to_speech_latent_tune", "default": "text_to_speech_latent_tune", - "description": "Determines the tune type for a tune request.", - "enum": ["text_to_speech_latent_tune"], - "title": "Tune Type", - "type": "string" + "description": "Tune type.", + "title": "Tune Type" } }, "required": ["asset_ids"], "title": "TextToSpeechLatentTune", "type": "object" }, - "TextTune": { - "description": "A text generation tune.", - "properties": { - "checkpoint": { - "description": "The checkpoint name or ID used as base model for tuning.", - "title": "Checkpoint", - "type": "string" - }, - "epochs": { - "default": 4, - "description": "The number of epochs used for this Text tune.", - "title": "Epochs", - "type": "integer" - }, - "learning_rate": { - "default": 0.0001, - "description": "The learning rate used for this Text tune.", - "title": "Learning Rate", - "type": "number" - }, - "lora_rank": { - "default": 16, - "description": "The LoRA rank used for this Text tune.", - "title": "Lora Rank", - "type": "integer" - }, - "tune_type": { - "const": "text_tune", - "default": "text_tune", - "description": "Determines the tune type for a tune request.", - "enum": ["text_tune"], - "title": "Tune Type", - "type": "string" - } - }, - "required": ["checkpoint"], - "title": "TextTune", - "type": "object" - }, "Tune": { - "description": "Tune base class. This class is used to store tune information.", + "description": "A tune base class.", "properties": { "created_at": { "description": "The time this tune task was created.", @@ -433,12 +380,11 @@ "type": "string" }, "details": { - "description": "Tune details. This field is used to store a specific tune type configurations.", + "description": "Tune details.", "discriminator": { "mapping": { "lora_tune": "#/components/schemas/LoraTune-Output", - "text_to_speech_latent_tune": "#/components/schemas/TextToSpeechLatentTune", - "text_tune": "#/components/schemas/TextTune" + "text_to_speech_latent_tune": "#/components/schemas/TextToSpeechLatentTune" }, "propertyName": "tune_type" }, @@ -448,9 +394,6 @@ }, { "$ref": "#/components/schemas/TextToSpeechLatentTune" - }, - { - "$ref": "#/components/schemas/TextTune" } ], "title": "Details" @@ -621,7 +564,6 @@ "description": "The type of tune requested.", "enum": [ "lora_tune", - "text_tune", "text_to_speech_latent_tune", "text_to_speech_rvc_tune", "text_to_speech_tortoise_tune" @@ -630,10 +572,8 @@ "type": "string" }, "ValidationError": { - "description": "Validation error in API.", "properties": { "loc": { - "description": "Error location in ValidationError.", "items": { "anyOf": [ { @@ -648,12 +588,10 @@ "type": "array" }, "msg": { - "description": "Error message in ValidationError.", "title": "Message", "type": "string" }, "type": { - "description": "Error type in ValidationError.", "title": "Error Type", "type": "string" } @@ -665,12 +603,32 @@ } }, "info": { - "description": "Fine-Tuning Service API.", - "title": "fine-tuning-api", + "title": "fine-tuning-api-server", "version": "0.1.0" }, "openapi": "3.1.0", "paths": { + "/health": { + "get": { + "description": "Healthcheck server endpoint.", + "operationId": "health_check_health_get", + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "title": "Response Health Check Health Get", + "type": "object" + } + } + }, + "description": "Successful Response" + } + }, + "summary": "Health Check", + "tags": ["tune"] + } + }, "/v1/tune": { "post": { "description": "Spawn a tune.", @@ -717,12 +675,12 @@ "operationId": "delete_tune_v1_tune__tune_id__delete", "parameters": [ { - "description": "The ID of the tune to delete.", + "description": "The ID of the tune.", "in": "path", "name": "tune_id", "required": true, "schema": { - "description": "The ID of the tune to delete.", + "description": "The ID of the tune.", "title": "Tune Id", "type": "string" } @@ -733,7 +691,8 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/Tune" + "title": "Response Delete Tune V1 Tune Tune Id Delete", + "type": "object" } } }, @@ -754,16 +713,16 @@ "tags": ["tune"] }, "get": { - "description": "Retrieve a specific tune.", - "operationId": "retrieve_tune_v1_tune__tune_id__get", + "description": "Get the specific tune.", + "operationId": "get_tune_v1_tune__tune_id__get", "parameters": [ { - "description": "The ID of the tune to retrieve.", + "description": "The ID of the tune.", "in": "path", "name": "tune_id", "required": true, "schema": { - "description": "The ID of the tune to retrieve.", + "description": "The ID of the tune.", "title": "Tune Id", "type": "string" } @@ -791,7 +750,7 @@ "description": "Validation Error" } }, - "summary": "Retrieve Tune", + "summary": "Get Tune", "tags": ["tune"] } }, @@ -851,6 +810,7 @@ "schema": { "default": 0, "description": "Offset into the results.", + "gte": 0, "title": "Offset", "type": "integer" } @@ -863,6 +823,8 @@ "schema": { "default": 100, "description": "The max number of results to be shown (limit 100).", + "gte": 1, + "lte": 100, "title": "Limit", "type": "integer" } @@ -944,7 +906,7 @@ } }, { - "description": "The engine type for listing tunes.", + "description": "The engine type.", "in": "query", "name": "engine", "required": false, @@ -957,7 +919,7 @@ "type": "null" } ], - "description": "The engine type for listing tunes.", + "description": "The engine type.", "title": "Engine" } } @@ -988,10 +950,5 @@ "tags": ["tune"] } } - }, - "servers": [ - { - "url": "https://api.octoai.cloud" - } - ] + } } diff --git a/fern/docs.yml b/fern/docs.yml index dfebd25..fd31fd8 100644 --- a/fern/docs.yml +++ b/fern/docs.yml @@ -87,11 +87,11 @@ navigation: - section: Media Gen REST APIs contents: - section: Generate - contents: + contents: - page: FLUX.1 [Dev] - path: docs/media-gen-solution/rest-apis/generate/flux-dev.mdx + path: docs/media-gen-solution/rest-apis/generate/flux-dev.mdx - page: FLUX.1 [Schnell] - path: docs/media-gen-solution/rest-apis/generate/flux-schnell.mdx + path: docs/media-gen-solution/rest-apis/generate/flux-schnell.mdx - page: Stable Diffusion 3 path: docs/media-gen-solution/rest-apis/generate/sd3.mdx - page: Stable Diffusion XL @@ -99,23 +99,25 @@ navigation: - page: Stable Diffusion XL Lightning path: docs/media-gen-solution/rest-apis/generate/sdxl-lightning.mdx - page: Juggernaut XII - path: docs/media-gen-solution/rest-apis/generate/juggernautXII.mdx + path: docs/media-gen-solution/rest-apis/generate/juggernautXII.mdx - page: Juggernaut XI - path: docs/media-gen-solution/rest-apis/generate/juggernautXI.mdx + path: docs/media-gen-solution/rest-apis/generate/juggernautXI.mdx - page: Juggernaut v9 - path: docs/media-gen-solution/rest-apis/generate/juggernaut-v9.mdx + path: docs/media-gen-solution/rest-apis/generate/juggernaut-v9.mdx - page: SDXL Controlnet path: docs/media-gen-solution/rest-apis/generate/controlnet-sdxl.mdx + - page: SD1.5 Controlnet + path: docs/media-gen-solution/rest-apis/generate/controlnet-sd15.mdx - page: Stable Video Diffusion path: docs/media-gen-solution/rest-apis/generate/svd.mdx - section: Edit - contents: + contents: - page: Fix faces and hands - path: docs/media-gen-solution/rest-apis/edit/adetailer.mdx - - page: Remove background - path: docs/media-gen-solution/rest-apis/edit/background-removal.mdx + path: docs/media-gen-solution/rest-apis/edit/adetailer.mdx + - page: Remove background + path: docs/media-gen-solution/rest-apis/edit/background-removal.mdx - page: Upscale - path: docs/media-gen-solution/rest-apis/edit/upscaling.mdx + path: docs/media-gen-solution/rest-apis/edit/upscaling.mdx # - page: Image Gen # path: docs/media-gen-solution/rest-apis/image-gen-api.mdx # - page: Video Gen diff --git a/fern/docs/api-reference/media-gen/controlnet-sd15.mdx b/fern/docs/api-reference/media-gen/controlnet-sd15.mdx deleted file mode 100644 index 01ba67d..0000000 --- a/fern/docs/api-reference/media-gen/controlnet-sd15.mdx +++ /dev/null @@ -1,5 +0,0 @@ ---- -title: Generate ControlNet SD1.5 -slug: api-reference/media-gen/controlnet-sd15 ---- - diff --git a/fern/docs/api-reference/media-gen/sd15.mdx b/fern/docs/api-reference/media-gen/sd15.mdx deleted file mode 100644 index bbc0d03..0000000 --- a/fern/docs/api-reference/media-gen/sd15.mdx +++ /dev/null @@ -1,5 +0,0 @@ ---- -title: Generate SD1.5 -slug: api-reference/media-gen/sd15 ---- - diff --git a/fern/docs/getting-started/pricing-and-billing.mdx b/fern/docs/getting-started/pricing-and-billing.mdx index 4b38726..5c463c3 100644 --- a/fern/docs/getting-started/pricing-and-billing.mdx +++ b/fern/docs/getting-started/pricing-and-billing.mdx @@ -55,8 +55,6 @@ Pricing for default image features and configurations are below: | SDXL Lightning base | 4 | 1024x1024 | DDIM (and any not listed below as premium) | $0.001/image | | SDXL Lightning Custom Asset (Fine-tuned) | 4 | 1024x1024 | DDIM (and any not listed below as premium) | $0.005/image | | SDXL Fine-tuning | 500 | N/A | N/A | $0.25/tune | -| SD 1.5 with Base or Custom Asset (Fine-tuned) | 30 | 512x512 | DDIM (and any not listed below as premium) | $0.0015/image | -| SD1.5 Fine-tuning | 500 | N/A | N/A | $0.1/tune | | Asset library (storage) | N/A | N/A | N/A | $0.05/GB stored per month, after the first 50GB | | Upscaling | N/A | N/A | N/A | $0.004/request | | Background Removal | N/A | N/A | N/A | $0.002/request | @@ -86,9 +84,6 @@ Here are a few examples to illustrate how this works to assist you in applying t | SDXL with Custom Asset (Fine-tuned) | 60 | 1024x1024 | DDIM (default) | $.016 | | SDXL with Custom Asset (Fine-tuned) | 60 | 1024x1024 | DPM_2 (premium) | $.032 | | SDXL Fine-tuning | 1000 | N/A | N/A | $.5 | -| SD 1.5 | 40 | 512x512 | DDIM (default) | $.002 | -| SD1.5 | 60 | 1024x1024 | DDIM (default) | $.003 | -| SD1.5 | 40 | 1024x1024 | DPM_2 (premium) | $.009 | ## Text Gen Solution diff --git a/fern/docs/media-gen-solution/fine-tuning-stable-diffusion/typescript-sdk-finetuning.mdx b/fern/docs/media-gen-solution/fine-tuning-stable-diffusion/typescript-sdk-finetuning.mdx index 53236b0..2cab960 100644 --- a/fern/docs/media-gen-solution/fine-tuning-stable-diffusion/typescript-sdk-finetuning.mdx +++ b/fern/docs/media-gen-solution/fine-tuning-stable-diffusion/typescript-sdk-finetuning.mdx @@ -74,10 +74,10 @@ After this completes, all assets will hopefully be in the ready state, or you sh #### 2) Get a checkpoint asset to use for tuning our LoRA -Next, you'll need a checkpoint to use to tune your asset. In this example, we will just use the default checkpoint using Stable Diffusion 1.5, but you can also use other public OctoAI checkpoints or create your own using the Asset Library. +Next, you'll need a checkpoint to use to tune your asset. In this example, we will just use the default checkpoint using Stable Diffusion XL, but you can also use other public OctoAI checkpoints or create your own using the Asset Library. ```TypeScript TypeScript -const checkpoint = await octoai.assetLibrary.get("octoai:default-sd15"); +const checkpoint = await octoai.assetLibrary.get("octoai:default-sdxl"); ``` #### 3) Creating a tune diff --git a/fern/docs/media-gen-solution/getting-started.mdx b/fern/docs/media-gen-solution/getting-started.mdx index 89d8bf5..08b2aab 100644 --- a/fern/docs/media-gen-solution/getting-started.mdx +++ b/fern/docs/media-gen-solution/getting-started.mdx @@ -15,7 +15,7 @@ The OctoAI Media Gen Solution empowers users with unparalleled access to cutting 1. **Fastest Inference Speed:** OctoAI boasts the fastest inference speed in the market, ensuring swift generation of media content. Our latency-optimized Stable Video Diffusion (SVD) endpoint achieves an impressive average latency of ~30 seconds for default parameters to generate 3-4 second-long videos, and the Stable Diffusion XL (SDXL) endpoint achieves an average latency of ~3.1 seconds for default parameters. The cost-optimized SDXL maintains an average latency of under 7 seconds. 2. **Extensive Range of Features:** The OctoAI Media Gen solution offers a comprehensive suite of capabilities, supporting a diverse array of models including SVD, SDXL, and SD 1.5. These models cater to a wide range of use cases, spanning from text-to-image, image-to-image, and image-to-video functionalities, to advanced features like upscaling, image editing with controlnets, inpainting, outpainting, background removal, and photo merge. Additionally, advanced functionalities such as Adetailer and Background replacement are accessible through private preview, allowing users to finely customize their media generation processes according to their unique requirements. -3. **Advanced Customization Options:** Users can customize their media generation process by adjusting various parameters such as image dimensions, samplers, number of diffusion steps, and prompt weighting. Additionally, the OctoAI Media Gen Solution allows you to mix and match different Stable Diffusion assets, including checkpoints, Low Rank Adaptations (LoRAs), and textual inversions. It offers the flexibility to fine-tune Stable Diffusion with your own custom tuning image datasets to tailor AI-generated images for your business needs. Fine-tuning is supported for Stable Diffusion 1.5 (SD 1.5) and SDXL. Our proprietary Asset Orchestrator technology enables efficient caching and loading of assets, ensuring optimized performance even with highly customized configurations. +3. **Advanced Customization Options:** Users can customize their media generation process by adjusting various parameters such as image dimensions, samplers, number of diffusion steps, and prompt weighting. Additionally, the OctoAI Media Gen Solution allows you to mix and match different Stable Diffusion assets, including checkpoints, Low Rank Adaptations (LoRAs), and textual inversions. It offers the flexibility to fine-tune Stable Diffusion with your own custom tuning image datasets to tailor AI-generated images for your business needs. Our proprietary Asset Orchestrator technology enables efficient caching and loading of assets, ensuring optimized performance even with highly customized configurations. 4. **Comprehensive Toolkit:** The OctoAI Media Gen Solution provides a comprehensive toolkit for interacting with our models, including Stable Diffusion API endpoints, a user-friendly web UI, and Python/TypeScript SDKs. This allows seamless integration into existing workflows and facilitates easy experimentation with model parameters. By combining state-of-the-art technology with unparalleled flexibility, the OctoAI Media Gen Solution empowers users to unlock new possibilities in media generation, revolutionizing content creation across industries. diff --git a/fern/docs/media-gen-solution/image-utilities/outpainting.mdx b/fern/docs/media-gen-solution/image-utilities/outpainting.mdx index 5bcc447..99bfc3c 100644 --- a/fern/docs/media-gen-solution/image-utilities/outpainting.mdx +++ b/fern/docs/media-gen-solution/image-utilities/outpainting.mdx @@ -16,26 +16,13 @@ Outpainting is the process of using an image generation model like Stable Diffus </Card> </CardGroup> -Outpainting works both with SDXL and SD1.5. +Outpainting works with SDXL. At a high level, outpainting works like this: * Choose an existing image you’d like to outpaint. * Create a source image that places your original image within a larger canvas. * Create a black and white mask image. * Use init_image (source image), mask_image (your mask image), a text prompt and outpainting parameter as inputs to Image Gen API to generate a new image. -In the following example we will leverage the SD1.5 engine so we’ll start with a 512x512 image. - -<CardGroup cols={2}> - <Card title="Init Image (512X512)"> -  - </Card> -</CardGroup> - -You can extend the image in any direction, but for this example we’ll extend the width from 512 → 768. Supported resolutions for SD1.5 are: - -`(W, H): (768, 576), (1024, 576), (640, 512), (384, 704), (640, 768), (640, 640), (1024, 768), (1536, 1024), (768, 1024), (576, 448), (1024, 1024), (896, 896), (704, 1216), (512, 512), (448, 576), (832, 512), (512, 704), (576, 768), (1216, 704), (512, 768), (512, 832), (1024, 1536), (576, 1024), (704, 384), (768, 512) -` - Support resolutions for SDXL are: `(W, H): {(1536, 640), (768, 1344), (832, 1216), (1344, 768), (1152, 896), (640, 1536), (1216, 832), (896, 1152), (1024, 1024)}` diff --git a/fern/docs/media-gen-solution/rest-apis/generate/controlnet-sd15.mdx b/fern/docs/media-gen-solution/rest-apis/generate/controlnet-sd15.mdx deleted file mode 100644 index 53b37af..0000000 --- a/fern/docs/media-gen-solution/rest-apis/generate/controlnet-sd15.mdx +++ /dev/null @@ -1,250 +0,0 @@ ---- -title: ControlNet Stable Diffusion XL API -slug: media-gen-solution/rest-apis/generate/controlnet-sd15 ---- - -Generate an image using a ControlNet Stable Diffusion 1.5 (SD1.5) model. - -OctoAI's SD1.5 Controlnet API supports both text-to-image, image-to-image use cases, and works with custom assests like LoRAs, checkpoints, VAES, and textual inversions. -We offer the following public OctoAI SD1.5 ControlNet checkpoints in the OctoAI Asset Library: - ``` - octoai:canny_sd15 - octoai:depth_sd15 - octoai:inpaint_sd15 - octoai:ip2p_sd15 - octoai:lineart_sd15 - octoai:openpose_sd15 - octoai:scribble_sd15 - octoai:tile_sd15 - ``` - -In addition to using the default ControlNet checkpoints, you can also upload your own private ControlNet checkpoints to the OctoAI Asset Library. These custom checkpoints can then be utilized during generation by specifying the controlnet parameter. When using custom ControlNet checkpoints, please ensure you provide your own ControlNet mask using the `controlnet_image` parameter. - -You need to create an [OctoAI Authentication Token](getting-started/how-to-create-octoai-access-token) to access this API. - -### **How to use** - -Invoke `https://image.octoai.run/generate/controlnet-sd15` endpoint with a `POST` request. - -The headers of the request must include an Authentication Token in the authorization field. The accept header should be set to `application/json` to receive the image encoded as base64 in a JSON response. - -**Generating with a prompt**: Commonly referred to as **text-to-image**, this mode generates an image from text alone. - - `prompt` - text to generate the image from - - `controlnet`- Required if using a controlnet engine. Argument takes in the value of ControlNet to be used during image generation. - - `controlnet_image`: Required if using a controlnet engine. Controlnet image encoded in b64 string for guiding image generation. - -**Generating with a prompt and an image**: Commonly referred to as **image-to-image**, this mode also generates an image from text but uses an existing image as the starting point. The required parameters are: - - - `prompt` - text to generate the image from - - `init_image` - the image to use as the starting point for the generation. Argument takes an image encoded as a string in base64 format. - - `strength` - controls how much influence the image parameter has on the output image - - `controlnet`- Required if using a controlnet engine. Argument takes in the value of ControlNet to be used during image generation. - - `controlnet_image`: Required if using a controlnet engine. Controlnet image encoded in b64 string for guiding image generation. - -**Generating with a prompt and a custom asset**: This mode generates an image from text but uses either a custom checkpoint, LoRA, textual inversion, or VAE. Note that using a custom asset increases generation time. - - - `prompt` - text to generate the image from - - `controlnet`- Required if using a controlnet engine. Argument takes in the value of ControlNet to be used during image generation. - - `controlnet_image`- Required if using a controlnet engine. Controlnet image encoded in b64 string for guiding image generation. - - `checkpoint` - Here you can specify a checkpoint either from the OctoAI asset library or your private asset library. - - `loras` - Here you can specify LoRAs, in name-weight pairs, either from the OctoAI asset library or your private asset library. - - `textual_inversions` - Here you can specify textual inversions and their corresponding trigger words. - - `vae` - Here you can specify variational autoencoders. - - -<Note> - For more details about all parameters, please see the request schema below. -</Note> - - -### **Output** - -The resolution of the generated image will be 512x512 megapixel. - -### **Pricing** - - - SD1.5 controlnet: ***$0.003 *** per image - - - -Check [Pricing Page](https://octo.ai/docs/getting-started/pricing-and-billing) for more details. - -### **Request Details** - - **Headers**: - - `Authorization` (Required): Your `OCTOAI_TOKEN` - `Content-Type` (Required): Set to `application/json` - - **Parameters**: - - - `prompt` (string [ upto 77 tokens], Required): A string of text describing the image to generate. You can use prompt weighting, e.g. `(A tall (beautiful:1.5) woman:1.0) (some other prompt with weight:0.8)` . The weight will be the product of all brackets a token is a member of. The brackets, colons and weights do not count towards the number of tokens. - - `negative_prompt` (string, Optional): Text describing image traits to avoid during generation. - - `sampler` (string, Optional): A string specifying which scheduler to use when generating an image. Defaults to `DDIM`. Regular samplers include `DDIM`,`DDPM`,`DPM_PLUS_PLUS_2M_KARRAS`,`DPM_SINGLE`,`DPM_SOLVER_MULTISTEP`,`K_EULER`, `K_EULER_ANCESTRAL`,`PNDM`,`UNI_PC`. Premium samplers (2x price) include `DPM_2`, `DPM_2_ANCESTRAL`,`DPM_PLUS_PLUS_SDE_KARRAS`, `HEUN` and `KLMS`. - - `cfg_scale` (double, Optional): Floating-point number represeting how closely to adhere to prompt description. Must be a positive number no greater than 50.0. Defaults to 12. - - `image_encoding` (enum, Optional): Define which encoding process should be applied before returning the generated image(s). Allowed values: `jpeg` `png` - - `num_images` (integer, Optional): Integer representing how many output images to generate with a single prompt/configuration. Defaults to 1. Allowed values: 1-16. - - `seed` (union, Optional): Integer number or list of integers representing the seeds of random generators. Fixing random seed is useful when attempting to generate a specific image. Must be greater than 0 and less than 2^32. - - `steps` (integer, Optional Defaults to 30): Integer representing how many steps of diffusion to run. Must be greater than 0 and less than or equal to 200. - - `init_image` (string, Optional): The image (encoded in b64 string) to use as the starting point for the generation. This parameter is for Image-to-Image generation and Inpainting. - <Note> Use .jpg format to ensure best latency </Note> - - `strength` (double,Optional): Floating-point number indicating how much creative the Image to Image generation mode should be. Must be greater than 0 and less than or equal to 1.0. Defaults to 0.8. This parameter is for Image-to-Image generation. - - `height` (integer, Optional): Integer representing the height of image to generate. Default to 1024. - - `width` (integer, Optional): Integer representing the width of image to generate. Default to 1024. - - **Supported Output Resolutions (Width x Height)** are as follows: - - SD1.5: - ``` - (512, 512),(640, 512),(768, 512),(512, 704), - (512, 768),(576, 768),(640, 768),(576, 1024), - (1024, 576) - ``` - - `use_refiner` (Boolean, Optional): A boolean `true` or `false` determines whether to use the refiner or not - - `high_noise_frac` (double, Optional): A floating point or integer determining how much noise should be applied using the base model vs. the refiner. A value of `0.8` will apply the base model at 80% and Refiner at 20%. Defaults to `0.8` when not set. - - **ControlNet parameters** - - `controlnet`(string, Required if using a controlnet engine): Argument takes in the value of ControlNet to be used during image generation. - - `controlnet_image`(string, Required if using a controlnet engine): Controlnet image encoded in b64 string for guiding image generation. - - `controlnet_conditioning_scale` (double, Optional): Only applicable if using Controlnets. Argument determines how strong the effect of the controlnet will be. Defaults to `1` - - `controlnet_early_stop` (integer,Optional):Only applicable if using Controlnets. If provided, indicates fraction of steps at which to stop applying controlnet. This can be used to sometimes generate better outputs. - - `controlnet_preprocess` (boolean,Optional):Only applicable if using Controlnets. Argument takes in a boolean value to determine whether or not to apply automatic ControlNet preprocessing. For the privileged set of controlnet checkpoints listed above, we default to helping you autogenerate the corresponding controlnet map/mask that will be fed into the controlnet, but you can override the default by additionally specifying a `controlnet_preprocess: false` parameter. - - **Custom Assets** - - `checkpoint` (string, Optional): Here you can specify a checkpoint either from the OctoAI asset library or your private asset library. Note that using a custom asset increases generation time. - - `loras`(string, Optional): Here you can specify LoRAs, in name-weight pairs, either from the OctoAI asset library or your private asset library. Note that using a custom asset increases generation time. - - `textual_inversions` (string, Optional): Here you can specify textual inversions and their corresponding trigger words. Note that using a custom asset increases generation time. - - `vae` (string, Optional): Here you can specify variational autoencoders. Note that using a custom asset increases generation time. - - -### **Request Examples** - -<CodeGroup> -```bash cURL -curl -H 'Content-Type: application/json' -H "Authorization: Bearer $OCTOAI_TOKEN" -X POST "https://image.octoai.run/generate/controlnet-sd15" \ - -d '{ - "controlnet_image": base64_image, - "controlnet": "octoai:canny_sd15", - "controlnet_preprocess": false, - "prompt": ( - "A photo of a cute tiger astronaut in space" - ), - "negative_prompt": "low quality, bad quality, sketches, unnatural", - "steps": 20, - "num_images": 1, - "seed": 768072361, - "height": 512, - "width": 512 - }' > response.json -``` - -```Python Python -import base64 -import io -import os -import time - -import PIL.Image -import requests - -import cv2 as cv2 - -import matplotlib.pyplot as plt # Import Matplotlib - -def _process_test(endpoint_url): - image_path = "cat.jpeg" - img = cv2.imread(image_path) - img = cv2.resize(img, (1024, 1024)) # Resize to a resolution supported by OctoAI SD1.5 - - edges = cv2.Canny(img,100,200) # 100 and 200 are thresholds for determining canny edges - - height, width = edges.shape - - # Convert Canny edge map to PIL Image - edges_image = PIL.Image.fromarray(edges) - - # Create a BytesIO buffer to hold the image data - image_buffer = io.BytesIO() - edges_image.save(image_buffer, format='JPEG') - image_bytes = image_buffer.getvalue() - encoded_image = base64.b64encode(image_bytes).decode('utf-8') - - model_request = { - "controlnet_image": encoded_image, - "controlnet": "octoai:canny_sd15", - "controlnet_preprocess": false, - "prompt": ( - "A photo of a cute tiger astronaut in space" - ), - "negative_prompt": "low quality, bad quality, sketches, unnatural", - "steps": 20, - "num_images": 1, - "seed": 768072361, - "height": 512, - "width": 512 - } - - prod_token = os.environ.get("OCTOAI_TOKEN") # noqa - - reply = requests.post( - f"{endpoint_url}", - headers={ - "Content-Type": "application/json", - "Authorization": f"Bearer {prod_token}", - }, - json=model_request, - ) - - if reply.status_code != 200: - print(reply.text) - exit(-1) - - img_list = reply.json()["images"] - print(img_list) - - for i, idict in enumerate(img_list): - ibytes = idict['image_b64'] - img_bytes = base64.b64decode(ibytes) - img = PIL.Image.open(io.BytesIO(img_bytes)) - img.load() - img.save(f"result_image{i}.jpg") - -if __name__ == "__main__": - endpoint = "https://image.octoai.run/generate/controlnet-sd15" - - # Change this line to call either a10 or a100 - _process_test(endpoint) - -``` - -```typescript TypeScript -import fs from "node:fs"; -import { OctoAIClient } from "@octoai/sdk"; - -const octoai = new OctoAIClient({ - apiKey: process.env.OCTOAI_TOKEN, -}); - -const { images } = await octoai.imageGen.generateControlnetSd15({ - controlnet_image": base64_image, - controlnet: "octoai:canny_sd15", - controlnet_preprocess": false, - prompt: "A photo of a cute tiger astronaut in space", - negative_prompt: "low quality, bad quality, sketches, unnatural", - steps: 20, - num_images: 1, - height: 512, - width: 512 -}); - -images.forEach((output, i) => { - if (output.imageB64) { - const buffer = Buffer.from(output.imageB64, "base64"); - fs.writeFileSync(`result${i}.jpg`, buffer); - } -}); -``` - -</CodeGroup> - - diff --git a/fern/docs/media-gen-solution/rest-apis/generate/image-gen-api.mdx b/fern/docs/media-gen-solution/rest-apis/generate/image-gen-api.mdx index 65d79fa..1058ef0 100644 --- a/fern/docs/media-gen-solution/rest-apis/generate/image-gen-api.mdx +++ b/fern/docs/media-gen-solution/rest-apis/generate/image-gen-api.mdx @@ -8,9 +8,7 @@ All of our image generation models are accessible via REST API. Below you can se Our URL for image generations is at `https://image.octoai.run/generate/{engine_id}`, where engine_id is one of the following: 1. `sdxl`: Stable DiffusionXL v1.0 -2. `sd`: Stable Diffusion v1.5 3. `controlnet-sdxl`: ControlNet SDXL -4. `controlnet-sd15`: ControlNet SD1.5 This includes text-to-image, image-to-image, controlnets, photo merge, inpainting and outpainting. @@ -137,14 +135,6 @@ For SDXL: (1216, 832),(1152, 896) ``` -For SD1.5 - -``` -(512, 512),(640, 512),(768, 512),(512, 704), -(512, 768),(576, 768),(640, 768),(576, 1024), -(1024, 576) -``` - <Note> `init_image` and `mask_image` will be resized to the specified resolution before applying img2img or inpainting. diff --git a/fern/docs/media-gen-solution/rest-apis/generate/sd15.mdx b/fern/docs/media-gen-solution/rest-apis/generate/sd15.mdx deleted file mode 100644 index f755eed..0000000 --- a/fern/docs/media-gen-solution/rest-apis/generate/sd15.mdx +++ /dev/null @@ -1,195 +0,0 @@ ---- -title: Stable Diffusion 1.5 -slug: media-gen-solution/rest-apis/generate/sd15 ---- - -Generate an image using a Stable Diffusion 1.5 (SD1.5) model. - -OctoAI's SD1.5 API supports both text-to-image, image-to-image use cases and, adding custom assests like LoRAs, checkpoints, VAES, textual inversions. -It also support ControlNets. For more details, refer [SD15-ControlNets](https://octo.ai/docs/media-gen-solution/rest-apis/generate/images/controlnet-sd15). - -You need to create an [OctoAI Authentication Token](getting-started/how-to-create-octoai-access-token) to access this API. - -### **How to use** - -Invoke `https://image.octoai.run/generate/sd` endpoint with a `POST` request. - -The headers of the request must include an Authentication Token in the authorization field. The accept header should be set to `application/json` to receive the image encoded as base64 in a JSON response. - -**Generating with a prompt**: Commonly referred to as **text-to-image**, this mode generates an image from text alone. While the only required -parameter is the `prompt`, it also supports a `width` and `height` parameter which can be used to control the aspect ratio of the generated image. - -**Generating with a prompt and an image**: Commonly referred to as **image-to-image**, this mode also generates an image from text but uses an existing image as the starting point. The required parameters are: - - - `prompt` - text to generate the image from - - `init_image` - the image to use as the starting point for the generation. Argument takes an image encoded as a string in base64 format. - - `strength` - controls how much influence the image parameter has on the output image - -**Generating with a prompt and a custom asset**: This mode generates an image from text but uses either a custom checkpoint, LoRA, textual inversion, or VAE. Note that using a custom asset increases generation time. - - - `prompt` - text to generate the image from - - `checkpoint` - Here you can specify a checkpoint either from the OctoAI asset library or your private asset library. - - `loras` - Here you can specify LoRAs, in name-weight pairs, either from the OctoAI asset library or your private asset library. - - `textual_inversions` - Here you can specify textual inversions and their corresponding trigger words. - - `vae` - Here you can specify variational autoencoders. - -**Inpainting with a prompt**: Inpainting replaces or edits specific areas of an image. This makes it a useful tool for image restoration like removing defects and artifacts, or even replacing an image area with something entirely new. Inpainting relies on a mask to determine which regions of an image to fill in. The required parameters are: - - - `prompt` - text to generate the image from - - `init_image` - the image to use as the starting point for the generation. Argument takes an image encoded as a string in base64 format. - - `mask_image` - area of the picture to inpaint. Argument takes an image encoded as a string in base64 format. - -**Outpainting with a prompt**: Outpainting is the process of using an image generation model like Stable Diffusion to extend beyond the canvas of an existing image. Outpainting is very similar to inpainting, but instead of generating a region within an existing image, the model generates a region outside of it. The required parameters are: - - - `prompt` - text to generate the image from - - `init_image` - the existing image you’d like to outpaint. You need to create a source image that places your original image within a larger canvas. Argument takes an image encoded as a string in base64 format. - - `mask_image` - a black and white mask representing the extended area. Argument takes an image encoded as a string in base64 format. - - `outpainting` - Argument takes a boolean value to determine Whether the request requires outpainting or not. If so, special preprocessing is applied for better results. Defaults to `false`. This needs to be set to `true`, if you wish to use outpainting. - -<Note> - For more details about all parameters, please see the request schema below. -</Note> - - -### **Output** - -The default resolution of the generated image is 512x512. - -### **Pricing** - - - SD1.5 Base: ***$0.0015*** per image, SD1.5, 512x512, 30 steps; billed per image - - SD1.5 custom asset: ***$0.0015*** per image, 512x512, 30 steps; billed per image - - Fine tuning job SD1.5: ***$0.1*** per tune, using the 500 step default - - Inpainting: Same price as corresponding SD1.5 image - - Outpainting: Same price as corresponding SD1.5 image - - -Check [Pricing Page](https://octo.ai/docs/getting-started/pricing-and-billing) for more details. - -### **Request Details** - - **Headers**: - - `Authorization` (Required): Your `OCTOAI_TOKEN` - `Content-Type` (Required): Set to `application/json` - - **Parameters**: - - - `prompt` (string [ upto 77 tokens], Required): A string of text describing the image to generate. You can use prompt weighting, e.g. `(A tall (beautiful:1.5) woman:1.0) (some other prompt with weight:0.8)` . The weight will be the product of all brackets a token is a member of. The brackets, colons and weights do not count towards the number of tokens. - - `negative_prompt` (string, Optional): Text describing image traits to avoid during generation. - - `sampler` (string, Optional): A string specifying which scheduler to use when generating an image. Defaults to `DDIM`. Regular samplers include `DDIM`,`DDPM`,`DPM_PLUS_PLUS_2M_KARRAS`,`DPM_SINGLE`,`DPM_SOLVER_MULTISTEP`,`K_EULER`, `K_EULER_ANCESTRAL`,`PNDM`,`UNI_PC`. Premium samplers (2x price) include `DPM_2`, `DPM_2_ANCESTRAL`,`DPM_PLUS_PLUS_SDE_KARRAS`, `HEUN` and `KLMS`. - - `cfg_scale` (double, Optional): Floating-point number represeting how closely to adhere to prompt description. Must be a positive number no greater than 50.0. Defaults to 12. - - `image_encoding` (enum, Optional): Define which encoding process should be applied before returning the generated image(s). Allowed values: `jpeg` `png` - - `num_images` (integer, Optional): Integer representing how many output images to generate with a single prompt/configuration. Defaults to 1. Allowed values: 1-16. - - `seed` (union, Optional): Integer number or list of integers representing the seeds of random generators. Fixing random seed is useful when attempting to generate a specific image. Must be greater than 0 and less than 2^32. - - `steps` (integer, Optional Defaults to 30): Integer representing how many steps of diffusion to run. Must be greater than 0 and less than or equal to 200. - - `init_image` (string, Optional): The image (encoded in b64 string) to use as the starting point for the generation. This parameter is for Image-to-Image generation and Inpainting. - <Note> Use .jpg format to ensure best latency </Note> - - `strength` (double,Optional): Floating-point number indicating how much creative the Image to Image generation mode should be. Must be greater than 0 and less than or equal to 1.0. Defaults to 0.8. This parameter is for Image-to-Image generation. - - `height` (integer, Optional): Integer representing the height of image to generate. Default to 512. - - `width` (integer, Optional): Integer representing the width of image to generate. Default to 512. - - **Supported Output Resolutions (Width x Height)** are as follows: - - SD1.5 - - ``` - (512, 512),(640, 512),(768, 512),(512, 704), - (512, 768),(576, 768),(640, 768),(576, 1024), - (1024, 576) - ``` - - **Custom Assets** - - `checkpoint` (string, Optional): Here you can specify a checkpoint either from the OctoAI asset library or your private asset library. Note that using a custom asset increases generation time. - - `loras`(string, Optional): Here you can specify LoRAs, in name-weight pairs, either from the OctoAI asset library or your private asset library. Note that using a custom asset increases generation time. - - `textual_inversions` (string, Optional): Here you can specify textual inversions and their corresponding trigger words. Note that using a custom asset increases generation time. - - `vae` (string, Optional): Here you can specify variational autoencoders. Note that using a custom asset increases generation time. - - **Inpainting and Outpating Parameters** - - `mask_image` (string, Optional): Only applicable for inpainting use cases i.e. to specify which area of the picture to paint onto. Argument takes an image encoded as a string in base64 format. - <Note>Use .jpg format to ensure best latency</Note> - - `outpainting` (boolean, Optional): Only applicable for outpainting use cases. Argument takes a boolean value to determine Whether the request requires outpainting or not. If so, special preprocessing is applied for better results. Defaults to `false` - - -### **Request Examples** - -<CodeGroup> -```bash cURL -curl -X POST "https://image.octoai.run/generate/sd" \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer $OCTOAI_TOKEN" \ - --data-raw '{ - "prompt": "A still frame from a commercial of a DeLorean parked in bustling times square, rainy night shot with droplets of water falling", - "negative_prompt": "Blurry, low-res, poor quality", - "checkpoint": "octoai:crystal-clear", - "loras": { - "octoai:add-detail": 0.5 - }, - "width": 1344, - "height": 768, - "num_images": 4, - "sampler": "DPM_PLUS_PLUS_2M_KARRAS", - "steps": 30, - "cfg_scale": 7.5 - }' | jq -r ".images[0].image_b64" | base64 -d > result.jpg -``` - -```Python Python -from octoai.client import OctoAI - -client = OctoAI( - api_key="YOUR_API_KEY", -) -client.image_gen.generate_sd( - cfg_scale=7.5, - checkpoint="octoai:crystal-clear", - height=768, - loras={ - "octoai:add-detail": 0.5 - }, - negative_prompt="Blurry, low-res, poor quality", - num_images=4, - prompt="A still frame from a commercial of a DeLorean parked in bustling times square, rainy night shot with droplets of water falling", - steps=30, - width=1344 -) -``` - -```typescript TypeScript -import fs from "node:fs"; -import { OctoAIClient } from "@octoai/sdk"; - -const octoai = new OctoAIClient({ - apiKey: process.env.OCTOAI_TOKEN, -}); - -const { images } = await client.imageGen.generateSd( - { - { - cfgScale: 7.5, - checkpoint: "octoai:crystal-clear", - height: 768, - loras: { - "octoai:add-detail": 0.5 - }, - negativePrompt: "Blurry, low-res, poor quality", - numImages: 4, - prompt: "A still frame from a commercial of a DeLorean parked in bustling times square, rainy night shot with droplets of water falling", - steps: 30, - width: 1344 - } - } -); - - -images.forEach((output, i) => { - if (output.imageB64) { - const buffer = Buffer.from(output.imageB64, "base64"); - fs.writeFileSync(`result${i}.jpg`, buffer); - } -}); -``` - -</CodeGroup> - - diff --git a/fern/docs/media-gen-solution/uploading-a-custom-asset-to-the-octoai-asset-library.mdx b/fern/docs/media-gen-solution/uploading-a-custom-asset-to-the-octoai-asset-library.mdx index d4ea015..95cd923 100644 --- a/fern/docs/media-gen-solution/uploading-a-custom-asset-to-the-octoai-asset-library.mdx +++ b/fern/docs/media-gen-solution/uploading-a-custom-asset-to-the-octoai-asset-library.mdx @@ -37,7 +37,7 @@ Use "octoai asset [command] --help" for more information about a command. 1. Run `octoai login` to cache your token and authenticate to your account. 2. We can now use the `octoai asset create` subcommand to upload assets (you can run `octoai asset create --help` to learn more on the options). - 1. `--engine` denotes whether this is an asset for SDXL or SD1.5 + 1. `--engine` denotes whether this is an asset for SDXL or SD3 2. `--upload-from-file` flag denotes the path of the file on your local machine that you’re trying to upload. 3. `--upload-from-url` flag is an alternative to `upload-from-file` allowing you to upload an asset from a public URL; OctoAI will fetch and upload the file. 4. `—type` can be lora, checkpoint, or textual_inversion (VAEs coming soon) @@ -53,7 +53,7 @@ As an example, you could use this command to upload a checkpoint from your local octoai asset create \ --upload-from-file LCM_Dreamshaper_v7.safetensors \ --name Dreamshaper_v7 \ - --engine image/stable-diffusion-v1-5 \ + --engine image/stable-diffusion-xl-v1-0 \ --format safetensors \ --data-type fp16 \ --type checkpoint \ @@ -66,7 +66,7 @@ You can alternatively upload the file via public URL using `upload-from-url`: octoai asset create \ --upload-from-url https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7/resolve/main/LCM_Dreamshaper_v7_4k.safetensors?download=true \ --name Dreamshaper_v7 \ - --engine image/stable-diffusion-v1-5 \ + --engine image/stable-diffusion-xl-v1-0 \ --format safetensors \ --data-type fp16 \ --type checkpoint \ diff --git a/fern/docs/python-sdk/asset-orchestrator-client.mdx b/fern/docs/python-sdk/asset-orchestrator-client.mdx index f4af0bd..6a4a841 100644 --- a/fern/docs/python-sdk/asset-orchestrator-client.mdx +++ b/fern/docs/python-sdk/asset-orchestrator-client.mdx @@ -55,7 +55,7 @@ if __name__ == "__main__": # There is also TextualInversionData, VAEData, and CheckpointData. lora_data = Data_Lora( data_type="fp16", - engine="image/stable-diffusion-v1-5", + engine="image/stable-diffusion-xl-v1-0", file_format="safetensors", ) @@ -63,7 +63,7 @@ if __name__ == "__main__": file="origami-paper.safetensors", data=lora_data, name=asset_name, - description="origami-paper stable diffusion 1.5", + description="origami-paper sdxl", ) image_gen_resp = image_gen.generate_sd( diff --git a/fern/docs/python-sdk/image-generator-client.mdx b/fern/docs/python-sdk/image-generator-client.mdx index 5593f29..ed2f957 100644 --- a/fern/docs/python-sdk/image-generator-client.mdx +++ b/fern/docs/python-sdk/image-generator-client.mdx @@ -3,7 +3,7 @@ title: Image Generator Python client slug: python-sdk/image-generator-client --- -The `ImageGenClient` class specializes in supporting image generation in your application, and guiding what options are available to modify your outputs. It will return a list of all images using the `ImageGeneration` type. It allows you to use both Stable Diffusion 1.5 and Stable Diffusion XL for text to image and image to image use cases, and set parameters and prompts either with weighted prompts with the `prompt` field as was common with Stable Diffusion 1.5 or human-readable descriptions using `prompt_2` with Stable Diffusion XL 1.0. +The `ImageGenClient` class specializes in supporting image generation in your application, and guiding what options are available to modify your outputs. It will return a list of all images using the `ImageGeneration` type. It allows you to use Stable Diffusion XL for text to image and image to image use cases, and set parameters and prompts either with weighted prompts with the `prompt` field or human-readable descriptions using `prompt_2`. This guide will walk you through a text to image example, and then we will use the resulting image to demonstrate the image to image use case. diff --git a/fern/docs/typescript-sdk/asset-library.mdx b/fern/docs/typescript-sdk/asset-library.mdx index dcadb5a..fb1b260 100644 --- a/fern/docs/typescript-sdk/asset-library.mdx +++ b/fern/docs/typescript-sdk/asset-library.mdx @@ -29,7 +29,7 @@ console.log(JSON.stringify(assetListResponse.data, undefined, 2)); const assetListStableDiffusion = await octoai.assetLibrary.list({ isPublic: true, owner: "octoai", - engine: "image/stable-diffusion-v1-5", + engine: "image/stable-diffusion-xl-v1-0", }); console.log(JSON.stringify(assetListStableDiffusion, undefined, 2)); @@ -61,7 +61,7 @@ await octoai.assetLibrary.upload("origami-paper.safetensors", { data: { assetType: "lora", dataType: "fp16", - engine: "image/stable-diffusion-v1-5", + engine: "image/stable-diffusion-xl-v1-0", fileFormat: "safetensors", triggerWords: ["origami", "paper"], },