Skip to content

Commit

Permalink
LoRA in Text2ImagePipeline (openvinotoolkit#911)
Browse files Browse the repository at this point in the history
Co-authored-by: Ilya Lavrenov <[email protected]>
  • Loading branch information
slyalin and ilya-lavrenov authored Oct 7, 2024
1 parent b11f0d9 commit 41f1e7b
Show file tree
Hide file tree
Showing 28 changed files with 318 additions and 111 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/lcm_dreamshaper_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:
- name: Run app
run: |
source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
./build/samples/cpp/stable_diffusion/stable_diffusion ./models/lcm_dreamshaper_v7/FP16 "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"
./build/samples/cpp/text2image/stable_diffusion ./models/lcm_dreamshaper_v7/FP16 "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"
lcm_dreamshaper_v7_cpp-windows:
runs-on: windows-latest
Expand Down Expand Up @@ -118,7 +118,7 @@ jobs:
- name: Run app
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
./build/samples/cpp/stable_diffusion/Release/lcm_dreamshaper.exe ./models/lcm_dreamshaper_v7/FP16 "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"
./build/samples/cpp/text2image/Release/lcm_dreamshaper.exe ./models/lcm_dreamshaper_v7/FP16 "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"
Overall_Status:
name: ci/gha_overall_status_lcm
Expand Down
15 changes: 12 additions & 3 deletions .github/workflows/stable_diffusion_1_5_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,19 @@ jobs:
source openvino_sd_cpp/bin/activate
optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --weight-format fp16 --task stable-diffusion models/dreamlike-art-dreamlike-anime-1.0/FP16
- name: Run app
- name: Run main app
run: |
source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
./build/samples/cpp/stable_diffusion/stable_diffusion ./models/dreamlike-art-dreamlike-anime-1.0/FP16 "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"
./build/samples/cpp/text2image/stable_diffusion ./models/dreamlike-art-dreamlike-anime-1.0/FP16 "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"
- name: Download LoRA adapter
run: |
wget -O ./models/soulcard.safetensors https://civitai.com/api/download/models/72591
- name: Run LoRA app
run: |
source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
./build/samples/cpp/text2image/lora_stable_diffusion ./models/dreamlike-art-dreamlike-anime-1.0/FP16 "curly-haired unicorn in the forest, anime, line" ./models/soulcard.safetensors 0.7
stable_diffusion_1_5_cpp-windows:
runs-on: windows-latest
Expand Down Expand Up @@ -118,7 +127,7 @@ jobs:
- name: Run app
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
./build/samples/cpp/stable_diffusion/Release/stable_diffusion.exe ./models/dreamlike-art-dreamlike-anime-1.0/FP16 "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"
./build/samples/cpp/text2image/Release/stable_diffusion.exe ./models/dreamlike-art-dreamlike-anime-1.0/FP16 "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"
Overall_Status:
name: ci/gha_overall_status_stable_diffusion
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ It includes the following pipelines:
6. [multinomial_causal_lm](./samples/cpp/multinomial_causal_lm/README.md)
7. [prompt_lookup_decoding_lm](./samples/cpp/prompt_lookup_decoding_lm/README.md)
8. [speculative_decoding_lm](./samples/cpp/speculative_decoding_lm/README.md)
3. [Stable Diffuison and Latent Consistency Model (with LoRA) C++ image generation pipeline](./samples/cpp/stable_diffusion/README.md)
3. [Stable Diffuison and Latent Consistency Model (with LoRA) C++ image generation pipeline](./samples/cpp/text2image/README.md)

### Requirements

Expand Down
6 changes: 3 additions & 3 deletions samples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ add_subdirectory(cpp/prompt_lookup_decoding_lm)
add_subdirectory(cpp/speculative_decoding_lm)
add_subdirectory(cpp/benchmark_genai)
add_subdirectory(cpp/whisper_speech_recognition)
add_subdirectory(cpp/stable_diffusion)
add_subdirectory(cpp/text2image)

install(FILES requirements.txt DESTINATION samples
COMPONENT cpp_samples_genai)
Expand All @@ -26,7 +26,7 @@ install(DIRECTORY
# Don't install prompt_lookup_decoding_lm and speculative_decoding_lm because they don't use openvino_genai library and arent verifyed yet.
# Don't install continuous_batching_accuracy and continuous_batching_benchmark because they depend on json.
cpp/whisper_speech_recognition
cpp/stable_diffusion
cpp/text2image
cpp/lora_greedy_causal_lm
DESTINATION samples/cpp COMPONENT cpp_samples_genai)

Expand All @@ -36,6 +36,6 @@ install(DIRECTORY
python/greedy_causal_lm
python/multinomial_causal_lm
python/whisper_speech_recognition
# python/stable_diffusion
# python/text2image
DESTINATION samples/python COMPONENT cpp_samples_genai
USE_SOURCE_PERMISSIONS)
48 changes: 0 additions & 48 deletions samples/cpp/stable_diffusion/README.md

This file was deleted.

File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ find_package(OpenVINOGenAI REQUIRED
NO_CMAKE_FIND_ROOT_PATH
)

# create executable
# create main sample executable

add_executable(stable_diffusion
${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
Expand All @@ -26,3 +26,22 @@ install(TARGETS stable_diffusion
RUNTIME DESTINATION samples_bin/
COMPONENT samples_bin
EXCLUDE_FROM_ALL)

# create LoRA sample executable

add_executable(lora_stable_diffusion
${CMAKE_CURRENT_SOURCE_DIR}/lora.cpp
${CMAKE_CURRENT_SOURCE_DIR}/imwrite.cpp)

target_include_directories(lora_stable_diffusion PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(lora_stable_diffusion PRIVATE openvino::genai)

set_target_properties(lora_stable_diffusion PROPERTIES
COMPILE_PDB_NAME lora_stable_diffusion
# Ensure out of box LC_RPATH on macOS with SIP
INSTALL_RPATH_USE_LINK_PATH ON)

install(TARGETS lora_stable_diffusion
RUNTIME DESTINATION samples_bin/
COMPONENT samples_bin
EXCLUDE_FROM_ALL)
76 changes: 76 additions & 0 deletions samples/cpp/text2image/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Text to Image C++ Generation Pipeline

Examples in this folder showcase inference of text to image models like Stable Diffusion 1.5, 2.1, LCM. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `ov::genai::Text2ImagePipeline` and uses a text prompt as input source.

There are two sample files:
- [`main.cpp`](./main.cpp) demonstrates basic usage of the text to image pipeline
- [`lora.cpp`](./lora.cpp) shows how to apply LoRA adapters to the pipeline

Users can change the sample code and play with the following generation parameters:

- Change width or height of generated image
- Generate multiple images per prompt
- Adjust a number of inference steps
- Play with [guidance scale](https://huggingface.co/spaces/stabilityai/stable-diffusion/discussions/9) (read [more details](https://arxiv.org/abs/2207.12598))
- (SD 1.x, 2.x only) Add negative prompt when guidance scale > 1
- Apply multiple different LoRA adapters and mix them with different blending coefficients

## Download and convert the models and tokenizers

The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.

It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.

```sh
pip install --upgrade-strategy eager -r ../../requirements.txt
optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format fp16 dreamlike_anime_1_0_ov/FP16
```

## Run

`stable_diffusion ./dreamlike_anime_1_0_ov/FP16 'cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting'`

### Examples

Prompt: `cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting`

![](./512x512.bmp)

## Supported models

Models can be downloaded from [HiggingFace](https://huggingface.co/models). This sample can run the following list of models, but not limitied to:

- [botp/stable-diffusion-v1-5](https://huggingface.co/botp/stable-diffusion-v1-5)
- [stabilityai/stable-diffusion-2](https://huggingface.co/stabilityai/stable-diffusion-2)
- [stabilityai/stable-diffusion-2-1](https://huggingface.co/stabilityai/stable-diffusion-2-1)
- [dreamlike-art/dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0)
- [SimianLuo/LCM_Dreamshaper_v7](https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7)

## Run with optional LoRA adapters

LoRA adapters can be connected to the pipeline and modify generated images to have certain style, details or quality. Adapters are supported in Safetensors format and can be downloaded from public sources like [Civitai](https://civitai.com) or [HuggingFace](https://huggingface.co/models) or trained by the user. Adapters compatible with a base model should be used only. A weighted blend of multiple adapters can be applied by specifying multple adapter files with corresponding alpha parameters in command line. Check `lora.cpp` source code to learn how to enable adapters and specify them in each `generate` call.

Here is an example how to run the sample with a single adapter. First download adapter file from https://civitai.com/models/67927/soulcard page manually and save it as `soulcard.safetensors`. Or download it from command line:

`wget -O soulcard.safetensors https://civitai.com/api/download/models/72591`

Then run `lora_stable_diffusion` executable:

`./lora_stable_diffusion dreamlike_anime_1_0_ov/FP16 'curly-haired unicorn in the forest, anime, line' soulcard.safetensors 0.7`

The sample generates two images with and without adapters applied using the same prompt:
- `lora.bmp` with adapters applied
- `baseline.bmp` without adapters applied

Check the difference:

With adapter | Without adapter
:---:|:---:
![](./lora.bmp) | ![](./baseline.bmp)


## Note

- Image generated with HuggingFace / Optimum Intel is not the same generated by this C++ sample:

C++ random generation with MT19937 results differ from `numpy.random.randn()` and `diffusers.utils.randn_tensor`. So, it's expected that image generated by Python and C++ versions provide different images, because latent images are initialize differently. Users can implement their own random generator derived from `ov::genai::Generator` and pass it to `Text2ImagePipeline::generate` method.
Binary file added samples/cpp/text2image/baseline.bmp
Binary file not shown.
File renamed without changes.
File renamed without changes.
Binary file added samples/cpp/text2image/lora.bmp
Binary file not shown.
53 changes: 53 additions & 0 deletions samples/cpp/text2image/lora.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright (C) 2023-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#include "openvino/genai/text2image/pipeline.hpp"

#include "imwrite.hpp"

int32_t main(int32_t argc, char* argv[]) try {
OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " <MODEL_DIR> '<PROMPT>' [<LORA_SAFETENSORS> <ALPHA> ...]]");

const std::string models_path = argv[1], prompt = argv[2];
const std::string device = "CPU"; // GPU, NPU can be used as well

ov::genai::AdapterConfig adapter_config;
// Multiple LoRA adapters applied simultaniously are supported, parse them all and corresponding alphas from cmd parameters:
for(size_t i = 0; i < (argc - 3)/2; ++i) {
ov::genai::Adapter adapter(argv[3 + 2*i]);
float alpha = std::atof(argv[3 + 2*i + 1]);
adapter_config.add(adapter, alpha);
}

// LoRA adapters passed to the constructor will be activated by default in next generates
ov::genai::Text2ImagePipeline pipe(models_path, device, ov::genai::adapters(adapter_config));

std::cout << "Generating image with LoRA adapters applied, resulting image will be in lora.bmp\n";
ov::Tensor image = pipe.generate(prompt,
ov::genai::random_generator(std::make_shared<ov::genai::CppStdGenerator>(42)),
ov::genai::width(512),
ov::genai::height(896),
ov::genai::num_inference_steps(20));
imwrite("lora.bmp", image, true);

std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n";
image = pipe.generate(prompt,
ov::genai::adapters(), // passing adapters in generate overrides adapters set in the constructor; adapters() means no adapters
ov::genai::random_generator(std::make_shared<ov::genai::CppStdGenerator>(42)),
ov::genai::width(512),
ov::genai::height(896),
ov::genai::num_inference_steps(20));
imwrite("baseline.bmp", image, true);

return EXIT_SUCCESS;
} catch (const std::exception& error) {
try {
std::cerr << error.what() << '\n';
} catch (const std::ios_base::failure&) {}
return EXIT_FAILURE;
} catch (...) {
try {
std::cerr << "Non-exception object thrown\n";
} catch (const std::ios_base::failure&) {}
return EXIT_FAILURE;
}
File renamed without changes.
2 changes: 0 additions & 2 deletions src/cpp/include/openvino/genai/generation_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,6 @@ static constexpr ov::Property<float> presence_penalty{"presence_penalty"};
static constexpr ov::Property<float> frequency_penalty{"frequency_penalty"};
static constexpr ov::Property<size_t> rng_seed{"rng_seed"};

static constexpr AdaptersProperty adapters;

// Predefined Configs
OPENVINO_GENAI_EXPORTS GenerationConfig beam_search();
OPENVINO_GENAI_EXPORTS GenerationConfig greedy();
Expand Down
14 changes: 8 additions & 6 deletions src/cpp/include/openvino/genai/lora_adapter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,9 @@ struct OPENVINO_GENAI_EXPORTS AdapterConfig {

class AdaptersProperty : public ov::Property<AdapterConfig> {
public:
constexpr AdaptersProperty() : ov::Property<AdapterConfig>("adapters") {}
inline constexpr static const char* name () { return "adapters"; }

constexpr AdaptersProperty() : ov::Property<AdapterConfig>(name()) {}

inline std::pair<std::string, ov::Any> operator()(const AdapterConfig& config) const {
return ov::Property<AdapterConfig>::operator()(config);
Expand Down Expand Up @@ -154,6 +156,9 @@ class AdaptersProperty : public ov::Property<AdapterConfig> {
};


static constexpr AdaptersProperty adapters;


class OPENVINO_GENAI_EXPORTS AdapterController {

std::shared_ptr<AdapterControllerImpl> m_pimpl;
Expand All @@ -165,15 +170,12 @@ class OPENVINO_GENAI_EXPORTS AdapterController {

AdapterController(std::shared_ptr<ov::Model> model, const AdapterConfig& config, const std::string& prefix, std::string device = "");

// Call it every time when adapter config is changed; if adapter is configured as a static one, this call is not required
void apply(ov::InferRequest& request, const AdapterConfig& config);
// Apply adapters configured in the current config set last time, or set and use new config given as optional `config` argument
void apply(ov::InferRequest& request, const std::optional<AdapterConfig>& config = std::nullopt);

// the next call of apply will set all adapter tensors regardless of config change, use this method if full state.reset is called for the controlled model
void force_full_apply(bool full_apply = true);

// Apply the same config that was used last time (in initialization or in previous call to apply).
void apply(ov::InferRequest& request);

operator bool() const {
return bool(m_pimpl);
}
Expand Down
4 changes: 4 additions & 0 deletions src/cpp/include/openvino/genai/text2image/clip_text_model.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include "openvino/genai/visibility.hpp"
#include "openvino/genai/tokenizer.hpp"
#include "openvino/genai/lora_adapter.hpp"

#include "openvino/core/any.hpp"
#include "openvino/runtime/tensor.hpp"
Expand Down Expand Up @@ -53,10 +54,13 @@ class OPENVINO_GENAI_EXPORTS CLIPTextModel {
return compile(device, ov::AnyMap{std::forward<Properties>(properties)...});
}

void set_adapters(const AdapterConfig& adapters);

ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance);

private:
Config m_config;
AdapterController m_adapter_controller;
ov::InferRequest m_request;
std::shared_ptr<ov::Model> m_model;

Expand Down
10 changes: 10 additions & 0 deletions src/cpp/include/openvino/genai/text2image/pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "openvino/genai/visibility.hpp"

#include "openvino/genai/lora_adapter.hpp"
#include "openvino/genai/text2image/clip_text_model.hpp"
#include "openvino/genai/text2image/unet2d_condition_model.hpp"
#include "openvino/genai/text2image/autoencoder_kl.hpp"
Expand Down Expand Up @@ -81,6 +82,8 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline {
int64_t width = -1;
size_t num_inference_steps = 50;

AdapterConfig adapters;

void update_generation_config(const ov::AnyMap& config_map);

// checks whether is config is valid
Expand All @@ -96,6 +99,13 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline {

Text2ImagePipeline(const std::string& root_dir, const std::string& device, const ov::AnyMap& properties = {});

template <typename... Properties,
typename std::enable_if<ov::util::StringAny<Properties...>::value, bool>::type = true>
Text2ImagePipeline(const std::string& root_dir,
const std::string& device,
Properties&&... properties)
: Text2ImagePipeline(root_dir, device, ov::AnyMap{std::forward<Properties>(properties)...}) { }

// creates either LCM or SD pipeline from building blocks
static Text2ImagePipeline stable_diffusion(
const std::shared_ptr<Scheduler>& scheduler_type,
Expand Down
Loading

0 comments on commit 41f1e7b

Please sign in to comment.