Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add performance statistics for image generation #1405

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,16 @@ int32_t main(int32_t argc, char* argv[]) try {
for (int imagei = 0; imagei < number_of_images_to_generate; imagei++) {
std::cout << "Generating image " << imagei << std::endl;

ov::Tensor image = pipe.generate(prompt,
auto image_results = pipe.generate(prompt,
ov::genai::width(width),
ov::genai::height(height),
ov::genai::guidance_scale(guidance_scale),
ov::genai::num_inference_steps(number_of_inference_steps_per_image));

imwrite("image_" + std::to_string(imagei) + ".bmp", image, true);
imwrite("image_" + std::to_string(imagei) + ".bmp", image_results.image, true);

std::cout << "pipeline generate duration ms:" << image_results.perf_metrics.get_generate_duration().mean << std::endl;
std::cout << "pipeline inference duration ms:" << image_results.perf_metrics.get_inference_duration().mean << std::endl;
}

return EXIT_SUCCESS;
Expand Down
6 changes: 4 additions & 2 deletions samples/cpp/image_generation/image2image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ int32_t main(int32_t argc, char* argv[]) try {
ov::Tensor image = utils::load_image(image_path);

ov::genai::Image2ImagePipeline pipe(models_path, device);
ov::Tensor generated_image = pipe.generate(prompt, image,
auto image_results = pipe.generate(prompt, image,
// controls how initial image is noised after being converted to latent space. `1` means initial image is fully noised
ov::genai::strength(0.8f));

// writes `num_images_per_prompt` images by pattern name
imwrite("image_%d.bmp", generated_image, true);
imwrite("image_%d.bmp", image_results.image, true);
std::cout << "pipeline generate duration ms:" << image_results.perf_metrics.get_generate_duration().mean << std::endl;
std::cout << "pipeline inference duration ms:" << image_results.perf_metrics.get_inference_duration().mean << std::endl;

return EXIT_SUCCESS;
} catch (const std::exception& error) {
Expand Down
7 changes: 5 additions & 2 deletions samples/cpp/image_generation/inpainting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,13 @@ int32_t main(int32_t argc, char* argv[]) try {
ov::Tensor mask_image = utils::load_image(mask_image_path);

ov::genai::InpaintingPipeline pipe(models_path, device);
ov::Tensor generated_image = pipe.generate(prompt, image, mask_image);
auto image_results = pipe.generate(prompt, image, mask_image);

// writes `num_images_per_prompt` images by pattern name
imwrite("image_%d.bmp", generated_image, true);
imwrite("image_%d.bmp", image_results.image, true);

std::cout << "pipeline generate duration ms:" << image_results.perf_metrics.get_generate_duration().mean << std::endl;
std::cout << "pipeline inference duration ms:" << image_results.perf_metrics.get_inference_duration().mean << std::endl;

return EXIT_SUCCESS;
} catch (const std::exception& error) {
Expand Down
12 changes: 8 additions & 4 deletions samples/cpp/image_generation/lora_text2image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,25 @@ int32_t main(int32_t argc, char* argv[]) try {
ov::genai::Text2ImagePipeline pipe(models_path, device, ov::genai::adapters(adapter_config));

std::cout << "Generating image with LoRA adapters applied, resulting image will be in lora.bmp\n";
ov::Tensor image = pipe.generate(prompt,
auto image_results = pipe.generate(prompt,
ov::genai::width(512),
ov::genai::height(896),
ov::genai::num_inference_steps(20),
ov::genai::rng_seed(42));
imwrite("lora.bmp", image, true);
imwrite("lora.bmp", image_results.image, true);
std::cout << "pipeline generate duration ms:" << image_results.perf_metrics.get_generate_duration().mean << std::endl;
std::cout << "pipeline inference duration ms:" << image_results.perf_metrics.get_inference_duration().mean << std::endl;

std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n";
image = pipe.generate(prompt,
image_results = pipe.generate(prompt,
ov::genai::adapters(), // passing adapters in generate overrides adapters set in the constructor; adapters() means no adapters
ov::genai::width(512),
ov::genai::height(896),
ov::genai::num_inference_steps(20),
ov::genai::rng_seed(42));
imwrite("baseline.bmp", image, true);
imwrite("baseline.bmp", image_results.image, true);
std::cout << "pipeline generate duration ms:" << image_results.perf_metrics.get_generate_duration().mean << std::endl;
std::cout << "pipeline inference duration ms:" << image_results.perf_metrics.get_inference_duration().mean << std::endl;

return EXIT_SUCCESS;
} catch (const std::exception& error) {
Expand Down
7 changes: 5 additions & 2 deletions samples/cpp/image_generation/text2image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,17 @@ int32_t main(int32_t argc, char* argv[]) try {
const std::string device = "CPU"; // GPU can be used as well

ov::genai::Text2ImagePipeline pipe(models_path, device);
ov::Tensor image = pipe.generate(prompt,
auto image_results = pipe.generate(prompt,
ov::genai::width(512),
ov::genai::height(512),
ov::genai::num_inference_steps(20),
ov::genai::num_images_per_prompt(1));

// writes `num_images_per_prompt` images by pattern name
imwrite("image_%d.bmp", image, true);
imwrite("image_%d.bmp", image_results.image, true);

std::cout << "pipeline generate duration ms:" << image_results.perf_metrics.get_generate_duration().mean << std::endl;
std::cout << "pipeline inference duration ms:" << image_results.perf_metrics.get_inference_duration().mean << std::endl;

return EXIT_SUCCESS;
} catch (const std::exception& error) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ class OPENVINO_GENAI_EXPORTS AutoencoderKL {
return compile(device, ov::AnyMap{std::forward<Properties>(properties)...});
}

ov::Tensor decode(ov::Tensor latent);
ov::Tensor decode(ov::Tensor latent, RawPerfMetrics &raw_metrics);

ov::Tensor encode(ov::Tensor image, std::shared_ptr<Generator> generator);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "openvino/genai/visibility.hpp"
#include "openvino/genai/tokenizer.hpp"
#include "openvino/genai/lora_adapter.hpp"
#include "openvino/genai/perf_metrics.hpp"

#include "openvino/core/any.hpp"
#include "openvino/runtime/tensor.hpp"
Expand Down Expand Up @@ -84,7 +85,7 @@ class OPENVINO_GENAI_EXPORTS CLIPTextModel {

void set_adapters(const std::optional<AdapterConfig>& adapters);

ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance);
ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, RawPerfMetrics& raw_metrics);

ov::Tensor get_output_tensor(const size_t idx);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "openvino/genai/visibility.hpp"
#include "openvino/genai/tokenizer.hpp"
#include "openvino/genai/lora_adapter.hpp"
#include "openvino/genai/perf_metrics.hpp"

#include "openvino/core/any.hpp"
#include "openvino/runtime/tensor.hpp"
Expand Down Expand Up @@ -84,7 +85,7 @@ class OPENVINO_GENAI_EXPORTS CLIPTextModelWithProjection {

void set_adapters(const std::optional<AdapterConfig>& adapters);

ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance);
ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, RawPerfMetrics& raw_metrics);

ov::Tensor get_output_tensor(const size_t idx);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "openvino/runtime/tensor.hpp"

#include "openvino/genai/visibility.hpp"
#include "openvino/genai/perf_metrics.hpp"

namespace ov {
namespace genai {
Expand Down Expand Up @@ -75,7 +76,7 @@ class OPENVINO_GENAI_EXPORTS FluxTransformer2DModel {

void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states);

ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep);
ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep, RawPerfMetrics& raw_metrics);

private:
Config m_config;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "openvino/genai/lora_adapter.hpp"
#include "openvino/genai/visibility.hpp"
#include "openvino/genai/perf_metrics.hpp"

namespace ov {
namespace genai {
Expand Down Expand Up @@ -251,5 +252,11 @@ static constexpr ov::Property<std::function<bool(size_t, size_t, ov::Tensor&)>>
OPENVINO_GENAI_EXPORTS
std::pair<std::string, ov::Any> generation_config(const ImageGenerationConfig& generation_config);

class ImageResults {
public:
ov::Tensor image;
PerfMetrics perf_metrics;
};

} // namespace genai
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,13 @@ class OPENVINO_GENAI_EXPORTS Image2ImagePipeline {
* @param positive_prompt Prompt to generate image(s) from
* @param initial_image RGB/BGR image of [1, height, width, 3] shape used to initialize latent image
* @param properties Image generation parameters specified as properties. Values in 'properties' override default value for generation parameters.
* @returns A tensor which has dimensions [num_images_per_prompt, height, width, 3]
* @returns ImageResults includes a tensor which has dimensions [num_images_per_prompt, height, width, 3]
* @note Output image size is the same as initial image size, but rounded down to be divisible by VAE scale factor (usually, 8)
*/
ov::Tensor generate(const std::string& positive_prompt, ov::Tensor initial_image, const ov::AnyMap& properties = {});
ImageResults generate(const std::string& positive_prompt, ov::Tensor initial_image, const ov::AnyMap& properties = {});

template <typename... Properties>
ov::util::EnableIfAllStringAny<ov::Tensor, Properties...> generate(
ov::util::EnableIfAllStringAny<ImageResults, Properties...> generate(
const std::string& positive_prompt,
ov::Tensor initial_image,
Properties&&... properties) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ class OPENVINO_GENAI_EXPORTS InpaintingPipeline {
* @param initial_image RGB/BGR image of [1, height, width, 3] shape used to initialize latent image
* @param mask_image RGB/BGR or GRAY/BINARY image of [1, height, width, 3 or 1] shape used as a mask
* @param properties Image generation parameters specified as properties. Values in 'properties' override default value for generation parameters.
* @returns A tensor which has dimensions [num_images_per_prompt, height, width, 3]
* @returns ImageResults includes a tensor which has dimensions [num_images_per_prompt, height, width, 3]
*/
ov::Tensor generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties = {});
ImageResults generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties = {});

template <typename... Properties>
ov::util::EnableIfAllStringAny<ov::Tensor, Properties...> generate(
ov::util::EnableIfAllStringAny<ImageResults, Properties...> generate(
const std::string& positive_prompt,
ov::Tensor initial_image,
ov::Tensor mask,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "openvino/runtime/tensor.hpp"

#include "openvino/genai/visibility.hpp"
#include "openvino/genai/perf_metrics.hpp"

namespace ov {
namespace genai {
Expand Down Expand Up @@ -77,7 +78,7 @@ class OPENVINO_GENAI_EXPORTS SD3Transformer2DModel {

void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states);

ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep);
ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep, RawPerfMetrics& raw_metrics);

private:
Config m_config;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "openvino/genai/visibility.hpp"
#include "openvino/genai/tokenizer.hpp"
#include "openvino/genai/lora_adapter.hpp"
#include "openvino/genai/perf_metrics.hpp"

#include "openvino/core/any.hpp"
#include "openvino/runtime/tensor.hpp"
Expand Down Expand Up @@ -68,7 +69,8 @@ class OPENVINO_GENAI_EXPORTS T5EncoderModel {
ov::Tensor infer(const std::string& pos_prompt,
const std::string& neg_prompt,
bool do_classifier_free_guidance,
int max_sequence_length);
int max_sequence_length,
RawPerfMetrics& raw_metrics);

ov::Tensor get_output_tensor(const size_t idx);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,12 +203,12 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline {
* Generates image(s) based on prompt and other image generation parameters
* @param positive_prompt Prompt to generate image(s) from
* @param properties Image generation parameters specified as properties. Values in 'properties' override default value for generation parameters.
* @returns A tensor which has dimensions [num_images_per_prompt, height, width, 3]
* @returns ImageResults includes a tensor which has dimensions [num_images_per_prompt, height, width, 3]
*/
ov::Tensor generate(const std::string& positive_prompt, const ov::AnyMap& properties = {});
ImageResults generate(const std::string& positive_prompt, const ov::AnyMap& properties = {});

template <typename... Properties>
ov::util::EnableIfAllStringAny<ov::Tensor, Properties...> generate(
ov::util::EnableIfAllStringAny<ImageResults, Properties...> generate(
const std::string& positive_prompt,
Properties&&... properties) {
return generate(positive_prompt, ov::AnyMap{std::forward<Properties>(properties)...});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "openvino/genai/visibility.hpp"
#include "openvino/genai/lora_adapter.hpp"
#include "openvino/genai/perf_metrics.hpp"

namespace ov {
namespace genai {
Expand Down Expand Up @@ -89,7 +90,7 @@ class OPENVINO_GENAI_EXPORTS UNet2DConditionModel {

void set_adapters(const std::optional<AdapterConfig>& adapters);

ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep);
ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, RawPerfMetrics& raw_metrics);

bool do_classifier_free_guidance(float guidance_scale) const {
return guidance_scale > 1.0f && m_config.time_cond_proj_dim < 0;
Expand Down
6 changes: 3 additions & 3 deletions src/cpp/src/image_generation/diffusion_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,13 @@ class DiffusionPipeline {

virtual std::tuple<ov::Tensor, ov::Tensor, ov::Tensor, ov::Tensor> prepare_latents(ov::Tensor initial_image, const ImageGenerationConfig& generation_config) const = 0;

virtual void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) = 0;
virtual void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config, RawPerfMetrics& raw_metrics) = 0;

virtual void set_lora_adapters(std::optional<AdapterConfig> adapters) = 0;

virtual ov::Tensor generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties) = 0;
virtual ImageResults generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties) = 0;

virtual ov::Tensor decode(const ov::Tensor latent) = 0;
virtual ov::Tensor decode(const ov::Tensor latent, RawPerfMetrics& raw_metircs) = 0;

virtual ~DiffusionPipeline() = default;

Expand Down
31 changes: 21 additions & 10 deletions src/cpp/src/image_generation/flux_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,13 +253,13 @@ class FluxPipeline : public DiffusionPipeline {
m_transformer->compile(device, properties);
}

void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override {
void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config, RawPerfMetrics& raw_metrics) override {
// encode_prompt
std::string prompt_2_str = generation_config.prompt_2 != std::nullopt ? *generation_config.prompt_2 : positive_prompt;

m_clip_text_encoder->infer(positive_prompt, {}, false);
m_clip_text_encoder->infer(positive_prompt, {}, false, raw_metrics);
ov::Tensor pooled_prompt_embeds = m_clip_text_encoder->get_output_tensor(1);
ov::Tensor prompt_embeds = m_t5_text_encoder->infer(prompt_2_str, "", false, generation_config.max_sequence_length);
ov::Tensor prompt_embeds = m_t5_text_encoder->infer(prompt_2_str, "", false, generation_config.max_sequence_length, raw_metrics);

pooled_prompt_embeds = numpy_utils::repeat(pooled_prompt_embeds, generation_config.num_images_per_prompt);
prompt_embeds = numpy_utils::repeat(prompt_embeds, generation_config.num_images_per_prompt);
Expand Down Expand Up @@ -315,10 +315,15 @@ class FluxPipeline : public DiffusionPipeline {
OPENVINO_THROW("LORA adapters are not implemented for FLUX pipeline yet");
}

ov::Tensor generate(const std::string& positive_prompt,
ImageResults generate(const std::string& positive_prompt,
ov::Tensor initial_image,
ov::Tensor mask_image,
const ov::AnyMap& properties) override {
ImageResults image_results;
RawPerfMetrics &raw_metrics = image_results.perf_metrics.raw_metrics;
raw_metrics.generate_durations.clear();
raw_metrics.m_inference_durations = {{ MicroSeconds(0.0f) }};
const auto gen_start = std::chrono::steady_clock::now();
m_custom_generation_config = m_generation_config;
m_custom_generation_config.update_generation_config(properties);

Expand All @@ -339,7 +344,7 @@ class FluxPipeline : public DiffusionPipeline {

check_inputs(m_custom_generation_config, initial_image);

compute_hidden_states(positive_prompt, m_custom_generation_config);
compute_hidden_states(positive_prompt, m_custom_generation_config, raw_metrics);

ov::Tensor latents, processed_image, image_latent, noise;
std::tie(latents, processed_image, image_latent, noise) = prepare_latents(initial_image, m_custom_generation_config);
Expand All @@ -360,26 +365,32 @@ class FluxPipeline : public DiffusionPipeline {
for (size_t inference_step = 0; inference_step < timesteps.size(); ++inference_step) {
timestep_data[0] = timesteps[inference_step] / 1000;

ov::Tensor noise_pred_tensor = m_transformer->infer(latents, timestep);
ov::Tensor noise_pred_tensor = m_transformer->infer(latents, timestep, raw_metrics);

auto scheduler_step_result = m_scheduler->step(noise_pred_tensor, latents, inference_step, m_custom_generation_config.generator);
latents = scheduler_step_result["latent"];

if (callback && callback(inference_step, timesteps.size(), latents)) {
return ov::Tensor(ov::element::u8, {});
image_results.image = ov::Tensor(ov::element::u8, {});
const auto gen_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start);
raw_metrics.generate_durations.emplace_back(gen_ms);
return image_results;
}
}

latents = unpack_latents(latents, m_custom_generation_config.height, m_custom_generation_config.width, vae_scale_factor);
return m_vae->decode(latents);
image_results.image = m_vae->decode(latents, raw_metrics);
const auto gen_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start);
raw_metrics.generate_durations.emplace_back(gen_ms);
return image_results;
}

ov::Tensor decode(const ov::Tensor latent) override {
ov::Tensor decode(const ov::Tensor latent, RawPerfMetrics& raw_metrics) override {
ov::Tensor unpacked_latent = unpack_latents(latent,
m_custom_generation_config.height,
m_custom_generation_config.width,
m_vae->get_vae_scale_factor());
return m_vae->decode(unpacked_latent);
return m_vae->decode(unpacked_latent, raw_metrics);
}

private:
Expand Down
Loading
Loading