Skip to content

Commit

Permalink
[llm_bench] update optimum bench hook for transformer-based imagegen (#…
Browse files Browse the repository at this point in the history
…1525)

Co-authored-by: Ilya Lavrenov <[email protected]>
  • Loading branch information
eaidova and ilya-lavrenov authored Jan 11, 2025
1 parent b3096c3 commit 29ea577
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 11 deletions.
7 changes: 5 additions & 2 deletions tools/llm_bench/llm_bench_utils/hook_forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def __init__(self):
self.text_encoder_step_count = 0
self.unet_step_count = 0
self.vae_decoder_step_count = 0
self.main_model_name = "unet"

def get_text_encoder_latency(self):
return (self.text_encoder_time / self.text_encoder_step_count) * 1000 if self.text_encoder_step_count > 0 else 0
Expand Down Expand Up @@ -56,7 +57,9 @@ def my_text_encoder(inputs, share_inputs=True, **kwargs):
pipe.text_encoder.request = my_text_encoder

def new_unet(self, pipe):
old_unet = pipe.unet.request
main_model = pipe.unet if pipe.unet is not None else pipe.transformer
self.main_model_name = "unet" if pipe.unet is not None else "transformer"
old_unet = main_model.request

def my_unet(inputs, share_inputs=True, **kwargs):
t1 = time.time()
Expand All @@ -66,7 +69,7 @@ def my_unet(inputs, share_inputs=True, **kwargs):
self.unet_time_list.append(unet_time)
self.unet_step_count += 1
return r
pipe.unet.request = my_unet
main_model.request = my_unet

def new_vae_decoder(self, pipe):
old_vae_decoder = pipe.vae_decoder.request
Expand Down
8 changes: 4 additions & 4 deletions tools/llm_bench/llm_bench_utils/metrics_print.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,17 +97,17 @@ def print_stable_diffusion_infer_latency(iter_str, iter_data, stable_diffusion,
iter_data['first_token_infer_latency'] = iter_data['first_token_latency']
iter_data['other_tokens_infer_avg_latency'] = iter_data['other_tokens_avg_latency']
prefix = f'[{iter_str}][P{prompt_idx}]'
log.info(f"{prefix} First step of unet latency: {iter_data['first_token_latency']:.2f} ms/step, "
f"other steps of unet latency: {iter_data['other_tokens_avg_latency']:.2f} ms/step",)
log.info(f"{prefix} First step of {stable_diffusion.main_model_name} latency: {iter_data['first_token_latency']:.2f} ms/step, "
f"other steps of {stable_diffusion.main_model_name} latency: {iter_data['other_tokens_avg_latency']:.2f} ms/step",)
has_text_encoder_time = stable_diffusion.get_text_encoder_step_count() != -1
log_str = (
f"{prefix} Text encoder latency: {stable_diffusion.get_text_encoder_latency():.2f}" if has_text_encoder_time else f"{prefix} Text encoder latency: N/A "
f"unet latency: {stable_diffusion.get_unet_latency():.2f} ms/step, "
f"{stable_diffusion.main_model_name} latency: {stable_diffusion.get_unet_latency():.2f} ms/step, "
f"vae decoder latency: {stable_diffusion.get_vae_decoder_latency():.2f} ms/step, ")
if has_text_encoder_time:
log_str += f"text encoder step count: {stable_diffusion.get_text_encoder_step_count()}, "
log_str += (
f"unet step count: {stable_diffusion.get_unet_step_count()}, "
f"{stable_diffusion.main_model_name} step count: {stable_diffusion.get_unet_step_count()}, "
f"vae decoder step count: {stable_diffusion.get_vae_decoder_step_count()}")
log.info(log_str)

Expand Down
10 changes: 7 additions & 3 deletions tools/llm_bench/llm_bench_utils/ov_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,10 +363,11 @@ def create_genai_image_gen_model(model_path, device, ov_config, **kwargs):
import openvino_genai

class PerfCollector:
def __init__(self) -> types.NoneType:
def __init__(self, main_model_name="unet") -> types.NoneType:
self.iteration_time = []
self.start_time = time.perf_counter()
self.duration = -1
self.main_model_name = main_model_name

def __call__(self, step, num_steps, latents):
self.iteration_time.append(time.perf_counter() - self.start_time)
Expand Down Expand Up @@ -405,8 +406,6 @@ def get_unet_step_count(self):
def get_vae_decoder_step_count(self):
return 1

callback = PerfCollector()

adapter_config = get_lora_config(kwargs.get("lora", None), kwargs.get("lora_alphas", []))
if adapter_config:
ov_config['adapters'] = adapter_config
Expand All @@ -416,6 +415,11 @@ def get_vae_decoder_step_count(self):
data = json.load(f)

model_class_name = data.get("_class_name", "")
main_model_name = "unet" if "unet" in data else "transformer"
callback = PerfCollector(main_model_name)

orig_tokenizer = AutoTokenizer.from_pretrained(model_path, subfolder="tokenizer")
callback.orig_tokenizer = orig_tokenizer

start = time.perf_counter()

Expand Down
5 changes: 3 additions & 2 deletions tools/llm_bench/task/image_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data_list, proc_id, mem_consumption, callback=None):
set_seed(args['seed'])
input_text = image_param['prompt']
input_token_size = callback.orig_tokenizer(input_text, return_tensors="pt").input_ids.numel()
input_args = collects_input_args(image_param, args['model_type'], args['model_name'], args["num_steps"], args.get("height"), args.get("width"), callback)
out_str = f"Input params: Batch_size={args['batch_size']}, " \
f"steps={input_args['num_inference_steps']}, width={input_args['width']}, height={input_args['height']}"
Expand Down Expand Up @@ -157,6 +158,7 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
generation_time = end - start
iter_data = gen_output_data.gen_iterate_data(
iter_idx=num,
in_size=input_token_size * args['batch_size'],
infer_count=input_args["num_inference_steps"],
gen_time=generation_time,
res_md5=result_md5_list,
Expand Down Expand Up @@ -230,8 +232,7 @@ def run_image_generation_benchmark(model_path, framework, device, args, num_iter
prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
log.info(f"{prefix}[P{p_idx}] start: {iter_timestamp[num][p_idx]['start']}, end: {iter_timestamp[num][p_idx]['end']}")

if not use_genai:
metrics_print.print_average(iter_data_list, prompt_idx_list, args['batch_size'], False)
metrics_print.print_average(iter_data_list, prompt_idx_list, args['batch_size'], False)
return iter_data_list, pretrain_time, iter_timestamp


Expand Down

0 comments on commit 29ea577

Please sign in to comment.