Skip to content

Commit

Permalink
wa access to tensor (openvinotoolkit#516)
Browse files Browse the repository at this point in the history
  • Loading branch information
eaidova authored Jun 14, 2024
1 parent 649ff27 commit ca625dc
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 7 deletions.
11 changes: 6 additions & 5 deletions llm_bench/python/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,13 +209,13 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
if args["output_dir"] is not None and num == 0:
for bs_index, in_text in enumerate(input_text_list):
utils.output_file.output_input_text(in_text, args, model_precision, prompt_index, bs_index, proc_id)
pt_inputs = tokenizer(input_text_list, return_tensors="pt")
input_token_size = pt_inputs.input_ids.shape[1]
pipe_tokenizer = model.get_tokenizer()
tok_encode_start = time.perf_counter()
input_data = tokenizer.encode(input_text_list)
input_data = pipe_tokenizer.encode(input_text_list)
tok_encode_end = time.perf_counter()
tok_encode_time = (tok_encode_end - tok_encode_start) * 1000
# Remove `token_type_ids` from inputs
input_tokens = input_data.input_ids.data
input_token_size = input_tokens[0].size
if args['batch_size'] > 1:
out_str = '[warm-up]' if num == 0 else '[{}]'.format(num)
out_str += " Batch_size={}, ".format(args['batch_size'])
Expand All @@ -241,7 +241,7 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data

generation_time = end - start
tok_decode_start = time.perf_counter()
generated_text = tokenizer.decode(generated_tokens)
generated_text = pipe_tokenizer.decode(generated_tokens)
tok_decode_end = time.perf_counter()
tok_decode_time = (tok_decode_end - tok_decode_start) * 1000
# Only text_gen need to minus length of input_data, because generated_text may include input_text
Expand Down Expand Up @@ -296,6 +296,7 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
utils.metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0])
streamer.reset()


def run_text_generation_benchmark(model_path, framework, device, args, num_iters):
model, tokenizer, pretrain_time, bench_hook, use_genai = FW_UTILS[framework].create_text_gen_model(model_path, device, **args)
model_precision = utils.model_utils.get_model_precision(model_path.parts)
Expand Down
5 changes: 3 additions & 2 deletions llm_bench/python/utils/ov_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ def create_text_gen_model(model_path, device, **kwargs):
def create_genai_text_gen_model(model_path, device, ov_config, **kwargs):
import openvino_tokenizers # noqa: F401
import openvino_genai
from transformers import AutoTokenizer

class TokenStreamer(openvino_genai.StreamerBase):
def __init__(self, tokenizer):
Expand Down Expand Up @@ -213,13 +214,13 @@ def get_time_list(self):
if not (model_path / "openvino_tokenizer.xml").exists() or not (model_path / "openvino_detokenizer.xml").exists():
convert_ov_tokenizer(model_path)

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
start = time.perf_counter()

llm_pipe = openvino_genai.LLMPipeline(str(model_path), device.upper(), ov_config)
end = time.perf_counter()
log.info(f'Pipeline initialization time: {end - start:.2f}s')
tokenizer = llm_pipe.get_tokenizer()
streamer = TokenStreamer(tokenizer)
streamer = TokenStreamer(llm_pipe.get_tokenizer())

return llm_pipe, tokenizer, end - start, streamer, True

Expand Down

0 comments on commit ca625dc

Please sign in to comment.