From c45547bc8b27e8af19b32e2ab71386969f435f7f Mon Sep 17 00:00:00 2001 From: Alexei-V-Ivanov-AMD <156011006+Alexei-V-Ivanov-AMD@users.noreply.github.com> Date: Tue, 19 Mar 2024 16:07:32 -0500 Subject: [PATCH] Update description of measure_ppl_MC_small.py Added invocation examples into the description. --- benchmarks/measure_ppl_MC_small.py | 32 ++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/benchmarks/measure_ppl_MC_small.py b/benchmarks/measure_ppl_MC_small.py index 2b344058d056b..f1301776d6b27 100755 --- a/benchmarks/measure_ppl_MC_small.py +++ b/benchmarks/measure_ppl_MC_small.py @@ -1,13 +1,29 @@ #!/usr/bin/env python3 -# -# This is a quick hack that produces PPL measurement by -# iteratively dumping the logprob vector for the single next symbol -# that is to be generated over the preloaded context. -# It is actually an *inefficient* procedure because for the -# N-token string it takes N*(preload + generation) time instead of -# preload + N*generation -# +""" +This is a quick hack that produces PPL measurement by +iteratively dumping the logprob vector for the single next symbol +that is to be generated over the preloaded context. + +It is actually an *inefficient* procedure because for the +N-token string it takes N*(preload + generation) time instead of +preload + N*generation + +Quick correctness validation tips: + +Running llama-2-7b model +( ./vllm/benchmarks/measure_ppl_MC_small.py --model=/data/models/llama-2-7b-chat-hf --data=./vllm/tests/prompts/wiki.test.raw --context-size=2048 --batch-size=1 -tp=1 ) +should result in PPL~6.447469639345 + +Running llama-2-13b model +( ./vllm/benchmarks/measure_ppl_MC_small.py --model=/data/models/llama-2-137b-chat-hf --data=./vllm/tests/prompts/wiki.test.raw --context-size=2048 --batch-size=1 -tp=1 ) +should result in PPL~5.675290252052 + +Running llama-2-70b model +( ./vllm/benchmarks/measure_ppl_MC_small.py --model=/data/models/llama-2-70b-chat-hf --data=./vllm/tests/prompts/wiki.test.raw --context-size=2048 --batch-size=1 -tp=1 ) +should result in PPL~4.2067624908705 + +""" import numpy as np from transformers import LlamaForCausalLM, LlamaTokenizer