From c45547bc8b27e8af19b32e2ab71386969f435f7f Mon Sep 17 00:00:00 2001
From: Alexei-V-Ivanov-AMD
 <156011006+Alexei-V-Ivanov-AMD@users.noreply.github.com>
Date: Tue, 19 Mar 2024 16:07:32 -0500
Subject: [PATCH] Update description of measure_ppl_MC_small.py

Added invocation examples into the description.
---
 benchmarks/measure_ppl_MC_small.py | 32 ++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/benchmarks/measure_ppl_MC_small.py b/benchmarks/measure_ppl_MC_small.py
index 2b344058d056b..f1301776d6b27 100755
--- a/benchmarks/measure_ppl_MC_small.py
+++ b/benchmarks/measure_ppl_MC_small.py
@@ -1,13 +1,29 @@
 #!/usr/bin/env python3
 
-#
-#   This is a quick hack that produces PPL measurement by 
-#   iteratively dumping the logprob vector for the single next symbol 
-#   that is to be generated over the preloaded context.
-#   It is actually an *inefficient* procedure because for the
-#   N-token string it takes N*(preload + generation) time instead of
-#   preload + N*generation
-#
+"""
+This is a quick hack that produces PPL measurement by 
+iteratively dumping the logprob vector for the single next symbol 
+that is to be generated over the preloaded context.
+
+It is actually an *inefficient* procedure because for the
+N-token string it takes N*(preload + generation) time instead of
+preload + N*generation
+
+Quick correctness validation tips:
+
+Running llama-2-7b model 
+( ./vllm/benchmarks/measure_ppl_MC_small.py --model=/data/models/llama-2-7b-chat-hf --data=./vllm/tests/prompts/wiki.test.raw --context-size=2048 --batch-size=1 -tp=1 )
+should result in PPL~6.447469639345
+
+Running llama-2-13b model 
+( ./vllm/benchmarks/measure_ppl_MC_small.py --model=/data/models/llama-2-137b-chat-hf --data=./vllm/tests/prompts/wiki.test.raw --context-size=2048 --batch-size=1 -tp=1 )
+should result in PPL~5.675290252052
+
+Running llama-2-70b model 
+( ./vllm/benchmarks/measure_ppl_MC_small.py --model=/data/models/llama-2-70b-chat-hf --data=./vllm/tests/prompts/wiki.test.raw --context-size=2048 --batch-size=1 -tp=1 )
+should result in PPL~4.2067624908705
+
+"""
 
 import numpy as np
 from transformers import LlamaForCausalLM, LlamaTokenizer