From 4c1fde7aacfe8dd3da61c8868ff72e5d71ff1b75 Mon Sep 17 00:00:00 2001
From: AllentDan <AllentDan@yeah.net>
Date: Tue, 31 Oct 2023 13:09:24 +0800
Subject: [PATCH] fix benchmark serving computation mistake

---
 benchmark/profile_restful_api.py |  1 +
 benchmark/profile_serving.py     | 13 +++++++------
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/benchmark/profile_restful_api.py b/benchmark/profile_restful_api.py
index d1f6ebf80e..607cc467ca 100644
--- a/benchmark/profile_restful_api.py
+++ b/benchmark/profile_restful_api.py
@@ -115,6 +115,7 @@ def read_dataset(tokenizer_path: str, dataset_path: str, samples: int,
         print(f'elapsed time for read data: '
               f'{round(time.perf_counter() - start, 2)} s')
 
+    print('start tokenization. This takes a while, please wait...')
     start = time.perf_counter()
     tokenizer = Tokenizer(tokenizer_path)
     prompts_token_lens = [len(tokenizer.encode(prompt)) for prompt in prompts]
diff --git a/benchmark/profile_serving.py b/benchmark/profile_serving.py
index 4580757eeb..ea2edcf9f9 100644
--- a/benchmark/profile_serving.py
+++ b/benchmark/profile_serving.py
@@ -84,6 +84,7 @@ def read_dataset(tokenizer_path: str, dataset_path: str, samples: int,
         completions = [completion for _, completion in dataset]
         print(f'elapsed time for read data: '
               f'{round(time.perf_counter() - start, 2)} s')
+    print('start tokenization. This takes a while, please wait...')
 
     start = time.perf_counter()
     tokenizer = Tokenizer(tokenizer_path)
@@ -124,7 +125,6 @@ def main(tritonserver_addr: str,
     res_que = mp.Queue()
 
     procs = []
-    _start = time.perf_counter()
     for i in range(concurrency):
         chatbot = Chatbot(tritonserver_addr=tritonserver_addr,
                           display=False,
@@ -134,13 +134,18 @@ def main(tritonserver_addr: str,
         proc = mp.Process(target=infer,
                           args=(chatbot, i + 1, req_que, res_que))
         procs.append(proc)
-        proc.start()
 
     # read data and put it to queue
     n_req = read_dataset(tokenizer_path, dataset_path, samples, session_len,
                          req_que)
     for i in range(concurrency):
         req_que.put([None, None, None])
+    _start = time.perf_counter()
+    for proc in procs:
+        proc.start()
+    for proc in procs:
+        proc.join()
+    _end = time.perf_counter()
 
     stats = []
     for i in range(concurrency):
@@ -150,7 +155,6 @@ def main(tritonserver_addr: str,
               f'stats: \n{_stats}\n{"-" * 50}\n')
         stats.append(np.array(_stats))
 
-    _end = time.perf_counter()
     elapsed_time = _end - _start
 
     stats = np.concatenate(stats).reshape(-1, 3)
@@ -170,9 +174,6 @@ def main(tritonserver_addr: str,
           f'req throughput: {req_throughput:.3f} req/s\n'
           f'{"-" * 50}\n')
 
-    for proc in procs:
-        proc.join()
-
 
 if __name__ == '__main__':
     fire.Fire(main)