diff --git a/benchmark/profile_restful_api.py b/benchmark/profile_restful_api.py
index 607cc467ca..249245fc3a 100644
--- a/benchmark/profile_restful_api.py
+++ b/benchmark/profile_restful_api.py
@@ -54,7 +54,7 @@ def infer(server_addr: str, session_id: int, req_queue: mp.Queue,
             f'input_seqlen {input_seqlen}, output_seqlen {output_seqlen}')
         timestamps = []
         tokens = []
-        start = time.perf_counter()
+        timestamps.append(time.perf_counter())
         for res, token in get_streaming_response(
                 prompt,
                 server_addr,
@@ -65,7 +65,7 @@ def infer(server_addr: str, session_id: int, req_queue: mp.Queue,
             timestamps.append(time.perf_counter())
             tokens.append(token)
 
-        first_token_latency = timestamps[1] - start
+        first_token_latency = timestamps[1] - timestamps[0]
         token_latency = timestamps[-1] - timestamps[0]
         token = tokens[-1] - tokens[0]
         stats.append([first_token_latency, token, token_latency])
diff --git a/benchmark/profile_serving.py b/benchmark/profile_serving.py
index ea2edcf9f9..5a613dbfbe 100644
--- a/benchmark/profile_serving.py
+++ b/benchmark/profile_serving.py
@@ -17,7 +17,7 @@ def infer(chatbot, session_id: int, req_que: mp.Queue, res_que: mp.Queue):
                                                     [None, None, None]):
         timestamps = []
         tokens = []
-        start = time.perf_counter()
+        timestamps.append(time.perf_counter())
         for status, res, token in chatbot.stream_infer(
                 session_id,
                 prompt,
@@ -27,7 +27,7 @@ def infer(chatbot, session_id: int, req_que: mp.Queue, res_que: mp.Queue):
             timestamps.append(time.perf_counter())
             tokens.append(token)
 
-        first_token_latency = np.round(timestamps[1] - start, 3)
+        first_token_latency = np.round(timestamps[1] - timestamps[0], 3)
         token_latency = np.round(timestamps[-1] - timestamps[0], 3)
         token = tokens[-1] - tokens[0]
         stats.append([first_token_latency, token, token_latency])