Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
AllentDan committed Nov 6, 2023
1 parent 2e41177 commit 4bee521
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 10 deletions.
6 changes: 3 additions & 3 deletions benchmark/profile_restful_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def infer(server_addr: str, session_id: int, req_queue: Queue, res_que: Queue):

first_token_latency = np.round(timestamps[1] - timestamps[0], 3)
token_latency = np.round(timestamps[-1] - timestamps[0], 3)
generated_tokens = tokens[-1] - tokens[0]
total_tokens = tokens[-1]
generated_tokens = tokens[-1]
total_tokens = tokens[-1] + input_seqlen
stats.append([
first_token_latency, generated_tokens, total_tokens, token_latency
])
Expand Down Expand Up @@ -151,7 +151,7 @@ def main(server_addr: str,
first_token_latency_max = np.max(stats[:, 0], axis=0)
first_token_latency_ave = np.mean(stats[:, 0], axis=0)
generated_token_throughput = np.sum(stats[:, 1], axis=0) / elapsed_time
total_token_throughput = np.sum(stats[:, 1], axis=0) / elapsed_time
total_token_throughput = np.sum(stats[:, 2], axis=0) / elapsed_time
rqs = n_req / elapsed_time
rqm = rqs * 60

Expand Down
7 changes: 3 additions & 4 deletions benchmark/profile_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,10 @@ def infer(chatbot, session_id: int, req_que: mp.Queue, res_que: mp.Queue):
sequence_end=True):
timestamps.append(time.perf_counter())
tokens.append(token)

first_token_latency = np.round(timestamps[1] - timestamps[0], 3)
token_latency = np.round(timestamps[-1] - timestamps[0], 3)
generated_tokens = tokens[-1] - tokens[0]
total_tokens = tokens[-1]
generated_tokens = tokens[-1]
total_tokens = tokens[-1] + input_seqlen
stats.append([
first_token_latency, generated_tokens, total_tokens, token_latency
])
Expand Down Expand Up @@ -164,7 +163,7 @@ def main(tritonserver_addr: str,
first_token_latency_max = np.max(stats[:, 0], axis=0)
first_token_latency_ave = np.mean(stats[:, 0], axis=0)
generated_token_throughput = np.sum(stats[:, 1], axis=0) / elapsed_time
total_token_throughput = np.sum(stats[:, 1], axis=0) / elapsed_time
total_token_throughput = np.sum(stats[:, 2], axis=0) / elapsed_time
rqs = n_req / elapsed_time
rqm = rqs * 60

Expand Down
6 changes: 3 additions & 3 deletions benchmark/profile_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ def _inference(self, req_queue: Queue, res_queue: Queue, session_id: int):
tokens.append(token)
first_token_latency = np.round(timestamps[1] - timestamps[0], 3)
token_latency = np.round(timestamps[-1] - timestamps[0], 3)
generated_tokens = tokens[-1] - tokens[0]
total_tokens = tokens[-1]
generated_tokens = tokens[-1]
total_tokens = tokens[-1] + len(input_ids)
stats.append([
first_token_latency, generated_tokens, total_tokens,
token_latency
Expand Down Expand Up @@ -140,7 +140,7 @@ def process_request(self, requests, concurrency: int = 1):
first_token_latency_max = np.max(stats[:, 0], axis=0)
first_token_latency_ave = np.mean(stats[:, 0], axis=0)
generated_token_throughput = np.sum(stats[:, 1], axis=0) / elapsed_time
total_token_throughput = np.sum(stats[:, 1], axis=0) / elapsed_time
total_token_throughput = np.sum(stats[:, 2], axis=0) / elapsed_time
rqs = len(requests) / elapsed_time
rqm = rqs * 60
print(
Expand Down

0 comments on commit 4bee521

Please sign in to comment.