Skip to content

Commit

Permalink
benchmark-script
Browse files Browse the repository at this point in the history
  • Loading branch information
chyroc committed Mar 4, 2025
1 parent 61c83d3 commit 2d8569c
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 27 deletions.
43 changes: 34 additions & 9 deletions examples/benchmark_ark_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,35 @@ def get_current_time_ms():
return int(time.time() * 1000)


def cal_latency(latency_list: List[int]) -> str:
def cal_latency(current: int, latency_list: List[int]) -> str:
if latency_list is None or len(latency_list) == 0:
return "0"
return "No latency data"
if len(latency_list) == 1:
return f"{latency_list[0]}"
res = latency_list.copy()
res.sort()
return "%2d" % ((sum(res[:-1]) * 1.0) / (len(res) - 1))
return f"P99={latency_list[0]}ms, P90={latency_list[0]}ms, AVG={latency_list[0]}ms"

# 对延迟数据进行排序
sorted_latency = sorted(latency_list)
length = len(sorted_latency)

def fix_index(index):
if index < 0:
return 0
if index >= length:
return length - 1
return index

# 计算 P99
p99_index = fix_index(round(length * 0.99) - 1)
p99 = sorted_latency[p99_index]

# 计算 P90
p90_index = fix_index(round(length * 0.90) - 1)
p90 = sorted_latency[p90_index]

# 计算平均值
avg = sum(sorted_latency) / length

return f"P99={p99}ms, P90={p90}ms, AVG={avg:.2f}ms, CURRENT={current}ms"


def test_latency(ep: str, token: str, text: str):
Expand All @@ -35,20 +56,24 @@ def test_latency(ep: str, token: str, text: str):
continue

if chunk.choices[0].delta.content:
return "", chunk.choices[0].delta.content, get_current_time_ms() - start
return (
stream.response.headers["x-request-id"],
chunk.choices[0].delta.content,
get_current_time_ms() - start,
)


async def main():
ep = os.getenv("ARK_EP")
token = os.getenv("ARK_TOKEN")
text = os.getenv("COZE_TEXT") or "讲个笑话"

times = 50
times = 100
text_latency = []
for i in range(times):
logid, text, latency = test_latency(ep, token, text)
text_latency.append(latency)
print(f"[latency.ark.text] {i}, latency: {cal_latency(text_latency)} ms, log: {logid}, text: {text}")
print(f"[latency.ark.text] {i}, latency: {cal_latency(latency, text_latency)}, log: {logid}, text: {text}")


if __name__ == "__main__":
Expand Down
37 changes: 29 additions & 8 deletions examples/benchmark_text_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,35 @@ def get_current_time_ms():
kwargs = json.loads(os.getenv("COZE_KWARGS") or "{}")


def cal_latency(latency_list: List[int]) -> str:
def cal_latency(current: int, latency_list: List[int]) -> str:
if latency_list is None or len(latency_list) == 0:
return "0"
return "No latency data"
if len(latency_list) == 1:
return f"{latency_list[0]}"
res = latency_list.copy()
res.sort()
return "%2d" % ((sum(res[:-1]) * 1.0) / (len(res) - 1))
return f"P99={latency_list[0]}ms, P90={latency_list[0]}ms, AVG={latency_list[0]}ms"

# 对延迟数据进行排序
sorted_latency = sorted(latency_list)
length = len(sorted_latency)

def fix_index(index):
if index < 0:
return 0
if index >= length:
return length - 1
return index

# 计算 P99
p99_index = fix_index(round(length * 0.99) - 1)
p99 = sorted_latency[p99_index]

# 计算 P90
p90_index = fix_index(round(length * 0.90) - 1)
p90 = sorted_latency[p90_index]

# 计算平均值
avg = sum(sorted_latency) / length

return f"P99={p99}ms, P90={p90}ms, AVG={avg:.2f}ms, CURRENT={current}ms"


async def test_latency(coze: Coze, bot_id: str, text: str) -> (str, str, int):
Expand Down Expand Up @@ -91,12 +112,12 @@ async def main():
base_url=coze_api_base,
)

times = 50
times = 100
text_latency = []
for i in range(times):
logid, text, latency = await test_latency(coze, bot_id, text)
text_latency.append(latency)
print(f"[latency.text] {i}, latency: {cal_latency(text_latency)} ms, log: {logid}, text: {text}")
print(f"[latency.text] {i}, latency: {cal_latency(latency, text_latency)}, log: {logid}, text: {text}")


if __name__ == "__main__":
Expand Down
45 changes: 35 additions & 10 deletions examples/benchmark_websockets_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,35 @@ async def generate_audio(coze: AsyncCoze, text: str) -> List[bytes]:
return [data for data in content._raw_response.iter_bytes(chunk_size=1024)]


def cal_latency(latency_list: List[int]) -> str:
def cal_latency(current: int, latency_list: List[int]) -> str:
if latency_list is None or len(latency_list) == 0:
return "0"
return "No latency data"
if len(latency_list) == 1:
return f"{latency_list[0]}"
res = latency_list.copy()
res.sort()
return "%2d" % ((sum(res[:-1]) * 1.0) / (len(res) - 1))
return f"P99={latency_list[0]}ms, P90={latency_list[0]}ms, AVG={latency_list[0]}ms"

# 对延迟数据进行排序
sorted_latency = sorted(latency_list)
length = len(sorted_latency)

def fix_index(index):
if index < 0:
return 0
if index >= length:
return length - 1
return index

# 计算 P99
p99_index = fix_index(round(length * 0.99) - 1)
p99 = sorted_latency[p99_index]

# 计算 P90
p90_index = fix_index(round(length * 0.90) - 1)
p90 = sorted_latency[p90_index]

# 计算平均值
avg = sum(sorted_latency) / length

return f"P99={p99}ms, P90={p90}ms, AVG={avg:.2f}ms, CURRENT={current}ms"


async def test_latency(coze: AsyncCoze, bot_id: str, audios: List[bytes]) -> AsyncWebsocketsChatEventHandlerSub:
Expand Down Expand Up @@ -172,11 +193,15 @@ async def main():
asr_latency = []
for i in range(times):
handler = await test_latency(coze, bot_id, audios)
asr_latency.append(handler.conversation_audio_transcript_completed - handler.input_audio_buffer_completed_at)
text_latency.append(handler.text_first_token - handler.input_audio_buffer_completed_at)
audio_latency.append(handler.audio_first_token - handler.input_audio_buffer_completed_at)
asr = handler.conversation_audio_transcript_completed - handler.input_audio_buffer_completed_at
text = handler.text_first_token - handler.input_audio_buffer_completed_at
audio = handler.audio_first_token - handler.input_audio_buffer_completed_at

asr_latency.append(asr)
text_latency.append(text)
audio_latency.append(audio)
print(
f"[latency.ws] {i}, asr: {cal_latency(asr_latency)}, text: {cal_latency(text_latency)} ms, audio: {cal_latency(audio_latency)} ms, log: {handler.logid}"
f"[latency.ws] {i}, asr: {cal_latency(asr, asr_latency)}, text: {cal_latency(text, text_latency)}, audio: {cal_latency(audio, audio_latency)}, log: {handler.logid}"
)


Expand Down

0 comments on commit 2d8569c

Please sign in to comment.