forked from microsoft/AIOpsLab
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquantitative.py
63 lines (39 loc) · 2.13 KB
/
quantitative.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Helper functions for quantiative evaluation of solutions."""
import tiktoken
from aiopslab.session import SessionItem
# Constants
token_model = "gpt-3.5-turbo"
tokenizer = tiktoken.encoding_for_model(token_model)
def num_steps_taken(trace: list[SessionItem]) -> int:
"""Return the number of steps taken in the trace."""
return len([item for item in trace if item.role == "assistant"])
def out_tokens(trace: list[SessionItem]) -> int:
"""Return the (approx) total token cost of the agent's output."""
# NOTE: not dollar value, since depends on Agent's model
agent_steps = "".join([item.content for item in trace if item.role == "assistant"])
return len(tokenizer.encode(agent_steps))
def in_tokens(trace: list[SessionItem]) -> int:
"""Return the (approx) total token cost of the env's input."""
# NOTE: not dollar value, since depends on Agent's model
user_steps = "".join([item.content for item in trace if item.role != "assistant"])
return len(tokenizer.encode(user_steps))
def is_exact_match(pred: int | str | list, target: int | str | list) -> bool:
"""Return True if the prediction is an exact match to the target."""
return pred == target
def is_exact_match_lower(pred: str, target: str) -> bool:
"""Return True if the prediction is an exact match to the target."""
return pred.strip().lower() == target.strip().lower()
def is_in_range(pred: int | float, target: int | float, tolerance: float) -> bool:
"""Return True if the prediction is within the target range."""
return target - tolerance <= pred <= target + tolerance
def is_subset(pred: list, target: list) -> bool:
"""Return True if the prediction is a subset of the target."""
return set(pred).issubset(set(target))
def is_superset(pred: list, target: list) -> bool:
"""Return True if the prediction is a superset of the target."""
return set(pred).issuperset(set(target))
# TODO: once observability is setup, use metrics, traces, logs,
# and wrk2's logs to also observe the (side)-effects of agents' actions
# e.g., latency, throughput, etc.