Skip to content

Commit

Permalink
clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
enyst committed Feb 22, 2025
1 parent c59abb5 commit dba25f5
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 67 deletions.
35 changes: 7 additions & 28 deletions openhands/llm/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,17 @@ class TokensUsage(BaseModel):

class Metrics:
"""Metrics class can record various metrics during running and evaluation.
Currently, we define the following metrics:
accumulated_cost: the total cost (USD $) of the current LLM.
response_latency: the time taken for each LLM completion call.
accrued token usage: the total tokens used across all completions.
We track:
- accumulated_cost and costs
- A list of ResponseLatency
- A list of TokensUsage (one per call).
"""

def __init__(self, model_name: str = 'default') -> None:
self._accumulated_cost: float = 0.0
self._costs: list[Cost] = []
self._response_latencies: list[ResponseLatency] = []
self.model_name = model_name
self._accumulated_prompt_tokens = 0
self._accumulated_completion_tokens = 0
self._accumulated_cache_read_tokens = 0
self._accumulated_cache_write_tokens = 0
self._tokens_usages: list[TokensUsage] = []

@property
Expand Down Expand Up @@ -92,13 +88,7 @@ def add_tokens_usage(
cache_write_tokens: int,
response_id: str,
) -> None:
# accumulate
self._accumulated_prompt_tokens += prompt_tokens
self._accumulated_completion_tokens += completion_tokens
self._accumulated_cache_read_tokens += cache_read_tokens
self._accumulated_cache_write_tokens += cache_write_tokens

# record this individual usage
"""Add a single usage record."""
self._tokens_usages.append(
TokensUsage(
model=self.model_name,
Expand All @@ -111,38 +101,27 @@ def add_tokens_usage(
)

def merge(self, other: 'Metrics') -> None:
"""Merge 'other' metrics into this one."""
self._accumulated_cost += other.accumulated_cost
self._costs += other._costs
self._response_latencies += other._response_latencies
self._accumulated_prompt_tokens += other._accumulated_prompt_tokens
self._accumulated_completion_tokens += other._accumulated_completion_tokens
self._accumulated_cache_read_tokens += other._accumulated_cache_read_tokens
self._accumulated_cache_write_tokens += other._accumulated_cache_write_tokens
self._tokens_usages += other._tokens_usages

def get(self) -> dict:
"""Return the metrics in a dictionary."""
return {
'accumulated_cost': self._accumulated_cost,
'costs': [cost.model_dump() for cost in self._costs],
'accumulated_prompt_tokens': self._accumulated_prompt_tokens,
'accumulated_completion_tokens': self._accumulated_completion_tokens,
'accumulated_cache_read_tokens': self._accumulated_cache_read_tokens,
'accumulated_cache_write_tokens': self._accumulated_cache_write_tokens,
'tokens_usages': [usage.model_dump() for usage in self._tokens_usages],
'response_latencies': [
latency.model_dump() for latency in self._response_latencies
],
'tokens_usages': [usage.model_dump() for usage in self._tokens_usages],
}

def reset(self):
self._accumulated_cost = 0.0
self._costs = []
self._response_latencies = []
self._accumulated_prompt_tokens = 0
self._accumulated_completion_tokens = 0
self._accumulated_cache_read_tokens = 0
self._accumulated_cache_write_tokens = 0
self._tokens_usages = []

def log(self):
Expand Down
60 changes: 21 additions & 39 deletions tests/unit/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,46 +463,28 @@ def test_llm_token_usage(mock_litellm_completion, default_config):

llm = LLM(config=default_config)

# First call: usage_1
_ = llm.completion(messages=[{'role': 'user', 'content': 'Hello usage!'}])
# First call
llm.completion(messages=[{'role': 'user', 'content': 'Hello usage!'}])

# Check that the metrics tracked these tokens for the first response
assert llm.metrics.get()['accumulated_prompt_tokens'] == 12
assert llm.metrics.get()['accumulated_completion_tokens'] == 3
assert llm.metrics.get()['accumulated_cache_read_tokens'] == 2
assert llm.metrics.get()['accumulated_cache_write_tokens'] == 5

# Also verify tokens_usages has a single entry with the exact usage
# Verify we have exactly one usage record after first call
tokens_usage_list = llm.metrics.get()['tokens_usages']
assert len(tokens_usage_list) == 1
usage_entry = tokens_usage_list[0]
assert usage_entry['prompt_tokens'] == 12
assert usage_entry['completion_tokens'] == 3
assert usage_entry['cache_read_tokens'] == 2
assert usage_entry['cache_write_tokens'] == 5
# Check the response_id
assert usage_entry['response_id'] == 'test-response-usage'

# Second call: usage_2
_ = llm.completion(messages=[{'role': 'user', 'content': 'Hello again!'}])

# Now check accumulated totals
metrics_dict = llm.metrics.get()
# Prompt tokens = 12 + 7 = 19
assert metrics_dict['accumulated_prompt_tokens'] == 19
# Completion tokens = 3 + 2 = 5
assert metrics_dict['accumulated_completion_tokens'] == 5
# Cache read = 2 + 1 = 3
assert metrics_dict['accumulated_cache_read_tokens'] == 3
# Cache write = 5 + 3 = 8
assert metrics_dict['accumulated_cache_write_tokens'] == 8

# Also verify we have two usage records now
tokens_usage_list = metrics_dict['tokens_usages']
usage_entry_1 = tokens_usage_list[0]
assert usage_entry_1['prompt_tokens'] == 12
assert usage_entry_1['completion_tokens'] == 3
assert usage_entry_1['cache_read_tokens'] == 2
assert usage_entry_1['cache_write_tokens'] == 5
assert usage_entry_1['response_id'] == 'test-response-usage'

# Second call
llm.completion(messages=[{'role': 'user', 'content': 'Hello again!'}])

# Now we expect two usage records total
tokens_usage_list = llm.metrics.get()['tokens_usages']
assert len(tokens_usage_list) == 2
latest_entry = tokens_usage_list[-1]
assert latest_entry['prompt_tokens'] == 7
assert latest_entry['completion_tokens'] == 2
assert latest_entry['cache_read_tokens'] == 1
assert latest_entry['cache_write_tokens'] == 3
assert latest_entry['response_id'] == 'test-response-usage-2'
usage_entry_2 = tokens_usage_list[-1]
assert usage_entry_2['prompt_tokens'] == 7
assert usage_entry_2['completion_tokens'] == 2
assert usage_entry_2['cache_read_tokens'] == 1
assert usage_entry_2['cache_write_tokens'] == 3
assert usage_entry_2['response_id'] == 'test-response-usage-2'

0 comments on commit dba25f5

Please sign in to comment.