Skip to content

Commit

Permalink
track used tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
enyst committed Feb 22, 2025
1 parent 70b21d1 commit d80c376
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 9 deletions.
27 changes: 18 additions & 9 deletions openhands/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,17 +500,17 @@ def _post_completion(self, response: ModelResponse) -> float:

if usage:
# keep track of the input and output tokens
input_tokens = usage.get('prompt_tokens')
output_tokens = usage.get('completion_tokens')
prompt_tokens = usage.get('prompt_tokens', 0)
completion_tokens = usage.get('completion_tokens', 0)

if input_tokens:
stats += 'Input tokens: ' + str(input_tokens)
if prompt_tokens:
stats += 'Input tokens: ' + str(prompt_tokens)

if output_tokens:
if completion_tokens:
stats += (
(' | ' if input_tokens else '')
(' | ' if prompt_tokens else '')
+ 'Output tokens: '
+ str(output_tokens)
+ str(completion_tokens)
+ '\n'
)

Expand All @@ -519,7 +519,7 @@ def _post_completion(self, response: ModelResponse) -> float:
'prompt_tokens_details'
)
cache_hit_tokens = (
prompt_tokens_details.cached_tokens if prompt_tokens_details else None
prompt_tokens_details.cached_tokens if prompt_tokens_details else 0
)
if cache_hit_tokens:
stats += 'Input tokens (cache hit): ' + str(cache_hit_tokens) + '\n'
Expand All @@ -528,10 +528,19 @@ def _post_completion(self, response: ModelResponse) -> float:
# but litellm doesn't separate them in the usage stats
# so we can read it from the provider-specific extra field
model_extra = usage.get('model_extra', {})
cache_write_tokens = model_extra.get('cache_creation_input_tokens')
cache_write_tokens = model_extra.get('cache_creation_input_tokens', 0)
if cache_write_tokens:
stats += 'Input tokens (cache write): ' + str(cache_write_tokens) + '\n'

# Record in metrics
# We'll treat cache_hit_tokens as "cache read" and cache_write_tokens as "cache write"
self.metrics.add_tokens_usage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
cache_read_tokens=cache_hit_tokens,
cache_write_tokens=cache_write_tokens,
)

# log the stats
if stats:
logger.debug(stats)
Expand Down
56 changes: 56 additions & 0 deletions openhands/llm/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,35 @@ class ResponseLatency(BaseModel):
response_id: str


class TokensUsage(BaseModel):
"""Metric tracking detailed token usage per completion call."""

model: str
prompt_tokens: int
completion_tokens: int
cache_read_tokens: int
cache_write_tokens: int
timestamp: float = Field(default_factory=time.time)


class Metrics:
"""Metrics class can record various metrics during running and evaluation.
Currently, we define the following metrics:
accumulated_cost: the total cost (USD $) of the current LLM.
response_latency: the time taken for each LLM completion call.
accrued token usage: the total tokens used across all completions.
"""

def __init__(self, model_name: str = 'default') -> None:
self._accumulated_cost: float = 0.0
self._costs: list[Cost] = []
self._response_latencies: list[ResponseLatency] = []
self.model_name = model_name
self._accumulated_prompt_tokens = 0
self._accumulated_completion_tokens = 0
self._accumulated_cache_read_tokens = 0
self._accumulated_cache_write_tokens = 0
self._tokens_usages: list[TokensUsage] = []

@property
def accumulated_cost(self) -> float:
Expand Down Expand Up @@ -67,16 +84,50 @@ def add_response_latency(self, value: float, response_id: str) -> None:
)
)

def add_tokens_usage(
self,
prompt_tokens: int,
completion_tokens: int,
cache_read_tokens: int,
cache_write_tokens: int,
) -> None:
# accumulate
self._accumulated_prompt_tokens += prompt_tokens
self._accumulated_completion_tokens += completion_tokens
self._accumulated_cache_read_tokens += cache_read_tokens
self._accumulated_cache_write_tokens += cache_write_tokens

# record this individual usage
self._tokens_usages.append(
TokensUsage(
model=self.model_name,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
cache_read_tokens=cache_read_tokens,
cache_write_tokens=cache_write_tokens,
)
)

def merge(self, other: 'Metrics') -> None:
self._accumulated_cost += other.accumulated_cost
self._costs += other._costs
self._response_latencies += other._response_latencies
self._accumulated_prompt_tokens += other._accumulated_prompt_tokens
self._accumulated_completion_tokens += other._accumulated_completion_tokens
self._accumulated_cache_read_tokens += other._accumulated_cache_read_tokens
self._accumulated_cache_write_tokens += other._accumulated_cache_write_tokens
self._tokens_usages += other._tokens_usages

def get(self) -> dict:
"""Return the metrics in a dictionary."""
return {
'accumulated_cost': self._accumulated_cost,
'costs': [cost.model_dump() for cost in self._costs],
'accumulated_prompt_tokens': self._accumulated_prompt_tokens,
'accumulated_completion_tokens': self._accumulated_completion_tokens,
'accumulated_cache_read_tokens': self._accumulated_cache_read_tokens,
'accumulated_cache_write_tokens': self._accumulated_cache_write_tokens,
'tokens_usages': [usage.model_dump() for usage in self._tokens_usages],
'response_latencies': [
latency.model_dump() for latency in self._response_latencies
],
Expand All @@ -86,6 +137,11 @@ def reset(self):
self._accumulated_cost = 0.0
self._costs = []
self._response_latencies = []
self._accumulated_prompt_tokens = 0
self._accumulated_completion_tokens = 0
self._accumulated_cache_read_tokens = 0
self._accumulated_cache_write_tokens = 0
self._tokens_usages = []

def log(self):
"""Log the metrics."""
Expand Down

0 comments on commit d80c376

Please sign in to comment.