Skip to content

Commit

Permalink
add response_id
Browse files Browse the repository at this point in the history
  • Loading branch information
enyst committed Feb 22, 2025
1 parent d80c376 commit bd9fc55
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 1 deletion.
2 changes: 2 additions & 0 deletions openhands/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,7 @@ def _post_completion(self, response: ModelResponse) -> float:
stats += 'Response Latency: %.3f seconds\n' % latest_latency.latency

usage: Usage | None = response.get('usage')
response_id = response.get('id', 'unknown')

if usage:
# keep track of the input and output tokens
Expand Down Expand Up @@ -539,6 +540,7 @@ def _post_completion(self, response: ModelResponse) -> float:
completion_tokens=completion_tokens,
cache_read_tokens=cache_hit_tokens,
cache_write_tokens=cache_write_tokens,
response_id=response_id,
)

# log the stats
Expand Down
4 changes: 3 additions & 1 deletion openhands/llm/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class TokensUsage(BaseModel):
completion_tokens: int
cache_read_tokens: int
cache_write_tokens: int
timestamp: float = Field(default_factory=time.time)
response_id: str


class Metrics:
Expand Down Expand Up @@ -90,6 +90,7 @@ def add_tokens_usage(
completion_tokens: int,
cache_read_tokens: int,
cache_write_tokens: int,
response_id: str,
) -> None:
# accumulate
self._accumulated_prompt_tokens += prompt_tokens
Expand All @@ -105,6 +106,7 @@ def add_tokens_usage(
completion_tokens=completion_tokens,
cache_read_tokens=cache_read_tokens,
cache_write_tokens=cache_write_tokens,
response_id=response_id,
)
)

Expand Down
35 changes: 35 additions & 0 deletions tests/unit/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,3 +429,38 @@ def test_get_token_count_error_handling(
mock_logger.error.assert_called_once_with(
'Error getting token count for\n model gpt-4o\nToken counting failed'
)


@patch('openhands.llm.llm.litellm_completion')
def test_llm_token_usage(mock_litellm_completion, default_config):
# This mock response includes usage details with prompt_tokens,
# completion_tokens, prompt_tokens_details.cached_tokens, and model_extra.cache_creation_input_tokens
mock_response = {
'id': 'test-response-usage',
'choices': [{'message': {'content': 'Usage test response'}}],
'usage': {
'prompt_tokens': 12,
'completion_tokens': 3,
'prompt_tokens_details': {'cached_tokens': 2},
'model_extra': {'cache_creation_input_tokens': 5},
},
}
mock_litellm_completion.return_value = mock_response

llm = LLM(config=default_config)
_ = llm.completion(messages=[{'role': 'user', 'content': 'Hello usage!'}])

# Check that the metrics tracked these tokens
assert llm.metrics.get()['accumulated_prompt_tokens'] == 12
assert llm.metrics.get()['accumulated_completion_tokens'] == 3
assert llm.metrics.get()['accumulated_cache_read_tokens'] == 2
assert llm.metrics.get()['accumulated_cache_write_tokens'] == 5

# Also verify tokens_usages has a single entry with the exact usage
tokens_usage_list = llm.metrics.get()['tokens_usages']
assert len(tokens_usage_list) == 1
usage_entry = tokens_usage_list[0]
assert usage_entry['prompt_tokens'] == 12
assert usage_entry['completion_tokens'] == 3
assert usage_entry['cache_read_tokens'] == 2
assert usage_entry['cache_write_tokens'] == 5

0 comments on commit bd9fc55

Please sign in to comment.