Skip to content

Commit

Permalink
Apply ruff formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
openhands-agent committed Feb 24, 2025
1 parent c90a5f2 commit e7b260f
Show file tree
Hide file tree
Showing 12 changed files with 381 additions and 52 deletions.
14 changes: 11 additions & 3 deletions openhands/core/config/condenser_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ class RecentEventsCondenserConfig(BaseModel):
"""Configuration for RecentEventsCondenser."""

type: Literal['recent'] = Field('recent')

# at least one event by default, because the best guess is that it is the user task
keep_first: int = Field(
default=0,
default=1,
description='The number of initial events to condense.',
ge=0,
)
Expand All @@ -43,6 +45,8 @@ class LLMSummarizingCondenserConfig(BaseModel):
llm_config: LLMConfig = Field(
..., description='Configuration for the LLM to use for condensing.'
)

# at least one event by default, because the best guess is that it's the user task
keep_first: int = Field(
default=1,
description='The number of initial events to condense.',
Expand All @@ -62,8 +66,10 @@ class AmortizedForgettingCondenserConfig(BaseModel):
description='Maximum size of the condensed history before triggering forgetting.',
ge=2,
)

# at least one event by default, because the best guess is that it's the user task
keep_first: int = Field(
default=0,
default=1,
description='Number of initial events to always keep in history.',
ge=0,
)
Expand All @@ -81,8 +87,10 @@ class LLMAttentionCondenserConfig(BaseModel):
description='Maximum size of the condensed history before triggering forgetting.',
ge=2,
)

# at least one event by default, because the best guess is that it's the user task
keep_first: int = Field(
default=0,
default=1,
description='Number of initial events to always keep in history.',
ge=0,
)
Expand Down
45 changes: 45 additions & 0 deletions openhands/core/message_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from openhands.events.observation.error import ErrorObservation
from openhands.events.observation.observation import Observation
from openhands.events.serialization.event import truncate_content
from openhands.llm.metrics import Metrics, TokenUsage


def events_to_messages(
Expand Down Expand Up @@ -362,3 +363,47 @@ def apply_prompt_caching(messages: list[Message]) -> None:
-1
].cache_prompt = True # Last item inside the message content
break


def get_token_usage_for_event(event: Event, metrics: Metrics) -> TokenUsage | None:
"""
Returns at most one token usage record for the `model_response.id` in this event's
`tool_call_metadata`.
If no response_id is found, or none match in metrics.token_usages, returns None.
"""
if event.tool_call_metadata and event.tool_call_metadata.model_response:
response_id = event.tool_call_metadata.model_response.get('id')
if response_id:
return next(
(
usage
for usage in metrics.token_usages
if usage.response_id == response_id
),
None,
)
return None


def get_token_usage_for_event_id(
events: list[Event], event_id: int, metrics: Metrics
) -> TokenUsage | None:
"""
Starting from the event with .id == event_id and moving backwards in `events`,
find the first TokenUsage record (if any) associated with a response_id from
tool_call_metadata.model_response.id.
Returns the first match found, or None if none is found.
"""
# find the index of the event with the given id
idx = next((i for i, e in enumerate(events) if e.id == event_id), None)
if idx is None:
return None

# search backward from idx down to 0
for i in range(idx, -1, -1):
usage = get_token_usage_for_event(events[i], metrics)
if usage is not None:
return usage
return None
29 changes: 20 additions & 9 deletions openhands/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,20 +497,21 @@ def _post_completion(self, response: ModelResponse) -> float:
stats += 'Response Latency: %.3f seconds\n' % latest_latency.latency

usage: Usage | None = response.get('usage')
response_id = response.get('id', 'unknown')

if usage:
# keep track of the input and output tokens
input_tokens = usage.get('prompt_tokens')
output_tokens = usage.get('completion_tokens')
prompt_tokens = usage.get('prompt_tokens', 0)
completion_tokens = usage.get('completion_tokens', 0)

if input_tokens:
stats += 'Input tokens: ' + str(input_tokens)
if prompt_tokens:
stats += 'Input tokens: ' + str(prompt_tokens)

if output_tokens:
if completion_tokens:
stats += (
(' | ' if input_tokens else '')
(' | ' if prompt_tokens else '')
+ 'Output tokens: '
+ str(output_tokens)
+ str(completion_tokens)
+ '\n'
)

Expand All @@ -519,7 +520,7 @@ def _post_completion(self, response: ModelResponse) -> float:
'prompt_tokens_details'
)
cache_hit_tokens = (
prompt_tokens_details.cached_tokens if prompt_tokens_details else None
prompt_tokens_details.cached_tokens if prompt_tokens_details else 0
)
if cache_hit_tokens:
stats += 'Input tokens (cache hit): ' + str(cache_hit_tokens) + '\n'
Expand All @@ -528,10 +529,20 @@ def _post_completion(self, response: ModelResponse) -> float:
# but litellm doesn't separate them in the usage stats
# so we can read it from the provider-specific extra field
model_extra = usage.get('model_extra', {})
cache_write_tokens = model_extra.get('cache_creation_input_tokens')
cache_write_tokens = model_extra.get('cache_creation_input_tokens', 0)
if cache_write_tokens:
stats += 'Input tokens (cache write): ' + str(cache_write_tokens) + '\n'

# Record in metrics
# We'll treat cache_hit_tokens as "cache read" and cache_write_tokens as "cache write"
self.metrics.add_token_usage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
cache_read_tokens=cache_hit_tokens,
cache_write_tokens=cache_write_tokens,
response_id=response_id,
)

# log the stats
if stats:
logger.debug(stats)
Expand Down
56 changes: 52 additions & 4 deletions openhands/llm/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,31 @@ class ResponseLatency(BaseModel):
response_id: str


class TokenUsage(BaseModel):
"""Metric tracking detailed token usage per completion call."""

model: str
prompt_tokens: int
completion_tokens: int
cache_read_tokens: int
cache_write_tokens: int
response_id: str


class Metrics:
"""Metrics class can record various metrics during running and evaluation.
Currently, we define the following metrics:
accumulated_cost: the total cost (USD $) of the current LLM.
response_latency: the time taken for each LLM completion call.
We track:
- accumulated_cost and costs
- A list of ResponseLatency
- A list of TokenUsage (one per call).
"""

def __init__(self, model_name: str = 'default') -> None:
self._accumulated_cost: float = 0.0
self._costs: list[Cost] = []
self._response_latencies: list[ResponseLatency] = []
self.model_name = model_name
self._token_usages: list[TokenUsage] = []

@property
def accumulated_cost(self) -> float:
Expand All @@ -54,6 +67,16 @@ def response_latencies(self) -> list[ResponseLatency]:
def response_latencies(self, value: list[ResponseLatency]) -> None:
self._response_latencies = value

@property
def token_usages(self) -> list[TokenUsage]:
if not hasattr(self, '_token_usages'):
self._token_usages = []
return self._token_usages

@token_usages.setter
def token_usages(self, value: list[TokenUsage]) -> None:
self._token_usages = value

def add_cost(self, value: float) -> None:
if value < 0:
raise ValueError('Added cost cannot be negative.')
Expand All @@ -67,10 +90,33 @@ def add_response_latency(self, value: float, response_id: str) -> None:
)
)

def add_token_usage(
self,
prompt_tokens: int,
completion_tokens: int,
cache_read_tokens: int,
cache_write_tokens: int,
response_id: str,
) -> None:
"""Add a single usage record."""
self._token_usages.append(
TokenUsage(
model=self.model_name,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
cache_read_tokens=cache_read_tokens,
cache_write_tokens=cache_write_tokens,
response_id=response_id,
)
)

def merge(self, other: 'Metrics') -> None:
"""Merge 'other' metrics into this one."""
self._accumulated_cost += other.accumulated_cost
self._costs += other._costs
self._response_latencies += other._response_latencies
# use the property so older picked objects that lack the field won't crash
self.token_usages += other.token_usages
self.response_latencies += other.response_latencies

def get(self) -> dict:
"""Return the metrics in a dictionary."""
Expand All @@ -80,12 +126,14 @@ def get(self) -> dict:
'response_latencies': [
latency.model_dump() for latency in self._response_latencies
],
'token_usages': [usage.model_dump() for usage in self._token_usages],
}

def reset(self):
self._accumulated_cost = 0.0
self._costs = []
self._response_latencies = []
self._token_usages = []

def log(self):
"""Log the metrics."""
Expand Down
2 changes: 1 addition & 1 deletion openhands/memory/condenser/impl/llm_attention_condenser.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class ImportantEventSelection(BaseModel):
class LLMAttentionCondenser(RollingCondenser):
"""Rolling condenser strategy that uses an LLM to select the most important events when condensing the history."""

def __init__(self, llm: LLM, max_size: int = 100, keep_first: int = 0):
def __init__(self, llm: LLM, max_size: int = 100, keep_first: int = 1):
if keep_first >= max_size // 2:
raise ValueError(
f'keep_first ({keep_first}) must be less than half of max_size ({max_size})'
Expand Down
2 changes: 1 addition & 1 deletion openhands/memory/condenser/impl/recent_events_condenser.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
class RecentEventsCondenser(Condenser):
"""A condenser that only keeps a certain number of the most recent events."""

def __init__(self, keep_first: int = 0, max_events: int = 10):
def __init__(self, keep_first: int = 1, max_events: int = 10):
self.keep_first = keep_first
self.max_events = max_events

Expand Down
2 changes: 0 additions & 2 deletions openhands/server/routes/github.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from typing import Union

from fastapi import APIRouter, Depends, status
from fastapi.responses import JSONResponse
from pydantic import SecretStr
Expand Down
8 changes: 1 addition & 7 deletions openhands/server/routes/settings.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
from typing import Annotated, cast

from fastapi import APIRouter, Depends, Request, Response, status
from fastapi import APIRouter, Request, Response, status
from fastapi.responses import JSONResponse
from fastapi.routing import APIRoute
from pydantic import SecretStr

from openhands.core.logger import openhands_logger as logger
Expand Down Expand Up @@ -144,9 +141,6 @@ async def store_settings(
)





def convert_to_settings(settings_with_token_data: POSTSettingsModel) -> Settings:
"""Convert POSTSettingsModel to Settings.
Expand Down
Loading

0 comments on commit e7b260f

Please sign in to comment.