diff --git a/openhands/core/config/condenser_config.py b/openhands/core/config/condenser_config.py index 926bd1f383a6..fd22995fa176 100644 --- a/openhands/core/config/condenser_config.py +++ b/openhands/core/config/condenser_config.py @@ -22,6 +22,17 @@ class ObservationMaskingCondenserConfig(BaseModel): ) +class BrowserOutputCondenserConfig(BaseModel): + """Configuration for the BrowserOutputCondenser.""" + + type: Literal['browser_output_masking'] = Field('browser_output_masking') + attention_window: int = Field( + default=1, + description='The number of most recent browser output observations that will not be masked.', + ge=1, + ) + + class RecentEventsCondenserConfig(BaseModel): """Configuration for RecentEventsCondenser.""" @@ -91,6 +102,7 @@ class LLMAttentionCondenserConfig(BaseModel): CondenserConfig = ( NoOpCondenserConfig | ObservationMaskingCondenserConfig + | BrowserOutputCondenserConfig | RecentEventsCondenserConfig | LLMSummarizingCondenserConfig | AmortizedForgettingCondenserConfig diff --git a/openhands/memory/condenser/impl/__init__.py b/openhands/memory/condenser/impl/__init__.py index cca01ac8e2b9..0a2150cc7616 100644 --- a/openhands/memory/condenser/impl/__init__.py +++ b/openhands/memory/condenser/impl/__init__.py @@ -1,6 +1,9 @@ from openhands.memory.condenser.impl.amortized_forgetting_condenser import ( AmortizedForgettingCondenser, ) +from openhands.memory.condenser.impl.browser_output_condenser import ( + BrowserOutputCondenser, +) from openhands.memory.condenser.impl.llm_attention_condenser import ( ImportantEventSelection, LLMAttentionCondenser, @@ -23,5 +26,6 @@ 'LLMSummarizingCondenser', 'NoOpCondenser', 'ObservationMaskingCondenser', + 'BrowserOutputCondenser', 'RecentEventsCondenser', ] diff --git a/openhands/memory/condenser/impl/browser_output_condenser.py b/openhands/memory/condenser/impl/browser_output_condenser.py new file mode 100644 index 000000000000..b0fd2e805d3a --- /dev/null +++ b/openhands/memory/condenser/impl/browser_output_condenser.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from openhands.core.config.condenser_config import BrowserOutputCondenserConfig +from openhands.events.event import Event +from openhands.events.observation import BrowserOutputObservation +from openhands.events.observation.agent import AgentCondensationObservation +from openhands.memory.condenser.condenser import Condenser + + +class BrowserOutputCondenser(Condenser): + """A condenser that masks the observations from browser outputs outside of a recent attention window. + + The intent here is to mask just the browser outputs and leave everything else untouched. This is important because currently we provide screenshots and accessibility trees as input to the model for browser observations. These are really large and consume a lot of tokens without any benefits in performance. So we want to mask all such observations from all previous timesteps, and leave only the most recent one in context. + """ + + def __init__(self, attention_window: int = 1): + self.attention_window = attention_window + super().__init__() + + def condense(self, events: list[Event]) -> list[Event]: + """Replace the content of browser observations outside of the attention window with a placeholder.""" + results: list[Event] = [] + cnt: int = 0 + for event in reversed(events): + if ( + isinstance(event, BrowserOutputObservation) + and cnt >= self.attention_window + ): + results.append( + AgentCondensationObservation( + f'Current URL: {event.url}\nContent Omitted' + ) + ) + else: + results.append(event) + if isinstance(event, BrowserOutputObservation): + cnt += 1 + + return list(reversed(results)) + + @classmethod + def from_config( + cls, config: BrowserOutputCondenserConfig + ) -> BrowserOutputCondenser: + return BrowserOutputCondenser(**config.model_dump(exclude=['type'])) + + +BrowserOutputCondenser.register_config(BrowserOutputCondenserConfig) diff --git a/tests/unit/test_condenser.py b/tests/unit/test_condenser.py index fd1e922a103a..4315b69057b2 100644 --- a/tests/unit/test_condenser.py +++ b/tests/unit/test_condenser.py @@ -7,6 +7,7 @@ from openhands.controller.state.state import State from openhands.core.config.condenser_config import ( AmortizedForgettingCondenserConfig, + BrowserOutputCondenserConfig, LLMAttentionCondenserConfig, LLMSummarizingCondenserConfig, NoOpCondenserConfig, @@ -15,6 +16,7 @@ ) from openhands.core.config.llm_config import LLMConfig from openhands.events.event import Event, EventSource +from openhands.events.observation import BrowserOutputObservation from openhands.events.observation.agent import AgentCondensationObservation from openhands.events.observation.observation import Observation from openhands.llm import LLM @@ -22,6 +24,7 @@ from openhands.memory.condenser.condenser import RollingCondenser from openhands.memory.condenser.impl import ( AmortizedForgettingCondenser, + BrowserOutputCondenser, ImportantEventSelection, LLMAttentionCondenser, LLMSummarizingCondenser, @@ -154,6 +157,46 @@ def test_observation_masking_condenser_respects_attention_window(mock_state): assert event == condensed_event +def test_browser_output_condenser_from_config(): + """Test that BrowserOutputCondenser objects can be made from config.""" + attention_window = 5 + config = BrowserOutputCondenserConfig(attention_window=attention_window) + condenser = Condenser.from_config(config) + + assert isinstance(condenser, BrowserOutputCondenser) + assert condenser.attention_window == attention_window + + +def test_browser_output_condenser_respects_attention_window(mock_state): + """Test that BrowserOutputCondenser only masks events outside the attention window.""" + attention_window = 3 + condenser = BrowserOutputCondenser(attention_window=attention_window) + + events = [ + BrowserOutputObservation('Observation 1', url='', trigger_by_action=''), + BrowserOutputObservation('Observation 2', url='', trigger_by_action=''), + create_test_event('Event 3'), + create_test_event('Event 4'), + BrowserOutputObservation('Observation 3', url='', trigger_by_action=''), + BrowserOutputObservation('Observation 4', url='', trigger_by_action=''), + ] + + mock_state.history = events + result = condenser.condensed_history(mock_state) + + assert len(result) == len(events) + cnt = 4 + for event, condensed_event in zip(events, result): + if isinstance(event, BrowserOutputObservation): + if cnt > attention_window: + assert 'Content Omitted' in str(condensed_event) + else: + assert event == condensed_event + cnt -= 1 + else: + assert event == condensed_event + + def test_recent_events_condenser_from_config(): """Test that RecentEventsCondenser objects can be made from config.""" max_events = 5