From abac25cc4cef80409f7e98e73b90ead77888aab3 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Sun, 23 Feb 2025 17:20:31 +0100 Subject: [PATCH] Keep the first user message by default in condensers (#6888) --- openhands/core/config/condenser_config.py | 14 ++++- .../condenser/impl/llm_attention_condenser.py | 2 +- .../condenser/impl/recent_events_condenser.py | 2 +- tests/unit/test_condenser.py | 55 +++++++++++++++---- 4 files changed, 57 insertions(+), 16 deletions(-) diff --git a/openhands/core/config/condenser_config.py b/openhands/core/config/condenser_config.py index 926bd1f383a6..aca1d090d79e 100644 --- a/openhands/core/config/condenser_config.py +++ b/openhands/core/config/condenser_config.py @@ -26,8 +26,10 @@ class RecentEventsCondenserConfig(BaseModel): """Configuration for RecentEventsCondenser.""" type: Literal['recent'] = Field('recent') + + # at least one event by default, because the best guess is that it is the user task keep_first: int = Field( - default=0, + default=1, description='The number of initial events to condense.', ge=0, ) @@ -43,6 +45,8 @@ class LLMSummarizingCondenserConfig(BaseModel): llm_config: LLMConfig = Field( ..., description='Configuration for the LLM to use for condensing.' ) + + # at least one event by default, because the best guess is that it's the user task keep_first: int = Field( default=1, description='The number of initial events to condense.', @@ -62,8 +66,10 @@ class AmortizedForgettingCondenserConfig(BaseModel): description='Maximum size of the condensed history before triggering forgetting.', ge=2, ) + + # at least one event by default, because the best guess is that it's the user task keep_first: int = Field( - default=0, + default=1, description='Number of initial events to always keep in history.', ge=0, ) @@ -81,8 +87,10 @@ class LLMAttentionCondenserConfig(BaseModel): description='Maximum size of the condensed history before triggering forgetting.', ge=2, ) + + # at least one event by default, because the best guess is that it's the user task keep_first: int = Field( - default=0, + default=1, description='Number of initial events to always keep in history.', ge=0, ) diff --git a/openhands/memory/condenser/impl/llm_attention_condenser.py b/openhands/memory/condenser/impl/llm_attention_condenser.py index 98d0455283ce..9a638c071d18 100644 --- a/openhands/memory/condenser/impl/llm_attention_condenser.py +++ b/openhands/memory/condenser/impl/llm_attention_condenser.py @@ -18,7 +18,7 @@ class ImportantEventSelection(BaseModel): class LLMAttentionCondenser(RollingCondenser): """Rolling condenser strategy that uses an LLM to select the most important events when condensing the history.""" - def __init__(self, llm: LLM, max_size: int = 100, keep_first: int = 0): + def __init__(self, llm: LLM, max_size: int = 100, keep_first: int = 1): if keep_first >= max_size // 2: raise ValueError( f'keep_first ({keep_first}) must be less than half of max_size ({max_size})' diff --git a/openhands/memory/condenser/impl/recent_events_condenser.py b/openhands/memory/condenser/impl/recent_events_condenser.py index 2ccfd409f35c..a7790483637f 100644 --- a/openhands/memory/condenser/impl/recent_events_condenser.py +++ b/openhands/memory/condenser/impl/recent_events_condenser.py @@ -8,7 +8,7 @@ class RecentEventsCondenser(Condenser): """A condenser that only keeps a certain number of the most recent events.""" - def __init__(self, keep_first: int = 0, max_events: int = 10): + def __init__(self, keep_first: int = 1, max_events: int = 10): self.keep_first = keep_first self.max_events = max_events diff --git a/tests/unit/test_condenser.py b/tests/unit/test_condenser.py index fd1e922a103a..99561ae63c7f 100644 --- a/tests/unit/test_condenser.py +++ b/tests/unit/test_condenser.py @@ -38,7 +38,7 @@ def create_test_event( event = Event() event._message = message event.timestamp = timestamp if timestamp else datetime.now() - if id: + if id is not None: event._id = id event._source = EventSource.USER return event @@ -186,13 +186,14 @@ def test_recent_events_condenser(): assert result == events # If the max_events are smaller than the number of events, only keep the last few. - max_events = 2 + max_events = 3 condenser = RecentEventsCondenser(max_events=max_events) result = condenser.condensed_history(mock_state) assert len(result) == max_events - assert result[0]._message == 'Event 4' - assert result[1]._message == 'Event 5' + assert result[0]._message == 'Event 1' # kept from keep_first + assert result[1]._message == 'Event 4' # kept from max_events + assert result[2]._message == 'Event 5' # kept from max_events # If the keep_first flag is set, the first event will always be present. keep_first = 1 @@ -211,9 +212,9 @@ def test_recent_events_condenser(): result = condenser.condensed_history(mock_state) assert len(result) == max_events - assert result[0]._message == 'Event 1' - assert result[1]._message == 'Event 2' - assert result[2]._message == 'Event 5' + assert result[0]._message == 'Event 1' # kept from keep_first + assert result[1]._message == 'Event 2' # kept from keep_first + assert result[2]._message == 'Event 5' # kept from max_events def test_llm_summarization_condenser_from_config(): @@ -539,7 +540,7 @@ def test_llm_attention_condenser_forgets_when_larger_than_max_size( ): """Test that the LLMAttentionCondenser forgets events when the context grows too large.""" max_size = 2 - condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm) + condenser = LLMAttentionCondenser(max_size=max_size, keep_first=0, llm=mock_llm) for i in range(max_size * 10): event = create_test_event(f'Event {i}', id=i) @@ -560,7 +561,7 @@ def test_llm_attention_condenser_forgets_when_larger_than_max_size( def test_llm_attention_condenser_handles_events_outside_history(mock_llm, mock_state): """Test that the LLMAttentionCondenser handles event IDs that aren't from the event history.""" max_size = 2 - condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm) + condenser = LLMAttentionCondenser(max_size=max_size, keep_first=0, llm=mock_llm) for i in range(max_size * 10): event = create_test_event(f'Event {i}', id=i) @@ -580,7 +581,7 @@ def test_llm_attention_condenser_handles_events_outside_history(mock_llm, mock_s def test_llm_attention_condenser_handles_too_many_events(mock_llm, mock_state): """Test that the LLMAttentionCondenser handles when the response contains too many event IDs.""" max_size = 2 - condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm) + condenser = LLMAttentionCondenser(max_size=max_size, keep_first=0, llm=mock_llm) for i in range(max_size * 10): event = create_test_event(f'Event {i}', id=i) @@ -600,7 +601,9 @@ def test_llm_attention_condenser_handles_too_many_events(mock_llm, mock_state): def test_llm_attention_condenser_handles_too_few_events(mock_llm, mock_state): """Test that the LLMAttentionCondenser handles when the response contains too few event IDs.""" max_size = 2 - condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm) + # Developer note: We must specify keep_first=0 because + # keep_first (1) >= max_size//2 (1) is invalid. + condenser = LLMAttentionCondenser(max_size=max_size, keep_first=0, llm=mock_llm) for i in range(max_size * 10): event = create_test_event(f'Event {i}', id=i) @@ -614,3 +617,33 @@ def test_llm_attention_condenser_handles_too_few_events(mock_llm, mock_state): # The number of results should bounce back and forth between 1, 2, 1, 2, ... assert len(results) == (i % 2) + 1 + + # Add a new test verifying that keep_first=1 works with max_size > 2 + + +def test_llm_attention_condenser_handles_keep_first_for_larger_max_size( + mock_llm, mock_state +): + """Test that LLMAttentionCondenser works when keep_first=1 is allowed (must be less than half of max_size).""" + max_size = 4 # so keep_first=1 < (max_size // 2) = 2 + condenser = LLMAttentionCondenser(max_size=max_size, keep_first=1, llm=mock_llm) + + for i in range(max_size * 2): + # We append new events, then ensure some are pruned. + event = create_test_event(f'Event {i}', id=i) + mock_state.history.append(event) + + mock_llm.set_mock_response_content( + ImportantEventSelection(ids=[]).model_dump_json() + ) + + results = condenser.condensed_history(mock_state) + + # We expect that the first event is always kept, and the tail grows until max_size + if len(mock_state.history) <= max_size: + # No condensation needed yet + assert len(results) == len(mock_state.history) + else: + # The first event is kept, plus some from the tail + assert results[0].id == 0 + assert len(results) <= max_size