From 212787c07276dc732b07cc3f72c74e0861427b51 Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 14 Dec 2024 16:01:02 +0000 Subject: [PATCH 01/16] refactor: remove unused almost_stuck functionality - Remove almost_stuck field from State class - Remove almost_stuck counter from StuckDetector - Simplify stuck detection logic to focus on actual loop detection - Update tests to remove almost_stuck assertions --- openhands/controller/agent_controller.py | 2 +- openhands/controller/state/state.py | 2 +- openhands/controller/stuck.py | 36 ++++-------------------- tests/unit/test_is_stuck.py | 21 -------------- 4 files changed, 8 insertions(+), 53 deletions(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 8078855330cf..9caaa90eb963 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -319,7 +319,7 @@ async def _handle_message_action(self, action: MessageAction) -> None: def _reset(self) -> None: """Resets the agent controller""" - self.almost_stuck = 0 + self._pending_action = None self.agent.reset() diff --git a/openhands/controller/state/state.py b/openhands/controller/state/state.py index 73aa4e666ed7..b2e07b294106 100644 --- a/openhands/controller/state/state.py +++ b/openhands/controller/state/state.py @@ -94,7 +94,7 @@ class State: end_id: int = -1 # truncation_id tracks where to load history after context window truncation truncation_id: int = -1 - almost_stuck: int = 0 + delegates: dict[tuple[int, int], tuple[str, str]] = field(default_factory=dict) # NOTE: This will never be used by the controller, but it can be used by different # evaluation tasks to store extra data needed to track the progress/state of the task. diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py index 0eb0f4c893ca..79ac39390239 100644 --- a/openhands/controller/stuck.py +++ b/openhands/controller/stuck.py @@ -81,43 +81,19 @@ def _is_stuck_repeating_action_observation(self, last_actions, last_observations # it takes 4 actions and 4 observations to detect a loop # assert len(last_actions) == 4 and len(last_observations) == 4 - # reset almost_stuck reminder - self.state.almost_stuck = 0 - - # almost stuck? if two actions, obs are the same, we're almost stuck - if len(last_actions) >= 2 and len(last_observations) >= 2: + # Check for a loop of 4 identical action-observation pairs + if len(last_actions) == 4 and len(last_observations) == 4: actions_equal = all( - self._eq_no_pid(last_actions[0], action) for action in last_actions[:2] + self._eq_no_pid(last_actions[0], action) for action in last_actions ) observations_equal = all( self._eq_no_pid(last_observations[0], observation) - for observation in last_observations[:2] + for observation in last_observations ) - # the last two actions and obs are the same? if actions_equal and observations_equal: - self.state.almost_stuck = 2 - - # the last three actions and observations are the same? - if len(last_actions) >= 3 and len(last_observations) >= 3: - if ( - actions_equal - and observations_equal - and self._eq_no_pid(last_actions[0], last_actions[2]) - and self._eq_no_pid(last_observations[0], last_observations[2]) - ): - self.state.almost_stuck = 1 - - if len(last_actions) == 4 and len(last_observations) == 4: - if ( - actions_equal - and observations_equal - and self._eq_no_pid(last_actions[0], last_actions[3]) - and self._eq_no_pid(last_observations[0], last_observations[3]) - ): - logger.warning('Action, Observation loop detected') - self.state.almost_stuck = 0 - return True + logger.warning('Action, Observation loop detected') + return True return False diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py index 197d6d8462b7..c34246b86140 100644 --- a/tests/unit/test_is_stuck.py +++ b/tests/unit/test_is_stuck.py @@ -149,7 +149,6 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect # 8 events assert stuck_detector.is_stuck() is False - assert stuck_detector.state.almost_stuck == 2 cmd_action_3 = CmdRunAction(command='ls') cmd_action_3._id = 3 @@ -160,20 +159,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect # 10 events assert len(state.history) == 10 - assert ( - len(state.history) == 10 - ) # Adjusted since history is a list and the controller is not running - - # FIXME are we still testing this without this test? - # assert ( - # len( - # get_pairs_from_events(state.history) - # ) - # == 5 - # ) - assert stuck_detector.is_stuck() is False - assert stuck_detector.state.almost_stuck == 1 cmd_action_4 = CmdRunAction(command='ls') cmd_action_4._id = 4 @@ -184,16 +170,9 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect # 12 events assert len(state.history) == 12 - # assert ( - # len( - # get_pairs_from_events(state.history) - # ) - # == 6 - # ) with patch('logging.Logger.warning') as mock_warning: assert stuck_detector.is_stuck() is True - assert stuck_detector.state.almost_stuck == 0 mock_warning.assert_called_once_with('Action, Observation loop detected') def test_is_stuck_repeating_action_error(self, stuck_detector: StuckDetector): From 1e739bddb74076e8e9bdad079570596a8da9005b Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 14 Dec 2024 16:46:07 +0000 Subject: [PATCH 02/16] fix: improve stuck detection in UI mode - Add UI mode awareness to stuck detection - Only consider history after last user message in UI mode - Keep existing behavior in headless mode - Add comprehensive tests for both modes Fix: #5480 --- openhands/controller/agent_controller.py | 9 ++- openhands/controller/stuck.py | 22 +++++-- tests/unit/test_is_stuck.py | 84 ++++++++++++++++++------ 3 files changed, 87 insertions(+), 28 deletions(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 9caaa90eb963..d91e4bdf6b7c 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -902,17 +902,20 @@ def _apply_conversation_window(self, events: list[Event]) -> list[Event]: return kept_events - def _is_stuck(self) -> bool: + def _is_stuck(self, ui_mode: bool | None = None) -> bool: """Checks if the agent or its delegate is stuck in a loop. + Args: + ui_mode: Optional override for UI mode. If not provided, uses not self.headless_mode. + Returns: bool: True if the agent is stuck, False otherwise. """ # check if delegate stuck - if self.delegate and self.delegate._is_stuck(): + if self.delegate and self.delegate._is_stuck(ui_mode): return True - return self._stuck_detector.is_stuck() + return self._stuck_detector.is_stuck(ui_mode if ui_mode is not None else not self.headless_mode) def __repr__(self): return ( diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py index 79ac39390239..fb520f161d6c 100644 --- a/openhands/controller/stuck.py +++ b/openhands/controller/stuck.py @@ -24,16 +24,26 @@ class StuckDetector: def __init__(self, state: State): self.state = state - def is_stuck(self): - # filter out MessageAction with source='user' from history + def is_stuck(self, ui_mode: bool = False): + if ui_mode: + # In UI mode, only look at history after the last user message + last_user_msg_idx = -1 + for i, event in enumerate(self.state.history): + if isinstance(event, MessageAction) and event.source == EventSource.USER: + last_user_msg_idx = i + + history_to_check = self.state.history[last_user_msg_idx + 1:] + else: + # In headless mode, look at all history + history_to_check = self.state.history + + # Filter out user messages and null events filtered_history = [ event - for event in self.state.history + for event in history_to_check if not ( (isinstance(event, MessageAction) and event.source == EventSource.USER) - or - # there might be some NullAction or NullObservation in the history at least for now - isinstance(event, (NullAction, NullObservation)) + or isinstance(event, (NullAction, NullObservation)) ) ] diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py index c34246b86140..f4778e70b023 100644 --- a/tests/unit/test_is_stuck.py +++ b/tests/unit/test_is_stuck.py @@ -112,7 +112,53 @@ def test_history_too_short(self, stuck_detector: StuckDetector): # cmd_observation._cause = cmd_action._id state.history.append(cmd_observation) - assert stuck_detector.is_stuck() is False + assert stuck_detector.is_stuck(ui_mode=False) is False + assert stuck_detector.is_stuck(ui_mode=True) is False + + def test_ui_mode_resets_after_user_message(self, stuck_detector: StuckDetector): + state = stuck_detector.state + + # First add some actions that would be stuck in non-UI mode + for i in range(4): + cmd_action = CmdRunAction(command='ls') + cmd_action._id = i + state.history.append(cmd_action) + cmd_observation = CmdOutputObservation(content='', command='ls', command_id=i) + cmd_observation._cause = cmd_action._id + state.history.append(cmd_observation) + + # In non-UI mode, this should be stuck + assert stuck_detector.is_stuck(ui_mode=False) is True + + # Add a user message + message_action = MessageAction(content='Hello', wait_for_response=False) + message_action._source = EventSource.USER + state.history.append(message_action) + + # In UI mode, this should not be stuck because we ignore history before user message + assert stuck_detector.is_stuck(ui_mode=True) is False + + # Add two more identical actions - still not stuck because we need at least 3 + for i in range(2): + cmd_action = CmdRunAction(command='ls') + cmd_action._id = i + 4 + state.history.append(cmd_action) + cmd_observation = CmdOutputObservation(content='', command='ls', command_id=i + 4) + cmd_observation._cause = cmd_action._id + state.history.append(cmd_observation) + + assert stuck_detector.is_stuck(ui_mode=True) is False + + # Add two more identical actions - now it should be stuck + for i in range(2): + cmd_action = CmdRunAction(command='ls') + cmd_action._id = i + 6 + state.history.append(cmd_action) + cmd_observation = CmdOutputObservation(content='', command='ls', command_id=i + 6) + cmd_observation._cause = cmd_action._id + state.history.append(cmd_observation) + + assert stuck_detector.is_stuck(ui_mode=True) is True def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetector): state = stuck_detector.state @@ -148,7 +194,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect state.history.append(message_null_observation) # 8 events - assert stuck_detector.is_stuck() is False + assert stuck_detector.is_stuck(ui_mode=False) is False cmd_action_3 = CmdRunAction(command='ls') cmd_action_3._id = 3 @@ -159,7 +205,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect # 10 events assert len(state.history) == 10 - assert stuck_detector.is_stuck() is False + assert stuck_detector.is_stuck(ui_mode=False) is False cmd_action_4 = CmdRunAction(command='ls') cmd_action_4._id = 4 @@ -172,7 +218,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect assert len(state.history) == 12 with patch('logging.Logger.warning') as mock_warning: - assert stuck_detector.is_stuck() is True + assert stuck_detector.is_stuck(ui_mode=False) is True mock_warning.assert_called_once_with('Action, Observation loop detected') def test_is_stuck_repeating_action_error(self, stuck_detector: StuckDetector): @@ -224,7 +270,7 @@ def test_is_stuck_repeating_action_error(self, stuck_detector: StuckDetector): # 12 events with patch('logging.Logger.warning') as mock_warning: - assert stuck_detector.is_stuck() is True + assert stuck_detector.is_stuck(ui_mode=False) is True mock_warning.assert_called_once_with( 'Action, ErrorObservation loop detected' ) @@ -238,7 +284,7 @@ def test_is_stuck_invalid_syntax_error(self, stuck_detector: StuckDetector): ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck() is True + assert stuck_detector.is_stuck(ui_mode=False) is True def test_is_not_stuck_invalid_syntax_error_random_lines( self, stuck_detector: StuckDetector @@ -251,7 +297,7 @@ def test_is_not_stuck_invalid_syntax_error_random_lines( ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck() is False + assert stuck_detector.is_stuck(ui_mode=False) is False def test_is_not_stuck_invalid_syntax_error_only_three_incidents( self, stuck_detector: StuckDetector @@ -265,7 +311,7 @@ def test_is_not_stuck_invalid_syntax_error_only_three_incidents( ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck() is False + assert stuck_detector.is_stuck(ui_mode=False) is False def test_is_stuck_incomplete_input_error(self, stuck_detector: StuckDetector): state = stuck_detector.state @@ -276,7 +322,7 @@ def test_is_stuck_incomplete_input_error(self, stuck_detector: StuckDetector): ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck() is True + assert stuck_detector.is_stuck(ui_mode=False) is True def test_is_not_stuck_incomplete_input_error(self, stuck_detector: StuckDetector): state = stuck_detector.state @@ -287,7 +333,7 @@ def test_is_not_stuck_incomplete_input_error(self, stuck_detector: StuckDetector ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck() is False + assert stuck_detector.is_stuck(ui_mode=False) is False def test_is_not_stuck_ipython_unterminated_string_error_random_lines( self, stuck_detector: StuckDetector @@ -296,7 +342,7 @@ def test_is_not_stuck_ipython_unterminated_string_error_random_lines( self._impl_unterminated_string_error_events(state, random_line=True) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck() is False + assert stuck_detector.is_stuck(ui_mode=False) is False def test_is_not_stuck_ipython_unterminated_string_error_only_three_incidents( self, stuck_detector: StuckDetector @@ -307,7 +353,7 @@ def test_is_not_stuck_ipython_unterminated_string_error_only_three_incidents( ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck() is False + assert stuck_detector.is_stuck(ui_mode=False) is False def test_is_stuck_ipython_unterminated_string_error( self, stuck_detector: StuckDetector @@ -316,7 +362,7 @@ def test_is_stuck_ipython_unterminated_string_error( self._impl_unterminated_string_error_events(state, random_line=False) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck() is True + assert stuck_detector.is_stuck(ui_mode=False) is True def test_is_not_stuck_ipython_syntax_error_not_at_end( self, stuck_detector: StuckDetector @@ -361,7 +407,7 @@ def test_is_not_stuck_ipython_syntax_error_not_at_end( state.history.append(ipython_observation_4) with patch('logging.Logger.warning') as mock_warning: - assert stuck_detector.is_stuck() is False + assert stuck_detector.is_stuck(ui_mode=False) is False mock_warning.assert_not_called() def test_is_stuck_repeating_action_observation_pattern( @@ -430,7 +476,7 @@ def test_is_stuck_repeating_action_observation_pattern( state.history.append(read_observation_3) with patch('logging.Logger.warning') as mock_warning: - assert stuck_detector.is_stuck() is True + assert stuck_detector.is_stuck(ui_mode=False) is True mock_warning.assert_called_once_with('Action, Observation pattern detected') def test_is_stuck_not_stuck(self, stuck_detector: StuckDetector): @@ -496,7 +542,7 @@ def test_is_stuck_not_stuck(self, stuck_detector: StuckDetector): # read_observation_3._cause = read_action_3._id state.history.append(read_observation_3) - assert stuck_detector.is_stuck() is False + assert stuck_detector.is_stuck(ui_mode=False) is False def test_is_stuck_monologue(self, stuck_detector): state = stuck_detector.state @@ -526,7 +572,7 @@ def test_is_stuck_monologue(self, stuck_detector): message_action_6._source = EventSource.AGENT state.history.append(message_action_6) - assert stuck_detector.is_stuck() + assert stuck_detector.is_stuck(ui_mode=False) # Add an observation event between the repeated message actions cmd_output_observation = CmdOutputObservation( @@ -546,7 +592,7 @@ def test_is_stuck_monologue(self, stuck_detector): state.history.append(message_action_8) with patch('logging.Logger.warning'): - assert not stuck_detector.is_stuck() + assert not stuck_detector.is_stuck(ui_mode=False) class TestAgentController: @@ -563,4 +609,4 @@ def controller(self): def test_is_stuck_delegate_stuck(self, controller: AgentController): controller.delegate = Mock() controller.delegate._is_stuck.return_value = True - assert controller._is_stuck() is True + assert controller._is_stuck(ui_mode=False) is True From 40bb3184c769f01394000cfad386f6d08edbf289 Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 14 Dec 2024 16:57:56 +0000 Subject: [PATCH 03/16] fix: improve stuck detection in interactive mode - Use headless_mode flag to determine stuck detection behavior - In interactive mode (not headless), only consider history after last user message - Keep existing behavior in headless mode - Add comprehensive tests for both modes Fix: #5480 --- openhands/controller/agent_controller.py | 9 ++-- openhands/controller/stuck.py | 15 +++++-- tests/unit/test_is_stuck.py | 52 ++++++++++++------------ 3 files changed, 41 insertions(+), 35 deletions(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index d91e4bdf6b7c..7bc095ed99b3 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -902,20 +902,17 @@ def _apply_conversation_window(self, events: list[Event]) -> list[Event]: return kept_events - def _is_stuck(self, ui_mode: bool | None = None) -> bool: + def _is_stuck(self) -> bool: """Checks if the agent or its delegate is stuck in a loop. - Args: - ui_mode: Optional override for UI mode. If not provided, uses not self.headless_mode. - Returns: bool: True if the agent is stuck, False otherwise. """ # check if delegate stuck - if self.delegate and self.delegate._is_stuck(ui_mode): + if self.delegate and self.delegate._is_stuck(): return True - return self._stuck_detector.is_stuck(ui_mode if ui_mode is not None else not self.headless_mode) + return self._stuck_detector.is_stuck(not self.headless_mode) def __repr__(self): return ( diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py index fb520f161d6c..d90cc0d5c73c 100644 --- a/openhands/controller/stuck.py +++ b/openhands/controller/stuck.py @@ -24,9 +24,18 @@ class StuckDetector: def __init__(self, state: State): self.state = state - def is_stuck(self, ui_mode: bool = False): - if ui_mode: - # In UI mode, only look at history after the last user message + def is_stuck(self, interactive_mode: bool = False): + """Checks if the agent is stuck in a loop. + + Args: + interactive_mode: If True (not headless), only consider history after last user message. + If False (headless), consider all history. + + Returns: + bool: True if the agent is stuck in a loop, False otherwise. + """ + if interactive_mode: + # In interactive mode, only look at history after the last user message last_user_msg_idx = -1 for i, event in enumerate(self.state.history): if isinstance(event, MessageAction) and event.source == EventSource.USER: diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py index f4778e70b023..1ab8927387f5 100644 --- a/tests/unit/test_is_stuck.py +++ b/tests/unit/test_is_stuck.py @@ -112,10 +112,10 @@ def test_history_too_short(self, stuck_detector: StuckDetector): # cmd_observation._cause = cmd_action._id state.history.append(cmd_observation) - assert stuck_detector.is_stuck(ui_mode=False) is False - assert stuck_detector.is_stuck(ui_mode=True) is False + assert stuck_detector.is_stuck(interactive_mode=False) is False + assert stuck_detector.is_stuck(interactive_mode=True) is False - def test_ui_mode_resets_after_user_message(self, stuck_detector: StuckDetector): + def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckDetector): state = stuck_detector.state # First add some actions that would be stuck in non-UI mode @@ -128,15 +128,15 @@ def test_ui_mode_resets_after_user_message(self, stuck_detector: StuckDetector): state.history.append(cmd_observation) # In non-UI mode, this should be stuck - assert stuck_detector.is_stuck(ui_mode=False) is True + assert stuck_detector.is_stuck(interactive_mode=False) is True # Add a user message message_action = MessageAction(content='Hello', wait_for_response=False) message_action._source = EventSource.USER state.history.append(message_action) - # In UI mode, this should not be stuck because we ignore history before user message - assert stuck_detector.is_stuck(ui_mode=True) is False + # In interactive mode, this should not be stuck because we ignore history before user message + assert stuck_detector.is_stuck(interactive_mode=True) is False # Add two more identical actions - still not stuck because we need at least 3 for i in range(2): @@ -147,7 +147,7 @@ def test_ui_mode_resets_after_user_message(self, stuck_detector: StuckDetector): cmd_observation._cause = cmd_action._id state.history.append(cmd_observation) - assert stuck_detector.is_stuck(ui_mode=True) is False + assert stuck_detector.is_stuck(interactive_mode=True) is False # Add two more identical actions - now it should be stuck for i in range(2): @@ -158,7 +158,7 @@ def test_ui_mode_resets_after_user_message(self, stuck_detector: StuckDetector): cmd_observation._cause = cmd_action._id state.history.append(cmd_observation) - assert stuck_detector.is_stuck(ui_mode=True) is True + assert stuck_detector.is_stuck(interactive_mode=True) is True def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetector): state = stuck_detector.state @@ -194,7 +194,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect state.history.append(message_null_observation) # 8 events - assert stuck_detector.is_stuck(ui_mode=False) is False + assert stuck_detector.is_stuck(interactive_mode=False) is False cmd_action_3 = CmdRunAction(command='ls') cmd_action_3._id = 3 @@ -205,7 +205,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect # 10 events assert len(state.history) == 10 - assert stuck_detector.is_stuck(ui_mode=False) is False + assert stuck_detector.is_stuck(interactive_mode=False) is False cmd_action_4 = CmdRunAction(command='ls') cmd_action_4._id = 4 @@ -218,7 +218,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect assert len(state.history) == 12 with patch('logging.Logger.warning') as mock_warning: - assert stuck_detector.is_stuck(ui_mode=False) is True + assert stuck_detector.is_stuck(interactive_mode=False) is True mock_warning.assert_called_once_with('Action, Observation loop detected') def test_is_stuck_repeating_action_error(self, stuck_detector: StuckDetector): @@ -270,7 +270,7 @@ def test_is_stuck_repeating_action_error(self, stuck_detector: StuckDetector): # 12 events with patch('logging.Logger.warning') as mock_warning: - assert stuck_detector.is_stuck(ui_mode=False) is True + assert stuck_detector.is_stuck(interactive_mode=False) is True mock_warning.assert_called_once_with( 'Action, ErrorObservation loop detected' ) @@ -284,7 +284,7 @@ def test_is_stuck_invalid_syntax_error(self, stuck_detector: StuckDetector): ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(ui_mode=False) is True + assert stuck_detector.is_stuck(interactive_mode=False) is True def test_is_not_stuck_invalid_syntax_error_random_lines( self, stuck_detector: StuckDetector @@ -297,7 +297,7 @@ def test_is_not_stuck_invalid_syntax_error_random_lines( ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(ui_mode=False) is False + assert stuck_detector.is_stuck(interactive_mode=False) is False def test_is_not_stuck_invalid_syntax_error_only_three_incidents( self, stuck_detector: StuckDetector @@ -311,7 +311,7 @@ def test_is_not_stuck_invalid_syntax_error_only_three_incidents( ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(ui_mode=False) is False + assert stuck_detector.is_stuck(interactive_mode=False) is False def test_is_stuck_incomplete_input_error(self, stuck_detector: StuckDetector): state = stuck_detector.state @@ -322,7 +322,7 @@ def test_is_stuck_incomplete_input_error(self, stuck_detector: StuckDetector): ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(ui_mode=False) is True + assert stuck_detector.is_stuck(interactive_mode=False) is True def test_is_not_stuck_incomplete_input_error(self, stuck_detector: StuckDetector): state = stuck_detector.state @@ -333,7 +333,7 @@ def test_is_not_stuck_incomplete_input_error(self, stuck_detector: StuckDetector ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(ui_mode=False) is False + assert stuck_detector.is_stuck(interactive_mode=False) is False def test_is_not_stuck_ipython_unterminated_string_error_random_lines( self, stuck_detector: StuckDetector @@ -342,7 +342,7 @@ def test_is_not_stuck_ipython_unterminated_string_error_random_lines( self._impl_unterminated_string_error_events(state, random_line=True) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(ui_mode=False) is False + assert stuck_detector.is_stuck(interactive_mode=False) is False def test_is_not_stuck_ipython_unterminated_string_error_only_three_incidents( self, stuck_detector: StuckDetector @@ -353,7 +353,7 @@ def test_is_not_stuck_ipython_unterminated_string_error_only_three_incidents( ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(ui_mode=False) is False + assert stuck_detector.is_stuck(interactive_mode=False) is False def test_is_stuck_ipython_unterminated_string_error( self, stuck_detector: StuckDetector @@ -362,7 +362,7 @@ def test_is_stuck_ipython_unterminated_string_error( self._impl_unterminated_string_error_events(state, random_line=False) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(ui_mode=False) is True + assert stuck_detector.is_stuck(interactive_mode=False) is True def test_is_not_stuck_ipython_syntax_error_not_at_end( self, stuck_detector: StuckDetector @@ -407,7 +407,7 @@ def test_is_not_stuck_ipython_syntax_error_not_at_end( state.history.append(ipython_observation_4) with patch('logging.Logger.warning') as mock_warning: - assert stuck_detector.is_stuck(ui_mode=False) is False + assert stuck_detector.is_stuck(interactive_mode=False) is False mock_warning.assert_not_called() def test_is_stuck_repeating_action_observation_pattern( @@ -476,7 +476,7 @@ def test_is_stuck_repeating_action_observation_pattern( state.history.append(read_observation_3) with patch('logging.Logger.warning') as mock_warning: - assert stuck_detector.is_stuck(ui_mode=False) is True + assert stuck_detector.is_stuck(interactive_mode=False) is True mock_warning.assert_called_once_with('Action, Observation pattern detected') def test_is_stuck_not_stuck(self, stuck_detector: StuckDetector): @@ -542,7 +542,7 @@ def test_is_stuck_not_stuck(self, stuck_detector: StuckDetector): # read_observation_3._cause = read_action_3._id state.history.append(read_observation_3) - assert stuck_detector.is_stuck(ui_mode=False) is False + assert stuck_detector.is_stuck(interactive_mode=False) is False def test_is_stuck_monologue(self, stuck_detector): state = stuck_detector.state @@ -572,7 +572,7 @@ def test_is_stuck_monologue(self, stuck_detector): message_action_6._source = EventSource.AGENT state.history.append(message_action_6) - assert stuck_detector.is_stuck(ui_mode=False) + assert stuck_detector.is_stuck(interactive_mode=False) # Add an observation event between the repeated message actions cmd_output_observation = CmdOutputObservation( @@ -592,7 +592,7 @@ def test_is_stuck_monologue(self, stuck_detector): state.history.append(message_action_8) with patch('logging.Logger.warning'): - assert not stuck_detector.is_stuck(ui_mode=False) + assert not stuck_detector.is_stuck(interactive_mode=False) class TestAgentController: @@ -609,4 +609,4 @@ def controller(self): def test_is_stuck_delegate_stuck(self, controller: AgentController): controller.delegate = Mock() controller.delegate._is_stuck.return_value = True - assert controller._is_stuck(ui_mode=False) is True + assert controller._is_stuck() is True From 8589b0a9e3bdb784cf21924ad6247890a2f183de Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 14 Dec 2024 17:11:32 +0000 Subject: [PATCH 04/16] refactor: simplify stuck detection to use headless_mode directly - Use not_headless parameter to match AgentController's headless_mode - Remove unnecessary interactive_mode concept - Update tests to use consistent terminology - Keep behavior the same, just clearer naming --- openhands/controller/stuck.py | 9 +++--- tests/unit/test_is_stuck.py | 52 +++++++++++++++++------------------ 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py index d90cc0d5c73c..d753897c8c83 100644 --- a/openhands/controller/stuck.py +++ b/openhands/controller/stuck.py @@ -24,17 +24,18 @@ class StuckDetector: def __init__(self, state: State): self.state = state - def is_stuck(self, interactive_mode: bool = False): + def is_stuck(self, not_headless: bool = False): """Checks if the agent is stuck in a loop. Args: - interactive_mode: If True (not headless), only consider history after last user message. - If False (headless), consider all history. + not_headless: Matches AgentController's not headless_mode. + If True: Consider only history after last user message + If False: Consider all history (headless mode) Returns: bool: True if the agent is stuck in a loop, False otherwise. """ - if interactive_mode: + if not_headless: # In interactive mode, only look at history after the last user message last_user_msg_idx = -1 for i, event in enumerate(self.state.history): diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py index 1ab8927387f5..af6bdca5298e 100644 --- a/tests/unit/test_is_stuck.py +++ b/tests/unit/test_is_stuck.py @@ -112,10 +112,10 @@ def test_history_too_short(self, stuck_detector: StuckDetector): # cmd_observation._cause = cmd_action._id state.history.append(cmd_observation) - assert stuck_detector.is_stuck(interactive_mode=False) is False - assert stuck_detector.is_stuck(interactive_mode=True) is False + assert stuck_detector.is_stuck(not_headless=False) is False + assert stuck_detector.is_stuck(not_headless=True) is False - def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckDetector): + def test_not_headless_resets_after_user_message(self, stuck_detector: StuckDetector): state = stuck_detector.state # First add some actions that would be stuck in non-UI mode @@ -127,16 +127,16 @@ def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckD cmd_observation._cause = cmd_action._id state.history.append(cmd_observation) - # In non-UI mode, this should be stuck - assert stuck_detector.is_stuck(interactive_mode=False) is True + # In headless mode, this should be stuck + assert stuck_detector.is_stuck(not_headless=False) is True # Add a user message message_action = MessageAction(content='Hello', wait_for_response=False) message_action._source = EventSource.USER state.history.append(message_action) - # In interactive mode, this should not be stuck because we ignore history before user message - assert stuck_detector.is_stuck(interactive_mode=True) is False + # In not-headless mode, this should not be stuck because we ignore history before user message + assert stuck_detector.is_stuck(not_headless=True) is False # Add two more identical actions - still not stuck because we need at least 3 for i in range(2): @@ -147,7 +147,7 @@ def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckD cmd_observation._cause = cmd_action._id state.history.append(cmd_observation) - assert stuck_detector.is_stuck(interactive_mode=True) is False + assert stuck_detector.is_stuck(not_headless=True) is False # Add two more identical actions - now it should be stuck for i in range(2): @@ -158,7 +158,7 @@ def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckD cmd_observation._cause = cmd_action._id state.history.append(cmd_observation) - assert stuck_detector.is_stuck(interactive_mode=True) is True + assert stuck_detector.is_stuck(not_headless=True) is True def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetector): state = stuck_detector.state @@ -194,7 +194,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect state.history.append(message_null_observation) # 8 events - assert stuck_detector.is_stuck(interactive_mode=False) is False + assert stuck_detector.is_stuck(not_headless=False) is False cmd_action_3 = CmdRunAction(command='ls') cmd_action_3._id = 3 @@ -205,7 +205,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect # 10 events assert len(state.history) == 10 - assert stuck_detector.is_stuck(interactive_mode=False) is False + assert stuck_detector.is_stuck(not_headless=False) is False cmd_action_4 = CmdRunAction(command='ls') cmd_action_4._id = 4 @@ -218,7 +218,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect assert len(state.history) == 12 with patch('logging.Logger.warning') as mock_warning: - assert stuck_detector.is_stuck(interactive_mode=False) is True + assert stuck_detector.is_stuck(not_headless=False) is True mock_warning.assert_called_once_with('Action, Observation loop detected') def test_is_stuck_repeating_action_error(self, stuck_detector: StuckDetector): @@ -270,7 +270,7 @@ def test_is_stuck_repeating_action_error(self, stuck_detector: StuckDetector): # 12 events with patch('logging.Logger.warning') as mock_warning: - assert stuck_detector.is_stuck(interactive_mode=False) is True + assert stuck_detector.is_stuck(not_headless=False) is True mock_warning.assert_called_once_with( 'Action, ErrorObservation loop detected' ) @@ -284,7 +284,7 @@ def test_is_stuck_invalid_syntax_error(self, stuck_detector: StuckDetector): ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(interactive_mode=False) is True + assert stuck_detector.is_stuck(not_headless=False) is True def test_is_not_stuck_invalid_syntax_error_random_lines( self, stuck_detector: StuckDetector @@ -297,7 +297,7 @@ def test_is_not_stuck_invalid_syntax_error_random_lines( ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(interactive_mode=False) is False + assert stuck_detector.is_stuck(not_headless=False) is False def test_is_not_stuck_invalid_syntax_error_only_three_incidents( self, stuck_detector: StuckDetector @@ -311,7 +311,7 @@ def test_is_not_stuck_invalid_syntax_error_only_three_incidents( ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(interactive_mode=False) is False + assert stuck_detector.is_stuck(not_headless=False) is False def test_is_stuck_incomplete_input_error(self, stuck_detector: StuckDetector): state = stuck_detector.state @@ -322,7 +322,7 @@ def test_is_stuck_incomplete_input_error(self, stuck_detector: StuckDetector): ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(interactive_mode=False) is True + assert stuck_detector.is_stuck(not_headless=False) is True def test_is_not_stuck_incomplete_input_error(self, stuck_detector: StuckDetector): state = stuck_detector.state @@ -333,7 +333,7 @@ def test_is_not_stuck_incomplete_input_error(self, stuck_detector: StuckDetector ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(interactive_mode=False) is False + assert stuck_detector.is_stuck(not_headless=False) is False def test_is_not_stuck_ipython_unterminated_string_error_random_lines( self, stuck_detector: StuckDetector @@ -342,7 +342,7 @@ def test_is_not_stuck_ipython_unterminated_string_error_random_lines( self._impl_unterminated_string_error_events(state, random_line=True) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(interactive_mode=False) is False + assert stuck_detector.is_stuck(not_headless=False) is False def test_is_not_stuck_ipython_unterminated_string_error_only_three_incidents( self, stuck_detector: StuckDetector @@ -353,7 +353,7 @@ def test_is_not_stuck_ipython_unterminated_string_error_only_three_incidents( ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(interactive_mode=False) is False + assert stuck_detector.is_stuck(not_headless=False) is False def test_is_stuck_ipython_unterminated_string_error( self, stuck_detector: StuckDetector @@ -362,7 +362,7 @@ def test_is_stuck_ipython_unterminated_string_error( self._impl_unterminated_string_error_events(state, random_line=False) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(interactive_mode=False) is True + assert stuck_detector.is_stuck(not_headless=False) is True def test_is_not_stuck_ipython_syntax_error_not_at_end( self, stuck_detector: StuckDetector @@ -407,7 +407,7 @@ def test_is_not_stuck_ipython_syntax_error_not_at_end( state.history.append(ipython_observation_4) with patch('logging.Logger.warning') as mock_warning: - assert stuck_detector.is_stuck(interactive_mode=False) is False + assert stuck_detector.is_stuck(not_headless=False) is False mock_warning.assert_not_called() def test_is_stuck_repeating_action_observation_pattern( @@ -476,7 +476,7 @@ def test_is_stuck_repeating_action_observation_pattern( state.history.append(read_observation_3) with patch('logging.Logger.warning') as mock_warning: - assert stuck_detector.is_stuck(interactive_mode=False) is True + assert stuck_detector.is_stuck(not_headless=False) is True mock_warning.assert_called_once_with('Action, Observation pattern detected') def test_is_stuck_not_stuck(self, stuck_detector: StuckDetector): @@ -542,7 +542,7 @@ def test_is_stuck_not_stuck(self, stuck_detector: StuckDetector): # read_observation_3._cause = read_action_3._id state.history.append(read_observation_3) - assert stuck_detector.is_stuck(interactive_mode=False) is False + assert stuck_detector.is_stuck(not_headless=False) is False def test_is_stuck_monologue(self, stuck_detector): state = stuck_detector.state @@ -572,7 +572,7 @@ def test_is_stuck_monologue(self, stuck_detector): message_action_6._source = EventSource.AGENT state.history.append(message_action_6) - assert stuck_detector.is_stuck(interactive_mode=False) + assert stuck_detector.is_stuck(not_headless=False) # Add an observation event between the repeated message actions cmd_output_observation = CmdOutputObservation( @@ -592,7 +592,7 @@ def test_is_stuck_monologue(self, stuck_detector): state.history.append(message_action_8) with patch('logging.Logger.warning'): - assert not stuck_detector.is_stuck(interactive_mode=False) + assert not stuck_detector.is_stuck(not_headless=False) class TestAgentController: From 9f17d105b40e52b20fbb0ebcb30f121e3c9ee2a4 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Sat, 14 Dec 2024 18:15:01 +0100 Subject: [PATCH 05/16] Update openhands/controller/stuck.py --- openhands/controller/stuck.py | 1 + 1 file changed, 1 insertion(+) diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py index d753897c8c83..7827ac3b9e9f 100644 --- a/openhands/controller/stuck.py +++ b/openhands/controller/stuck.py @@ -53,6 +53,7 @@ def is_stuck(self, not_headless: bool = False): for event in history_to_check if not ( (isinstance(event, MessageAction) and event.source == EventSource.USER) + # there might be some NullAction or NullObservation in the history at least for now or isinstance(event, (NullAction, NullObservation)) ) ] From 0f6c731eecb0bb4a141baa24533f55c45255f4a3 Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 14 Dec 2024 17:23:25 +0000 Subject: [PATCH 06/16] refactor: use headless_mode consistently - Use headless_mode parameter to match AgentController - Remove confusing double negative (not_headless) - Keep behavior the same, just clearer naming --- openhands/controller/stuck.py | 10 ++++---- tests/unit/test_is_stuck.py | 48 +++++++++++++++++------------------ 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py index 7827ac3b9e9f..9c5419bbe8b9 100644 --- a/openhands/controller/stuck.py +++ b/openhands/controller/stuck.py @@ -24,18 +24,18 @@ class StuckDetector: def __init__(self, state: State): self.state = state - def is_stuck(self, not_headless: bool = False): + def is_stuck(self, headless_mode: bool = True): """Checks if the agent is stuck in a loop. Args: - not_headless: Matches AgentController's not headless_mode. - If True: Consider only history after last user message - If False: Consider all history (headless mode) + headless_mode: Matches AgentController's headless_mode. + If True: Consider all history (automated/testing) + If False: Consider only history after last user message (interactive) Returns: bool: True if the agent is stuck in a loop, False otherwise. """ - if not_headless: + if not headless_mode: # In interactive mode, only look at history after the last user message last_user_msg_idx = -1 for i, event in enumerate(self.state.history): diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py index af6bdca5298e..9bb3c44e1248 100644 --- a/tests/unit/test_is_stuck.py +++ b/tests/unit/test_is_stuck.py @@ -112,10 +112,10 @@ def test_history_too_short(self, stuck_detector: StuckDetector): # cmd_observation._cause = cmd_action._id state.history.append(cmd_observation) - assert stuck_detector.is_stuck(not_headless=False) is False - assert stuck_detector.is_stuck(not_headless=True) is False + assert stuck_detector.is_stuck(headless_mode=True) is False + assert stuck_detector.is_stuck(headless_mode=False) is False - def test_not_headless_resets_after_user_message(self, stuck_detector: StuckDetector): + def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckDetector): state = stuck_detector.state # First add some actions that would be stuck in non-UI mode @@ -128,7 +128,7 @@ def test_not_headless_resets_after_user_message(self, stuck_detector: StuckDetec state.history.append(cmd_observation) # In headless mode, this should be stuck - assert stuck_detector.is_stuck(not_headless=False) is True + assert stuck_detector.is_stuck(headless_mode=True) is True # Add a user message message_action = MessageAction(content='Hello', wait_for_response=False) @@ -136,7 +136,7 @@ def test_not_headless_resets_after_user_message(self, stuck_detector: StuckDetec state.history.append(message_action) # In not-headless mode, this should not be stuck because we ignore history before user message - assert stuck_detector.is_stuck(not_headless=True) is False + assert stuck_detector.is_stuck(headless_mode=False) is False # Add two more identical actions - still not stuck because we need at least 3 for i in range(2): @@ -147,7 +147,7 @@ def test_not_headless_resets_after_user_message(self, stuck_detector: StuckDetec cmd_observation._cause = cmd_action._id state.history.append(cmd_observation) - assert stuck_detector.is_stuck(not_headless=True) is False + assert stuck_detector.is_stuck(headless_mode=False) is False # Add two more identical actions - now it should be stuck for i in range(2): @@ -158,7 +158,7 @@ def test_not_headless_resets_after_user_message(self, stuck_detector: StuckDetec cmd_observation._cause = cmd_action._id state.history.append(cmd_observation) - assert stuck_detector.is_stuck(not_headless=True) is True + assert stuck_detector.is_stuck(headless_mode=False) is True def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetector): state = stuck_detector.state @@ -194,7 +194,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect state.history.append(message_null_observation) # 8 events - assert stuck_detector.is_stuck(not_headless=False) is False + assert stuck_detector.is_stuck(headless_mode=True) is False cmd_action_3 = CmdRunAction(command='ls') cmd_action_3._id = 3 @@ -205,7 +205,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect # 10 events assert len(state.history) == 10 - assert stuck_detector.is_stuck(not_headless=False) is False + assert stuck_detector.is_stuck(headless_mode=True) is False cmd_action_4 = CmdRunAction(command='ls') cmd_action_4._id = 4 @@ -218,7 +218,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect assert len(state.history) == 12 with patch('logging.Logger.warning') as mock_warning: - assert stuck_detector.is_stuck(not_headless=False) is True + assert stuck_detector.is_stuck(headless_mode=True) is True mock_warning.assert_called_once_with('Action, Observation loop detected') def test_is_stuck_repeating_action_error(self, stuck_detector: StuckDetector): @@ -270,7 +270,7 @@ def test_is_stuck_repeating_action_error(self, stuck_detector: StuckDetector): # 12 events with patch('logging.Logger.warning') as mock_warning: - assert stuck_detector.is_stuck(not_headless=False) is True + assert stuck_detector.is_stuck(headless_mode=True) is True mock_warning.assert_called_once_with( 'Action, ErrorObservation loop detected' ) @@ -284,7 +284,7 @@ def test_is_stuck_invalid_syntax_error(self, stuck_detector: StuckDetector): ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(not_headless=False) is True + assert stuck_detector.is_stuck(headless_mode=True) is True def test_is_not_stuck_invalid_syntax_error_random_lines( self, stuck_detector: StuckDetector @@ -297,7 +297,7 @@ def test_is_not_stuck_invalid_syntax_error_random_lines( ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(not_headless=False) is False + assert stuck_detector.is_stuck(headless_mode=True) is False def test_is_not_stuck_invalid_syntax_error_only_three_incidents( self, stuck_detector: StuckDetector @@ -311,7 +311,7 @@ def test_is_not_stuck_invalid_syntax_error_only_three_incidents( ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(not_headless=False) is False + assert stuck_detector.is_stuck(headless_mode=True) is False def test_is_stuck_incomplete_input_error(self, stuck_detector: StuckDetector): state = stuck_detector.state @@ -322,7 +322,7 @@ def test_is_stuck_incomplete_input_error(self, stuck_detector: StuckDetector): ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(not_headless=False) is True + assert stuck_detector.is_stuck(headless_mode=True) is True def test_is_not_stuck_incomplete_input_error(self, stuck_detector: StuckDetector): state = stuck_detector.state @@ -333,7 +333,7 @@ def test_is_not_stuck_incomplete_input_error(self, stuck_detector: StuckDetector ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(not_headless=False) is False + assert stuck_detector.is_stuck(headless_mode=True) is False def test_is_not_stuck_ipython_unterminated_string_error_random_lines( self, stuck_detector: StuckDetector @@ -342,7 +342,7 @@ def test_is_not_stuck_ipython_unterminated_string_error_random_lines( self._impl_unterminated_string_error_events(state, random_line=True) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(not_headless=False) is False + assert stuck_detector.is_stuck(headless_mode=True) is False def test_is_not_stuck_ipython_unterminated_string_error_only_three_incidents( self, stuck_detector: StuckDetector @@ -353,7 +353,7 @@ def test_is_not_stuck_ipython_unterminated_string_error_only_three_incidents( ) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(not_headless=False) is False + assert stuck_detector.is_stuck(headless_mode=True) is False def test_is_stuck_ipython_unterminated_string_error( self, stuck_detector: StuckDetector @@ -362,7 +362,7 @@ def test_is_stuck_ipython_unterminated_string_error( self._impl_unterminated_string_error_events(state, random_line=False) with patch('logging.Logger.warning'): - assert stuck_detector.is_stuck(not_headless=False) is True + assert stuck_detector.is_stuck(headless_mode=True) is True def test_is_not_stuck_ipython_syntax_error_not_at_end( self, stuck_detector: StuckDetector @@ -407,7 +407,7 @@ def test_is_not_stuck_ipython_syntax_error_not_at_end( state.history.append(ipython_observation_4) with patch('logging.Logger.warning') as mock_warning: - assert stuck_detector.is_stuck(not_headless=False) is False + assert stuck_detector.is_stuck(headless_mode=True) is False mock_warning.assert_not_called() def test_is_stuck_repeating_action_observation_pattern( @@ -476,7 +476,7 @@ def test_is_stuck_repeating_action_observation_pattern( state.history.append(read_observation_3) with patch('logging.Logger.warning') as mock_warning: - assert stuck_detector.is_stuck(not_headless=False) is True + assert stuck_detector.is_stuck(headless_mode=True) is True mock_warning.assert_called_once_with('Action, Observation pattern detected') def test_is_stuck_not_stuck(self, stuck_detector: StuckDetector): @@ -542,7 +542,7 @@ def test_is_stuck_not_stuck(self, stuck_detector: StuckDetector): # read_observation_3._cause = read_action_3._id state.history.append(read_observation_3) - assert stuck_detector.is_stuck(not_headless=False) is False + assert stuck_detector.is_stuck(headless_mode=True) is False def test_is_stuck_monologue(self, stuck_detector): state = stuck_detector.state @@ -572,7 +572,7 @@ def test_is_stuck_monologue(self, stuck_detector): message_action_6._source = EventSource.AGENT state.history.append(message_action_6) - assert stuck_detector.is_stuck(not_headless=False) + assert stuck_detector.is_stuck(headless_mode=True) # Add an observation event between the repeated message actions cmd_output_observation = CmdOutputObservation( @@ -592,7 +592,7 @@ def test_is_stuck_monologue(self, stuck_detector): state.history.append(message_action_8) with patch('logging.Logger.warning'): - assert not stuck_detector.is_stuck(not_headless=False) + assert not stuck_detector.is_stuck(headless_mode=True) class TestAgentController: From 860d3e280374eb2413102bd4d058b3699a9603b2 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Sat, 14 Dec 2024 18:27:16 +0100 Subject: [PATCH 07/16] Update openhands/controller/agent_controller.py --- openhands/controller/agent_controller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 7bc095ed99b3..b2de6dd7d5ac 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -912,7 +912,7 @@ def _is_stuck(self) -> bool: if self.delegate and self.delegate._is_stuck(): return True - return self._stuck_detector.is_stuck(not self.headless_mode) + return self._stuck_detector.is_stuck(self.headless_mode) def __repr__(self): return ( From b0616e3d17c5b2708032805f8d96449b4d011ae1 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Sat, 14 Dec 2024 18:28:08 +0100 Subject: [PATCH 08/16] Update tests/unit/test_is_stuck.py --- tests/unit/test_is_stuck.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py index 9bb3c44e1248..87e2e2be87a8 100644 --- a/tests/unit/test_is_stuck.py +++ b/tests/unit/test_is_stuck.py @@ -113,7 +113,6 @@ def test_history_too_short(self, stuck_detector: StuckDetector): state.history.append(cmd_observation) assert stuck_detector.is_stuck(headless_mode=True) is False - assert stuck_detector.is_stuck(headless_mode=False) is False def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckDetector): state = stuck_detector.state From ab4d504e146e1f0f1432d5c994a6d0df2f8284ba Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Sat, 14 Dec 2024 18:29:36 +0100 Subject: [PATCH 09/16] Update tests/unit/test_is_stuck.py --- tests/unit/test_is_stuck.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py index 87e2e2be87a8..666c4c3530ad 100644 --- a/tests/unit/test_is_stuck.py +++ b/tests/unit/test_is_stuck.py @@ -128,7 +128,8 @@ def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckD # In headless mode, this should be stuck assert stuck_detector.is_stuck(headless_mode=True) is True - + # with the UI, it will ALSO be stuck initially + assert stuck_detector.is_stuck(headless_mode=False) is True # Add a user message message_action = MessageAction(content='Hello', wait_for_response=False) message_action._source = EventSource.USER From 1f952c8b85b67c783dc8ca728facc86342a6915f Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Sat, 14 Dec 2024 18:32:11 +0100 Subject: [PATCH 10/16] Update tests/unit/test_is_stuck.py --- tests/unit/test_is_stuck.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py index 666c4c3530ad..412379c22681 100644 --- a/tests/unit/test_is_stuck.py +++ b/tests/unit/test_is_stuck.py @@ -137,7 +137,10 @@ def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckD # In not-headless mode, this should not be stuck because we ignore history before user message assert stuck_detector.is_stuck(headless_mode=False) is False - + + # But in headless mode, this should be still stuck because user messages do not count + assert stuck_detector.is_stuck(headless_mode=True) is True + # Add two more identical actions - still not stuck because we need at least 3 for i in range(2): cmd_action = CmdRunAction(command='ls') From 16c82b385fb66a50af226ff3bdccc85000710bb9 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Sat, 14 Dec 2024 18:35:11 +0100 Subject: [PATCH 11/16] Update openhands/controller/stuck.py --- openhands/controller/stuck.py | 1 + 1 file changed, 1 insertion(+) diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py index 9c5419bbe8b9..2e002c62cc4a 100644 --- a/openhands/controller/stuck.py +++ b/openhands/controller/stuck.py @@ -52,6 +52,7 @@ def is_stuck(self, headless_mode: bool = True): event for event in history_to_check if not ( + # this is a problem! in headless mode we ignore these, but will this work with UI? (isinstance(event, MessageAction) and event.source == EventSource.USER) # there might be some NullAction or NullObservation in the history at least for now or isinstance(event, (NullAction, NullObservation)) From e8636e1c6901dc953b863722e78fc7aacacb56ca Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 14 Dec 2024 17:44:43 +0000 Subject: [PATCH 12/16] perf: optimize last user message search - Use reversed() to find last user message - Stop searching once found (break) - Same behavior, just more efficient --- openhands/controller/stuck.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py index 2e002c62cc4a..74f1bf762335 100644 --- a/openhands/controller/stuck.py +++ b/openhands/controller/stuck.py @@ -38,9 +38,10 @@ def is_stuck(self, headless_mode: bool = True): if not headless_mode: # In interactive mode, only look at history after the last user message last_user_msg_idx = -1 - for i, event in enumerate(self.state.history): + for i, event in enumerate(reversed(self.state.history)): if isinstance(event, MessageAction) and event.source == EventSource.USER: - last_user_msg_idx = i + last_user_msg_idx = len(self.state.history) - i - 1 + break history_to_check = self.state.history[last_user_msg_idx + 1:] else: From c18003785f80419ee9c31dbba7aa081186f23dfc Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 14 Dec 2024 17:51:13 +0000 Subject: [PATCH 13/16] docs: explain elegant user message filtering The same filter works perfectly in both modes: - In headless: actively filters user messages - In non-headless: no-op (already sliced after last user message) --- openhands/controller/stuck.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py index 74f1bf762335..53b38c77ea9b 100644 --- a/openhands/controller/stuck.py +++ b/openhands/controller/stuck.py @@ -53,7 +53,9 @@ def is_stuck(self, headless_mode: bool = True): event for event in history_to_check if not ( - # this is a problem! in headless mode we ignore these, but will this work with UI? + # Filter works elegantly in both modes: + # - In headless: actively filters out user messages from full history + # - In non-headless: no-op since we already sliced after last user message (isinstance(event, MessageAction) and event.source == EventSource.USER) # there might be some NullAction or NullObservation in the history at least for now or isinstance(event, (NullAction, NullObservation)) From 282e59e2cbe18ae83149121b37543b4499325642 Mon Sep 17 00:00:00 2001 From: OpenHands Bot Date: Sat, 14 Dec 2024 17:53:43 +0000 Subject: [PATCH 14/16] =?UTF-8?q?=F0=9F=A4=96=20Auto-fix=20Python=20lintin?= =?UTF-8?q?g=20issues?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- openhands/controller/stuck.py | 7 +++++-- tests/unit/test_is_stuck.py | 30 +++++++++++++++++++----------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py index 53b38c77ea9b..dbb18a9b4f2c 100644 --- a/openhands/controller/stuck.py +++ b/openhands/controller/stuck.py @@ -39,11 +39,14 @@ def is_stuck(self, headless_mode: bool = True): # In interactive mode, only look at history after the last user message last_user_msg_idx = -1 for i, event in enumerate(reversed(self.state.history)): - if isinstance(event, MessageAction) and event.source == EventSource.USER: + if ( + isinstance(event, MessageAction) + and event.source == EventSource.USER + ): last_user_msg_idx = len(self.state.history) - i - 1 break - history_to_check = self.state.history[last_user_msg_idx + 1:] + history_to_check = self.state.history[last_user_msg_idx + 1 :] else: # In headless mode, look at all history history_to_check = self.state.history diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py index 412379c22681..80a2acad4843 100644 --- a/tests/unit/test_is_stuck.py +++ b/tests/unit/test_is_stuck.py @@ -114,18 +114,22 @@ def test_history_too_short(self, stuck_detector: StuckDetector): assert stuck_detector.is_stuck(headless_mode=True) is False - def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckDetector): + def test_interactive_mode_resets_after_user_message( + self, stuck_detector: StuckDetector + ): state = stuck_detector.state - + # First add some actions that would be stuck in non-UI mode for i in range(4): cmd_action = CmdRunAction(command='ls') cmd_action._id = i state.history.append(cmd_action) - cmd_observation = CmdOutputObservation(content='', command='ls', command_id=i) + cmd_observation = CmdOutputObservation( + content='', command='ls', command_id=i + ) cmd_observation._cause = cmd_action._id state.history.append(cmd_observation) - + # In headless mode, this should be stuck assert stuck_detector.is_stuck(headless_mode=True) is True # with the UI, it will ALSO be stuck initially @@ -134,11 +138,11 @@ def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckD message_action = MessageAction(content='Hello', wait_for_response=False) message_action._source = EventSource.USER state.history.append(message_action) - + # In not-headless mode, this should not be stuck because we ignore history before user message assert stuck_detector.is_stuck(headless_mode=False) is False - # But in headless mode, this should be still stuck because user messages do not count + # But in headless mode, this should be still stuck because user messages do not count assert stuck_detector.is_stuck(headless_mode=True) is True # Add two more identical actions - still not stuck because we need at least 3 @@ -146,21 +150,25 @@ def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckD cmd_action = CmdRunAction(command='ls') cmd_action._id = i + 4 state.history.append(cmd_action) - cmd_observation = CmdOutputObservation(content='', command='ls', command_id=i + 4) + cmd_observation = CmdOutputObservation( + content='', command='ls', command_id=i + 4 + ) cmd_observation._cause = cmd_action._id state.history.append(cmd_observation) - + assert stuck_detector.is_stuck(headless_mode=False) is False - + # Add two more identical actions - now it should be stuck for i in range(2): cmd_action = CmdRunAction(command='ls') cmd_action._id = i + 6 state.history.append(cmd_action) - cmd_observation = CmdOutputObservation(content='', command='ls', command_id=i + 6) + cmd_observation = CmdOutputObservation( + content='', command='ls', command_id=i + 6 + ) cmd_observation._cause = cmd_action._id state.history.append(cmd_observation) - + assert stuck_detector.is_stuck(headless_mode=False) is True def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetector): From 32913183d7d32124404b8a478b4964293e3d2ed4 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Sat, 14 Dec 2024 19:00:02 +0100 Subject: [PATCH 15/16] Update tests/unit/test_is_stuck.py --- tests/unit/test_is_stuck.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py index 80a2acad4843..89f71e3627a1 100644 --- a/tests/unit/test_is_stuck.py +++ b/tests/unit/test_is_stuck.py @@ -132,6 +132,7 @@ def test_interactive_mode_resets_after_user_message( # In headless mode, this should be stuck assert stuck_detector.is_stuck(headless_mode=True) is True + # with the UI, it will ALSO be stuck initially assert stuck_detector.is_stuck(headless_mode=False) is True # Add a user message From 63cb544d5479f2989bc9c476b5e948ae85d32c51 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Sat, 14 Dec 2024 19:00:22 +0100 Subject: [PATCH 16/16] Update tests/unit/test_is_stuck.py --- tests/unit/test_is_stuck.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py index 89f71e3627a1..4b9a4e820ee5 100644 --- a/tests/unit/test_is_stuck.py +++ b/tests/unit/test_is_stuck.py @@ -135,6 +135,7 @@ def test_interactive_mode_resets_after_user_message( # with the UI, it will ALSO be stuck initially assert stuck_detector.is_stuck(headless_mode=False) is True + # Add a user message message_action = MessageAction(content='Hello', wait_for_response=False) message_action._source = EventSource.USER