From 212787c07276dc732b07cc3f72c74e0861427b51 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Sat, 14 Dec 2024 16:01:02 +0000
Subject: [PATCH 01/16] refactor: remove unused almost_stuck functionality

- Remove almost_stuck field from State class
- Remove almost_stuck counter from StuckDetector
- Simplify stuck detection logic to focus on actual loop detection
- Update tests to remove almost_stuck assertions
---
 openhands/controller/agent_controller.py |  2 +-
 openhands/controller/state/state.py      |  2 +-
 openhands/controller/stuck.py            | 36 ++++--------------------
 tests/unit/test_is_stuck.py              | 21 --------------
 4 files changed, 8 insertions(+), 53 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 8078855330cf..9caaa90eb963 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -319,7 +319,7 @@ async def _handle_message_action(self, action: MessageAction) -> None:
 
     def _reset(self) -> None:
         """Resets the agent controller"""
-        self.almost_stuck = 0
+
         self._pending_action = None
         self.agent.reset()
 
diff --git a/openhands/controller/state/state.py b/openhands/controller/state/state.py
index 73aa4e666ed7..b2e07b294106 100644
--- a/openhands/controller/state/state.py
+++ b/openhands/controller/state/state.py
@@ -94,7 +94,7 @@ class State:
     end_id: int = -1
     # truncation_id tracks where to load history after context window truncation
     truncation_id: int = -1
-    almost_stuck: int = 0
+
     delegates: dict[tuple[int, int], tuple[str, str]] = field(default_factory=dict)
     # NOTE: This will never be used by the controller, but it can be used by different
     # evaluation tasks to store extra data needed to track the progress/state of the task.
diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py
index 0eb0f4c893ca..79ac39390239 100644
--- a/openhands/controller/stuck.py
+++ b/openhands/controller/stuck.py
@@ -81,43 +81,19 @@ def _is_stuck_repeating_action_observation(self, last_actions, last_observations
         # it takes 4 actions and 4 observations to detect a loop
         # assert len(last_actions) == 4 and len(last_observations) == 4
 
-        # reset almost_stuck reminder
-        self.state.almost_stuck = 0
-
-        # almost stuck? if two actions, obs are the same, we're almost stuck
-        if len(last_actions) >= 2 and len(last_observations) >= 2:
+        # Check for a loop of 4 identical action-observation pairs
+        if len(last_actions) == 4 and len(last_observations) == 4:
             actions_equal = all(
-                self._eq_no_pid(last_actions[0], action) for action in last_actions[:2]
+                self._eq_no_pid(last_actions[0], action) for action in last_actions
             )
             observations_equal = all(
                 self._eq_no_pid(last_observations[0], observation)
-                for observation in last_observations[:2]
+                for observation in last_observations
             )
 
-            # the last two actions and obs are the same?
             if actions_equal and observations_equal:
-                self.state.almost_stuck = 2
-
-            # the last three actions and observations are the same?
-            if len(last_actions) >= 3 and len(last_observations) >= 3:
-                if (
-                    actions_equal
-                    and observations_equal
-                    and self._eq_no_pid(last_actions[0], last_actions[2])
-                    and self._eq_no_pid(last_observations[0], last_observations[2])
-                ):
-                    self.state.almost_stuck = 1
-
-            if len(last_actions) == 4 and len(last_observations) == 4:
-                if (
-                    actions_equal
-                    and observations_equal
-                    and self._eq_no_pid(last_actions[0], last_actions[3])
-                    and self._eq_no_pid(last_observations[0], last_observations[3])
-                ):
-                    logger.warning('Action, Observation loop detected')
-                    self.state.almost_stuck = 0
-                    return True
+                logger.warning('Action, Observation loop detected')
+                return True
 
         return False
 
diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py
index 197d6d8462b7..c34246b86140 100644
--- a/tests/unit/test_is_stuck.py
+++ b/tests/unit/test_is_stuck.py
@@ -149,7 +149,6 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect
         # 8 events
 
         assert stuck_detector.is_stuck() is False
-        assert stuck_detector.state.almost_stuck == 2
 
         cmd_action_3 = CmdRunAction(command='ls')
         cmd_action_3._id = 3
@@ -160,20 +159,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect
         # 10 events
 
         assert len(state.history) == 10
-        assert (
-            len(state.history) == 10
-        )  # Adjusted since history is a list and the controller is not running
-
-        # FIXME are we still testing this without this test?
-        # assert (
-        #    len(
-        #        get_pairs_from_events(state.history)
-        #    )
-        #    == 5
-        # )
-
         assert stuck_detector.is_stuck() is False
-        assert stuck_detector.state.almost_stuck == 1
 
         cmd_action_4 = CmdRunAction(command='ls')
         cmd_action_4._id = 4
@@ -184,16 +170,9 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect
         # 12 events
 
         assert len(state.history) == 12
-        # assert (
-        #    len(
-        #        get_pairs_from_events(state.history)
-        #    )
-        #    == 6
-        # )
 
         with patch('logging.Logger.warning') as mock_warning:
             assert stuck_detector.is_stuck() is True
-            assert stuck_detector.state.almost_stuck == 0
             mock_warning.assert_called_once_with('Action, Observation loop detected')
 
     def test_is_stuck_repeating_action_error(self, stuck_detector: StuckDetector):

From 1e739bddb74076e8e9bdad079570596a8da9005b Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Sat, 14 Dec 2024 16:46:07 +0000
Subject: [PATCH 02/16] fix: improve stuck detection in UI mode

- Add UI mode awareness to stuck detection
- Only consider history after last user message in UI mode
- Keep existing behavior in headless mode
- Add comprehensive tests for both modes

Fix: #5480
---
 openhands/controller/agent_controller.py |  9 ++-
 openhands/controller/stuck.py            | 22 +++++--
 tests/unit/test_is_stuck.py              | 84 ++++++++++++++++++------
 3 files changed, 87 insertions(+), 28 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 9caaa90eb963..d91e4bdf6b7c 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -902,17 +902,20 @@ def _apply_conversation_window(self, events: list[Event]) -> list[Event]:
 
         return kept_events
 
-    def _is_stuck(self) -> bool:
+    def _is_stuck(self, ui_mode: bool | None = None) -> bool:
         """Checks if the agent or its delegate is stuck in a loop.
 
+        Args:
+            ui_mode: Optional override for UI mode. If not provided, uses not self.headless_mode.
+
         Returns:
             bool: True if the agent is stuck, False otherwise.
         """
         # check if delegate stuck
-        if self.delegate and self.delegate._is_stuck():
+        if self.delegate and self.delegate._is_stuck(ui_mode):
             return True
 
-        return self._stuck_detector.is_stuck()
+        return self._stuck_detector.is_stuck(ui_mode if ui_mode is not None else not self.headless_mode)
 
     def __repr__(self):
         return (
diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py
index 79ac39390239..fb520f161d6c 100644
--- a/openhands/controller/stuck.py
+++ b/openhands/controller/stuck.py
@@ -24,16 +24,26 @@ class StuckDetector:
     def __init__(self, state: State):
         self.state = state
 
-    def is_stuck(self):
-        # filter out MessageAction with source='user' from history
+    def is_stuck(self, ui_mode: bool = False):
+        if ui_mode:
+            # In UI mode, only look at history after the last user message
+            last_user_msg_idx = -1
+            for i, event in enumerate(self.state.history):
+                if isinstance(event, MessageAction) and event.source == EventSource.USER:
+                    last_user_msg_idx = i
+
+            history_to_check = self.state.history[last_user_msg_idx + 1:]
+        else:
+            # In headless mode, look at all history
+            history_to_check = self.state.history
+
+        # Filter out user messages and null events
         filtered_history = [
             event
-            for event in self.state.history
+            for event in history_to_check
             if not (
                 (isinstance(event, MessageAction) and event.source == EventSource.USER)
-                or
-                # there might be some NullAction or NullObservation in the history at least for now
-                isinstance(event, (NullAction, NullObservation))
+                or isinstance(event, (NullAction, NullObservation))
             )
         ]
 
diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py
index c34246b86140..f4778e70b023 100644
--- a/tests/unit/test_is_stuck.py
+++ b/tests/unit/test_is_stuck.py
@@ -112,7 +112,53 @@ def test_history_too_short(self, stuck_detector: StuckDetector):
         # cmd_observation._cause = cmd_action._id
         state.history.append(cmd_observation)
 
-        assert stuck_detector.is_stuck() is False
+        assert stuck_detector.is_stuck(ui_mode=False) is False
+        assert stuck_detector.is_stuck(ui_mode=True) is False
+
+    def test_ui_mode_resets_after_user_message(self, stuck_detector: StuckDetector):
+        state = stuck_detector.state
+        
+        # First add some actions that would be stuck in non-UI mode
+        for i in range(4):
+            cmd_action = CmdRunAction(command='ls')
+            cmd_action._id = i
+            state.history.append(cmd_action)
+            cmd_observation = CmdOutputObservation(content='', command='ls', command_id=i)
+            cmd_observation._cause = cmd_action._id
+            state.history.append(cmd_observation)
+        
+        # In non-UI mode, this should be stuck
+        assert stuck_detector.is_stuck(ui_mode=False) is True
+        
+        # Add a user message
+        message_action = MessageAction(content='Hello', wait_for_response=False)
+        message_action._source = EventSource.USER
+        state.history.append(message_action)
+        
+        # In UI mode, this should not be stuck because we ignore history before user message
+        assert stuck_detector.is_stuck(ui_mode=True) is False
+        
+        # Add two more identical actions - still not stuck because we need at least 3
+        for i in range(2):
+            cmd_action = CmdRunAction(command='ls')
+            cmd_action._id = i + 4
+            state.history.append(cmd_action)
+            cmd_observation = CmdOutputObservation(content='', command='ls', command_id=i + 4)
+            cmd_observation._cause = cmd_action._id
+            state.history.append(cmd_observation)
+        
+        assert stuck_detector.is_stuck(ui_mode=True) is False
+        
+        # Add two more identical actions - now it should be stuck
+        for i in range(2):
+            cmd_action = CmdRunAction(command='ls')
+            cmd_action._id = i + 6
+            state.history.append(cmd_action)
+            cmd_observation = CmdOutputObservation(content='', command='ls', command_id=i + 6)
+            cmd_observation._cause = cmd_action._id
+            state.history.append(cmd_observation)
+        
+        assert stuck_detector.is_stuck(ui_mode=True) is True
 
     def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetector):
         state = stuck_detector.state
@@ -148,7 +194,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect
         state.history.append(message_null_observation)
         # 8 events
 
-        assert stuck_detector.is_stuck() is False
+        assert stuck_detector.is_stuck(ui_mode=False) is False
 
         cmd_action_3 = CmdRunAction(command='ls')
         cmd_action_3._id = 3
@@ -159,7 +205,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect
         # 10 events
 
         assert len(state.history) == 10
-        assert stuck_detector.is_stuck() is False
+        assert stuck_detector.is_stuck(ui_mode=False) is False
 
         cmd_action_4 = CmdRunAction(command='ls')
         cmd_action_4._id = 4
@@ -172,7 +218,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect
         assert len(state.history) == 12
 
         with patch('logging.Logger.warning') as mock_warning:
-            assert stuck_detector.is_stuck() is True
+            assert stuck_detector.is_stuck(ui_mode=False) is True
             mock_warning.assert_called_once_with('Action, Observation loop detected')
 
     def test_is_stuck_repeating_action_error(self, stuck_detector: StuckDetector):
@@ -224,7 +270,7 @@ def test_is_stuck_repeating_action_error(self, stuck_detector: StuckDetector):
         # 12 events
 
         with patch('logging.Logger.warning') as mock_warning:
-            assert stuck_detector.is_stuck() is True
+            assert stuck_detector.is_stuck(ui_mode=False) is True
             mock_warning.assert_called_once_with(
                 'Action, ErrorObservation loop detected'
             )
@@ -238,7 +284,7 @@ def test_is_stuck_invalid_syntax_error(self, stuck_detector: StuckDetector):
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck() is True
+            assert stuck_detector.is_stuck(ui_mode=False) is True
 
     def test_is_not_stuck_invalid_syntax_error_random_lines(
         self, stuck_detector: StuckDetector
@@ -251,7 +297,7 @@ def test_is_not_stuck_invalid_syntax_error_random_lines(
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck() is False
+            assert stuck_detector.is_stuck(ui_mode=False) is False
 
     def test_is_not_stuck_invalid_syntax_error_only_three_incidents(
         self, stuck_detector: StuckDetector
@@ -265,7 +311,7 @@ def test_is_not_stuck_invalid_syntax_error_only_three_incidents(
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck() is False
+            assert stuck_detector.is_stuck(ui_mode=False) is False
 
     def test_is_stuck_incomplete_input_error(self, stuck_detector: StuckDetector):
         state = stuck_detector.state
@@ -276,7 +322,7 @@ def test_is_stuck_incomplete_input_error(self, stuck_detector: StuckDetector):
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck() is True
+            assert stuck_detector.is_stuck(ui_mode=False) is True
 
     def test_is_not_stuck_incomplete_input_error(self, stuck_detector: StuckDetector):
         state = stuck_detector.state
@@ -287,7 +333,7 @@ def test_is_not_stuck_incomplete_input_error(self, stuck_detector: StuckDetector
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck() is False
+            assert stuck_detector.is_stuck(ui_mode=False) is False
 
     def test_is_not_stuck_ipython_unterminated_string_error_random_lines(
         self, stuck_detector: StuckDetector
@@ -296,7 +342,7 @@ def test_is_not_stuck_ipython_unterminated_string_error_random_lines(
         self._impl_unterminated_string_error_events(state, random_line=True)
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck() is False
+            assert stuck_detector.is_stuck(ui_mode=False) is False
 
     def test_is_not_stuck_ipython_unterminated_string_error_only_three_incidents(
         self, stuck_detector: StuckDetector
@@ -307,7 +353,7 @@ def test_is_not_stuck_ipython_unterminated_string_error_only_three_incidents(
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck() is False
+            assert stuck_detector.is_stuck(ui_mode=False) is False
 
     def test_is_stuck_ipython_unterminated_string_error(
         self, stuck_detector: StuckDetector
@@ -316,7 +362,7 @@ def test_is_stuck_ipython_unterminated_string_error(
         self._impl_unterminated_string_error_events(state, random_line=False)
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck() is True
+            assert stuck_detector.is_stuck(ui_mode=False) is True
 
     def test_is_not_stuck_ipython_syntax_error_not_at_end(
         self, stuck_detector: StuckDetector
@@ -361,7 +407,7 @@ def test_is_not_stuck_ipython_syntax_error_not_at_end(
         state.history.append(ipython_observation_4)
 
         with patch('logging.Logger.warning') as mock_warning:
-            assert stuck_detector.is_stuck() is False
+            assert stuck_detector.is_stuck(ui_mode=False) is False
             mock_warning.assert_not_called()
 
     def test_is_stuck_repeating_action_observation_pattern(
@@ -430,7 +476,7 @@ def test_is_stuck_repeating_action_observation_pattern(
         state.history.append(read_observation_3)
 
         with patch('logging.Logger.warning') as mock_warning:
-            assert stuck_detector.is_stuck() is True
+            assert stuck_detector.is_stuck(ui_mode=False) is True
             mock_warning.assert_called_once_with('Action, Observation pattern detected')
 
     def test_is_stuck_not_stuck(self, stuck_detector: StuckDetector):
@@ -496,7 +542,7 @@ def test_is_stuck_not_stuck(self, stuck_detector: StuckDetector):
         # read_observation_3._cause = read_action_3._id
         state.history.append(read_observation_3)
 
-        assert stuck_detector.is_stuck() is False
+        assert stuck_detector.is_stuck(ui_mode=False) is False
 
     def test_is_stuck_monologue(self, stuck_detector):
         state = stuck_detector.state
@@ -526,7 +572,7 @@ def test_is_stuck_monologue(self, stuck_detector):
         message_action_6._source = EventSource.AGENT
         state.history.append(message_action_6)
 
-        assert stuck_detector.is_stuck()
+        assert stuck_detector.is_stuck(ui_mode=False)
 
         # Add an observation event between the repeated message actions
         cmd_output_observation = CmdOutputObservation(
@@ -546,7 +592,7 @@ def test_is_stuck_monologue(self, stuck_detector):
         state.history.append(message_action_8)
 
         with patch('logging.Logger.warning'):
-            assert not stuck_detector.is_stuck()
+            assert not stuck_detector.is_stuck(ui_mode=False)
 
 
 class TestAgentController:
@@ -563,4 +609,4 @@ def controller(self):
     def test_is_stuck_delegate_stuck(self, controller: AgentController):
         controller.delegate = Mock()
         controller.delegate._is_stuck.return_value = True
-        assert controller._is_stuck() is True
+        assert controller._is_stuck(ui_mode=False) is True

From 40bb3184c769f01394000cfad386f6d08edbf289 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Sat, 14 Dec 2024 16:57:56 +0000
Subject: [PATCH 03/16] fix: improve stuck detection in interactive mode

- Use headless_mode flag to determine stuck detection behavior
- In interactive mode (not headless), only consider history after last user message
- Keep existing behavior in headless mode
- Add comprehensive tests for both modes

Fix: #5480
---
 openhands/controller/agent_controller.py |  9 ++--
 openhands/controller/stuck.py            | 15 +++++--
 tests/unit/test_is_stuck.py              | 52 ++++++++++++------------
 3 files changed, 41 insertions(+), 35 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index d91e4bdf6b7c..7bc095ed99b3 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -902,20 +902,17 @@ def _apply_conversation_window(self, events: list[Event]) -> list[Event]:
 
         return kept_events
 
-    def _is_stuck(self, ui_mode: bool | None = None) -> bool:
+    def _is_stuck(self) -> bool:
         """Checks if the agent or its delegate is stuck in a loop.
 
-        Args:
-            ui_mode: Optional override for UI mode. If not provided, uses not self.headless_mode.
-
         Returns:
             bool: True if the agent is stuck, False otherwise.
         """
         # check if delegate stuck
-        if self.delegate and self.delegate._is_stuck(ui_mode):
+        if self.delegate and self.delegate._is_stuck():
             return True
 
-        return self._stuck_detector.is_stuck(ui_mode if ui_mode is not None else not self.headless_mode)
+        return self._stuck_detector.is_stuck(not self.headless_mode)
 
     def __repr__(self):
         return (
diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py
index fb520f161d6c..d90cc0d5c73c 100644
--- a/openhands/controller/stuck.py
+++ b/openhands/controller/stuck.py
@@ -24,9 +24,18 @@ class StuckDetector:
     def __init__(self, state: State):
         self.state = state
 
-    def is_stuck(self, ui_mode: bool = False):
-        if ui_mode:
-            # In UI mode, only look at history after the last user message
+    def is_stuck(self, interactive_mode: bool = False):
+        """Checks if the agent is stuck in a loop.
+
+        Args:
+            interactive_mode: If True (not headless), only consider history after last user message.
+                           If False (headless), consider all history.
+
+        Returns:
+            bool: True if the agent is stuck in a loop, False otherwise.
+        """
+        if interactive_mode:
+            # In interactive mode, only look at history after the last user message
             last_user_msg_idx = -1
             for i, event in enumerate(self.state.history):
                 if isinstance(event, MessageAction) and event.source == EventSource.USER:
diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py
index f4778e70b023..1ab8927387f5 100644
--- a/tests/unit/test_is_stuck.py
+++ b/tests/unit/test_is_stuck.py
@@ -112,10 +112,10 @@ def test_history_too_short(self, stuck_detector: StuckDetector):
         # cmd_observation._cause = cmd_action._id
         state.history.append(cmd_observation)
 
-        assert stuck_detector.is_stuck(ui_mode=False) is False
-        assert stuck_detector.is_stuck(ui_mode=True) is False
+        assert stuck_detector.is_stuck(interactive_mode=False) is False
+        assert stuck_detector.is_stuck(interactive_mode=True) is False
 
-    def test_ui_mode_resets_after_user_message(self, stuck_detector: StuckDetector):
+    def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckDetector):
         state = stuck_detector.state
         
         # First add some actions that would be stuck in non-UI mode
@@ -128,15 +128,15 @@ def test_ui_mode_resets_after_user_message(self, stuck_detector: StuckDetector):
             state.history.append(cmd_observation)
         
         # In non-UI mode, this should be stuck
-        assert stuck_detector.is_stuck(ui_mode=False) is True
+        assert stuck_detector.is_stuck(interactive_mode=False) is True
         
         # Add a user message
         message_action = MessageAction(content='Hello', wait_for_response=False)
         message_action._source = EventSource.USER
         state.history.append(message_action)
         
-        # In UI mode, this should not be stuck because we ignore history before user message
-        assert stuck_detector.is_stuck(ui_mode=True) is False
+        # In interactive mode, this should not be stuck because we ignore history before user message
+        assert stuck_detector.is_stuck(interactive_mode=True) is False
         
         # Add two more identical actions - still not stuck because we need at least 3
         for i in range(2):
@@ -147,7 +147,7 @@ def test_ui_mode_resets_after_user_message(self, stuck_detector: StuckDetector):
             cmd_observation._cause = cmd_action._id
             state.history.append(cmd_observation)
         
-        assert stuck_detector.is_stuck(ui_mode=True) is False
+        assert stuck_detector.is_stuck(interactive_mode=True) is False
         
         # Add two more identical actions - now it should be stuck
         for i in range(2):
@@ -158,7 +158,7 @@ def test_ui_mode_resets_after_user_message(self, stuck_detector: StuckDetector):
             cmd_observation._cause = cmd_action._id
             state.history.append(cmd_observation)
         
-        assert stuck_detector.is_stuck(ui_mode=True) is True
+        assert stuck_detector.is_stuck(interactive_mode=True) is True
 
     def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetector):
         state = stuck_detector.state
@@ -194,7 +194,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect
         state.history.append(message_null_observation)
         # 8 events
 
-        assert stuck_detector.is_stuck(ui_mode=False) is False
+        assert stuck_detector.is_stuck(interactive_mode=False) is False
 
         cmd_action_3 = CmdRunAction(command='ls')
         cmd_action_3._id = 3
@@ -205,7 +205,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect
         # 10 events
 
         assert len(state.history) == 10
-        assert stuck_detector.is_stuck(ui_mode=False) is False
+        assert stuck_detector.is_stuck(interactive_mode=False) is False
 
         cmd_action_4 = CmdRunAction(command='ls')
         cmd_action_4._id = 4
@@ -218,7 +218,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect
         assert len(state.history) == 12
 
         with patch('logging.Logger.warning') as mock_warning:
-            assert stuck_detector.is_stuck(ui_mode=False) is True
+            assert stuck_detector.is_stuck(interactive_mode=False) is True
             mock_warning.assert_called_once_with('Action, Observation loop detected')
 
     def test_is_stuck_repeating_action_error(self, stuck_detector: StuckDetector):
@@ -270,7 +270,7 @@ def test_is_stuck_repeating_action_error(self, stuck_detector: StuckDetector):
         # 12 events
 
         with patch('logging.Logger.warning') as mock_warning:
-            assert stuck_detector.is_stuck(ui_mode=False) is True
+            assert stuck_detector.is_stuck(interactive_mode=False) is True
             mock_warning.assert_called_once_with(
                 'Action, ErrorObservation loop detected'
             )
@@ -284,7 +284,7 @@ def test_is_stuck_invalid_syntax_error(self, stuck_detector: StuckDetector):
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(ui_mode=False) is True
+            assert stuck_detector.is_stuck(interactive_mode=False) is True
 
     def test_is_not_stuck_invalid_syntax_error_random_lines(
         self, stuck_detector: StuckDetector
@@ -297,7 +297,7 @@ def test_is_not_stuck_invalid_syntax_error_random_lines(
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(ui_mode=False) is False
+            assert stuck_detector.is_stuck(interactive_mode=False) is False
 
     def test_is_not_stuck_invalid_syntax_error_only_three_incidents(
         self, stuck_detector: StuckDetector
@@ -311,7 +311,7 @@ def test_is_not_stuck_invalid_syntax_error_only_three_incidents(
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(ui_mode=False) is False
+            assert stuck_detector.is_stuck(interactive_mode=False) is False
 
     def test_is_stuck_incomplete_input_error(self, stuck_detector: StuckDetector):
         state = stuck_detector.state
@@ -322,7 +322,7 @@ def test_is_stuck_incomplete_input_error(self, stuck_detector: StuckDetector):
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(ui_mode=False) is True
+            assert stuck_detector.is_stuck(interactive_mode=False) is True
 
     def test_is_not_stuck_incomplete_input_error(self, stuck_detector: StuckDetector):
         state = stuck_detector.state
@@ -333,7 +333,7 @@ def test_is_not_stuck_incomplete_input_error(self, stuck_detector: StuckDetector
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(ui_mode=False) is False
+            assert stuck_detector.is_stuck(interactive_mode=False) is False
 
     def test_is_not_stuck_ipython_unterminated_string_error_random_lines(
         self, stuck_detector: StuckDetector
@@ -342,7 +342,7 @@ def test_is_not_stuck_ipython_unterminated_string_error_random_lines(
         self._impl_unterminated_string_error_events(state, random_line=True)
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(ui_mode=False) is False
+            assert stuck_detector.is_stuck(interactive_mode=False) is False
 
     def test_is_not_stuck_ipython_unterminated_string_error_only_three_incidents(
         self, stuck_detector: StuckDetector
@@ -353,7 +353,7 @@ def test_is_not_stuck_ipython_unterminated_string_error_only_three_incidents(
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(ui_mode=False) is False
+            assert stuck_detector.is_stuck(interactive_mode=False) is False
 
     def test_is_stuck_ipython_unterminated_string_error(
         self, stuck_detector: StuckDetector
@@ -362,7 +362,7 @@ def test_is_stuck_ipython_unterminated_string_error(
         self._impl_unterminated_string_error_events(state, random_line=False)
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(ui_mode=False) is True
+            assert stuck_detector.is_stuck(interactive_mode=False) is True
 
     def test_is_not_stuck_ipython_syntax_error_not_at_end(
         self, stuck_detector: StuckDetector
@@ -407,7 +407,7 @@ def test_is_not_stuck_ipython_syntax_error_not_at_end(
         state.history.append(ipython_observation_4)
 
         with patch('logging.Logger.warning') as mock_warning:
-            assert stuck_detector.is_stuck(ui_mode=False) is False
+            assert stuck_detector.is_stuck(interactive_mode=False) is False
             mock_warning.assert_not_called()
 
     def test_is_stuck_repeating_action_observation_pattern(
@@ -476,7 +476,7 @@ def test_is_stuck_repeating_action_observation_pattern(
         state.history.append(read_observation_3)
 
         with patch('logging.Logger.warning') as mock_warning:
-            assert stuck_detector.is_stuck(ui_mode=False) is True
+            assert stuck_detector.is_stuck(interactive_mode=False) is True
             mock_warning.assert_called_once_with('Action, Observation pattern detected')
 
     def test_is_stuck_not_stuck(self, stuck_detector: StuckDetector):
@@ -542,7 +542,7 @@ def test_is_stuck_not_stuck(self, stuck_detector: StuckDetector):
         # read_observation_3._cause = read_action_3._id
         state.history.append(read_observation_3)
 
-        assert stuck_detector.is_stuck(ui_mode=False) is False
+        assert stuck_detector.is_stuck(interactive_mode=False) is False
 
     def test_is_stuck_monologue(self, stuck_detector):
         state = stuck_detector.state
@@ -572,7 +572,7 @@ def test_is_stuck_monologue(self, stuck_detector):
         message_action_6._source = EventSource.AGENT
         state.history.append(message_action_6)
 
-        assert stuck_detector.is_stuck(ui_mode=False)
+        assert stuck_detector.is_stuck(interactive_mode=False)
 
         # Add an observation event between the repeated message actions
         cmd_output_observation = CmdOutputObservation(
@@ -592,7 +592,7 @@ def test_is_stuck_monologue(self, stuck_detector):
         state.history.append(message_action_8)
 
         with patch('logging.Logger.warning'):
-            assert not stuck_detector.is_stuck(ui_mode=False)
+            assert not stuck_detector.is_stuck(interactive_mode=False)
 
 
 class TestAgentController:
@@ -609,4 +609,4 @@ def controller(self):
     def test_is_stuck_delegate_stuck(self, controller: AgentController):
         controller.delegate = Mock()
         controller.delegate._is_stuck.return_value = True
-        assert controller._is_stuck(ui_mode=False) is True
+        assert controller._is_stuck() is True

From 8589b0a9e3bdb784cf21924ad6247890a2f183de Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Sat, 14 Dec 2024 17:11:32 +0000
Subject: [PATCH 04/16] refactor: simplify stuck detection to use headless_mode
 directly

- Use not_headless parameter to match AgentController's headless_mode
- Remove unnecessary interactive_mode concept
- Update tests to use consistent terminology
- Keep behavior the same, just clearer naming
---
 openhands/controller/stuck.py |  9 +++---
 tests/unit/test_is_stuck.py   | 52 +++++++++++++++++------------------
 2 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py
index d90cc0d5c73c..d753897c8c83 100644
--- a/openhands/controller/stuck.py
+++ b/openhands/controller/stuck.py
@@ -24,17 +24,18 @@ class StuckDetector:
     def __init__(self, state: State):
         self.state = state
 
-    def is_stuck(self, interactive_mode: bool = False):
+    def is_stuck(self, not_headless: bool = False):
         """Checks if the agent is stuck in a loop.
 
         Args:
-            interactive_mode: If True (not headless), only consider history after last user message.
-                           If False (headless), consider all history.
+            not_headless: Matches AgentController's not headless_mode.
+                         If True: Consider only history after last user message
+                         If False: Consider all history (headless mode)
 
         Returns:
             bool: True if the agent is stuck in a loop, False otherwise.
         """
-        if interactive_mode:
+        if not_headless:
             # In interactive mode, only look at history after the last user message
             last_user_msg_idx = -1
             for i, event in enumerate(self.state.history):
diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py
index 1ab8927387f5..af6bdca5298e 100644
--- a/tests/unit/test_is_stuck.py
+++ b/tests/unit/test_is_stuck.py
@@ -112,10 +112,10 @@ def test_history_too_short(self, stuck_detector: StuckDetector):
         # cmd_observation._cause = cmd_action._id
         state.history.append(cmd_observation)
 
-        assert stuck_detector.is_stuck(interactive_mode=False) is False
-        assert stuck_detector.is_stuck(interactive_mode=True) is False
+        assert stuck_detector.is_stuck(not_headless=False) is False
+        assert stuck_detector.is_stuck(not_headless=True) is False
 
-    def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckDetector):
+    def test_not_headless_resets_after_user_message(self, stuck_detector: StuckDetector):
         state = stuck_detector.state
         
         # First add some actions that would be stuck in non-UI mode
@@ -127,16 +127,16 @@ def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckD
             cmd_observation._cause = cmd_action._id
             state.history.append(cmd_observation)
         
-        # In non-UI mode, this should be stuck
-        assert stuck_detector.is_stuck(interactive_mode=False) is True
+        # In headless mode, this should be stuck
+        assert stuck_detector.is_stuck(not_headless=False) is True
         
         # Add a user message
         message_action = MessageAction(content='Hello', wait_for_response=False)
         message_action._source = EventSource.USER
         state.history.append(message_action)
         
-        # In interactive mode, this should not be stuck because we ignore history before user message
-        assert stuck_detector.is_stuck(interactive_mode=True) is False
+        # In not-headless mode, this should not be stuck because we ignore history before user message
+        assert stuck_detector.is_stuck(not_headless=True) is False
         
         # Add two more identical actions - still not stuck because we need at least 3
         for i in range(2):
@@ -147,7 +147,7 @@ def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckD
             cmd_observation._cause = cmd_action._id
             state.history.append(cmd_observation)
         
-        assert stuck_detector.is_stuck(interactive_mode=True) is False
+        assert stuck_detector.is_stuck(not_headless=True) is False
         
         # Add two more identical actions - now it should be stuck
         for i in range(2):
@@ -158,7 +158,7 @@ def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckD
             cmd_observation._cause = cmd_action._id
             state.history.append(cmd_observation)
         
-        assert stuck_detector.is_stuck(interactive_mode=True) is True
+        assert stuck_detector.is_stuck(not_headless=True) is True
 
     def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetector):
         state = stuck_detector.state
@@ -194,7 +194,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect
         state.history.append(message_null_observation)
         # 8 events
 
-        assert stuck_detector.is_stuck(interactive_mode=False) is False
+        assert stuck_detector.is_stuck(not_headless=False) is False
 
         cmd_action_3 = CmdRunAction(command='ls')
         cmd_action_3._id = 3
@@ -205,7 +205,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect
         # 10 events
 
         assert len(state.history) == 10
-        assert stuck_detector.is_stuck(interactive_mode=False) is False
+        assert stuck_detector.is_stuck(not_headless=False) is False
 
         cmd_action_4 = CmdRunAction(command='ls')
         cmd_action_4._id = 4
@@ -218,7 +218,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect
         assert len(state.history) == 12
 
         with patch('logging.Logger.warning') as mock_warning:
-            assert stuck_detector.is_stuck(interactive_mode=False) is True
+            assert stuck_detector.is_stuck(not_headless=False) is True
             mock_warning.assert_called_once_with('Action, Observation loop detected')
 
     def test_is_stuck_repeating_action_error(self, stuck_detector: StuckDetector):
@@ -270,7 +270,7 @@ def test_is_stuck_repeating_action_error(self, stuck_detector: StuckDetector):
         # 12 events
 
         with patch('logging.Logger.warning') as mock_warning:
-            assert stuck_detector.is_stuck(interactive_mode=False) is True
+            assert stuck_detector.is_stuck(not_headless=False) is True
             mock_warning.assert_called_once_with(
                 'Action, ErrorObservation loop detected'
             )
@@ -284,7 +284,7 @@ def test_is_stuck_invalid_syntax_error(self, stuck_detector: StuckDetector):
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(interactive_mode=False) is True
+            assert stuck_detector.is_stuck(not_headless=False) is True
 
     def test_is_not_stuck_invalid_syntax_error_random_lines(
         self, stuck_detector: StuckDetector
@@ -297,7 +297,7 @@ def test_is_not_stuck_invalid_syntax_error_random_lines(
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(interactive_mode=False) is False
+            assert stuck_detector.is_stuck(not_headless=False) is False
 
     def test_is_not_stuck_invalid_syntax_error_only_three_incidents(
         self, stuck_detector: StuckDetector
@@ -311,7 +311,7 @@ def test_is_not_stuck_invalid_syntax_error_only_three_incidents(
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(interactive_mode=False) is False
+            assert stuck_detector.is_stuck(not_headless=False) is False
 
     def test_is_stuck_incomplete_input_error(self, stuck_detector: StuckDetector):
         state = stuck_detector.state
@@ -322,7 +322,7 @@ def test_is_stuck_incomplete_input_error(self, stuck_detector: StuckDetector):
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(interactive_mode=False) is True
+            assert stuck_detector.is_stuck(not_headless=False) is True
 
     def test_is_not_stuck_incomplete_input_error(self, stuck_detector: StuckDetector):
         state = stuck_detector.state
@@ -333,7 +333,7 @@ def test_is_not_stuck_incomplete_input_error(self, stuck_detector: StuckDetector
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(interactive_mode=False) is False
+            assert stuck_detector.is_stuck(not_headless=False) is False
 
     def test_is_not_stuck_ipython_unterminated_string_error_random_lines(
         self, stuck_detector: StuckDetector
@@ -342,7 +342,7 @@ def test_is_not_stuck_ipython_unterminated_string_error_random_lines(
         self._impl_unterminated_string_error_events(state, random_line=True)
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(interactive_mode=False) is False
+            assert stuck_detector.is_stuck(not_headless=False) is False
 
     def test_is_not_stuck_ipython_unterminated_string_error_only_three_incidents(
         self, stuck_detector: StuckDetector
@@ -353,7 +353,7 @@ def test_is_not_stuck_ipython_unterminated_string_error_only_three_incidents(
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(interactive_mode=False) is False
+            assert stuck_detector.is_stuck(not_headless=False) is False
 
     def test_is_stuck_ipython_unterminated_string_error(
         self, stuck_detector: StuckDetector
@@ -362,7 +362,7 @@ def test_is_stuck_ipython_unterminated_string_error(
         self._impl_unterminated_string_error_events(state, random_line=False)
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(interactive_mode=False) is True
+            assert stuck_detector.is_stuck(not_headless=False) is True
 
     def test_is_not_stuck_ipython_syntax_error_not_at_end(
         self, stuck_detector: StuckDetector
@@ -407,7 +407,7 @@ def test_is_not_stuck_ipython_syntax_error_not_at_end(
         state.history.append(ipython_observation_4)
 
         with patch('logging.Logger.warning') as mock_warning:
-            assert stuck_detector.is_stuck(interactive_mode=False) is False
+            assert stuck_detector.is_stuck(not_headless=False) is False
             mock_warning.assert_not_called()
 
     def test_is_stuck_repeating_action_observation_pattern(
@@ -476,7 +476,7 @@ def test_is_stuck_repeating_action_observation_pattern(
         state.history.append(read_observation_3)
 
         with patch('logging.Logger.warning') as mock_warning:
-            assert stuck_detector.is_stuck(interactive_mode=False) is True
+            assert stuck_detector.is_stuck(not_headless=False) is True
             mock_warning.assert_called_once_with('Action, Observation pattern detected')
 
     def test_is_stuck_not_stuck(self, stuck_detector: StuckDetector):
@@ -542,7 +542,7 @@ def test_is_stuck_not_stuck(self, stuck_detector: StuckDetector):
         # read_observation_3._cause = read_action_3._id
         state.history.append(read_observation_3)
 
-        assert stuck_detector.is_stuck(interactive_mode=False) is False
+        assert stuck_detector.is_stuck(not_headless=False) is False
 
     def test_is_stuck_monologue(self, stuck_detector):
         state = stuck_detector.state
@@ -572,7 +572,7 @@ def test_is_stuck_monologue(self, stuck_detector):
         message_action_6._source = EventSource.AGENT
         state.history.append(message_action_6)
 
-        assert stuck_detector.is_stuck(interactive_mode=False)
+        assert stuck_detector.is_stuck(not_headless=False)
 
         # Add an observation event between the repeated message actions
         cmd_output_observation = CmdOutputObservation(
@@ -592,7 +592,7 @@ def test_is_stuck_monologue(self, stuck_detector):
         state.history.append(message_action_8)
 
         with patch('logging.Logger.warning'):
-            assert not stuck_detector.is_stuck(interactive_mode=False)
+            assert not stuck_detector.is_stuck(not_headless=False)
 
 
 class TestAgentController:

From 9f17d105b40e52b20fbb0ebcb30f121e3c9ee2a4 Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Sat, 14 Dec 2024 18:15:01 +0100
Subject: [PATCH 05/16] Update openhands/controller/stuck.py

---
 openhands/controller/stuck.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py
index d753897c8c83..7827ac3b9e9f 100644
--- a/openhands/controller/stuck.py
+++ b/openhands/controller/stuck.py
@@ -53,6 +53,7 @@ def is_stuck(self, not_headless: bool = False):
             for event in history_to_check
             if not (
                 (isinstance(event, MessageAction) and event.source == EventSource.USER)
+                # there might be some NullAction or NullObservation in the history at least for now
                 or isinstance(event, (NullAction, NullObservation))
             )
         ]

From 0f6c731eecb0bb4a141baa24533f55c45255f4a3 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Sat, 14 Dec 2024 17:23:25 +0000
Subject: [PATCH 06/16] refactor: use headless_mode consistently

- Use headless_mode parameter to match AgentController
- Remove confusing double negative (not_headless)
- Keep behavior the same, just clearer naming
---
 openhands/controller/stuck.py | 10 ++++----
 tests/unit/test_is_stuck.py   | 48 +++++++++++++++++------------------
 2 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py
index 7827ac3b9e9f..9c5419bbe8b9 100644
--- a/openhands/controller/stuck.py
+++ b/openhands/controller/stuck.py
@@ -24,18 +24,18 @@ class StuckDetector:
     def __init__(self, state: State):
         self.state = state
 
-    def is_stuck(self, not_headless: bool = False):
+    def is_stuck(self, headless_mode: bool = True):
         """Checks if the agent is stuck in a loop.
 
         Args:
-            not_headless: Matches AgentController's not headless_mode.
-                         If True: Consider only history after last user message
-                         If False: Consider all history (headless mode)
+            headless_mode: Matches AgentController's headless_mode.
+                          If True: Consider all history (automated/testing)
+                          If False: Consider only history after last user message (interactive)
 
         Returns:
             bool: True if the agent is stuck in a loop, False otherwise.
         """
-        if not_headless:
+        if not headless_mode:
             # In interactive mode, only look at history after the last user message
             last_user_msg_idx = -1
             for i, event in enumerate(self.state.history):
diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py
index af6bdca5298e..9bb3c44e1248 100644
--- a/tests/unit/test_is_stuck.py
+++ b/tests/unit/test_is_stuck.py
@@ -112,10 +112,10 @@ def test_history_too_short(self, stuck_detector: StuckDetector):
         # cmd_observation._cause = cmd_action._id
         state.history.append(cmd_observation)
 
-        assert stuck_detector.is_stuck(not_headless=False) is False
-        assert stuck_detector.is_stuck(not_headless=True) is False
+        assert stuck_detector.is_stuck(headless_mode=True) is False
+        assert stuck_detector.is_stuck(headless_mode=False) is False
 
-    def test_not_headless_resets_after_user_message(self, stuck_detector: StuckDetector):
+    def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckDetector):
         state = stuck_detector.state
         
         # First add some actions that would be stuck in non-UI mode
@@ -128,7 +128,7 @@ def test_not_headless_resets_after_user_message(self, stuck_detector: StuckDetec
             state.history.append(cmd_observation)
         
         # In headless mode, this should be stuck
-        assert stuck_detector.is_stuck(not_headless=False) is True
+        assert stuck_detector.is_stuck(headless_mode=True) is True
         
         # Add a user message
         message_action = MessageAction(content='Hello', wait_for_response=False)
@@ -136,7 +136,7 @@ def test_not_headless_resets_after_user_message(self, stuck_detector: StuckDetec
         state.history.append(message_action)
         
         # In not-headless mode, this should not be stuck because we ignore history before user message
-        assert stuck_detector.is_stuck(not_headless=True) is False
+        assert stuck_detector.is_stuck(headless_mode=False) is False
         
         # Add two more identical actions - still not stuck because we need at least 3
         for i in range(2):
@@ -147,7 +147,7 @@ def test_not_headless_resets_after_user_message(self, stuck_detector: StuckDetec
             cmd_observation._cause = cmd_action._id
             state.history.append(cmd_observation)
         
-        assert stuck_detector.is_stuck(not_headless=True) is False
+        assert stuck_detector.is_stuck(headless_mode=False) is False
         
         # Add two more identical actions - now it should be stuck
         for i in range(2):
@@ -158,7 +158,7 @@ def test_not_headless_resets_after_user_message(self, stuck_detector: StuckDetec
             cmd_observation._cause = cmd_action._id
             state.history.append(cmd_observation)
         
-        assert stuck_detector.is_stuck(not_headless=True) is True
+        assert stuck_detector.is_stuck(headless_mode=False) is True
 
     def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetector):
         state = stuck_detector.state
@@ -194,7 +194,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect
         state.history.append(message_null_observation)
         # 8 events
 
-        assert stuck_detector.is_stuck(not_headless=False) is False
+        assert stuck_detector.is_stuck(headless_mode=True) is False
 
         cmd_action_3 = CmdRunAction(command='ls')
         cmd_action_3._id = 3
@@ -205,7 +205,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect
         # 10 events
 
         assert len(state.history) == 10
-        assert stuck_detector.is_stuck(not_headless=False) is False
+        assert stuck_detector.is_stuck(headless_mode=True) is False
 
         cmd_action_4 = CmdRunAction(command='ls')
         cmd_action_4._id = 4
@@ -218,7 +218,7 @@ def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetect
         assert len(state.history) == 12
 
         with patch('logging.Logger.warning') as mock_warning:
-            assert stuck_detector.is_stuck(not_headless=False) is True
+            assert stuck_detector.is_stuck(headless_mode=True) is True
             mock_warning.assert_called_once_with('Action, Observation loop detected')
 
     def test_is_stuck_repeating_action_error(self, stuck_detector: StuckDetector):
@@ -270,7 +270,7 @@ def test_is_stuck_repeating_action_error(self, stuck_detector: StuckDetector):
         # 12 events
 
         with patch('logging.Logger.warning') as mock_warning:
-            assert stuck_detector.is_stuck(not_headless=False) is True
+            assert stuck_detector.is_stuck(headless_mode=True) is True
             mock_warning.assert_called_once_with(
                 'Action, ErrorObservation loop detected'
             )
@@ -284,7 +284,7 @@ def test_is_stuck_invalid_syntax_error(self, stuck_detector: StuckDetector):
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(not_headless=False) is True
+            assert stuck_detector.is_stuck(headless_mode=True) is True
 
     def test_is_not_stuck_invalid_syntax_error_random_lines(
         self, stuck_detector: StuckDetector
@@ -297,7 +297,7 @@ def test_is_not_stuck_invalid_syntax_error_random_lines(
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(not_headless=False) is False
+            assert stuck_detector.is_stuck(headless_mode=True) is False
 
     def test_is_not_stuck_invalid_syntax_error_only_three_incidents(
         self, stuck_detector: StuckDetector
@@ -311,7 +311,7 @@ def test_is_not_stuck_invalid_syntax_error_only_three_incidents(
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(not_headless=False) is False
+            assert stuck_detector.is_stuck(headless_mode=True) is False
 
     def test_is_stuck_incomplete_input_error(self, stuck_detector: StuckDetector):
         state = stuck_detector.state
@@ -322,7 +322,7 @@ def test_is_stuck_incomplete_input_error(self, stuck_detector: StuckDetector):
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(not_headless=False) is True
+            assert stuck_detector.is_stuck(headless_mode=True) is True
 
     def test_is_not_stuck_incomplete_input_error(self, stuck_detector: StuckDetector):
         state = stuck_detector.state
@@ -333,7 +333,7 @@ def test_is_not_stuck_incomplete_input_error(self, stuck_detector: StuckDetector
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(not_headless=False) is False
+            assert stuck_detector.is_stuck(headless_mode=True) is False
 
     def test_is_not_stuck_ipython_unterminated_string_error_random_lines(
         self, stuck_detector: StuckDetector
@@ -342,7 +342,7 @@ def test_is_not_stuck_ipython_unterminated_string_error_random_lines(
         self._impl_unterminated_string_error_events(state, random_line=True)
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(not_headless=False) is False
+            assert stuck_detector.is_stuck(headless_mode=True) is False
 
     def test_is_not_stuck_ipython_unterminated_string_error_only_three_incidents(
         self, stuck_detector: StuckDetector
@@ -353,7 +353,7 @@ def test_is_not_stuck_ipython_unterminated_string_error_only_three_incidents(
         )
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(not_headless=False) is False
+            assert stuck_detector.is_stuck(headless_mode=True) is False
 
     def test_is_stuck_ipython_unterminated_string_error(
         self, stuck_detector: StuckDetector
@@ -362,7 +362,7 @@ def test_is_stuck_ipython_unterminated_string_error(
         self._impl_unterminated_string_error_events(state, random_line=False)
 
         with patch('logging.Logger.warning'):
-            assert stuck_detector.is_stuck(not_headless=False) is True
+            assert stuck_detector.is_stuck(headless_mode=True) is True
 
     def test_is_not_stuck_ipython_syntax_error_not_at_end(
         self, stuck_detector: StuckDetector
@@ -407,7 +407,7 @@ def test_is_not_stuck_ipython_syntax_error_not_at_end(
         state.history.append(ipython_observation_4)
 
         with patch('logging.Logger.warning') as mock_warning:
-            assert stuck_detector.is_stuck(not_headless=False) is False
+            assert stuck_detector.is_stuck(headless_mode=True) is False
             mock_warning.assert_not_called()
 
     def test_is_stuck_repeating_action_observation_pattern(
@@ -476,7 +476,7 @@ def test_is_stuck_repeating_action_observation_pattern(
         state.history.append(read_observation_3)
 
         with patch('logging.Logger.warning') as mock_warning:
-            assert stuck_detector.is_stuck(not_headless=False) is True
+            assert stuck_detector.is_stuck(headless_mode=True) is True
             mock_warning.assert_called_once_with('Action, Observation pattern detected')
 
     def test_is_stuck_not_stuck(self, stuck_detector: StuckDetector):
@@ -542,7 +542,7 @@ def test_is_stuck_not_stuck(self, stuck_detector: StuckDetector):
         # read_observation_3._cause = read_action_3._id
         state.history.append(read_observation_3)
 
-        assert stuck_detector.is_stuck(not_headless=False) is False
+        assert stuck_detector.is_stuck(headless_mode=True) is False
 
     def test_is_stuck_monologue(self, stuck_detector):
         state = stuck_detector.state
@@ -572,7 +572,7 @@ def test_is_stuck_monologue(self, stuck_detector):
         message_action_6._source = EventSource.AGENT
         state.history.append(message_action_6)
 
-        assert stuck_detector.is_stuck(not_headless=False)
+        assert stuck_detector.is_stuck(headless_mode=True)
 
         # Add an observation event between the repeated message actions
         cmd_output_observation = CmdOutputObservation(
@@ -592,7 +592,7 @@ def test_is_stuck_monologue(self, stuck_detector):
         state.history.append(message_action_8)
 
         with patch('logging.Logger.warning'):
-            assert not stuck_detector.is_stuck(not_headless=False)
+            assert not stuck_detector.is_stuck(headless_mode=True)
 
 
 class TestAgentController:

From 860d3e280374eb2413102bd4d058b3699a9603b2 Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Sat, 14 Dec 2024 18:27:16 +0100
Subject: [PATCH 07/16] Update openhands/controller/agent_controller.py

---
 openhands/controller/agent_controller.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 7bc095ed99b3..b2de6dd7d5ac 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -912,7 +912,7 @@ def _is_stuck(self) -> bool:
         if self.delegate and self.delegate._is_stuck():
             return True
 
-        return self._stuck_detector.is_stuck(not self.headless_mode)
+        return self._stuck_detector.is_stuck(self.headless_mode)
 
     def __repr__(self):
         return (

From b0616e3d17c5b2708032805f8d96449b4d011ae1 Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Sat, 14 Dec 2024 18:28:08 +0100
Subject: [PATCH 08/16] Update tests/unit/test_is_stuck.py

---
 tests/unit/test_is_stuck.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py
index 9bb3c44e1248..87e2e2be87a8 100644
--- a/tests/unit/test_is_stuck.py
+++ b/tests/unit/test_is_stuck.py
@@ -113,7 +113,6 @@ def test_history_too_short(self, stuck_detector: StuckDetector):
         state.history.append(cmd_observation)
 
         assert stuck_detector.is_stuck(headless_mode=True) is False
-        assert stuck_detector.is_stuck(headless_mode=False) is False
 
     def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckDetector):
         state = stuck_detector.state

From ab4d504e146e1f0f1432d5c994a6d0df2f8284ba Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Sat, 14 Dec 2024 18:29:36 +0100
Subject: [PATCH 09/16] Update tests/unit/test_is_stuck.py

---
 tests/unit/test_is_stuck.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py
index 87e2e2be87a8..666c4c3530ad 100644
--- a/tests/unit/test_is_stuck.py
+++ b/tests/unit/test_is_stuck.py
@@ -128,7 +128,8 @@ def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckD
         
         # In headless mode, this should be stuck
         assert stuck_detector.is_stuck(headless_mode=True) is True
-        
+        # with the UI, it will ALSO be stuck initially
+        assert stuck_detector.is_stuck(headless_mode=False) is True
         # Add a user message
         message_action = MessageAction(content='Hello', wait_for_response=False)
         message_action._source = EventSource.USER

From 1f952c8b85b67c783dc8ca728facc86342a6915f Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Sat, 14 Dec 2024 18:32:11 +0100
Subject: [PATCH 10/16] Update tests/unit/test_is_stuck.py

---
 tests/unit/test_is_stuck.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py
index 666c4c3530ad..412379c22681 100644
--- a/tests/unit/test_is_stuck.py
+++ b/tests/unit/test_is_stuck.py
@@ -137,7 +137,10 @@ def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckD
         
         # In not-headless mode, this should not be stuck because we ignore history before user message
         assert stuck_detector.is_stuck(headless_mode=False) is False
-        
+
+         # But in headless mode, this should be still stuck because user messages do not count
+        assert stuck_detector.is_stuck(headless_mode=True) is True
+
         # Add two more identical actions - still not stuck because we need at least 3
         for i in range(2):
             cmd_action = CmdRunAction(command='ls')

From 16c82b385fb66a50af226ff3bdccc85000710bb9 Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Sat, 14 Dec 2024 18:35:11 +0100
Subject: [PATCH 11/16] Update openhands/controller/stuck.py

---
 openhands/controller/stuck.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py
index 9c5419bbe8b9..2e002c62cc4a 100644
--- a/openhands/controller/stuck.py
+++ b/openhands/controller/stuck.py
@@ -52,6 +52,7 @@ def is_stuck(self, headless_mode: bool = True):
             event
             for event in history_to_check
             if not (
+                # this is a problem! in headless mode we ignore these, but will this work with UI?
                 (isinstance(event, MessageAction) and event.source == EventSource.USER)
                 # there might be some NullAction or NullObservation in the history at least for now
                 or isinstance(event, (NullAction, NullObservation))

From e8636e1c6901dc953b863722e78fc7aacacb56ca Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Sat, 14 Dec 2024 17:44:43 +0000
Subject: [PATCH 12/16] perf: optimize last user message search

- Use reversed() to find last user message
- Stop searching once found (break)
- Same behavior, just more efficient
---
 openhands/controller/stuck.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py
index 2e002c62cc4a..74f1bf762335 100644
--- a/openhands/controller/stuck.py
+++ b/openhands/controller/stuck.py
@@ -38,9 +38,10 @@ def is_stuck(self, headless_mode: bool = True):
         if not headless_mode:
             # In interactive mode, only look at history after the last user message
             last_user_msg_idx = -1
-            for i, event in enumerate(self.state.history):
+            for i, event in enumerate(reversed(self.state.history)):
                 if isinstance(event, MessageAction) and event.source == EventSource.USER:
-                    last_user_msg_idx = i
+                    last_user_msg_idx = len(self.state.history) - i - 1
+                    break
 
             history_to_check = self.state.history[last_user_msg_idx + 1:]
         else:

From c18003785f80419ee9c31dbba7aa081186f23dfc Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Sat, 14 Dec 2024 17:51:13 +0000
Subject: [PATCH 13/16] docs: explain elegant user message filtering

The same filter works perfectly in both modes:
- In headless: actively filters user messages
- In non-headless: no-op (already sliced after last user message)
---
 openhands/controller/stuck.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py
index 74f1bf762335..53b38c77ea9b 100644
--- a/openhands/controller/stuck.py
+++ b/openhands/controller/stuck.py
@@ -53,7 +53,9 @@ def is_stuck(self, headless_mode: bool = True):
             event
             for event in history_to_check
             if not (
-                # this is a problem! in headless mode we ignore these, but will this work with UI?
+                # Filter works elegantly in both modes:
+                # - In headless: actively filters out user messages from full history
+                # - In non-headless: no-op since we already sliced after last user message
                 (isinstance(event, MessageAction) and event.source == EventSource.USER)
                 # there might be some NullAction or NullObservation in the history at least for now
                 or isinstance(event, (NullAction, NullObservation))

From 282e59e2cbe18ae83149121b37543b4499325642 Mon Sep 17 00:00:00 2001
From: OpenHands Bot <openhands@all-hands.dev>
Date: Sat, 14 Dec 2024 17:53:43 +0000
Subject: [PATCH 14/16] =?UTF-8?q?=F0=9F=A4=96=20Auto-fix=20Python=20lintin?=
 =?UTF-8?q?g=20issues?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 openhands/controller/stuck.py |  7 +++++--
 tests/unit/test_is_stuck.py   | 30 +++++++++++++++++++-----------
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py
index 53b38c77ea9b..dbb18a9b4f2c 100644
--- a/openhands/controller/stuck.py
+++ b/openhands/controller/stuck.py
@@ -39,11 +39,14 @@ def is_stuck(self, headless_mode: bool = True):
             # In interactive mode, only look at history after the last user message
             last_user_msg_idx = -1
             for i, event in enumerate(reversed(self.state.history)):
-                if isinstance(event, MessageAction) and event.source == EventSource.USER:
+                if (
+                    isinstance(event, MessageAction)
+                    and event.source == EventSource.USER
+                ):
                     last_user_msg_idx = len(self.state.history) - i - 1
                     break
 
-            history_to_check = self.state.history[last_user_msg_idx + 1:]
+            history_to_check = self.state.history[last_user_msg_idx + 1 :]
         else:
             # In headless mode, look at all history
             history_to_check = self.state.history
diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py
index 412379c22681..80a2acad4843 100644
--- a/tests/unit/test_is_stuck.py
+++ b/tests/unit/test_is_stuck.py
@@ -114,18 +114,22 @@ def test_history_too_short(self, stuck_detector: StuckDetector):
 
         assert stuck_detector.is_stuck(headless_mode=True) is False
 
-    def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckDetector):
+    def test_interactive_mode_resets_after_user_message(
+        self, stuck_detector: StuckDetector
+    ):
         state = stuck_detector.state
-        
+
         # First add some actions that would be stuck in non-UI mode
         for i in range(4):
             cmd_action = CmdRunAction(command='ls')
             cmd_action._id = i
             state.history.append(cmd_action)
-            cmd_observation = CmdOutputObservation(content='', command='ls', command_id=i)
+            cmd_observation = CmdOutputObservation(
+                content='', command='ls', command_id=i
+            )
             cmd_observation._cause = cmd_action._id
             state.history.append(cmd_observation)
-        
+
         # In headless mode, this should be stuck
         assert stuck_detector.is_stuck(headless_mode=True) is True
         # with the UI, it will ALSO be stuck initially
@@ -134,11 +138,11 @@ def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckD
         message_action = MessageAction(content='Hello', wait_for_response=False)
         message_action._source = EventSource.USER
         state.history.append(message_action)
-        
+
         # In not-headless mode, this should not be stuck because we ignore history before user message
         assert stuck_detector.is_stuck(headless_mode=False) is False
 
-         # But in headless mode, this should be still stuck because user messages do not count
+        # But in headless mode, this should be still stuck because user messages do not count
         assert stuck_detector.is_stuck(headless_mode=True) is True
 
         # Add two more identical actions - still not stuck because we need at least 3
@@ -146,21 +150,25 @@ def test_interactive_mode_resets_after_user_message(self, stuck_detector: StuckD
             cmd_action = CmdRunAction(command='ls')
             cmd_action._id = i + 4
             state.history.append(cmd_action)
-            cmd_observation = CmdOutputObservation(content='', command='ls', command_id=i + 4)
+            cmd_observation = CmdOutputObservation(
+                content='', command='ls', command_id=i + 4
+            )
             cmd_observation._cause = cmd_action._id
             state.history.append(cmd_observation)
-        
+
         assert stuck_detector.is_stuck(headless_mode=False) is False
-        
+
         # Add two more identical actions - now it should be stuck
         for i in range(2):
             cmd_action = CmdRunAction(command='ls')
             cmd_action._id = i + 6
             state.history.append(cmd_action)
-            cmd_observation = CmdOutputObservation(content='', command='ls', command_id=i + 6)
+            cmd_observation = CmdOutputObservation(
+                content='', command='ls', command_id=i + 6
+            )
             cmd_observation._cause = cmd_action._id
             state.history.append(cmd_observation)
-        
+
         assert stuck_detector.is_stuck(headless_mode=False) is True
 
     def test_is_stuck_repeating_action_observation(self, stuck_detector: StuckDetector):

From 32913183d7d32124404b8a478b4964293e3d2ed4 Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Sat, 14 Dec 2024 19:00:02 +0100
Subject: [PATCH 15/16] Update tests/unit/test_is_stuck.py

---
 tests/unit/test_is_stuck.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py
index 80a2acad4843..89f71e3627a1 100644
--- a/tests/unit/test_is_stuck.py
+++ b/tests/unit/test_is_stuck.py
@@ -132,6 +132,7 @@ def test_interactive_mode_resets_after_user_message(
 
         # In headless mode, this should be stuck
         assert stuck_detector.is_stuck(headless_mode=True) is True
+
         # with the UI, it will ALSO be stuck initially
         assert stuck_detector.is_stuck(headless_mode=False) is True
         # Add a user message

From 63cb544d5479f2989bc9c476b5e948ae85d32c51 Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Sat, 14 Dec 2024 19:00:22 +0100
Subject: [PATCH 16/16] Update tests/unit/test_is_stuck.py

---
 tests/unit/test_is_stuck.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py
index 89f71e3627a1..4b9a4e820ee5 100644
--- a/tests/unit/test_is_stuck.py
+++ b/tests/unit/test_is_stuck.py
@@ -135,6 +135,7 @@ def test_interactive_mode_resets_after_user_message(
 
         # with the UI, it will ALSO be stuck initially
         assert stuck_detector.is_stuck(headless_mode=False) is True
+
         # Add a user message
         message_action = MessageAction(content='Hello', wait_for_response=False)
         message_action._source = EventSource.USER