diff --git a/frontend/src/state/chat-slice.ts b/frontend/src/state/chat-slice.ts index 47d2b651754d..da3d56427fea 100644 --- a/frontend/src/state/chat-slice.ts +++ b/frontend/src/state/chat-slice.ts @@ -149,9 +149,8 @@ export const chatSlice = createSlice({ } else if (observationID === "run_ipython") { // For IPython, we consider it successful if there's no error message const ipythonObs = observation.payload as IPythonObservation; - causeMessage.success = !ipythonObs.message - .toLowerCase() - .includes("error"); + // Check for error in the message field which contains error information + causeMessage.success = !ipythonObs.error; } if (observationID === "run" || observationID === "run_ipython") { diff --git a/openhands/events/observation/commands.py b/openhands/events/observation/commands.py index b522b5c47283..3fc1c61cdadd 100644 --- a/openhands/events/observation/commands.py +++ b/openhands/events/observation/commands.py @@ -40,7 +40,13 @@ class IPythonRunCellObservation(Observation): @property def error(self) -> bool: - return False # IPython cells do not return exit codes + # Check for common error indicators in IPython output + error_indicators = [ + 'ERROR:', + 'Error:', + 'Exception:', + ] + return any(indicator in self.content for indicator in error_indicators) @property def message(self) -> str: @@ -48,7 +54,7 @@ def message(self) -> str: @property def success(self) -> bool: - return True # IPython cells are always considered successful + return not self.error def __str__(self) -> str: return f'**IPythonRunCellObservation**\n{self.content}' diff --git a/tests/unit/test_observation_serialization.py b/tests/unit/test_observation_serialization.py index 67a95449b719..3173a850d5db 100644 --- a/tests/unit/test_observation_serialization.py +++ b/tests/unit/test_observation_serialization.py @@ -40,6 +40,32 @@ def serialization_deserialization( # Additional tests for various observation subclasses can be included here +def test_ipython_error_detection(): + from openhands.events.observation import IPythonRunCellObservation + + # Test error detection for various error patterns + error_cases = [ + 'ERROR: Something went wrong', + 'Error: Invalid syntax', + 'Exception: Division by zero', + ] + for error_content in error_cases: + obs = IPythonRunCellObservation(content=error_content, code='print("test")') + serialized = event_to_dict(obs) + assert ( + serialized['success'] is False + ), f'Failed to detect error in: {error_content}' + assert obs.error is True, f'Failed to detect error in: {error_content}' + + # Test success case + obs = IPythonRunCellObservation( + content='Hello World!', code='print("Hello World!")' + ) + serialized = event_to_dict(obs) + assert serialized['success'] is True, 'Failed to detect success' + assert obs.error is False, 'Failed to detect success' + + def test_success_field_serialization(): # Test success=True obs = CmdOutputObservation(