diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py index 8bd23939dd58..a28670cc9449 100644 --- a/evaluation/swe_bench/run_infer.py +++ b/evaluation/swe_bench/run_infer.py @@ -4,6 +4,9 @@ import tempfile from typing import Any +# Set environment variable to indicate SWE Bench context +os.environ['SWE_BENCH_RUN'] = 'true' + import pandas as pd import toml from datasets import load_dataset diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index 484cedd53e2f..cc644e7e6571 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -513,12 +513,18 @@ def _get_messages(self, state: State) -> list[Message]: '\n\nENVIRONMENT REMINDER:\n' f'- You have {state.max_iterations - state.iteration} turns left to complete the task\n' '- When finished reply with \n' - '\n\n' - '- You MUST generate only one action per turn!\n' - '- A patch is a set of changes to the source code of the codebase that you are given\n' - '- You MUST generate a patch that attempts to fix the issue described in the \n' - '\n' ) + + # Add SWE Bench specific instructions only when running in that context + if os.environ.get('SWE_BENCH_RUN', 'false').lower() == 'true': + reminder_text += ( + '\n\n' + '- You MUST generate only one action per turn!\n' + '- A patch is a set of changes to the source code of the codebase that you are given\n' + '- You MUST generate a patch that attempts to fix the issue described in the \n' + '\n' + ) + latest_user_message.content.append(TextContent(text=reminder_text)) return messages