diff --git a/evaluation/benchmarks/swe_bench/prompt.py b/evaluation/benchmarks/swe_bench/prompt.py deleted file mode 100644 index d74463891a07..000000000000 --- a/evaluation/benchmarks/swe_bench/prompt.py +++ /dev/null @@ -1,28 +0,0 @@ -CODEACT_SWE_PROMPT = """Now, you're going to solve this issue on your own. Your terminal session has started and you're in the repository's root directory. You can use any bash commands or the special interface to help you. Edit all the files you need to and run any checks or tests that you want. -Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for feedback after every command. -When you're satisfied with all of the changes you've made, you can use the "finish" tool to finish the interaction. -Note however that you cannot use any interactive session commands (e.g. vim) in this environment, but you can write scripts and run them. E.g. you can write a python script and then run it with `python .py`. - -NOTE ABOUT THE EDIT COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate indentation before each line! - -IMPORTANT TIPS: -1. Always start by trying to replicate the bug that the issues discusses. - If the issue includes code for reproducing the bug, we recommend that you re-implement that in your environment, and run it to make sure you can reproduce the bug. - Then start trying to fix it. - When you think you've fixed the bug, re-run the bug reproduction script to make sure that the bug has indeed been fixed. - - If the bug reproduction script does not print anything when it successfully runs, we recommend adding a print("Script completed successfully, no errors.") command at the end of the file, - so that you can be sure that the script indeed ran fine all the way through. - -2. If you run a command and it doesn't work, try running a different command. A command that did not work once will not work the second time unless you modify it! - -3. If you open a file and need to get to an area around a specific line that is not in the first 100 lines, say line 583, don't just use the scroll_down command multiple times. Instead, use the goto 583 command. It's much quicker. - -4. If the bug reproduction script requires inputting/reading a specific file, such as buggy-input.png, and you'd like to understand how to input that file, conduct a search in the existing repo code, to see whether someone else has already done that. Do this by running the command: find_file("buggy-input.png") If that doesn't work, use the linux 'find' command. - -5. Always make sure to look at the currently open file and the current working directory (which appears right after the currently open file). The currently open file might be in a different directory than the working directory! Note that some commands, such as 'create', open files, so they might change the current open file. - -6. When editing files, it is easy to accidentally specify a wrong line number or to write code with incorrect indentation. Always check the code after you issue an edit to make sure that it reflects what you wanted to accomplish. If it didn't, issue another command to fix it. - -[Current directory: /workspace/{workspace_dir_name}] -""" diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py index 3ffc08d29bfb..01111f75d126 100644 --- a/evaluation/benchmarks/swe_bench/run_infer.py +++ b/evaluation/benchmarks/swe_bench/run_infer.py @@ -9,7 +9,7 @@ from datasets import load_dataset import openhands.agenthub -from evaluation.benchmarks.swe_bench.prompt import CODEACT_SWE_PROMPT + from evaluation.utils.shared import ( EvalException, EvalMetadata, @@ -45,7 +45,6 @@ AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = { 'CodeActAgent': codeact_user_response, - 'CodeActSWEAgent': codeact_user_response, } @@ -56,39 +55,27 @@ def _get_swebench_workspace_dir_name(instance: pd.Series) -> str: def get_instruction(instance: pd.Series, metadata: EvalMetadata): workspace_dir_name = _get_swebench_workspace_dir_name(instance) # Prepare instruction - if metadata.agent_class == 'CodeActSWEAgent': - instruction = ( - 'We are currently solving the following issue within our repository. Here is the issue text:\n' - '--- BEGIN ISSUE ---\n' - f'{instance.problem_statement}\n' - '--- END ISSUE ---\n\n' - ) - if USE_HINT_TEXT and instance.hints_text: - instruction += ( - f'--- BEGIN HINTS ---\n{instance.hints_text}\n--- END HINTS ---\n' - ) - instruction += CODEACT_SWE_PROMPT.format(workspace_dir_name=workspace_dir_name) - else: - # Instruction based on Anthropic's official trajectory - # https://github.com/eschluntz/swe-bench-experiments/tree/main/evaluation/verified/20241022_tools_claude-3-5-sonnet-updated/trajs - instruction = ( - '\n' - f'/workspace/{workspace_dir_name}\n' - '\n' - f"I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following PR description:\n\n" - f'\n' - f'{instance.problem_statement}\n' - '\n\n' - 'Can you help me implement the necessary changes to the repository so that the requirements specified in the are met?\n' - "I've already taken care of all changes to any of the test files described in the . This means you DON'T have to modify the testing logic or any of the tests in any way!\n" - 'Your task is to make the minimal changes to non-tests files in the /workspace directory to ensure the is satisfied.\n' - 'Follow these steps to resolve the issue:\n' - '1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n' - '2. Create a script to reproduce the error and execute it with `python ` using the BashTool, to confirm the error\n' - '3. Edit the sourcecode of the repo to resolve the issue\n' - '4. Rerun your reproduce script and confirm that the error is fixed!\n' - '5. Think about edgecases and make sure your fix handles them as well\n' - "Your thinking should be thorough and so it's fine if it's very long.\n" + + # Instruction based on Anthropic's official trajectory + # https://github.com/eschluntz/swe-bench-experiments/tree/main/evaluation/verified/20241022_tools_claude-3-5-sonnet-updated/trajs + instruction = ( + '\n' + f'/workspace/{workspace_dir_name}\n' + '\n' + f"I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following PR description:\n\n" + f'\n' + f'{instance.problem_statement}\n' + '\n\n' + 'Can you help me implement the necessary changes to the repository so that the requirements specified in the are met?\n' + "I've already taken care of all changes to any of the test files described in the . This means you DON'T have to modify the testing logic or any of the tests in any way!\n" + 'Your task is to make the minimal changes to non-tests files in the /workspace directory to ensure the is satisfied.\n' + 'Follow these steps to resolve the issue:\n' + '1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n' + '2. Create a script to reproduce the error and execute it with `python ` using the BashTool, to confirm the error\n' + '3. Edit the sourcecode of the repo to resolve the issue\n' + '4. Rerun your reproduce script and confirm that the error is fixed!\n' + '5. Think about edgecases and make sure your fix handles them as well\n' + "Your thinking should be thorough and so it's fine if it's very long.\n" ) if RUN_WITH_BROWSING: diff --git a/openhands/agenthub/__init__.py b/openhands/agenthub/__init__.py index 0076976c27ed..85ae41d425d0 100644 --- a/openhands/agenthub/__init__.py +++ b/openhands/agenthub/__init__.py @@ -10,7 +10,6 @@ from openhands.agenthub import ( # noqa: E402 browsing_agent, codeact_agent, - codeact_swe_agent, delegator_agent, dummy_agent, planner_agent, @@ -18,7 +17,6 @@ __all__ = [ 'codeact_agent', - 'codeact_swe_agent', 'planner_agent', 'delegator_agent', 'dummy_agent', diff --git a/openhands/agenthub/codeact_swe_agent/README.md b/openhands/agenthub/codeact_swe_agent/README.md deleted file mode 100644 index ec88537aaab1..000000000000 --- a/openhands/agenthub/codeact_swe_agent/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# CodeAct (SWE Edit Specialized) - -This agent is an adaptation of the original [SWE Agent](https://swe-agent.com/) based on CodeAct using the `agentskills` library of OpenHands. - -Its intended use is **solving GitHub issues**. - -It removes web-browsing and GitHub capability from the original CodeAct agent to avoid confusion to the agent. diff --git a/openhands/agenthub/codeact_swe_agent/__init__.py b/openhands/agenthub/codeact_swe_agent/__init__.py deleted file mode 100644 index ef5233786194..000000000000 --- a/openhands/agenthub/codeact_swe_agent/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from openhands.agenthub.codeact_swe_agent.codeact_swe_agent import CodeActSWEAgent -from openhands.controller.agent import Agent - -Agent.register('CodeActSWEAgent', CodeActSWEAgent) diff --git a/openhands/agenthub/codeact_swe_agent/action_parser.py b/openhands/agenthub/codeact_swe_agent/action_parser.py deleted file mode 100644 index c77c1404a6e6..000000000000 --- a/openhands/agenthub/codeact_swe_agent/action_parser.py +++ /dev/null @@ -1,110 +0,0 @@ -import re - -from openhands.controller.action_parser import ActionParser -from openhands.events.action import ( - Action, - AgentFinishAction, - CmdRunAction, - IPythonRunCellAction, - MessageAction, -) - - -class CodeActSWEActionParserFinish(ActionParser): - """Parser action: - - AgentFinishAction() - end the interaction - """ - - def __init__( - self, - ): - self.finish_command = None - - def check_condition(self, action_str: str) -> bool: - self.finish_command = re.search(r'.*', action_str, re.DOTALL) - return self.finish_command is not None - - def parse(self, action_str: str) -> Action: - assert ( - self.finish_command is not None - ), 'self.finish_command should not be None when parse is called' - thought = action_str.replace(self.finish_command.group(0), '').strip() - return AgentFinishAction(thought=thought) - - -class CodeActSWEActionParserCmdRun(ActionParser): - """Parser action: - - CmdRunAction(command) - bash command to run - - AgentFinishAction() - end the interaction - """ - - def __init__( - self, - ): - self.bash_command = None - - def check_condition(self, action_str: str) -> bool: - self.bash_command = re.search( - r'(.*?)', action_str, re.DOTALL - ) - return self.bash_command is not None - - def parse(self, action_str: str) -> Action: - assert ( - self.bash_command is not None - ), 'self.bash_command should not be None when parse is called' - thought = action_str.replace(self.bash_command.group(0), '').strip() - # a command was found - command_group = self.bash_command.group(1).strip() - if command_group.strip() == 'exit': - return AgentFinishAction() - return CmdRunAction(command=command_group, thought=thought) - - -class CodeActSWEActionParserIPythonRunCell(ActionParser): - """Parser action: - - IPythonRunCellAction(code) - IPython code to run - """ - - def __init__( - self, - ): - self.python_code = None - self.jupyter_kernel_init_code: str = 'from agentskills import *' - - def check_condition(self, action_str: str) -> bool: - self.python_code = re.search( - r'(.*?)', action_str, re.DOTALL - ) - return self.python_code is not None - - def parse(self, action_str: str) -> Action: - assert ( - self.python_code is not None - ), 'self.python_code should not be None when parse is called' - code_group = self.python_code.group(1).strip() - thought = action_str.replace(self.python_code.group(0), '').strip() - return IPythonRunCellAction( - code=code_group, - thought=thought, - kernel_init_code=self.jupyter_kernel_init_code, - ) - - -class CodeActSWEActionParserMessage(ActionParser): - """Parser action: - - MessageAction(content) - Message action to run (e.g. ask for clarification) - """ - - def __init__( - self, - ): - pass - - def check_condition(self, action_str: str) -> bool: - # We assume the LLM is GOOD enough that when it returns pure natural language - # it wants to talk to the user - return True - - def parse(self, action_str: str) -> Action: - return MessageAction(content=action_str, wait_for_response=True) diff --git a/openhands/agenthub/codeact_swe_agent/codeact_swe_agent.py b/openhands/agenthub/codeact_swe_agent/codeact_swe_agent.py deleted file mode 100644 index 8d403d357e03..000000000000 --- a/openhands/agenthub/codeact_swe_agent/codeact_swe_agent.py +++ /dev/null @@ -1,224 +0,0 @@ -from openhands.agenthub.codeact_swe_agent.prompt import ( - COMMAND_DOCS, - SWE_EXAMPLE, - SYSTEM_PREFIX, - SYSTEM_SUFFIX, -) -from openhands.agenthub.codeact_swe_agent.response_parser import ( - CodeActSWEResponseParser, -) -from openhands.controller.agent import Agent -from openhands.controller.state.state import State -from openhands.core.config import AgentConfig -from openhands.core.message import ImageContent, Message, TextContent -from openhands.events.action import ( - Action, - AgentFinishAction, - CmdRunAction, - IPythonRunCellAction, - MessageAction, -) -from openhands.events.observation import ( - CmdOutputObservation, - IPythonRunCellObservation, -) -from openhands.events.observation.error import ErrorObservation -from openhands.events.observation.observation import Observation -from openhands.events.serialization.event import truncate_content -from openhands.llm.llm import LLM -from openhands.runtime.plugins import ( - AgentSkillsRequirement, - JupyterRequirement, - PluginRequirement, -) - - -def get_system_message() -> str: - return f'{SYSTEM_PREFIX}\n\n{COMMAND_DOCS}\n\n{SYSTEM_SUFFIX}' - - -def get_in_context_example() -> str: - return SWE_EXAMPLE - - -class CodeActSWEAgent(Agent): - VERSION = '1.6' - """ - This agent is an adaptation of the original [SWE Agent](https://swe-agent.com/) based on CodeAct 1.5 using the `agentskills` library of OpenHands. - - It is intended use is **solving Github issues**. - - It removes web-browsing and Github capability from the original CodeAct agent to avoid confusion to the agent. - """ - - sandbox_plugins: list[PluginRequirement] = [ - # NOTE: AgentSkillsRequirement need to go before JupyterRequirement, since - # AgentSkillsRequirement provides a lot of Python functions, - # and it needs to be initialized before Jupyter for Jupyter to use those functions. - AgentSkillsRequirement(), - JupyterRequirement(), - ] - - system_message: str = get_system_message() - in_context_example: str = f"Here is an example of how you can interact with the environment for task solving:\n{get_in_context_example()}\n\nNOW, LET'S START!" - - response_parser = CodeActSWEResponseParser() - - def __init__( - self, - llm: LLM, - config: AgentConfig, - ) -> None: - """Initializes a new instance of the CodeActSWEAgent class. - - Parameters: - - llm (LLM): The llm to be used by this agent - """ - super().__init__(llm, config) - self.reset() - - def action_to_str(self, action: Action) -> str: - if isinstance(action, CmdRunAction): - return ( - f'{action.thought}\n\n{action.command}\n' - ) - elif isinstance(action, IPythonRunCellAction): - return f'{action.thought}\n\n{action.code}\n' - elif isinstance(action, MessageAction): - return action.content - return '' - - def get_action_message(self, action: Action) -> Message | None: - if isinstance(action, (CmdRunAction, IPythonRunCellAction, MessageAction)): - content = [TextContent(text=self.action_to_str(action))] - - if ( - self.llm.vision_is_active() - and isinstance(action, MessageAction) - and action.image_urls - ): - content.append(ImageContent(image_urls=action.image_urls)) - - return Message( - role='user' if action.source == 'user' else 'assistant', content=content - ) - - return None - - def get_observation_message(self, obs: Observation) -> Message | None: - max_message_chars = self.llm.config.max_message_chars - if isinstance(obs, CmdOutputObservation): - text = 'OBSERVATION:\n' + truncate_content( - obs.content + obs.interpreter_details, max_message_chars - ) - text += ( - f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]' - ) - return Message(role='user', content=[TextContent(text=text)]) - elif isinstance(obs, IPythonRunCellObservation): - text = 'OBSERVATION:\n' + obs.content - # replace base64 images with a placeholder - splitted = text.split('\n') - for i, line in enumerate(splitted): - if '![image](data:image/png;base64,' in line: - splitted[i] = ( - '![image](data:image/png;base64, ...) already displayed to user' - ) - text = '\n'.join(splitted) - text = truncate_content(text, max_message_chars) - return Message(role='user', content=[TextContent(text=text)]) - elif isinstance(obs, ErrorObservation): - text = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars) - text += '\n[Error occurred in processing last action]' - return Message(role='user', content=[TextContent(text=text)]) - else: - # If an observation message is not returned, it will cause an error - # when the LLM tries to return the next message - raise ValueError(f'Unknown observation type: {type(obs)}') - - def reset(self) -> None: - """Resets the CodeAct Agent.""" - super().reset() - - def step(self, state: State) -> Action: - """Performs one step using the CodeAct Agent. - This includes gathering info on previous steps and prompting the model to make a command to execute. - - Parameters: - - state (State): used to get updated info and background commands - - Returns: - - CmdRunAction(command) - bash command to run - - IPythonRunCellAction(code) - IPython code to run - - MessageAction(content) - Message action to run (e.g. ask for clarification) - - AgentFinishAction() - end the interaction - """ - # if we're done, go back - last_user_message = state.get_last_user_message() - if last_user_message and last_user_message.content.strip() == '/exit': - return AgentFinishAction() - - # prepare what we want to send to the LLM - messages: list[Message] = self._get_messages(state) - response = self.llm.completion( - messages=self.llm.format_messages_for_llm(messages), - stop=[ - '', - '', - ], - ) - - return self.response_parser.parse(response) - - def _get_messages(self, state: State) -> list[Message]: - messages: list[Message] = [ - Message(role='system', content=[TextContent(text=self.system_message)]), - Message(role='user', content=[TextContent(text=self.in_context_example)]), - ] - - for event in state.history: - # create a regular message from an event - if isinstance(event, Action): - message = self.get_action_message(event) - elif isinstance(event, Observation): - message = self.get_observation_message(event) - else: - raise ValueError(f'Unknown event type: {type(event)}') - - # add regular message - if message: - # handle error if the message is the SAME role as the previous message - # litellm.exceptions.BadRequestError: litellm.BadRequestError: OpenAIException - Error code: 400 - {'detail': 'Only supports u/a/u/a/u...'} - # there should not have two consecutive messages from the same role - if messages and messages[-1].role == message.role: - messages[-1].content.extend(message.content) - else: - messages.append(message) - - # the latest user message is important: - # we want to remind the agent of the environment constraints - latest_user_message = next( - (m for m in reversed(messages) if m.role == 'user'), None - ) - - # Get the last user text inside content - if latest_user_message: - latest_user_message_text = next( - ( - t - for t in reversed(latest_user_message.content) - if isinstance(t, TextContent) - ) - ) - # add a reminder to the prompt - reminder_text = f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with .' - - if latest_user_message_text: - latest_user_message_text.text = ( - latest_user_message_text.text + reminder_text - ) - else: - latest_user_message_text = TextContent(text=reminder_text) - latest_user_message.content.append(latest_user_message_text) - - return messages diff --git a/openhands/agenthub/codeact_swe_agent/prompt.py b/openhands/agenthub/codeact_swe_agent/prompt.py deleted file mode 100644 index 1a2ffabad95f..000000000000 --- a/openhands/agenthub/codeact_swe_agent/prompt.py +++ /dev/null @@ -1,455 +0,0 @@ -from openhands.runtime.plugins import AgentSkillsRequirement - -_AGENT_SKILLS_DOCS = AgentSkillsRequirement.documentation - -COMMAND_DOCS = ( - '\nApart from the standard Python library, the assistant can also use the following functions (already imported) in environment:\n' - f'{_AGENT_SKILLS_DOCS}' - "Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run." -) - -# ======= SYSTEM MESSAGE ======= -MINIMAL_SYSTEM_PREFIX = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. -The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "" tag, for example: - -print("Hello World!") - -The assistant can execute bash commands on behalf of the user by wrapping them with and . -For example, you can list the files in the current directory by ls . -""" - -PIP_INSTALL_PREFIX = """The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them.""" - -SYSTEM_PREFIX = MINIMAL_SYSTEM_PREFIX + PIP_INSTALL_PREFIX - -SYSTEM_SUFFIX = """The assistant's response should be concise. -The assistant should include ONLY ONE or in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed. -IMPORTANT: Whenever possible, execute the code for the user using or instead of providing it. -""" - -SWE_EXAMPLE = """ ---- START OF EXAMPLE --- - -USER: We are currently solving the following issue within our repository. Here is the issue text: ---- BEGIN ISSUE --- -TimeDelta serialization precision -Hi there! - -I just found quite strange behaviour of `TimeDelta` field serialization - -```python3 -from marshmallow.fields import TimeDelta -from datetime import timedelta - -td_field = TimeDelta(precision="milliseconds") - -obj = dict() -obj["td_field"] = timedelta(milliseconds=345) - -print(td_field.serialize("td_field", obj)) -``` - -Output of this snippet is `344`, but it seems that `345` is correct. - -Looks like a rounding issue here: https://github.com/marshmallow-code/marshmallow/blob/dev/src/marshmallow/fields.py#L1474 ---- END ISSUE --- - -[Current directory: /workspace/marshmallow-code__marshmallow] - -ASSISTANT: -Let's first start by reproducing the results of the issue. The issue includes some example code for reproduction, which we can use. We'll create a new file called `reproduce.py` and paste the example code into it. - -create_file('/workspace/marshmallow-code__marshmallow/reproduce.py') - - -USER: -OBSERVATION: -[File: /workspace/marshmallow-code__marshmallow/reproduce.py (1 lines total)] -1| -[File /workspace/marshmallow-code__marshmallow/reproduce.py created.] - -ASSISTANT: -Now let's paste in the example code from the issue. - -EDITED_CODE = \"\"\"from marshmallow.fields import TimeDelta -from datetime import timedelta - -td_field = TimeDelta(precision="milliseconds") - -obj = dict() -obj["td_field"] = timedelta(milliseconds=345) - -print(td_field.serialize("td_field", obj)) -\"\"\" -edit_file(start=1, end=1, content=EDITED_CODE) - - -USER: -OBSERVATION: -[File: /workspace/marshmallow-code__marshmallow/reproduce.py (9 lines total)] -1|from marshmallow.fields import TimeDelta -2|from datetime import timedelta -3| -4|td_field = TimeDelta(precision="milliseconds") -5| -6|obj = dict() -7|obj["td_field"] = timedelta(milliseconds=345) -8| -9|print(td_field.serialize("td_field", obj)) -[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] - -ASSISTANT: -Now let's run the code to see if we see the same output as the issue. - -python reproduce.py - - -USER: -OBSERVATION: -344 - -ASSISTANT: -We are indeed seeing the same output as the issue. The issue suggests that we should look at line 1474 of the `fields.py` file to see if there is a rounding issue. Let's open the `fields.py` file and navigate to line 1474 to see the relevant code. First we should find the `fields.py` file in the repository. It is likely to be in the `src/marshmallow` directory, but we should check to be sure. - -ls -F - - -USER: -OBSERVATION: -AUTHORS.rst -CHANGELOG.rst -CODE_OF_CONDUCT.md -CONTRIBUTING.rst -LICENSE -MANIFEST.in -NOTICE -README.rst -RELEASING.md -azure-pipelines.yml -docs/ -examples/ -performance/ -pyproject.toml -reproduce.py -setup.cfg -setup.py -src/ -tests/ -tox.ini - -ASSISTANT: -It looks like the `src` directory is present, which suggests that the `fields.py` file is likely to be in the `src` directory. Let's use find_file to see where it is. - -find_file("fields.py", "/workspace/marshmallow-code__marshmallow/src") - - -USER: -OBSERVATION: -[Found 1 matches for "fields.py" in /workspace/marshmallow-code__marshmallow/src] -/workspace/marshmallow-code__marshmallow/src/marshmallow/fields.py - -ASSISTANT: -It looks like the `fields.py` file is present in the `./src/marshmallow/` directory. The issue also points to a specific URL with line number 1474. We should navigate to that line in fields.py to see the relevant code for the `TimeDelta` serialization. - -open_file("/workspace/marshmallow-code__marshmallow/src/marshmallow/fields.py", 1474) - - -USER: -OBSERVATION: -[File: /workspace/marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)] -(1458 more lines above) -1459| self.WEEKS, -1460| ) -1461| -1462| if precision not in units: -1463| msg = 'The precision must be {} or "{}".'.format( -1464| ", ".join([f'"{each}"' for each in units[:-1]]), units[-1] -1465| ) -1466| raise ValueError(msg) -1467| -1468| self.precision = precision -1469| super().__init__(**kwargs) -1470| -1471| def _serialize(self, value, attr, obj, **kwargs): -1472| if value is None: -1473| return None -1474| base_unit = dt.timedelta(**{self.precision: 1}) -1475| return int(value.total_seconds() / base_unit.total_seconds()) -1476| -1477| def _deserialize(self, value, attr, data, **kwargs): -1478| try: -1479| value = int(value) -1480| except (TypeError, ValueError) as error: -1481| raise self.make_error("invalid") from error -1482| -1483| kwargs = {self.precision: value} -1484| -1485| try: -1486| return dt.timedelta(**kwargs) -1487| except OverflowError as error: -1488| raise self.make_error("invalid") from error -1489| -1490| -1491|class Mapping(Field): -1492| \"\"\"An abstract class for objects with key-value pairs. -1493| -1494| :param keys: A field class or instance for dict keys. -1495| :param values: A field class or instance for dict values. -1496| :param kwargs: The same keyword arguments that :class:`Field` receives. -1497| -1498| .. note:: -1499| When the structure of nested data is not known, you may omit the -1500| `keys` and `values` arguments to prevent content validation. -1501| -1502| .. versionadded:: 3.0.0rc4 -1503| \"\"\" -1504| -1505| mapping_type = dict -1506| -1507| #: Default error messages. -1508| default_error_messages = {"invalid": "Not a valid mapping type."} -1509| -1510| def __init__( -1511| self, -1512| keys: typing.Optional[typing.Union[Field, type]] = None, -1513| values: typing.Optional[typing.Union[Field, type]] = None, -1514| **kwargs, -1515| ): -1516| super().__init__(**kwargs) -1517| if keys is None: -1518| self.key_field = None -1519| else: -1520| try: -1521| self.key_field = resolve_field_instance(keys) -1522| except FieldInstanceResolutionError as error: -1523| raise ValueError( -1524| '"keys" must be a subclass or instance of ' -1525| "marshmallow.base.FieldABC." -1526| ) from error -1527| -1528| if values is None: -1529| self.value_field = None -1530| else: -1531| try: -1532| self.value_field = resolve_field_instance(values) -1533| except FieldInstanceResolutionError as error: -1534| raise ValueError( -1535| '"values" must be a subclass or instance of ' -1536| "marshmallow.base.FieldABC." -1537| ) from error -1538| if isinstance(self.value_field, Nested): -1539| self.only = self.value_field.only -1540| self.exclude = self.value_field.exclude -1541| -1542| def _bind_to_schema(self, field_name, schema): -1543| super()._bind_to_schema(field_name, schema) -1544| if self.value_field: -1545| self.value_field = copy.deepcopy(self.value_field) -1546| self.value_field._bind_to_schema(field_name, self) -1547| if isinstance(self.value_field, Nested): -1548| self.value_field.only = self.only -1549| self.value_field.exclude = self.exclude -1550| if self.key_field: -1551| self.key_field = copy.deepcopy(self.key_field) -1552| self.key_field._bind_to_schema(field_name, self) -1553| -1554| def _serialize(self, value, attr, obj, **kwargs): -1555| if value is None: -1556| return None -1557| if not self.value_field and not self.key_field: -1558| return self.mapping_type(value) -(439 more lines below) - -ASSISTANT: -We are now looking at the relevant section of the `fields.py` file where the `TimeDelta` serialization occurs. The issue suggests that there is a rounding problem near line 1474. The current implementation uses integer division, which truncates the decimal part and can lead to incorrect rounding on line 1475. To fix this issue, we should ensure that the division results in a float, and then round the result to the nearest integer before converting it to an int. This should preserve the milliseconds precision as expected. Let's make the necessary edit to the code. - -edit_file(start=1475, end=1475, content="return int(round(value.total_seconds() / base_unit.total_seconds())) # round to nearest int") - - -USER: -OBSERVATION: -[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.] - -ERRORS: -- E999 IndentationError: unexpected indent - -[This is how your edit would have looked if applied] -------------------------------------------------- -[File: /marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)] -(1468 more lines above) -1469| super().__init__(**kwargs) -1470| -1471| def _serialize(self, value, attr, obj, **kwargs): -1472| if value is None: -1473| return None -1474| base_unit = dt.timedelta(**{self.precision: 1}) -1475|return int(round(value.total_seconds() / base_unit.total_seconds())) # round to nearest int -1476| -1477| def _deserialize(self, value, attr, data, **kwargs): -1478| try: -1479| value = int(value) -(518 more lines below) -------------------------------------------------- - -[This is the original code before your edit] -------------------------------------------------- -[File: /marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)] -(1469 more lines above) -1470| -1471| def _serialize(self, value, attr, obj, **kwargs): -1472| if value is None: -1473| return None -1474| base_unit = dt.timedelta(**{self.precision: 1}) -1475| return int(value.total_seconds() / base_unit.total_seconds()) -1476| -1477| def _deserialize(self, value, attr, data, **kwargs): -1478| try: -1479| value = int(value) -1480| except (TypeError, ValueError) as error: -(517 more lines below) -------------------------------------------------- -Your changes have NOT been applied. Please fix your edit command and try again. -You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code. -DO NOT re-run the same failed edit command. Running it again will lead to the same error. - -ASSISTANT: -My edit command did not use the proper indentation, I will fix my syntax in this follow up edit command. - -EDITED_CODE=" return int(round(value.total_seconds() / base_unit.total_seconds())) # round to nearest int" -edit_file(start=1475, end=1475, content=EDITED_CODE) - - -USER: -OBSERVATION: -[File: /marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)] -(1424 more lines above) -1425| -1426| :param precision: Influences how the integer is interpreted during -1427| (de)serialization. Must be 'days', 'seconds', 'microseconds', -1428| 'milliseconds', 'minutes', 'hours' or 'weeks'. -1429| :param kwargs: The same keyword arguments that :class:`Field` receives. -1430| -1431| .. versionchanged:: 2.0.0 -1432| Always serializes to an integer value to avoid rounding errors. -1433| Add `precision` parameter. -1434| \"\"\" -1435| -1436| DAYS = "days" -1437| SECONDS = "seconds" -1438| MICROSECONDS = "microseconds" -1439| MILLISECONDS = "milliseconds" -1440| MINUTES = "minutes" -1441| HOURS = "hours" -1442| WEEKS = "weeks" -1443| -1444| #: Default error messages. -1445| default_error_messages = { -1446| "invalid": "Not a valid period of time.", -1447| "format": "{input!r} cannot be formatted as a timedelta.", -1448| } -1449| -1450| def __init__(self, precision: str = SECONDS, **kwargs): -1451| precision = precision.lower() -1452| units = ( -1453| self.DAYS, -1454| self.SECONDS, -1455| self.MICROSECONDS, -1456| self.MILLISECONDS, -1457| self.MINUTES, -1458| self.HOURS, -1459| self.WEEKS, -1460| ) -1461| -1462| if precision not in units: -1463| msg = 'The precision must be {} or "{}".'.format( -1464| ", ".join([f'"{each}"' for each in units[:-1]]), units[-1] -1465| ) -1466| raise ValueError(msg) -1467| -1468| self.precision = precision -1469| super().__init__(**kwargs) -1470| -1471| def _serialize(self, value, attr, obj, **kwargs): -1472| if value is None: -1473| return None -1474| base_unit = dt.timedelta(**{self.precision: 1}) -1475| return int(round(value.total_seconds() / base_unit.total_seconds())) # round to nearest int -1476| -1477| def _deserialize(self, value, attr, data, **kwargs): -1478| try: -1479| value = int(value) -1480| except (TypeError, ValueError) as error: -1481| raise self.make_error("invalid") from error -1482| -1483| kwargs = {self.precision: value} -1484| -1485| try: -1486| return dt.timedelta(**kwargs) -1487| except OverflowError as error: -1488| raise self.make_error("invalid") from error -1489| -1490| -1491|class Mapping(Field): -1492| \"\"\"An abstract class for objects with key-value pairs. -1493| -1494| :param keys: A field class or instance for dict keys. -1495| :param values: A field class or instance for dict values. -1496| :param kwargs: The same keyword arguments that :class:`Field` receives. -1497| -1498| .. note:: -1499| When the structure of nested data is not known, you may omit the -1500| `keys` and `values` arguments to prevent content validation. -1501| -1502| .. versionadded:: 3.0.0rc4 -1503| \"\"\" -1504| -1505| mapping_type = dict -1506| -1507| #: Default error messages. -1508| default_error_messages = {"invalid": "Not a valid mapping type."} -1509| -1510| def __init__( -1511| self, -1512| keys: typing.Optional[typing.Union[Field, type]] = None, -1513| values: typing.Optional[typing.Union[Field, type]] = None, -1514| **kwargs, -1515| ): -1516| super().__init__(**kwargs) -1517| if keys is None: -1518| self.key_field = None -1519| else: -1520| try: -1521| self.key_field = resolve_field_instance(keys) -1522| except FieldInstanceResolutionError as error: -1523| raise ValueError( -1524| '"keys" must be a subclass or instance of ' -(473 more lines below) -[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] - -ASSISTANT: -The code has been updated to use the `round` function, which should fix the rounding issue. Before submitting the changes, it would be prudent to run the reproduce.py code again to ensure that this change has the desired effect. I see that "Current directory" is still `/marshmallow-code__marshmallow`, so we can just run `reproduce.py` directly. - -python reproduce.py - - -USER: -OBSERVATION: -345 - -ASSISTANT: -The output has changed from 344 to 345, which suggests that the rounding issue has been fixed. Let's remove the reproduce.py file since it is no longer needed. - -rm reproduce.py - - -USER: -OBSERVATION: - -ASSISTANT: -rm doesn't have any output when it deletes successfully, so that must have worked. Now that we have fixed the issue, we can exit the current task. - -exit - - ---- END OF EXAMPLE --- -""" diff --git a/openhands/agenthub/codeact_swe_agent/response_parser.py b/openhands/agenthub/codeact_swe_agent/response_parser.py deleted file mode 100644 index 147b8655f8f8..000000000000 --- a/openhands/agenthub/codeact_swe_agent/response_parser.py +++ /dev/null @@ -1,46 +0,0 @@ -from openhands.agenthub.codeact_swe_agent.action_parser import ( - CodeActSWEActionParserCmdRun, - CodeActSWEActionParserFinish, - CodeActSWEActionParserIPythonRunCell, - CodeActSWEActionParserMessage, -) -from openhands.controller.action_parser import ResponseParser -from openhands.events.action import Action - - -class CodeActSWEResponseParser(ResponseParser): - """Parser action: - - CmdRunAction(command) - bash command to run - - IPythonRunCellAction(code) - IPython code to run - - MessageAction(content) - Message action to run (e.g. ask for clarification) - - AgentFinishAction() - end the interaction - """ - - def __init__(self): - # Need pay attention to the item order in self.action_parsers - super().__init__() - self.action_parsers = [ - CodeActSWEActionParserFinish(), - CodeActSWEActionParserCmdRun(), - CodeActSWEActionParserIPythonRunCell(), - ] - self.default_parser = CodeActSWEActionParserMessage() - - def parse(self, response: str) -> Action: - action_str = self.parse_response(response) - return self.parse_action(action_str) - - def parse_response(self, response) -> str: - action = response.choices[0].message.content - if action is None: - return '' - for lang in ['bash', 'ipython']: - if f'' in action and f'' not in action: - action += f'' - return action - - def parse_action(self, action_str: str) -> Action: - for action_parser in self.action_parsers: - if action_parser.check_condition(action_str): - return action_parser.parse(action_str) - return self.default_parser.parse(action_str)