Merge branch 'main' into add-port-mappings

All-Hands-AI · Dec 16, 2024 · 4bc7721 · 4bc7721
2 parents c7d3225 + 590ebb6
commit 4bc7721
Show file tree

Hide file tree

Showing 33 changed files with 550 additions and 1,019 deletions.
diff --git a/.github/workflows/ghcr-build.yml b/.github/workflows/ghcr-build.yml
@@ -68,9 +68,6 @@ jobs:
       - name: Set up Docker Buildx
         id: buildx
         uses: docker/setup-buildx-action@v3
-      - name: "Set up docker layer caching"
-        uses: satackey/[email protected]
-        continue-on-error: true
       - name: Build and push app image
         if: "!github.event.pull_request.head.repo.fork"
         run: |

diff --git a/.github/workflows/openhands-resolver.yml b/.github/workflows/openhands-resolver.yml
@@ -59,7 +59,6 @@ jobs:
       github.event_name == 'workflow_call' ||
       github.event.label.name == 'fix-me' ||
       github.event.label.name == 'fix-me-experimental' ||
-
       (
         ((github.event_name == 'issue_comment' || github.event_name == 'pull_request_review_comment') &&
         contains(github.event.comment.body, inputs.macro || '@openhands-agent') &&
@@ -140,15 +139,19 @@ jobs:
 
       - name: Set environment variables
         run: |
-          if [ -n "${{ github.event.review.body }}" ]; then
+          # Handle pull request events first
+          if [ -n "${{ github.event.pull_request.number }}" ]; then
+            echo "ISSUE_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
+            echo "ISSUE_TYPE=pr" >> $GITHUB_ENV
+          # Handle pull request review events
+          elif [ -n "${{ github.event.review.body }}" ]; then
             echo "ISSUE_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
             echo "ISSUE_TYPE=pr" >> $GITHUB_ENV
+          # Handle issue comment events that reference a PR
           elif [ -n "${{ github.event.issue.pull_request }}" ]; then
             echo "ISSUE_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
             echo "ISSUE_TYPE=pr" >> $GITHUB_ENV
-          elif [ -n "${{ github.event.pull_request.number }}" ]; then
-            echo "ISSUE_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
-            echo "ISSUE_TYPE=pr" >> $GITHUB_ENV
+          # Handle regular issue events
           else
             echo "ISSUE_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
             echo "ISSUE_TYPE=issue" >> $GITHUB_ENV

diff --git a/README.md b/README.md
@@ -45,10 +45,11 @@ system requirements and more information.
 ```bash
 docker pull docker.all-hands.dev/all-hands-ai/runtime:0.15-nikolaik
 
-docker run -it --pull=always \
+docker run -it --rm --pull=always \
     -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.15-nikolaik \
     -e LOG_ALL_EVENTS=true \
     -v /var/run/docker.sock:/var/run/docker.sock \
+    -v ~/.openhands:/home/openhands/.openhands \
     -p 3000:3000 \
     --add-host host.docker.internal:host-gateway \
     --name openhands-app \

diff --git a/config.template.toml b/config.template.toml
@@ -172,6 +172,10 @@ model = "gpt-4o"
 # If model is vision capable, this option allows to disable image processing (useful for cost reduction).
 #disable_vision = true
 
+# Custom tokenizer to use for token counting
+# https://docs.litellm.ai/docs/completion/token_usage
+#custom_tokenizer = ""
+
 [llm.gpt4o-mini]
 api_key = "your-api-key"
 model = "gpt-4o"

diff --git a/containers/app/Dockerfile b/containers/app/Dockerfile
@@ -42,6 +42,8 @@ ENV USE_HOST_NETWORK=false
 ENV WORKSPACE_BASE=/opt/workspace_base
 ENV OPENHANDS_BUILD_VERSION=$OPENHANDS_BUILD_VERSION
 ENV SANDBOX_USER_ID=0
+ENV FILE_STORE=local
+ENV FILE_STORE_PATH=~/.openhands
 RUN mkdir -p $WORKSPACE_BASE
 
 RUN apt-get update -y \

diff --git a/evaluation/benchmarks/swe_bench/prompt.py b/evaluation/benchmarks/swe_bench/prompt.py
diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py
@@ -9,7 +9,7 @@
 from datasets import load_dataset
 
 import openhands.agenthub
-from evaluation.benchmarks.swe_bench.prompt import CODEACT_SWE_PROMPT
+
 from evaluation.utils.shared import (
     EvalException,
     EvalMetadata,
@@ -45,7 +45,6 @@
 
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
     'CodeActAgent': codeact_user_response,
-    'CodeActSWEAgent': codeact_user_response,
 }
 
 
@@ -56,39 +55,27 @@ def _get_swebench_workspace_dir_name(instance: pd.Series) -> str:
 def get_instruction(instance: pd.Series, metadata: EvalMetadata):
     workspace_dir_name = _get_swebench_workspace_dir_name(instance)
     # Prepare instruction
-    if metadata.agent_class == 'CodeActSWEAgent':
-        instruction = (
-            'We are currently solving the following issue within our repository. Here is the issue text:\n'
-            '--- BEGIN ISSUE ---\n'
-            f'{instance.problem_statement}\n'
-            '--- END ISSUE ---\n\n'
-        )
-        if USE_HINT_TEXT and instance.hints_text:
-            instruction += (
-                f'--- BEGIN HINTS ---\n{instance.hints_text}\n--- END HINTS ---\n'
-            )
-        instruction += CODEACT_SWE_PROMPT.format(workspace_dir_name=workspace_dir_name)
-    else:
-        # Instruction based on Anthropic's official trajectory
-        # https://github.com/eschluntz/swe-bench-experiments/tree/main/evaluation/verified/20241022_tools_claude-3-5-sonnet-updated/trajs
-        instruction = (
-            '<uploaded_files>\n'
-            f'/workspace/{workspace_dir_name}\n'
-            '</uploaded_files>\n'
-            f"I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following PR description:\n\n"
-            f'<pr_description>\n'
-            f'{instance.problem_statement}\n'
-            '</pr_description>\n\n'
-            'Can you help me implement the necessary changes to the repository so that the requirements specified in the <pr_description> are met?\n'
-            "I've already taken care of all changes to any of the test files described in the <pr_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
-            'Your task is to make the minimal changes to non-tests files in the /workspace directory to ensure the <pr_description> is satisfied.\n'
-            'Follow these steps to resolve the issue:\n'
-            '1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
-            '2. Create a script to reproduce the error and execute it with `python <filename.py>` using the BashTool, to confirm the error\n'
-            '3. Edit the sourcecode of the repo to resolve the issue\n'
-            '4. Rerun your reproduce script and confirm that the error is fixed!\n'
-            '5. Think about edgecases and make sure your fix handles them as well\n'
-            "Your thinking should be thorough and so it's fine if it's very long.\n"
+
+    # Instruction based on Anthropic's official trajectory
+    # https://github.com/eschluntz/swe-bench-experiments/tree/main/evaluation/verified/20241022_tools_claude-3-5-sonnet-updated/trajs
+    instruction = (
+        '<uploaded_files>\n'
+        f'/workspace/{workspace_dir_name}\n'
+        '</uploaded_files>\n'
+        f"I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following PR description:\n\n"
+        f'<pr_description>\n'
+        f'{instance.problem_statement}\n'
+        '</pr_description>\n\n'
+        'Can you help me implement the necessary changes to the repository so that the requirements specified in the <pr_description> are met?\n'
+        "I've already taken care of all changes to any of the test files described in the <pr_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
+        'Your task is to make the minimal changes to non-tests files in the /workspace directory to ensure the <pr_description> is satisfied.\n'
+        'Follow these steps to resolve the issue:\n'
+        '1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
+        '2. Create a script to reproduce the error and execute it with `python <filename.py>` using the BashTool, to confirm the error\n'
+        '3. Edit the sourcecode of the repo to resolve the issue\n'
+        '4. Rerun your reproduce script and confirm that the error is fixed!\n'
+        '5. Think about edgecases and make sure your fix handles them as well\n'
+        "Your thinking should be thorough and so it's fine if it's very long.\n"
         )
 
     if RUN_WITH_BROWSING:

diff --git a/evaluation/benchmarks/swe_bench/scripts/setup/instance_swe_entry.sh b/evaluation/benchmarks/swe_bench/scripts/setup/instance_swe_entry.sh
@@ -33,7 +33,7 @@ if [ -d /workspace/$WORKSPACE_NAME ]; then
     rm -rf /workspace/$WORKSPACE_NAME
 fi
 mkdir -p /workspace
-ln -s /testbed /workspace/$WORKSPACE_NAME
+mv /testbed /workspace/$WORKSPACE_NAME
 
 # Activate instance-specific environment
 . /opt/miniconda3/etc/profile.d/conda.sh

diff --git a/openhands/agenthub/__init__.py b/openhands/agenthub/__init__.py
@@ -10,15 +10,13 @@
 from openhands.agenthub import (  # noqa: E402
     browsing_agent,
     codeact_agent,
-    codeact_swe_agent,
     delegator_agent,
     dummy_agent,
     planner_agent,
 )
 
 __all__ = [
     'codeact_agent',
-    'codeact_swe_agent',
     'planner_agent',
     'delegator_agent',
     'dummy_agent',

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -166,6 +166,7 @@ def get_action_message(
 
             # Add the LLM message (assistant) that initiated the tool calls
             # (overwrites any previous message with the same response_id)
+            logger.debug(f'Tool calls type: {type(assistant_msg.tool_calls)}, value: {assistant_msg.tool_calls}')
             pending_tool_call_action_messages[llm_response.id] = Message(
                 role=assistant_msg.role,
                 # tool call content SHOULD BE a string

diff --git a/openhands/agenthub/codeact_swe_agent/README.md b/openhands/agenthub/codeact_swe_agent/README.md
diff --git a/openhands/agenthub/codeact_swe_agent/__init__.py b/openhands/agenthub/codeact_swe_agent/__init__.py
diff --git a/openhands/agenthub/codeact_swe_agent/action_parser.py b/openhands/agenthub/codeact_swe_agent/action_parser.py