Merge branch 'main' into kevin

SmartManoj · Aug 28, 2024 · 5fc90b5 · 5fc90b5
2 parents 62c0494 + 653bc4e
commit 5fc90b5
Show file tree

Hide file tree

Showing 67 changed files with 3,378 additions and 2,530 deletions.
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -1,4 +1,4 @@
-**What is the problem that this fixes or functionality that this introduces? Does it fix any open issues?**
+**Short description of the problem this fixes or functionality that this introduces. This may be used for the CHANGELOG**
 
 
 
@@ -8,4 +8,4 @@
 
 
 ---
-**Other references**
+**Link of any specific issues this addresses**
diff --git a/.github/workflows/ghcr_app.yml b/.github/workflows/ghcr_app.yml
@@ -56,6 +56,11 @@ jobs:
       - name: Set up Docker Buildx
         id: buildx
         uses: docker/setup-buildx-action@v3
-      - name: Build and export image
-        id: build
-        run: ./containers/build.sh openhands ${{ github.repository_owner }} --push
+      - name: Build and push app image
+        if: "!github.event.pull_request.head.repo.fork"
+        run: |
+          ./containers/build.sh openhands ${{ github.repository_owner }} --push
+      - name: Build app image
+        if: "github.event.pull_request.head.repo.fork"
+        run: |
+          ./containers/build.sh openhands image ${{ github.repository_owner }}
diff --git a/.github/workflows/ghcr_runtime.yml b/.github/workflows/ghcr_runtime.yml
@@ -31,7 +31,13 @@ jobs:
       packages: write
     strategy:
       matrix:
-        base_image: ['nikolaik/python-nodejs:python3.11-nodejs22', 'python:3.11-bookworm', 'node:22-bookworm']
+        base_image:
+          - image: 'nikolaik/python-nodejs:python3.11-nodejs22'
+            tag: nikolaik
+          - image: 'python:3.11-bookworm'
+            tag: python
+          - image: 'node:22-bookworm'
+            tag: node
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -70,12 +76,27 @@ jobs:
       - name: Install Python dependencies using Poetry
         run: make install-python-dependencies
       - name: Create source distribution and Dockerfile
-        run: poetry run python3 openhands/runtime/utils/runtime_build.py --base_image ${{ matrix.base_image }} --build_folder containers/runtime --force_rebuild
-      - name: Build and export image
-        id: build
+        run: poetry run python3 openhands/runtime/utils/runtime_build.py --base_image ${{ matrix.base_image.image }} --build_folder containers/runtime --force_rebuild
+
+      - name: Build and push runtime image ${{ matrix.base_image.image }}
+        if: "!github.event.pull_request.head.repo.fork"
         run: |
-          suffix=$(echo "${{ matrix.base_image }}" | cut -d ':' -f 1 | cut -d '/' -f 1)
-          ./containers/build.sh runtime ${{ github.repository_owner }} --push $suffix
+          ./containers/build.sh runtime ${{ github.repository_owner }} --push ${{ matrix.base_image.tag }}
+
+      # Forked repos can't push to GHCR, so we need to upload the image as an artifact
+      - name: Build runtime image ${{ matrix.base_image.image }} for fork
+        uses: docker/build-push-action@v6
+        with:
+          tags: ghcr.io/all-hands-ai/runtime:${{ github.sha }}-${{ matrix.base_image.tag }}
+          outputs: type=docker,dest=/tmp/runtime-${{ matrix.base_image.tag }}.tar
+          context: containers/runtime
+      - name: Upload runtime image for fork
+        if: "github.event.pull_request.head.repo.fork"
+        uses: actions/upload-artifact@v4
+        with:
+          name: runtime-${{ matrix.base_image.tag }}
+          path: /tmp/runtime-${{ matrix.base_image.tag }}.tar
+
 
   # Run unit tests with the EventStream runtime Docker images
   test_runtime:
@@ -96,6 +117,19 @@ jobs:
           haskell: true
           large-packages: true
           swap-storage: true
+
+      # Forked repos can't push to GHCR, so we need to download the image as an artifact
+      - name: Download runtime image for fork
+        if: "github.event.pull_request.head.repo.fork"
+        uses: actions/download-artifact@v4
+        with:
+          name: runtime-${{ matrix.base_image }}
+          path: /tmp
+      - name: Load runtime image for fork
+        if: "github.event.pull_request.head.repo.fork"
+        run: |
+          docker load --input /tmp/runtime-${{ matrix.base_image }}.tar
+
       - name: Install poetry via pipx
         run: pipx install poetry
       - name: Set up Python
@@ -107,8 +141,7 @@ jobs:
         run: make install-python-dependencies
       - name: Run runtime tests
         run: |
-          git_hash=$(git rev-parse --short "$GITHUB_SHA")
-          image_name=ghcr.io/${{ github.repository_owner }}/runtime:$git_hash-${{ matrix.base_image }}
+          image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }}
           image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
 
           TEST_RUNTIME=eventstream \
@@ -132,6 +165,19 @@ jobs:
         base_image: ['nikolaik', 'python', 'node']
     steps:
       - uses: actions/checkout@v4
+
+      # Forked repos can't push to GHCR, so we need to download the image as an artifact
+      - name: Download runtime image for fork
+        if: "github.event.pull_request.head.repo.fork"
+        uses: actions/download-artifact@v4
+        with:
+          name: runtime-${{ matrix.base_image }}
+          path: /tmp
+      - name: Load runtime image for fork
+        if: "github.event.pull_request.head.repo.fork"
+        run: |
+          docker load --input /tmp/runtime-${{ matrix.base_image }}.tar
+
       - name: Install poetry via pipx
         run: pipx install poetry
       - name: Set up Python
@@ -143,8 +189,7 @@ jobs:
         run: make install-python-dependencies
       - name: Run integration tests
         run: |
-          git_hash=$(git rev-parse --short "$GITHUB_SHA")
-          image_name=ghcr.io/${{ github.repository_owner }}/runtime:$git_hash-${{ matrix.base_image }}
+          image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }}
           image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
 
           TEST_RUNTIME=eventstream \

diff --git a/agenthub/codeact_agent/README.md b/agenthub/codeact_agent/README.md
@@ -15,7 +15,7 @@ The conceptual idea is illustrated below. At each turn, the agent can:
 
 To make the CodeAct agent more powerful with only access to `bash` action space, CodeAct agent leverages OpenHands's plugin system:
 - [Jupyter plugin](https://github.com/All-Hands-AI/OpenHands/tree/main/openhands/runtime/plugins/jupyter): for IPython execution via bash command
-- [SWE-agent tool plugin](https://github.com/All-Hands-AI/OpenHands/tree/main/openhands/runtime/plugins/swe_agent_commands): Powerful bash command line tools for software development tasks introduced by [swe-agent](https://github.com/princeton-nlp/swe-agent).
+- [Agent Skills plugin](https://github.com/All-Hands-AI/OpenHands/tree/main/openhands/runtime/plugins/agent_skills): Powerful bash command line tools for software development tasks introduced by [swe-agent](https://github.com/princeton-nlp/swe-agent).
 
 ## Demo
 

diff --git a/agenthub/codeact_agent/codeact_agent.py b/agenthub/codeact_agent/codeact_agent.py
@@ -197,30 +197,48 @@ def step(self, state: State) -> Action:
 
         response = None
         # prepare what we want to send to the LLM
-        messages: list[Message] = self._get_messages(state)
-        response = self.llm.completion(
-            messages=messages,
-            stop=[
+        messages = self._get_messages(state)
+
+        params = {
+            'messages': messages,
+            'stop': [
                 '</execute_ipython>',
                 '</execute_bash>',
                 '</execute_browse>',
             ],
-            temperature=0.0,
-            condense=True,
-        )
+            'temperature': 0.0,
+            'condense': True,
+        }
+
+        if self.llm.supports_prompt_caching:
+            params['extra_headers'] = {
+                'anthropic-beta': 'prompt-caching-2024-07-31',
+            }
+
+        response = self.llm.completion(**params)
 
         return self.action_parser.parse(response)
 
     def _get_messages(self, state: State) -> list[Message]:
         messages: list[Message] = [
             Message(
                 role='system',
-                content=[TextContent(text=self.prompt_manager.system_message)],
+                content=[
+                    TextContent(
+                        text=self.prompt_manager.system_message,
+                        cache_prompt=self.llm.supports_prompt_caching,  # Cache system prompt
+                    )
+                ],
                 condensable=False,
             ),
             # Message(
             #     role='user',
-            #     content=[TextContent(text=self.prompt_manager.initial_user_message)],
+            #     content=[
+            #         TextContent(
+            #             text=self.prompt_manager.initial_user_message,
+            #             cache_prompt=self.llm.supports_prompt_caching,  # if the user asks the same query,
+            #         )
+            #     ],
             #     condensable=False,
             # ),
         ]
@@ -264,30 +282,23 @@ def _get_messages(self, state: State) -> list[Message]:
                     else:
                         messages.append(message)
 
+        # Add caching to the last 2 user messages
+        if self.llm.supports_prompt_caching:
+            user_turns_processed = 0
+            for message in reversed(messages):
+                if message.role == 'user' and user_turns_processed < 2:
+                    message.content[
+                        -1
+                    ].cache_prompt = True  # Last item inside the message content
+                    user_turns_processed += 1
+
         # the latest user message is important:
         # we want to remind the agent of the environment constraints
         latest_user_message = next(
             (m for m in reversed(messages) if m.role == 'user'), None
         )
-
-        # Get the last user text inside content
         if latest_user_message:
-            latest_user_message_text = next(
-                (
-                    t
-                    for t in reversed(latest_user_message.content)
-                    if isinstance(t, TextContent)
-                )
-            )
-            # add a reminder to the prompt
             reminder_text = f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with <finish></finish>.'
-
-            if latest_user_message_text:
-                latest_user_message_text.text = (
-                    latest_user_message_text.text + reminder_text
-                )
-            else:
-                latest_user_message_text = TextContent(text=reminder_text)
-                latest_user_message.content.append(latest_user_message_text)
+            latest_user_message.content.append(TextContent(text=reminder_text))
 
         return messages
diff --git a/containers/build.sh b/containers/build.sh
@@ -20,6 +20,7 @@ cache_tag="$cache_tag_base"
 if [[ -n $GITHUB_SHA ]]; then
   git_hash=$(git rev-parse --short "$GITHUB_SHA")
   tags+=("$git_hash")
+  tags+=("$GITHUB_SHA")
 fi
 
 if [[ -n $GITHUB_REF_NAME ]]; then

diff --git a/docs/modules/usage/about.md b/docs/modules/usage/about.md
@@ -5,30 +5,26 @@ sidebar_position: 8
 # 📚 Misc
 
 ## ⭐️ Research Strategy
-
 Achieving full replication of production-grade applications with LLMs is a complex endeavor. Our strategy involves:
 
-1. **Core Technical Research:** Focusing on foundational research to understand and improve the technical aspects of code generation and handling.
-2. **Specialist Abilities:** Enhancing the effectiveness of core components through data curation, training methods, and more.
-3. **Task Planning:** Developing capabilities for bug detection, codebase management, and optimization.
-4. **Evaluation:** Establishing comprehensive evaluation metrics to better understand and improve our models.
+1. **Core Technical Research:** Focusing on foundational research to understand and improve the technical aspects of code generation and handling
+2. **Specialist Abilities:** Enhancing the effectiveness of core components through data curation, training methods, and more
+3. **Task Planning:** Developing capabilities for bug detection, codebase management, and optimization
+4. **Evaluation:** Establishing comprehensive evaluation metrics to better understand and improve our models
 
 ## 🚧 Default Agent
-
-- Our default Agent is currently the CodeActAgent, which is capable of generating code and handling files. We're working on other Agent implementations, including [SWE Agent](https://swe-agent.com/). You can [read about our current set of agents here](./agents).
+Our default Agent is currently the [CodeActAgent](agents), which is capable of generating code and handling files.
 
 ## 🤝 How to Contribute
-
 OpenHands is a community-driven project, and we welcome contributions from everyone. Whether you're a developer, a researcher, or simply enthusiastic about advancing the field of software engineering with AI, there are many ways to get involved:
 
-- **Code Contributions:** Help us develop the core functionalities, frontend interface, or sandboxing solutions.
-- **Research and Evaluation:** Contribute to our understanding of LLMs in software engineering, participate in evaluating the models, or suggest improvements.
-- **Feedback and Testing:** Use the OpenHands toolset, report bugs, suggest features, or provide feedback on usability.
+- **Code Contributions:** Help us develop the core functionalities, frontend interface, or sandboxing solutions
+- **Research and Evaluation:** Contribute to our understanding of LLMs in software engineering, participate in evaluating the models, or suggest improvements
+- **Feedback and Testing:** Use the OpenHands toolset, report bugs, suggest features, or provide feedback on usability
 
 For details, please check [this document](https://github.com/All-Hands-AI/OpenHands/blob/main/CONTRIBUTING.md).
 
 ## 🤖 Join Our Community
-
 We have both Slack workspace for the collaboration on building OpenHands and Discord server for discussion about anything related, e.g., this project, LLM, agent, etc.
 
 - [Slack workspace](https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA)
@@ -41,13 +37,11 @@ If you would love to contribute, feel free to join our community. Let's simplify
 [![Star History Chart](https://api.star-history.com/svg?repos=All-Hands-AI/OpenHands&type=Date)](https://star-history.com/#All-Hands-AI/OpenHands&Date)
 
 ## 🛠️ Built With
-
 OpenHands is built using a combination of powerful frameworks and libraries, providing a robust foundation for its development. Here are the key technologies used in the project:
 
 ![FastAPI](https://img.shields.io/badge/FastAPI-black?style=for-the-badge) ![uvicorn](https://img.shields.io/badge/uvicorn-black?style=for-the-badge) ![LiteLLM](https://img.shields.io/badge/LiteLLM-black?style=for-the-badge) ![Docker](https://img.shields.io/badge/Docker-black?style=for-the-badge) ![Ruff](https://img.shields.io/badge/Ruff-black?style=for-the-badge) ![MyPy](https://img.shields.io/badge/MyPy-black?style=for-the-badge) ![LlamaIndex](https://img.shields.io/badge/LlamaIndex-black?style=for-the-badge) ![React](https://img.shields.io/badge/React-black?style=for-the-badge)
 
 Please note that the selection of these technologies is in progress, and additional technologies may be added or existing ones may be removed as the project evolves. We strive to adopt the most suitable and efficient tools to enhance the capabilities of OpenHands.
 
 ## 📜 License
-
 Distributed under the MIT License. See [our license](https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE) for more information.
diff --git a/docs/modules/usage/agents.md b/docs/modules/usage/agents.md
@@ -2,13 +2,11 @@
 sidebar_position: 3
 ---
 
-# 🧠 Agents and Capabilities
-
-## CodeAct Agent
-
+# 🧠 Main Agent and Capabilities
+## CodeActAgent
 ### Description
-
-This agent implements the CodeAct idea ([paper](https://arxiv.org/abs/2402.01030), [tweet](https://twitter.com/xingyaow_/status/1754556835703751087)) that consolidates LLM agents’ **act**ions into a unified **code** action space for both _simplicity_ and _performance_ (see paper for more details).
+This agent implements the CodeAct idea ([paper](https://arxiv.org/abs/2402.01030), [tweet](https://twitter.com/xingyaow_/status/1754556835703751087)) that consolidates LLM agents’ **act**ions into a
+unified **code** action space for both _simplicity_ and _performance_.
 
 The conceptual idea is illustrated below. At each turn, the agent can:
 
@@ -20,74 +18,7 @@ The conceptual idea is illustrated below. At each turn, the agent can:
 
 ![image](https://github.com/All-Hands-AI/OpenHands/assets/38853559/92b622e3-72ad-4a61-8f41-8c040b6d5fb3)
 
-### Plugin System
-
-To make the CodeAct agent more powerful with only access to `bash` action space, CodeAct agent leverages OpenHands&#x27;s plugin system:
-
-- [Jupyter plugin](https://github.com/All-Hands-AI/OpenHands/tree/main/openhands/runtime/plugins/jupyter): for IPython execution via bash command
-- [SWE-agent tool plugin](https://github.com/All-Hands-AI/OpenHands/tree/main/openhands/runtime/plugins/swe_agent_commands): Powerful bash command line tools for software development tasks introduced by [swe-agent](https://github.com/princeton-nlp/swe-agent).
-
 ### Demo
-
 https://github.com/All-Hands-AI/OpenHands/assets/38853559/f592a192-e86c-4f48-ad31-d69282d5f6ac
 
-_Example of CodeActAgent with `gpt-4-turbo-2024-04-09` performing a data science task (linear regression)_
-
-### Actions
-
-`Action`,
-`CmdRunAction`,
-`IPythonRunCellAction`,
-`AgentEchoAction`,
-`AgentFinishAction`,
-`AgentTalkAction`
-
-### Observations
-
-`CmdOutputObservation`,
-`IPythonRunCellObservation`,
-`AgentMessageObservation`,
-`UserMessageObservation`
-
-### Methods
-
-| Method          | Description                                                                                                                                     |
-| --------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- |
-| `__init__`      | Initializes an agent with `llm` and a list of messages `list[Mapping[str, str]]`                                                                |
-| `step`          | Performs one step using the CodeAct Agent. This includes gathering info on previous steps and prompting the model to make a command to execute. |
-
-## Planner Agent
-
-### Description
-
-The planner agent utilizes a special prompting strategy to create long term plans for solving problems.
-The agent is given its previous action-observation pairs, current task, and hint based on last action taken at every step.
-
-### Actions
-
-`NullAction`,
-`CmdRunAction`,
-`BrowseURLAction`,
-`GithubPushAction`,
-`FileReadAction`,
-`FileWriteAction`,
-`AgentThinkAction`,
-`AgentFinishAction`,
-`AgentSummarizeAction`,
-`AddTaskAction`,
-`ModifyTaskAction`,
-
-### Observations
-
-`Observation`,
-`NullObservation`,
-`CmdOutputObservation`,
-`FileReadObservation`,
-`BrowserOutputObservation`
-
-### Methods
-
-| Method          | Description                                                                                                                                                                               |
-| --------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `__init__`      | Initializes an agent with `llm`                                                                                                                                                           |
-| `step`          | Checks to see if current step is completed, returns `AgentFinishAction` if True. Otherwise, creates a plan prompt and sends to model for inference, adding the result as the next action. |
+_Example of CodeActAgent with `gpt-4-turbo-2024-04-09` performing a data science task (linear regression)_.