diff --git a/.devcontainer/README.MD b/.devcontainer/README.MD
deleted file mode 100644
index 001c24beb49b..000000000000
--- a/.devcontainer/README.MD
+++ /dev/null
@@ -1 +0,0 @@
-The files in this directory configure a development container for GitHub Codespaces.
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
deleted file mode 100644
index 6c97e3bf5523..000000000000
--- a/.devcontainer/devcontainer.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-	"name": "OpenHands Codespaces",
-	"image": "mcr.microsoft.com/devcontainers/universal",
-	"customizations":{
-        "vscode":{
-            "extensions": [
-                "ms-python.python"
-            ]
-        }
-    },
-	"onCreateCommand": "sh ./.devcontainer/on_create.sh",
-	"postCreateCommand": "make build",
-	"postStartCommand": "USE_HOST_NETWORK=True nohup bash -c 'make run &'"
-
-}
diff --git a/.devcontainer/on_create.sh b/.devcontainer/on_create.sh
deleted file mode 100644
index 43da1023236b..000000000000
--- a/.devcontainer/on_create.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env bash
-sudo apt update
-sudo apt install -y netcat
-sudo add-apt-repository -y ppa:deadsnakes/ppa
-sudo apt install -y python3.12
-curl -sSL https://install.python-poetry.org | python3.12 -
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index e7413f415f57..b4ce891d672f 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -18,7 +18,7 @@ updates:
           - "chromadb"
       browsergym:
         patterns:
-          - "browsergym"
+          - "browsergym*"
       security-all:
         applies-to: "security-updates"
         patterns:
diff --git a/.github/workflows/fe-unit-tests.yml b/.github/workflows/fe-unit-tests.yml
index b720bfe34c28..dd577b4b43c5 100644
--- a/.github/workflows/fe-unit-tests.yml
+++ b/.github/workflows/fe-unit-tests.yml
@@ -24,7 +24,8 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        node-version: [20]
+        node-version: [20, 22]
+      fail-fast: true
     steps:
       - name: Checkout
         uses: actions/checkout@v4
diff --git a/.github/workflows/openhands-resolver.yml b/.github/workflows/openhands-resolver.yml
index 2719c3773607..c19ee472b7dc 100644
--- a/.github/workflows/openhands-resolver.yml
+++ b/.github/workflows/openhands-resolver.yml
@@ -16,6 +16,10 @@ on:
         type: string
         default: "main"
         description: "Target branch to pull and create PR against"
+      LLM_MODEL:
+        required: false
+        type: string
+        default: "anthropic/claude-3-5-sonnet-20241022"
       base_container_image:
         required: false
         type: string
@@ -23,15 +27,15 @@ on:
         description: "Custom sandbox env"
     secrets:
       LLM_MODEL:
-        required: true
+        required: false
       LLM_API_KEY:
         required: true
       LLM_BASE_URL:
         required: false
       PAT_TOKEN:
-        required: true
+        required: false
       PAT_USERNAME:
-        required: true
+        required: false
 
   issues:
     types: [labeled]
@@ -55,7 +59,6 @@ jobs:
       github.event_name == 'workflow_call' ||
       github.event.label.name == 'fix-me' ||
       github.event.label.name == 'fix-me-experimental' ||
-
       (
         ((github.event_name == 'issue_comment' || github.event_name == 'pull_request_review_comment') &&
         contains(github.event.comment.body, inputs.macro || '@openhands-agent') &&
@@ -106,13 +109,14 @@ jobs:
 
       - name: Check required environment variables
         env:
-          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL || inputs.LLM_MODEL }}
           LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
           LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
           PAT_TOKEN: ${{ secrets.PAT_TOKEN }}
           PAT_USERNAME: ${{ secrets.PAT_USERNAME }}
+          GITHUB_TOKEN: ${{ github.token }}
         run: |
-          required_vars=("LLM_MODEL" "LLM_API_KEY" "PAT_TOKEN" "PAT_USERNAME")
+          required_vars=("LLM_MODEL" "LLM_API_KEY")
           for var in "${required_vars[@]}"; do
             if [ -z "${!var}" ]; then
               echo "Error: Required environment variable $var is not set."
@@ -120,17 +124,34 @@ jobs:
             fi
           done
 
+          # Check optional variables and warn about fallbacks
+          if [ -z "$PAT_TOKEN" ]; then
+            echo "Warning: PAT_TOKEN is not set, falling back to GITHUB_TOKEN"
+          fi
+
+          if [ -z "$LLM_BASE_URL" ]; then
+            echo "Warning: LLM_BASE_URL is not set, will use default API endpoint"
+          fi
+
+          if [ -z "$PAT_USERNAME" ]; then
+            echo "Warning: PAT_USERNAME is not set, will use openhands-agent"
+          fi
+
       - name: Set environment variables
         run: |
-          if [ -n "${{ github.event.review.body }}" ]; then
+          # Handle pull request events first
+          if [ -n "${{ github.event.pull_request.number }}" ]; then
+            echo "ISSUE_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
+            echo "ISSUE_TYPE=pr" >> $GITHUB_ENV
+          # Handle pull request review events
+          elif [ -n "${{ github.event.review.body }}" ]; then
             echo "ISSUE_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
             echo "ISSUE_TYPE=pr" >> $GITHUB_ENV
+          # Handle issue comment events that reference a PR
           elif [ -n "${{ github.event.issue.pull_request }}" ]; then
             echo "ISSUE_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
             echo "ISSUE_TYPE=pr" >> $GITHUB_ENV
-          elif [ -n "${{ github.event.pull_request.number }}" ]; then
-            echo "ISSUE_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
-            echo "ISSUE_TYPE=pr" >> $GITHUB_ENV
+          # Handle regular issue events
           else
             echo "ISSUE_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
             echo "ISSUE_TYPE=issue" >> $GITHUB_ENV
@@ -143,7 +164,7 @@ jobs:
           fi
 
           echo "MAX_ITERATIONS=${{ inputs.max_iterations || 50 }}" >> $GITHUB_ENV
-          echo "SANDBOX_ENV_GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }}" >> $GITHUB_ENV
+          echo "SANDBOX_ENV_GITHUB_TOKEN=${{ secrets.PAT_TOKEN || github.token }}" >> $GITHUB_ENV
           echo "SANDBOX_ENV_BASE_CONTAINER_IMAGE=${{ inputs.base_container_image }}" >> $GITHUB_ENV
 
           # Set branch variables
@@ -152,7 +173,7 @@ jobs:
       - name: Comment on issue with start message
         uses: actions/github-script@v7
         with:
-          github-token: ${{secrets.GITHUB_TOKEN}}
+          github-token: ${{ secrets.PAT_TOKEN || github.token }}
           script: |
             const issueType = process.env.ISSUE_TYPE;
             github.rest.issues.createComment({
@@ -177,9 +198,9 @@ jobs:
 
       - name: Attempt to resolve issue
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          GITHUB_USERNAME: ${{ secrets.PAT_USERNAME }}
-          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          GITHUB_TOKEN: ${{ secrets.PAT_TOKEN || github.token }}
+          GITHUB_USERNAME: ${{ secrets.PAT_USERNAME || 'openhands-agent' }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL || inputs.LLM_MODEL }}
           LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
           LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
           PYTHONPATH: ""
@@ -189,7 +210,7 @@ jobs:
             --issue-number ${{ env.ISSUE_NUMBER }} \
             --issue-type ${{ env.ISSUE_TYPE }} \
             --max-iterations ${{ env.MAX_ITERATIONS }} \
-            --comment-id ${{ env.COMMENT_ID }} \
+            --comment-id ${{ env.COMMENT_ID }}
 
       - name: Check resolution result
         id: check_result
@@ -211,9 +232,9 @@ jobs:
       - name: Create draft PR or push branch
         if: always() # Create PR or branch even if the previous steps fail
         env:
-          GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
-          GITHUB_USERNAME: ${{ secrets.PAT_USERNAME }}
-          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          GITHUB_TOKEN: ${{ secrets.PAT_TOKEN || github.token }}
+          GITHUB_USERNAME: ${{ secrets.PAT_USERNAME || 'openhands-agent' }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL || inputs.LLM_MODEL }}
           LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
           LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
           PYTHONPATH: ""
@@ -221,7 +242,8 @@ jobs:
           if [ "${{ steps.check_result.outputs.RESOLUTION_SUCCESS }}" == "true" ]; then
             cd /tmp && python -m openhands.resolver.send_pull_request \
               --issue-number ${{ env.ISSUE_NUMBER }} \
-              --pr-type draft | tee pr_result.txt && \
+              --pr-type draft \
+              --reviewer ${{ github.actor }} | tee pr_result.txt && \
               grep "draft created" pr_result.txt | sed 's/.*\///g' > pr_number.txt
           else
             cd /tmp && python -m openhands.resolver.send_pull_request \
@@ -235,7 +257,7 @@ jobs:
         uses: actions/github-script@v7
         if: always() # Comment on issue even if the previous steps fail
         with:
-          github-token: ${{secrets.GITHUB_TOKEN}}
+          github-token: ${{ secrets.PAT_TOKEN || github.token }}
           script: |
             const fs = require('fs');
             const issueNumber = ${{ env.ISSUE_NUMBER }};
diff --git a/.github/workflows/py-unit-tests.yml b/.github/workflows/py-unit-tests.yml
index 6e624904a82f..28a14095d7c1 100644
--- a/.github/workflows/py-unit-tests.yml
+++ b/.github/workflows/py-unit-tests.yml
@@ -42,7 +42,7 @@ jobs:
       - name: Build Environment
         run: make build
       - name: Run Tests
-        run: poetry run pytest --forked --cov=openhands --cov-report=xml -svv ./tests/unit --ignore=tests/unit/test_memory.py
+        run: poetry run pytest --forked -n auto --cov=openhands --cov-report=xml -svv ./tests/unit --ignore=tests/unit/test_memory.py
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v4
         env:
diff --git a/README.md b/README.md
index 7396099de859..141420021bac 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,11 @@ call APIs, and yes—even copy code snippets from StackOverflow.
 
 Learn more at [docs.all-hands.dev](https://docs.all-hands.dev), or jump to the [Quick Start](#-quick-start).
 
+> [!IMPORTANT]
+> Using OpenHands for work? We'd love to chat! Fill out
+> [this short form](https://docs.google.com/forms/d/e/1FAIpQLSet3VbGaz8z32gW9Wm-Grl4jpt5WgMXPgJ4EDPVmCETCBpJtQ/viewform)
+> to join our Design Partner program, where you'll get early access to commercial features and the opportunity to provide input on our product roadmap.
+
 ![App screenshot](./docs/static/img/screenshot.png)
 
 ## ⚡ Quick Start
@@ -40,10 +45,11 @@ system requirements and more information.
 ```bash
 docker pull docker.all-hands.dev/all-hands-ai/runtime:0.15-nikolaik
 
-docker run -it --pull=always \
+docker run -it --rm --pull=always \
     -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.15-nikolaik \
     -e LOG_ALL_EVENTS=true \
     -v /var/run/docker.sock:/var/run/docker.sock \
+    -v ~/.openhands:/home/openhands/.openhands \
     -p 3000:3000 \
     --add-host host.docker.internal:host-gateway \
     --name openhands-app \
diff --git a/config.template.toml b/config.template.toml
index d19ff6085e3b..6f626e6bee90 100644
--- a/config.template.toml
+++ b/config.template.toml
@@ -95,10 +95,10 @@ workspace_base = "./workspace"
 # AWS secret access key
 #aws_secret_access_key = ""
 
-# API key to use
+# API key to use (For Headless / CLI only -  In Web this is overridden by Session Init)
 api_key = "your-api-key"
 
-# API base URL
+# API base URL (For Headless / CLI only -  In Web this is overridden by Session Init)
 #base_url = ""
 
 # API version
@@ -131,7 +131,7 @@ embedding_model = "local"
 # Maximum number of output tokens
 #max_output_tokens = 0
 
-# Model to use
+# Model to use. (For Headless / CLI only -  In Web this is overridden by Session Init)
 model = "gpt-4o"
 
 # Number of retries to attempt when an operation fails with the LLM.
@@ -217,6 +217,9 @@ llm_config = 'gpt3'
 # Use host network
 #use_host_network = false
 
+# runtime extra build args
+#runtime_extra_build_args = ["--network=host", "--add-host=host.docker.internal:host-gateway"]
+
 # Enable auto linting after editing
 #enable_auto_lint = false
 
@@ -237,10 +240,10 @@ llm_config = 'gpt3'
 ##############################################################################
 [security]
 
-# Enable confirmation mode
+# Enable confirmation mode (For Headless / CLI only -  In Web this is overridden by Session Init)
 #confirmation_mode = false
 
-# The security analyzer to use
+# The security analyzer to use (For Headless / CLI only -  In Web this is overridden by Session Init)
 #security_analyzer = ""
 
 #################################### Eval ####################################
diff --git a/containers/app/Dockerfile b/containers/app/Dockerfile
index 266a9d6b3e40..67753cbe2c2d 100644
--- a/containers/app/Dockerfile
+++ b/containers/app/Dockerfile
@@ -42,6 +42,8 @@ ENV USE_HOST_NETWORK=false
 ENV WORKSPACE_BASE=/opt/workspace_base
 ENV OPENHANDS_BUILD_VERSION=$OPENHANDS_BUILD_VERSION
 ENV SANDBOX_USER_ID=0
+ENV FILE_STORE=local
+ENV FILE_STORE_PATH=~/.openhands
 RUN mkdir -p $WORKSPACE_BASE
 
 RUN apt-get update -y \
diff --git a/containers/dev/README.md b/containers/dev/README.md
index ed45e0c3a5ed..0fb1bbc3b38d 100644
--- a/containers/dev/README.md
+++ b/containers/dev/README.md
@@ -1,5 +1,8 @@
 # Develop in Docker
 
+> [!WARNING]
+> This is not officially supported and may not work.
+
 Install [Docker](https://docs.docker.com/engine/install/) on your host machine and run:
 
 ```bash
diff --git a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/troubleshooting/troubleshooting.md b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/troubleshooting/troubleshooting.md
index 96e8bd58c09d..2731b1990483 100644
--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/troubleshooting/troubleshooting.md
+++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/troubleshooting/troubleshooting.md
@@ -9,7 +9,6 @@ Si vous trouvez plus d'informations ou une solution de contournement pour l'un d
 :::tip
 OpenHands ne prend en charge Windows que via [WSL](https://learn.microsoft.com/en-us/windows/wsl/install).
 Veuillez vous assurer d'exécuter toutes les commandes à l'intérieur de votre terminal WSL.
-Consultez les [Notes pour les utilisateurs de WSL sur Windows](troubleshooting/windows) pour des guides de dépannage.
 :::
 
 ## Problèmes courants
diff --git a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/troubleshooting/windows.md b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/troubleshooting/windows.md
deleted file mode 100644
index 467f9deb7624..000000000000
--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/troubleshooting/windows.md
+++ /dev/null
@@ -1,66 +0,0 @@
-
-
-# Notes pour les utilisateurs de WSL sur Windows
-
-OpenHands ne prend en charge Windows que via [WSL](https://learn.microsoft.com/en-us/windows/wsl/install).
-Veuillez vous assurer d'exécuter toutes les commandes dans votre terminal WSL.
-
-## Dépannage
-
-### Recommandation : Ne pas exécuter en tant qu'utilisateur root
-
-Pour des raisons de sécurité, il est fortement recommandé de ne pas exécuter OpenHands en tant qu'utilisateur root, mais en tant qu'utilisateur avec un UID non nul.
-
-Références :
-
-* [Pourquoi il est mauvais de se connecter en tant que root](https://askubuntu.com/questions/16178/why-is-it-bad-to-log-in-as-root)
-* [Définir l'utilisateur par défaut dans WSL](https://www.tenforums.com/tutorials/128152-set-default-user-windows-subsystem-linux-distro-windows-10-a.html#option2)
-Astuce concernant la 2ème référence : pour les utilisateurs d'Ubuntu, la commande pourrait en fait être "ubuntupreview" au lieu de "ubuntu".
-
----
-### Erreur : 'docker' n'a pas pu être trouvé dans cette distribution WSL 2.
-
-Si vous utilisez Docker Desktop, assurez-vous de le démarrer avant d'appeler toute commande docker depuis WSL.
-Docker doit également avoir l'option d'intégration WSL activée.
-
----
-### Installation de Poetry
-
-* Si vous rencontrez des problèmes pour exécuter Poetry même après l'avoir installé pendant le processus de build, vous devrez peut-être ajouter son chemin binaire à votre environnement :
-
-```sh
-export PATH="$HOME/.local/bin:$PATH"
-```
-
-* Si make build s'arrête sur une erreur comme celle-ci :
-
-```sh
-ModuleNotFoundError: no module named <module-name>
-```
-
-Cela pourrait être un problème avec le cache de Poetry.
-Essayez d'exécuter ces 2 commandes l'une après l'autre :
-
-```sh
-rm -r ~/.cache/pypoetry
-make build
-```
-
----
-### L'objet NoneType n'a pas d'attribut 'request'
-
-Si vous rencontrez des problèmes liés au réseau, tels que `NoneType object has no attribute 'request'` lors de l'exécution de `make run`, vous devrez peut-être configurer les paramètres réseau de WSL2. Suivez ces étapes :
-
-* Ouvrez ou créez le fichier `.wslconfig` situé à `C:\Users\%username%\.wslconfig` sur votre machine hôte Windows.
-* Ajoutez la configuration suivante au fichier `.wslconfig` :
-
-```sh
-[wsl2]
-networkingMode=mirrored
-localhostForwarding=true
-```
-
-* Enregistrez le fichier `.wslconfig`.
-* Redémarrez complètement WSL2 en quittant toutes les instances WSL2 en cours d'exécution et en exécutant la commande `wsl --shutdown` dans votre invite de commande ou terminal.
-* Après avoir redémarré WSL, essayez d'exécuter à nouveau `make run`.
-Le problème de réseau devrait être résolu.
diff --git a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/troubleshooting/troubleshooting.md b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/troubleshooting/troubleshooting.md
index 30daa5e768de..21514a594e19 100644
--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/troubleshooting/troubleshooting.md
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/troubleshooting/troubleshooting.md
@@ -7,7 +7,6 @@
 :::tip
 OpenHands 仅通过 [WSL](https://learn.microsoft.com/en-us/windows/wsl/install) 支持 Windows。
 请确保在您的 WSL 终端内运行所有命令。
-查看 [Windows 用户的 WSL 注意事项](troubleshooting/windows) 以获取一些故障排除指南。
 :::
 
 ## 常见问题
diff --git a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/troubleshooting/windows.md b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/troubleshooting/windows.md
deleted file mode 100644
index cb59ba2aee01..000000000000
--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/troubleshooting/windows.md
+++ /dev/null
@@ -1,66 +0,0 @@
-以下是翻译后的内容:
-
-# 针对 Windows 上 WSL 用户的注意事项
-
-OpenHands 仅通过 [WSL](https://learn.microsoft.com/en-us/windows/wsl/install) 支持 Windows。
-请确保在您的 WSL 终端内运行所有命令。
-
-## 故障排除
-
-### 建议: 不要以 root 用户身份运行
-
-出于安全原因,强烈建议不要以 root 用户身份运行 OpenHands,而是以具有非零 UID 的用户身份运行。
-
-参考:
-
-* [为什么以 root 身份登录不好](https://askubuntu.com/questions/16178/why-is-it-bad-to-log-in-as-root)
-* [在 WSL 中设置默认用户](https://www.tenforums.com/tutorials/128152-set-default-user-windows-subsystem-linux-distro-windows-10-a.html#option2)
-关于第二个参考的提示:对于 Ubuntu 用户,命令实际上可能是 "ubuntupreview" 而不是 "ubuntu"。
-
----
-### 错误: 在此 WSL 2 发行版中找不到 'docker'。
-
-如果您正在使用 Docker Desktop,请确保在从 WSL 内部调用任何 docker 命令之前启动它。
-Docker 还需要激活 WSL 集成选项。
-
----
-### Poetry 安装
-
-* 如果您在构建过程中安装 Poetry 后仍然面临运行 Poetry 的问题,您可能需要将其二进制路径添加到环境中:
-
-```sh
-export PATH="$HOME/.local/bin:$PATH"
-```
-
-* 如果 make build 在如下错误上停止:
-
-```sh
-ModuleNotFoundError: no module named <module-name>
-```
-
-这可能是 Poetry 缓存的问题。
-尝试依次运行这两个命令:
-
-```sh
-rm -r ~/.cache/pypoetry
-make build
-```
-
----
-### NoneType 对象没有属性 'request'
-
-如果您在执行 `make run` 时遇到与网络相关的问题,例如 `NoneType 对象没有属性 'request'`,您可能需要配置 WSL2 网络设置。请按照以下步骤操作:
-
-* 在 Windows 主机上打开或创建位于 `C:\Users\%username%\.wslconfig` 的 `.wslconfig` 文件。
-* 将以下配置添加到 `.wslconfig` 文件中:
-
-```sh
-[wsl2]
-networkingMode=mirrored
-localhostForwarding=true
-```
-
-* 保存 `.wslconfig` 文件。
-* 通过退出任何正在运行的 WSL2 实例并在命令提示符或终端中执行 `wsl --shutdown` 命令来完全重启 WSL2。
-* 重新启动 WSL 后,再次尝试执行 `make run`。
-网络问题应该得到解决。
diff --git a/docs/modules/usage/how-to/gui-mode.md b/docs/modules/usage/how-to/gui-mode.md
index df5a070c01e5..f32e1acfe679 100644
--- a/docs/modules/usage/how-to/gui-mode.md
+++ b/docs/modules/usage/how-to/gui-mode.md
@@ -23,10 +23,75 @@ OpenHands provides a user-friendly Graphical User Interface (GUI) mode for inter
 
 OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if it is available. This can happen in two ways:
 
-1. Locally (OSS): The user directly inputs their GitHub token.
-2. Online (SaaS): The token is obtained through GitHub OAuth authentication.
-
-When you reach the `/app` route, the app checks if a token is present. If it finds one, it sets it in the environment for the agent to use.
+1. **Locally (OSS)**: The user directly inputs their GitHub token
+2. **Online (SaaS)**: The token is obtained through GitHub OAuth authentication
+
+#### Setting Up a Local GitHub Token
+
+1. **Generate a Personal Access Token (PAT)**:
+   - Go to GitHub Settings > Developer Settings > Personal Access Tokens > Tokens (classic)
+   - Click "Generate new token (classic)"
+   - Required scopes:
+     - `repo` (Full control of private repositories)
+     - `workflow` (Update GitHub Action workflows)
+     - `read:org` (Read organization data)
+
+2. **Enter Token in OpenHands**:
+   - Click the Settings button (gear icon) in the top right
+   - Navigate to the "GitHub" section
+   - Paste your token in the "GitHub Token" field
+   - Click "Save" to apply the changes
+
+#### Organizational Token Policies
+
+If you're working with organizational repositories, additional setup may be required:
+
+1. **Check Organization Requirements**:
+   - Organization admins may enforce specific token policies
+   - Some organizations require tokens to be created with SSO enabled
+   - Review your organization's [token policy settings](https://docs.github.com/en/organizations/managing-programmatic-access-to-your-organization/setting-a-personal-access-token-policy-for-your-organization)
+
+2. **Verify Organization Access**:
+   - Go to your token settings on GitHub
+   - Look for the organization under "Organization access"
+   - If required, click "Enable SSO" next to your organization
+   - Complete the SSO authorization process
+
+#### OAuth Authentication (Online Mode)
+
+When using OpenHands in online mode, the GitHub OAuth flow:
+
+1. Requests the following permissions:
+   - Repository access (read/write)
+   - Workflow management
+   - Organization read access
+
+2. Authentication steps:
+   - Click "Sign in with GitHub" when prompted
+   - Review the requested permissions
+   - Authorize OpenHands to access your GitHub account
+   - If using an organization, authorize organization access if prompted
+
+#### Troubleshooting
+
+Common issues and solutions:
+
+1. **Token Not Recognized**:
+   - Ensure the token is properly saved in settings
+   - Check that the token hasn't expired
+   - Verify the token has the required scopes
+   - Try regenerating the token
+
+2. **Organization Access Denied**:
+   - Check if SSO is required but not enabled
+   - Verify organization membership
+   - Contact organization admin if token policies are blocking access
+
+3. **Verifying Token Works**:
+   - The app will show a green checkmark if the token is valid
+   - Try accessing a repository to confirm permissions
+   - Check the browser console for any error messages
+   - Use the "Test Connection" button in settings if available
 
 ### Advanced Settings
 
diff --git a/docs/modules/usage/how-to/headless-mode.md b/docs/modules/usage/how-to/headless-mode.md
index dfd4dd5e3e14..ff5e622de6a4 100644
--- a/docs/modules/usage/how-to/headless-mode.md
+++ b/docs/modules/usage/how-to/headless-mode.md
@@ -55,5 +55,5 @@ docker run -it \
     --add-host host.docker.internal:host-gateway \
     --name openhands-app-$(date +%Y%m%d%H%M%S) \
     docker.all-hands.dev/all-hands-ai/openhands:0.15 \
-    python -m openhands.core.main -t "write a bash script that prints hi"
+    python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
 ```
diff --git a/docs/modules/usage/how-to/persist-session-data.md b/docs/modules/usage/how-to/persist-session-data.md
new file mode 100644
index 000000000000..f079531498fe
--- /dev/null
+++ b/docs/modules/usage/how-to/persist-session-data.md
@@ -0,0 +1,16 @@
+# Persisting Session Data
+
+Using the standard installation, the session data is stored in memory. Currently, if OpenHands' service is restarted,
+previous sessions become invalid (a new secret is generated) and thus not recoverable.
+
+## How to Persist Session Data
+
+### Development Workflow
+In the `config.toml` file, specify the following:
+```
+[core]
+...
+file_store="local"
+file_store_path="/absolute/path/to/openhands/cache/directory"
+jwt_secret="secretpass"
+```
diff --git a/docs/modules/usage/micro-agents.md b/docs/modules/usage/micro-agents.md
new file mode 100644
index 000000000000..4bdf69413698
--- /dev/null
+++ b/docs/modules/usage/micro-agents.md
@@ -0,0 +1,213 @@
+# Micro-Agents
+
+OpenHands uses specialized micro-agents to handle specific tasks and contexts efficiently. These micro-agents are small, focused components that provide specialized behavior and knowledge for particular scenarios.
+
+## Overview
+
+Micro-agents are defined in markdown files under the `openhands/agenthub/codeact_agent/micro/` directory. Each micro-agent is configured with:
+
+- A unique name
+- The agent type (typically CodeActAgent)
+- Trigger keywords that activate the agent
+- Specific instructions and capabilities
+
+## Available Micro-Agents
+
+### GitHub Agent
+**File**: `github.md`  
+**Triggers**: `github`, `git`
+
+The GitHub agent specializes in GitHub API interactions and repository management. It:
+- Has access to a `GITHUB_TOKEN` for API authentication
+- Follows strict guidelines for repository interactions
+- Handles branch management and pull requests
+- Uses the GitHub API instead of web browser interactions
+
+Key features:
+- Branch protection (prevents direct pushes to main/master)
+- Automated PR creation
+- Git configuration management
+- API-first approach for GitHub operations
+
+### NPM Agent
+**File**: `npm.md`  
+**Triggers**: `npm`
+
+Specializes in handling npm package management with specific focus on:
+- Non-interactive shell operations
+- Automated confirmation handling using Unix 'yes' command
+- Package installation automation
+
+### Custom Micro-Agents
+
+You can create your own micro-agents by adding new markdown files to the micro-agents directory. Each file should follow this structure:
+
+```markdown
+---
+name: agent_name
+agent: CodeActAgent
+triggers:
+- trigger_word1
+- trigger_word2
+---
+
+Instructions and capabilities for the micro-agent...
+```
+
+## Best Practices
+
+When working with micro-agents:
+
+1. **Use Appropriate Triggers**: Ensure your commands include the relevant trigger words to activate the correct micro-agent
+2. **Follow Agent Guidelines**: Each agent has specific instructions and limitations - respect these for optimal results
+3. **API-First Approach**: When available, use API endpoints rather than web interfaces
+4. **Automation Friendly**: Design commands that work well in non-interactive environments
+
+## Integration
+
+Micro-agents are automatically integrated into OpenHands' workflow. They:
+- Monitor incoming commands for their trigger words
+- Activate when relevant triggers are detected
+- Apply their specialized knowledge and capabilities
+- Follow their specific guidelines and restrictions
+
+## Example Usage
+
+```bash
+# GitHub agent example
+git checkout -b feature-branch
+git commit -m "Add new feature"
+git push origin feature-branch
+
+# NPM agent example
+yes | npm install package-name
+```
+
+For more information about specific agents, refer to their individual documentation files in the micro-agents directory.
+
+## Contributing a Micro-Agent
+
+To contribute a new micro-agent to OpenHands, follow these guidelines:
+
+### 1. Planning Your Micro-Agent
+
+Before creating a micro-agent, consider:
+- What specific problem or use case will it address?
+- What unique capabilities or knowledge should it have?
+- What trigger words make sense for activating it?
+- What constraints or guidelines should it follow?
+
+### 2. File Structure
+
+Create a new markdown file in `openhands/agenthub/codeact_agent/micro/` with a descriptive name (e.g., `docker.md` for a Docker-focused agent).
+
+### 3. Required Components
+
+Your micro-agent file must include:
+
+1. **Front Matter**: YAML metadata at the start of the file:
+```markdown
+---
+name: your_agent_name
+agent: CodeActAgent
+triggers:
+- trigger_word1
+- trigger_word2
+---
+```
+
+2. **Instructions**: Clear, specific guidelines for the agent's behavior:
+```markdown
+You are responsible for [specific task/domain].
+
+Key responsibilities:
+1. [Responsibility 1]
+2. [Responsibility 2]
+
+Guidelines:
+- [Guideline 1]
+- [Guideline 2]
+
+Examples of usage:
+[Example 1]
+[Example 2]
+```
+
+### 4. Best Practices for Micro-Agent Development
+
+1. **Clear Scope**: Keep the agent focused on a specific domain or task
+2. **Explicit Instructions**: Provide clear, unambiguous guidelines
+3. **Useful Examples**: Include practical examples of common use cases
+4. **Safety First**: Include necessary warnings and constraints
+5. **Integration Awareness**: Consider how the agent interacts with other components
+
+### 5. Testing Your Micro-Agent
+
+Before submitting:
+1. Test the agent with various prompts
+2. Verify trigger words activate the agent correctly
+3. Ensure instructions are clear and comprehensive
+4. Check for potential conflicts with existing agents
+
+### 6. Example Implementation
+
+Here's a template for a new micro-agent:
+
+```markdown
+---
+name: docker
+agent: CodeActAgent
+triggers:
+- docker
+- container
+---
+
+You are responsible for Docker container management and Dockerfile creation.
+
+Key responsibilities:
+1. Create and modify Dockerfiles
+2. Manage container lifecycle
+3. Handle Docker Compose configurations
+
+Guidelines:
+- Always use official base images when possible
+- Include necessary security considerations
+- Follow Docker best practices for layer optimization
+
+Examples:
+1. Creating a Dockerfile:
+   ```dockerfile
+   FROM node:18-alpine
+   WORKDIR /app
+   COPY package*.json ./
+   RUN npm install
+   COPY . .
+   CMD ["npm", "start"]
+   ```
+
+2. Docker Compose usage:
+   ```yaml
+   version: '3'
+   services:
+     web:
+       build: .
+       ports:
+         - "3000:3000"
+   ```
+
+Remember to:
+- Validate Dockerfile syntax
+- Check for security vulnerabilities
+- Optimize for build time and image size
+```
+
+### 7. Submission Process
+
+1. Create your micro-agent file in the correct directory
+2. Test thoroughly
+3. Submit a pull request with:
+   - The new micro-agent file
+   - Updated documentation if needed
+   - Description of the agent's purpose and capabilities
+
+Remember that micro-agents are a powerful way to extend OpenHands' capabilities in specific domains. Well-designed agents can significantly improve the system's ability to handle specialized tasks.
diff --git a/docs/modules/usage/prompting-best-practices.md b/docs/modules/usage/prompting-best-practices.md
index 9df608a7a2fa..0d89592165cb 100644
--- a/docs/modules/usage/prompting-best-practices.md
+++ b/docs/modules/usage/prompting-best-practices.md
@@ -2,6 +2,11 @@
 
 When working with OpenHands AI software developer, it's crucial to provide clear and effective prompts. This guide outlines best practices for creating prompts that will yield the most accurate and useful responses.
 
+## Table of Contents
+
+- [Characteristics of Good Prompts](#characteristics-of-good-prompts)
+- [Customizing Prompts for your Project](#customizing-prompts-for-your-project)
+
 ## Characteristics of Good Prompts
 
 Good prompts are:
@@ -39,3 +44,63 @@ Good prompts are:
 Remember, the more precise and informative your prompt is, the better the AI can assist you in developing or modifying the OpenHands software.
 
 See [Getting Started with OpenHands](./getting-started) for more examples of helpful prompts.
+
+## Customizing Prompts for your Project
+
+OpenHands can be customized to work more effectively with specific repositories by providing repository-specific context and guidelines. This section explains how to optimize OpenHands for your project.
+
+### Repository Configuration
+
+You can customize OpenHands' behavior for your repository by creating a `.openhands_instructions` file in your repository's root directory. This file should contain:
+
+1. **Repository Overview**: A brief description of your project's purpose and architecture
+2. **Directory Structure**: Key directories and their purposes
+3. **Development Guidelines**: Project-specific coding standards and practices
+4. **Testing Requirements**: How to run tests and what types of tests are required
+5. **Setup Instructions**: Steps needed to build and run the project
+
+Example `.openhands_instructions` file:
+```
+Repository: MyProject
+Description: A web application for task management
+
+Directory Structure:
+- src/: Main application code
+- tests/: Test files
+- docs/: Documentation
+
+Setup:
+- Run `npm install` to install dependencies
+- Use `npm run dev` for development
+- Run `npm test` for testing
+
+Guidelines:
+- Follow ESLint configuration
+- Write tests for all new features
+- Use TypeScript for new code
+```
+
+### Customizing Prompts
+
+When working with a customized repository:
+
+1. **Reference Project Standards**: Mention specific coding standards or patterns used in your project
+2. **Include Context**: Reference relevant documentation or existing implementations
+3. **Specify Testing Requirements**: Include project-specific testing requirements in your prompts
+
+Example customized prompt:
+```
+Add a new task completion feature to src/components/TaskList.tsx following our existing component patterns. 
+Include unit tests in tests/components/ and update the documentation in docs/features/.
+The component should use our shared styling from src/styles/components.
+```
+
+### Best Practices for Repository Customization
+
+1. **Keep Instructions Updated**: Regularly update your `.openhands_instructions` file as your project evolves
+2. **Be Specific**: Include specific paths, patterns, and requirements unique to your project
+3. **Document Dependencies**: List all tools and dependencies required for development
+4. **Include Examples**: Provide examples of good code patterns from your project
+5. **Specify Conventions**: Document naming conventions, file organization, and code style preferences
+
+By customizing OpenHands for your repository, you'll get more accurate and consistent results that align with your project's standards and requirements.
diff --git a/docs/modules/usage/troubleshooting/troubleshooting.md b/docs/modules/usage/troubleshooting/troubleshooting.md
index e031e65b4623..b117411b51d7 100644
--- a/docs/modules/usage/troubleshooting/troubleshooting.md
+++ b/docs/modules/usage/troubleshooting/troubleshooting.md
@@ -1,180 +1,44 @@
 # 🚧 Troubleshooting
 
-There are some error messages that frequently get reported by users.
-We'll try to make the install process easier, but for now you can look for your error message below and see if there are any workarounds.
-If you find more information or a workaround for one of these issues, please open a *PR* to add details to this file.
-
 :::tip
-OpenHands only supports Windows via [WSL](https://learn.microsoft.com/en-us/windows/wsl/install).
-Please be sure to run all commands inside your WSL terminal.
-Check out [Notes for WSL on Windows Users](troubleshooting/windows) for some troubleshooting guides.
+OpenHands only supports Windows via WSL. Please be sure to run all commands inside your WSL terminal.
 :::
 
-## Common Issues
-
-* [Unable to connect to Docker](#unable-to-connect-to-docker)
-* [404 Resource not found](#404-resource-not-found)
-* [`make build` getting stuck on package installations](#make-build-getting-stuck-on-package-installations)
-* [Sessions are not restored](#sessions-are-not-restored)
-* [Connection to host.docker.internal timed out](#connection-to-host-docker-internal-timed-out)
-
-### Unable to connect to Docker
-
-[GitHub Issue](https://github.com/All-Hands-AI/OpenHands/issues/1226)
-
-**Symptoms**
-
-```bash
-Error creating controller. Please check Docker is running and visit `https://docs.all-hands.dev/modules/usage/troubleshooting` for more debugging information.
-```
-
-```bash
-docker.errors.DockerException: Error while fetching server API version: ('Connection aborted.', FileNotFoundError(2, 'No such file or directory'))
-```
-
-**Details**
-
-OpenHands uses a Docker container to do its work safely, without potentially breaking your machine.
+### Launch docker client failed
 
-**Workarounds**
+**Description**
 
-* Run `docker ps` to ensure that docker is running
-* Make sure you don't need `sudo` to run docker [see here](https://www.baeldung.com/linux/docker-run-without-sudo)
-* If you are on a Mac, check the [permissions requirements](https://docs.docker.com/desktop/mac/permission-requirements/) and in particular consider enabling the `Allow the default Docker socket to be used` under `Settings > Advanced` in Docker Desktop.
-* In addition, upgrade your Docker to the latest version under `Check for Updates`
-
----
-### `404 Resource not found`
-
-**Symptoms**
-
-```python
-Traceback (most recent call last):
-  File "/app/.venv/lib/python3.12/site-packages/litellm/llms/openai.py", line 414, in completion
-    raise e
-  File "/app/.venv/lib/python3.12/site-packages/litellm/llms/openai.py", line 373, in completion
-    response = openai_client.chat.completions.create(**data, timeout=timeout)  # type: ignore
-               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/app/.venv/lib/python3.12/site-packages/openai/_utils/_utils.py", line 277, in wrapper
-    return func(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^
-  File "/app/.venv/lib/python3.12/site-packages/openai/resources/chat/completions.py", line 579, in create
-    return self._post(
-           ^^^^^^^^^^^
-  File "/app/.venv/lib/python3.12/site-packages/openai/_base_client.py", line 1232, in post
-    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
-                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/app/.venv/lib/python3.12/site-packages/openai/_base_client.py", line 921, in request
-    return self._request(
-           ^^^^^^^^^^^^^^
-  File "/app/.venv/lib/python3.12/site-packages/openai/_base_client.py", line 1012, in _request
-    raise self._make_status_error_from_response(err.response) from None
-openai.NotFoundError: Error code: 404 - {'error': {'code': '404', 'message': 'Resource not found'}}
+When running OpenHands, the following error is seen:
 ```
-
-**Details**
-
-This happens when LiteLLM (our library for connecting to different LLM providers) can't find
-the API endpoint you're trying to connect to. Most often this happens for Azure or ollama users.
-
-**Workarounds**
-
-* Check that you've set `LLM_BASE_URL` properly
-* Check that the model is set properly, based on the [LiteLLM docs](https://docs.litellm.ai/docs/providers)
-  * If you're running inside the UI, be sure to set the `model` in the settings modal
-  * If you're running headless (via main.py) be sure to set `LLM_MODEL` in your env/config
-* Make sure you've followed any special instructions for your LLM provider
-  * [Azure](/modules/usage/llms/azure-llms)
-  * [Google](/modules/usage/llms/google-llms)
-* Make sure your API key is correct
-* See if you can connect to the LLM using `curl`
-* Try [connecting via LiteLLM directly](https://github.com/BerriAI/litellm) to test your setup
-
----
-### `make build` getting stuck on package installations
-
-**Symptoms**
-
-Package installation stuck on `Pending...` without any error message:
-
-```bash
-Package operations: 286 installs, 0 updates, 0 removals
-
-  - Installing certifi (2024.2.2): Pending...
-  - Installing h11 (0.14.0): Pending...
-  - Installing idna (3.7): Pending...
-  - Installing sniffio (1.3.1): Pending...
-  - Installing typing-extensions (4.11.0): Pending...
+Launch docker client failed. Please make sure you have installed docker and started docker desktop/daemon.
 ```
 
-**Details**
-
-In rare cases, `make build` can seemingly get stuck on package installations
-without any error message.
-
-**Workarounds**
-
-The package installer Poetry may miss a configuration setting for where credentials are to be looked up (keyring).
-
-First check with `env` if a value for `PYTHON_KEYRING_BACKEND` exists.
-If not, run the below command to set it to a known value and retry the build:
-
-```bash
-export PYTHON_KEYRING_BACKEND=keyring.backends.null.Keyring
-```
+**Resolution**
 
+Try these in order:
+* Confirm `docker` is running on your system. You should be able to run `docker ps` in the terminal successfully.
+* If using Docker Desktop, ensure `Settings > Advanced > Allow the default Docker socket to be used` is enabled.
+* Depending on your configuration you may need `Settings > Resources > Network > Enable host networking` enabled in Docker Desktop.
+* Reinstall Docker Desktop.
 ---
-### Sessions are not restored
-
-**Symptoms**
-
-OpenHands usually asks whether to resume or start a new session when opening the UI.
-But clicking "Resume" still starts a fresh new chat.
 
-**Details**
+# Development Workflow Specific
+### Error building runtime docker image
 
-With a standard installation as of today session data is stored in memory.
-Currently, if OpenHands's service is restarted, previous sessions become
-invalid (a new secret is generated) and thus not recoverable.
+**Description**
 
-**Workarounds**
-
-* Change configuration to make sessions persistent by editing the `config.toml`
-file (in OpenHands's root folder) by specifying a `file_store` and an
-absolute `file_store_path`:
-
-```toml
-file_store="local"
-file_store_path="/absolute/path/to/openhands/cache/directory"
+Attempts to start a new session fail, and errors with terms like the following appear in the logs:
 ```
-
-* Add a fixed jwt secret in your .bashrc, like below, so that previous session id's
-should stay accepted.
-
-```bash
-EXPORT JWT_SECRET=A_CONST_VALUE
+debian-security bookworm-security
+InRelease At least one invalid signature was encountered.
 ```
 
----
-### Connection to host docker internal timed out
-
-**Symptoms**
-
-When you start the server using the docker command from the main [README](https://github.com/All-Hands-AI/OpenHands/README.md), you get a long timeout
-followed by the a stack trace containing messages like:
-
-* `Connection to host.docker.internal timed out. (connect timeout=310)`
-* `Max retries exceeded with url: /alive`
-
-**Details**
-
-If Docker Engine is installed rather than Docker Desktop, the main command will not work as expected.
-Docker Desktop includes easy DNS configuration for connecting processes running in different containers
-which OpenHands makes use of when the main server is running inside a docker container.
-(Further details: https://forums.docker.com/t/difference-between-docker-desktop-and-docker-engine/124612)
-
-**Workarounds**
+This seems to happen when the hash of an existing external library changes and your local docker instance has
+cached a previous version. To work around this, please try the following:
 
-* [Install Docker Desktop](https://www.docker.com/products/docker-desktop/)
-* Run OpenHands in [Development Mode](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md),
-  So that the main server is not run inside a container, but still creates dockerized runtime sandboxes.
+* Stop any containers where the name has the prefix `openhands-runtime-` :
+  `docker ps --filter name=openhands-runtime- --filter status=running -aq | xargs docker stop`
+* Remove any containers where the name has the prefix `openhands-runtime-` :
+  `docker rmi $(docker images --filter name=openhands-runtime- -q --no-trunc)`
+* Stop and Remove any containers / images where the name has the prefix `openhands-runtime-`
+* Prune containers / images : `docker container prune -f && docker image prune -f`
diff --git a/docs/modules/usage/troubleshooting/windows.md b/docs/modules/usage/troubleshooting/windows.md
deleted file mode 100644
index c0196b75138b..000000000000
--- a/docs/modules/usage/troubleshooting/windows.md
+++ /dev/null
@@ -1,64 +0,0 @@
-# Notes for WSL on Windows Users
-
-OpenHands only supports Windows via [WSL](https://learn.microsoft.com/en-us/windows/wsl/install).
-Please be sure to run all commands inside your WSL terminal.
-
-## Troubleshooting
-
-### Recommendation: Do not run as root user
-
-For security reasons, it is highly recommended to not run OpenHands as the root user, but a user with a non-zero UID.
-
-References:
-
-* [Why it is bad to login as root](https://askubuntu.com/questions/16178/why-is-it-bad-to-log-in-as-root)
-* [Set default user in WSL](https://www.tenforums.com/tutorials/128152-set-default-user-windows-subsystem-linux-distro-windows-10-a.html#option2)
-Hint about the 2nd reference: for Ubuntu users, the command could actually be "ubuntupreview" instead of "ubuntu".
-
----
-### Error: 'docker' could not be found in this WSL 2 distro.
-
-If you are using Docker Desktop, make sure to start it before calling any docker command from inside WSL.
-Docker also needs to have the WSL integration option activated.
-
----
-### Poetry Installation
-
-* If you face issues running Poetry even after installing it during the build process, you may need to add its binary path to your environment:
-
-```sh
-export PATH="$HOME/.local/bin:$PATH"
-```
-
-* If make build stops on an error like this:
-
-```sh
-ModuleNotFoundError: no module named <module-name>
-```
-
-This could be an issue with Poetry's cache.
-Try to run these 2 commands after another:
-
-```sh
-rm -r ~/.cache/pypoetry
-make build
-```
-
----
-### NoneType object has no attribute 'request'
-
-If you are experiencing issues related to networking, such as `NoneType object has no attribute 'request'` when executing `make run`, you may need to configure your WSL2 networking settings. Follow these steps:
-
-* Open or create the `.wslconfig` file located at `C:\Users\%username%\.wslconfig` on your Windows host machine.
-* Add the following configuration to the `.wslconfig` file:
-
-```sh
-[wsl2]
-networkingMode=mirrored
-localhostForwarding=true
-```
-
-* Save the `.wslconfig` file.
-* Restart WSL2 completely by exiting any running WSL2 instances and executing the command `wsl --shutdown` in your command prompt or terminal.
-* After restarting WSL, attempt to execute `make run` again.
-The networking issue should be resolved.
diff --git a/docs/modules/usage/upgrade-guide.md b/docs/modules/usage/upgrade-guide.md
deleted file mode 100644
index 01e68d558b5f..000000000000
--- a/docs/modules/usage/upgrade-guide.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# ⬆️ Upgrade Guide
-
-## 0.8.0 (2024-07-13)
-
-### Config breaking changes
-
-In this release we introduced a few breaking changes to backend configurations.
-If you have only been using OpenHands via frontend (web GUI), nothing needs
-to be taken care of.
-
-Here's a list of breaking changes in configs. They only apply to users who
-use OpenHands CLI via `main.py`. For more detail, see [#2756](https://github.com/All-Hands-AI/OpenHands/pull/2756).
-
-#### Removal of --model-name option from main.py
-
-Please note that `--model-name`, or `-m` option, no longer exists. You should set up the LLM
-configs in `config.toml` or via environmental variables.
-
-#### LLM config groups must be subgroups of 'llm'
-
-Prior to release 0.8, you can use arbitrary name for llm config in `config.toml`, e.g.
-
-```toml
-[gpt-4o]
-model="gpt-4o"
-api_key="<your_api_key>"
-```
-
-and then use `--llm-config` CLI argument to specify the desired LLM config group
-by name. This no longer works. Instead, the config group must be under `llm` group,
-e.g.:
-
-```toml
-[llm.gpt-4o]
-model="gpt-4o"
-api_key="<your_api_key>"
-```
-
-If you have a config group named `llm`, no need to change it, it will be used
-as the default LLM config group.
-
-#### 'agent' group no longer contains 'name' field
-
-Prior to release 0.8, you may or may not have a config group named `agent` that
-looks like this:
-
-```toml
-[agent]
-name="CodeActAgent"
-memory_max_threads=2
-```
-
-Note the `name` field is now removed. Instead, you should put `default_agent` field
-under `core` group, e.g.
-
-```toml
-[core]
-# other configs
-default_agent='CodeActAgent'
-
-[agent]
-llm_config='llm'
-memory_max_threads=2
-
-[agent.CodeActAgent]
-llm_config='gpt-4o'
-```
-
-Note that similar to `llm` subgroups, you can also define `agent` subgroups.
-Moreover, an agent can be associated with a specific LLM config group. For more
-detail, see the examples in `config.template.toml`.
diff --git a/docs/package-lock.json b/docs/package-lock.json
index 1cdb00ab818e..d83acb021786 100644
--- a/docs/package-lock.json
+++ b/docs/package-lock.json
@@ -17,8 +17,8 @@
         "prism-react-renderer": "^2.4.0",
         "react": "^18.3.1",
         "react-dom": "^18.3.1",
-        "react-icons": "^5.3.0",
-        "react-use": "^17.5.1"
+        "react-icons": "^5.4.0",
+        "react-use": "^17.6.0"
       },
       "devDependencies": {
         "@docusaurus/module-type-aliases": "^3.5.1",
@@ -15155,9 +15155,10 @@
       }
     },
     "node_modules/react-icons": {
-      "version": "5.3.0",
-      "resolved": "https://registry.npmjs.org/react-icons/-/react-icons-5.3.0.tgz",
-      "integrity": "sha512-DnUk8aFbTyQPSkCfF8dbX6kQjXA9DktMeJqfjrg6cK9vwQVMxmcA3BfP4QoiztVmEHtwlTgLFsPuH2NskKT6eg==",
+      "version": "5.4.0",
+      "resolved": "https://registry.npmjs.org/react-icons/-/react-icons-5.4.0.tgz",
+      "integrity": "sha512-7eltJxgVt7X64oHh6wSWNwwbKTCtMfK35hcjvJS0yxEAhPM8oUKdS3+kqaW1vicIltw+kR2unHaa12S9pPALoQ==",
+      "license": "MIT",
       "peerDependencies": {
         "react": "*"
       }
@@ -15263,9 +15264,9 @@
       }
     },
     "node_modules/react-use": {
-      "version": "17.5.1",
-      "resolved": "https://registry.npmjs.org/react-use/-/react-use-17.5.1.tgz",
-      "integrity": "sha512-LG/uPEVRflLWMwi3j/sZqR00nF6JGqTTDblkXK2nzXsIvij06hXl1V/MZIlwj1OKIQUtlh1l9jK8gLsRyCQxMg==",
+      "version": "17.6.0",
+      "resolved": "https://registry.npmjs.org/react-use/-/react-use-17.6.0.tgz",
+      "integrity": "sha512-OmedEScUMKFfzn1Ir8dBxiLLSOzhKe/dPZwVxcujweSj45aNM7BEGPb9BEVIgVEqEXx6f3/TsXzwIktNgUR02g==",
       "dependencies": {
         "@types/js-cookie": "^2.2.6",
         "@xobotyi/scrollbar-width": "^1.9.5",
diff --git a/docs/package.json b/docs/package.json
index 5c5b54d023f4..232898bd88b8 100644
--- a/docs/package.json
+++ b/docs/package.json
@@ -24,8 +24,8 @@
     "prism-react-renderer": "^2.4.0",
     "react": "^18.3.1",
     "react-dom": "^18.3.1",
-    "react-icons": "^5.3.0",
-    "react-use": "^17.5.1"
+    "react-icons": "^5.4.0",
+    "react-use": "^17.6.0"
   },
   "devDependencies": {
     "@docusaurus/module-type-aliases": "^3.5.1",
diff --git a/docs/sidebars.ts b/docs/sidebars.ts
index 44f8c7b85037..436bd63e909a 100644
--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@@ -14,9 +14,20 @@ const sidebars: SidebarsConfig = {
       id: 'usage/getting-started',
     },
     {
-      type: 'doc',
-      label: 'Prompting Best Practices',
-      id: 'usage/prompting-best-practices',
+      type: 'category',
+      label: 'Prompting',
+      items: [
+        {
+          type: 'doc',
+          label: 'Best Practices',
+          id: 'usage/prompting-best-practices',
+        },
+        {
+          type: 'doc',
+          label: 'Micro-Agents',
+          id: 'usage/micro-agents',
+        },
+      ],
     },
     {
       type: 'category',
@@ -110,6 +121,11 @@ const sidebars: SidebarsConfig = {
           label: 'Custom Sandbox',
           id: 'usage/how-to/custom-sandbox-guide',
         },
+        {
+          type: 'doc',
+          label: 'Persist Session Data',
+          id: 'usage/how-to/persist-session-data',
+        },
       ],
     },
     {
diff --git a/evaluation/benchmarks/EDA/README.md b/evaluation/benchmarks/EDA/README.md
index fee875c5dd51..11de7ca36e13 100644
--- a/evaluation/benchmarks/EDA/README.md
+++ b/evaluation/benchmarks/EDA/README.md
@@ -4,12 +4,10 @@ This folder contains evaluation harness for evaluating agents on the Entity-dedu
 
 ## Setup Environment and LLM Configuration
 
-Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
-
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
 
 ## Start the evaluation
 
-
 ```bash
 export OPENAI_API_KEY="sk-XXX"; # This is required for evaluation (to simulate another party of conversation)
 ./evaluation/benchmarks/EDA/scripts/run_infer.sh [model_config] [git-version] [agent] [dataset] [eval_limit]
@@ -37,7 +35,8 @@ For example,
 ```
 
 ## Reference
-```
+
+```bibtex
 @inproceedings{zhang2023entity,
   title={Probing the Multi-turn Planning Capabilities of LLMs via 20 Question Games},
   author={Zhang, Yizhe and Lu, Jiarui and Jaitly, Navdeep},
diff --git a/evaluation/benchmarks/agent_bench/README.md b/evaluation/benchmarks/agent_bench/README.md
index ea7da04e9f29..9ee8482eb39b 100644
--- a/evaluation/benchmarks/agent_bench/README.md
+++ b/evaluation/benchmarks/agent_bench/README.md
@@ -4,7 +4,7 @@ This folder contains evaluation harness for evaluating agents on the [AgentBench
 
 ## Setup Environment and LLM Configuration
 
-Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
 
 ## Start the evaluation
 
diff --git a/evaluation/benchmarks/aider_bench/README.md b/evaluation/benchmarks/aider_bench/README.md
index 965fc06d7ecc..086cfe58160a 100644
--- a/evaluation/benchmarks/aider_bench/README.md
+++ b/evaluation/benchmarks/aider_bench/README.md
@@ -10,7 +10,7 @@ Hugging Face dataset based on the
 
 ## Setup Environment and LLM Configuration
 
-Please follow instruction [here](../README.md#setup) to setup your local
+Please follow instruction [here](../../README.md#setup) to setup your local
 development environment and LLM.
 
 ## Start the evaluation
diff --git a/evaluation/benchmarks/biocoder/README.md b/evaluation/benchmarks/biocoder/README.md
index 035f2d20bf12..4cd1643fa98f 100644
--- a/evaluation/benchmarks/biocoder/README.md
+++ b/evaluation/benchmarks/biocoder/README.md
@@ -4,13 +4,14 @@ Implements evaluation of agents on BioCoder from the BioCoder benchmark introduc
 
 ## Setup Environment and LLM Configuration
 
-Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
 
 ## BioCoder Docker Image
 
 In the openhands branch of the Biocoder repository, we have slightly modified our original Docker image to work with the OpenHands environment. In the Docker image are testing scripts (`/testing/start_test_openhands.py` and aux files in `/testing_files/`) to assist with evaluation. Additionally, we have installed all dependencies, including OpenJDK, mamba (with Python 3.6), and many system libraries. Notably, we have **not** packaged all repositories into the image, so they are downloaded at runtime.
 
 **Before first execution, pull our Docker image with the following command**
+
 ```bash
 docker pull public.ecr.aws/i5g0m1f6/eval_biocoder:v1.0
 ```
@@ -19,7 +20,6 @@ To reproduce this image, please see the Dockerfile_Openopenhands in the `biocode
 
 ## Start the evaluation
 
-
 ```bash
 ./evaluation/benchmarks/biocoder/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit]
 ```
@@ -47,7 +47,8 @@ with current OpenHands version, then your command would be:
 ```
 
 ## Reference
-```
+
+```bibtex
 @misc{tang2024biocoder,
       title={BioCoder: A Benchmark for Bioinformatics Code Generation with Large Language Models},
       author={Xiangru Tang and Bill Qian and Rick Gao and Jiakang Chen and Xinyun Chen and Mark Gerstein},
diff --git a/evaluation/benchmarks/bird/README.md b/evaluation/benchmarks/bird/README.md
index 90e3fa300cbd..41874fe99f59 100644
--- a/evaluation/benchmarks/bird/README.md
+++ b/evaluation/benchmarks/bird/README.md
@@ -4,7 +4,7 @@ Implements evaluation of agents on BIRD introduced in [Can LLM Already Serve as
 
 ## Setup Environment and LLM Configuration
 
-Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
 
 ## Run Inference on Bird
 
@@ -22,8 +22,7 @@ like to evaluate. It could also be a release tag like `0.6.2`.
 
 For each problem, OpenHands is given a set number of iterations to fix the failing code. The history field shows each iteration's response to correct its code that fails any test case.
 
-
-```
+```json
 {
   "task_id": "0",
   "instruction": "You are a SQL expert and need to complete the following text-to-SQL tasks.\n\nCREATE TABLE frpm\n(\n    CDSCode                                       TEXT not null\n        primary key,\n    `Academic Year`                               TEXT  null,\n    `County Code`                                 TEXT  null,\n    `District Code`                               INTEGER         null,\n    `School Code`                                 TEXT  null,\n    `County Name`                                 TEXT null,\n    `District Name`                               TEXT null,\n    `School Name`                                 TEXT null,\n    `District Type`                               TEXT null,\n    `School Type`                                 TEXT null,\n    `Educational Option Type`                     TEXT null,\n    `NSLP Provision Status`                       TEXT null,\n    `Charter School (Y/N)`                        INTEGER    null,\n    `Charter School Number`                       TEXT  null,\n    `Charter Funding Type`                        TEXT null,\n    IRC                                           INTEGER    null,\n    `Low Grade`                                   TEXT  null,\n    `High Grade`                                  TEXT null,\n    `Enrollment (K-12)`                           REAL      null,\n    `Free Meal Count (K-12)`                      REAL       null,\n    `Percent (%) Eligible Free (K-12)`            REAL       null,\n    `FRPM Count (K-12)`                           REAL       null,\n    `Percent (%) Eligible FRPM (K-12)`            REAL       null,\n    `Enrollment (Ages 5-17)`                      REAL       null,\n    `Free Meal Count (Ages 5-17)`                 REAL       null,\n    `Percent (%) Eligible Free (Ages 5-17)`       REAL       null,\n    `FRPM Count (Ages 5-17)`                      REAL       null,\n    `Percent (%) Eligible FRPM (Ages 5-17)`       REAL       null,\n    `2013-14 CALPADS Fall 1 Certification Status` INTEGER    null,\n    foreign key (CDSCode) references schools (CDSCode)\n);\n\nCREATE TABLE satscores\n(\n    cds         TEXT not null\n        primary key,\n    rtype       TEXT  not null,\n    sname       TEXT null,\n    dname       TEXT null,\n    cname       TEXT null,\n    enroll12    INTEGER         not null,\n    NumTstTakr  INTEGER          not null,\n    AvgScrRead  INTEGER          null,\n    AvgScrMath  INTEGER          null,\n    AvgScrWrite INTEGER          null,\n    NumGE1500   INTEGER          null,\n--     PctGE1500   double      null,\n        foreign key (cds) references schools (CDSCode)\n);\n\nCREATE TABLE schools\n(\n    CDSCode     TEXT not null\n        primary key,\n    NCESDist    TEXT  null,\n    NCESSchool  TEXT  null,\n    StatusType  TEXT  not null,\n    County      TEXT not null,\n    District    TEXT not null,\n    School      TEXT null,\n    Street      TEXT null,\n    StreetAbr   TEXT null,\n    City        TEXT null,\n    Zip         TEXT null,\n    State       TEXT  null,\n    MailStreet  TEXT null,\n    MailStrAbr  TEXT null,\n    MailCity    TEXT null,\n    MailZip     TEXT null,\n    MailState   TEXT  null,\n    Phone       TEXT null,\n    Ext         TEXT  null,\n    Website     TEXT null,\n    OpenDate    DATE        null,\n    ClosedDate  DATE        null,\n    Charter     INTEGER    null,\n    CharterNum  TEXT  null,\n    FundingType TEXT null,\n    DOC         TEXT  not null,\n    DOCType     TEXT not null,\n    SOC         TEXT  null,\n    SOCType     TEXT null,\n    EdOpsCode   TEXT  null,\n    EdOpsName   TEXT null,\n    EILCode     TEXT  null,\n    EILName     TEXT null,\n    GSoffered   TEXT null,\n    GSserved    TEXT  null,\n    Virtual     TEXT  null,\n    Magnet      INTEGER   null,\n    Latitude    REAL      null,\n    Longitude   REAL      null,\n    AdmFName1   TEXT null,\n    AdmLName1   TEXT null,\n    AdmEmail1   TEXT null,\n    AdmFName2   TEXT null,\n    AdmLName2   TEXT null,\n    AdmEmail2   TEXT null,\n    AdmFName3   TEXT  null,\n    AdmLName3   TEXT null,\n    AdmEmail3   TEXT null,\n    LastUpdate  DATE        not null\n);\n\n-- External Knowledge: Eligible free rate for K-12 = `Free Meal Count (K-12)` / `Enrollment (K-12)`\n\n-- Using valid SQLite and understanding External Knowledge, answer the following questions for the tables provided above.\n\n-- Using valid SQLite, answer the following questions for the tables provided above.\nQuestion: What is the highest eligible free rate for K-12 students in the schools in Alameda County?\n\n\nPlease write the SQL in one line without line breaks.And write a new python file named 0.py to call the SQL you wrote.You need to follow the code template below:\n\n\n    import sqlite3\n    def execute_sql(db_path, sql):\n        with sqlite3.connect(db_path) as conn:\n            cursor = conn.cursor()\n            cursor.execute(sql)\n            result = cursor.fetchall()\n            return result\n\n    if __name__ == '__main__':\n        sql = \"\" # filling your SQL here\n        db_path = \"california_schools/california_schools.sqlite\"\n        print(db_path)\n        result = execute_sql(db_path, sql)\n        print(result)\n    \n\nEnvironment has been set up for you to start working.You may assume all necessary tools are installed.\n\nIMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.\nYou SHOULD INCLUDE PROPER INDENTATION in your edit commands.\nWhen you think you have fixed the issue through code changes, please finish the interaction using the "finish" tool.\n",
diff --git a/evaluation/benchmarks/browsing_delegation/README.md b/evaluation/benchmarks/browsing_delegation/README.md
index a06170f8b9e0..9ae349b81900 100644
--- a/evaluation/benchmarks/browsing_delegation/README.md
+++ b/evaluation/benchmarks/browsing_delegation/README.md
@@ -7,7 +7,7 @@ If so, the browsing performance upper-bound of CodeActAgent will be the performa
 
 ## Setup Environment and LLM Configuration
 
-Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
 
 ## Run Inference
 
diff --git a/evaluation/benchmarks/commit0_bench/README.md b/evaluation/benchmarks/commit0_bench/README.md
index 78b58b02137f..9ac3a0e05dd3 100644
--- a/evaluation/benchmarks/commit0_bench/README.md
+++ b/evaluation/benchmarks/commit0_bench/README.md
@@ -4,19 +4,18 @@ This folder contains the evaluation harness that we built on top of the original
 
 The evaluation consists of three steps:
 
-1. Environment setup: [install python environment](../README.md#development-environment), [configure LLM config](../README.md#configure-openhands-and-your-llm).
+1. Environment setup: [install python environment](../../README.md#development-environment), [configure LLM config](../../README.md#configure-openhands-and-your-llm).
 2. [Run Evaluation](#run-inference-on-commit0-instances): Generate a edit patch for each Commit0 Repo, and get the evaluation results
 
 ## Setup Environment and LLM Configuration
 
-Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
 
 ## OpenHands Commit0 Instance-level Docker Support
 
 OpenHands supports using the Commit0 Docker for **[inference](#run-inference-on-commit0-instances).
 This is now the default behavior.
 
-
 ## Run Inference on Commit0 Instances
 
 Make sure your Docker daemon is running, and you have ample disk space (at least 200-500GB, depends on the Commit0 set you are running on) for the [instance-level docker image](#openhands-commit0-instance-level-docker-support).
diff --git a/evaluation/benchmarks/gaia/README.md b/evaluation/benchmarks/gaia/README.md
index f592e5f7118d..9a7bbd7fa346 100644
--- a/evaluation/benchmarks/gaia/README.md
+++ b/evaluation/benchmarks/gaia/README.md
@@ -4,9 +4,10 @@ This folder contains evaluation harness for evaluating agents on the [GAIA bench
 
 ## Setup Environment and LLM Configuration
 
-Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
 
 ## Run the evaluation
+
 We are using the GAIA dataset hosted on [Hugging Face](https://huggingface.co/datasets/gaia-benchmark/GAIA).
 Please accept the terms and make sure to have logged in on your computer by `huggingface-cli login` before running the evaluation.
 
@@ -41,6 +42,7 @@ For example,
 ## Get score
 
 Then you can get stats by running the following command:
+
 ```bash
 python ./evaluation/benchmarks/gaia/get_score.py \
 --file <path_to/output.json>
diff --git a/evaluation/benchmarks/gorilla/README.md b/evaluation/benchmarks/gorilla/README.md
index c6f1cde55b40..d5a076234a5a 100644
--- a/evaluation/benchmarks/gorilla/README.md
+++ b/evaluation/benchmarks/gorilla/README.md
@@ -4,7 +4,7 @@ This folder contains evaluation harness we built on top of the original [Gorilla
 
 ## Setup Environment and LLM Configuration
 
-Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
 
 ## Run Inference on APIBench Instances
 
diff --git a/evaluation/benchmarks/gpqa/README.md b/evaluation/benchmarks/gpqa/README.md
index 235b9ab9b281..735584d4556e 100644
--- a/evaluation/benchmarks/gpqa/README.md
+++ b/evaluation/benchmarks/gpqa/README.md
@@ -3,6 +3,7 @@
 Implements the evaluation of agents on the GPQA benchmark introduced in [GPQA: A Graduate-Level Google-Proof Q&A Benchmark](https://arxiv.org/abs/2308.07124).
 
 This code implements the evaluation of agents on the GPQA Benchmark with Open Book setting.
+
 - The benchmark consists of 448 high-quality and extremely difficult multiple-choice questions in the domains of biology, physics, and chemistry. The questions are intentionally designed to be "Google-proof," meaning that even highly skilled non-expert validators achieve only 34% accuracy despite unrestricted access to the web.
 - Even experts in the corresponding domains achieve only 65% accuracy.
 - State-of-the-art AI systems achieve only 39% accuracy on this challenging dataset.
@@ -11,20 +12,24 @@ This code implements the evaluation of agents on the GPQA Benchmark with Open Bo
 Accurate solving of above graduate level questions would require both tool use (e.g., python for calculations) and web-search for finding related facts as information required for the questions might not be part of the LLM knowledge / training data.
 
 Further references:
-- https://arxiv.org/pdf/2311.12022
-- https://paperswithcode.com/dataset/gpqa
-- https://github.com/idavidrein/gpqa
+
+- <https://arxiv.org/pdf/2311.12022>
+- <https://paperswithcode.com/dataset/gpqa>
+- <https://github.com/idavidrein/gpqa>
 
 ## Setup Environment and LLM Configuration
 
-Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
 
 ## Run Inference on GPQA Benchmark
+
 'gpqa_main', 'gqpa_diamond', 'gpqa_experts', 'gpqa_extended' -- data split options
 From the root of the OpenHands repo, run the following command:
+
 ```bash
 ./evaluation/benchmarks/gpqa/scripts/run_infer.sh [model_config_name] [git-version] [num_samples_eval] [data_split] [AgentClass]
 ```
+
 You can replace `model_config_name` with any model you set up in `config.toml`.
 
 - `model_config_name`: The model configuration name from `config.toml` that you want to evaluate.
diff --git a/evaluation/benchmarks/humanevalfix/README.md b/evaluation/benchmarks/humanevalfix/README.md
index 5f3ae58ee29d..60dabef1f609 100644
--- a/evaluation/benchmarks/humanevalfix/README.md
+++ b/evaluation/benchmarks/humanevalfix/README.md
@@ -4,7 +4,7 @@ Implements evaluation of agents on HumanEvalFix from the HumanEvalPack benchmark
 
 ## Setup Environment and LLM Configuration
 
-Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
 
 ## Run Inference on HumanEvalFix
 
@@ -14,13 +14,11 @@ Please follow instruction [here](../README.md#setup) to setup your local develop
 
 You can replace `eval_gpt4_1106_preview` with any model you set up in `config.toml`.
 
-
 ## Examples
 
 For each problem, OpenHands is given a set number of iterations to fix the failing code. The history field shows each iteration's response to correct its code that fails any test case.
 
-
-```
+```json
 {
     "task_id": "Python/2",
     "instruction": "Please fix the function in Python__2.py such that all test cases pass.\nEnvironment has been set up for you to start working. You may assume all necessary tools are installed.\n\n# Problem Statement\ndef truncate_number(number: float) -> float:\n    return number % 1.0 + 1.0\n\n\n\n\n\n\ndef check(truncate_number):\n    assert truncate_number(3.5) == 0.5\n    assert abs(truncate_number(1.33) - 0.33) < 1e-6\n    assert abs(truncate_number(123.456) - 0.456) < 1e-6\n\ncheck(truncate_number)\n\nIMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.\nYou should NOT modify any existing test case files. If needed, you can add new test cases in a NEW file to reproduce the issue.\nYou SHOULD INCLUDE PROPER INDENTATION in your edit commands.\nWhen you think you have fixed the issue through code changes, please finish the interaction using the "finish" tool.\n",
diff --git a/evaluation/benchmarks/logic_reasoning/README.md b/evaluation/benchmarks/logic_reasoning/README.md
index d4e4d3e9a554..bba0076f25fa 100644
--- a/evaluation/benchmarks/logic_reasoning/README.md
+++ b/evaluation/benchmarks/logic_reasoning/README.md
@@ -4,9 +4,10 @@ This folder contains evaluation harness for evaluating agents on the logic reaso
 
 ## Setup Environment and LLM Configuration
 
-Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
 
 ## Run Inference on logic_reasoning
+
 The following code will run inference on the first example of the ProofWriter dataset,
 
 ```bash
diff --git a/evaluation/benchmarks/miniwob/README.md b/evaluation/benchmarks/miniwob/README.md
index 5535e45a7dc0..3809925b3fd6 100644
--- a/evaluation/benchmarks/miniwob/README.md
+++ b/evaluation/benchmarks/miniwob/README.md
@@ -4,7 +4,7 @@ This folder contains evaluation for [MiniWoB++](https://miniwob.farama.org/) ben
 
 ## Setup Environment and LLM Configuration
 
-Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
 
 ## Test if your environment works
 
@@ -42,7 +42,6 @@ poetry run python evaluation/benchmarks/miniwob/get_success_rate.py evaluation/e
 
 You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenHands/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
 
-
 ## BrowsingAgent V1.0 result
 
 Tested on BrowsingAgent V1.0
diff --git a/evaluation/benchmarks/ml_bench/README.md b/evaluation/benchmarks/ml_bench/README.md
index 528edddc148a..e8b386205230 100644
--- a/evaluation/benchmarks/ml_bench/README.md
+++ b/evaluation/benchmarks/ml_bench/README.md
@@ -12,7 +12,7 @@ For more details on the ML-Bench task and dataset, please refer to the paper: [M
 
 ## Setup Environment and LLM Configuration
 
-Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
 
 ## Run Inference on ML-Bench
 
diff --git a/evaluation/benchmarks/scienceagentbench/README.md b/evaluation/benchmarks/scienceagentbench/README.md
index 4d979177215b..5cb39da591af 100644
--- a/evaluation/benchmarks/scienceagentbench/README.md
+++ b/evaluation/benchmarks/scienceagentbench/README.md
@@ -1,10 +1,10 @@
 # ScienceAgentBench Evaluation with OpenHands
 
-This folder contains the evaluation harness for [ScienceAgentBench](https://osu-nlp-group.github.io/ScienceAgentBench/) (paper: https://arxiv.org/abs/2410.05080).
+This folder contains the evaluation harness for [ScienceAgentBench](https://osu-nlp-group.github.io/ScienceAgentBench/) (paper: <https://arxiv.org/abs/2410.05080>).
 
 ## Setup Environment and LLM Configuration
 
-Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
 
 ## Setup ScienceAgentBench
 
@@ -45,6 +45,7 @@ After the inference is completed, you may use the following command to extract n
 ```bash
 python post_proc.py [log_fname]
 ```
+
 - `log_fname`, e.g. `evaluation/.../output.jsonl`, is the automatically saved trajectory log of an OpenHands agent.
 
 Output will be write to e.g. `evaluation/.../output.converted.jsonl`
diff --git a/evaluation/benchmarks/swe_bench/README.md b/evaluation/benchmarks/swe_bench/README.md
index b69a7389555c..7ed1e2688198 100644
--- a/evaluation/benchmarks/swe_bench/README.md
+++ b/evaluation/benchmarks/swe_bench/README.md
@@ -6,20 +6,19 @@ This folder contains the evaluation harness that we built on top of the original
 
 The evaluation consists of three steps:
 
-1. Environment setup: [install python environment](../README.md#development-environment), [configure LLM config](../README.md#configure-openhands-and-your-llm), and [pull docker](#openhands-swe-bench-instance-level-docker-support).
+1. Environment setup: [install python environment](../../README.md#development-environment), [configure LLM config](../../README.md#configure-openhands-and-your-llm), and [pull docker](#openhands-swe-bench-instance-level-docker-support).
 2. [Run inference](#run-inference-on-swe-bench-instances): Generate a edit patch for each Github issue
 3. [Evaluate patches using SWE-Bench docker](#evaluate-generated-patches)
 
 ## Setup Environment and LLM Configuration
 
-Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
 
 ## OpenHands SWE-Bench Instance-level Docker Support
 
 OpenHands now support using the [official evaluation docker](https://github.com/princeton-nlp/SWE-bench/blob/main/docs/20240627_docker/README.md) for both **[inference](#run-inference-on-swe-bench-instances) and [evaluation](#evaluate-generated-patches)**.
 This is now the default behavior.
 
-
 ## Run Inference on SWE-Bench Instances
 
 Make sure your Docker daemon is running, and you have ample disk space (at least 200-500GB, depends on the SWE-Bench set you are running on) for the [instance-level docker image](#openhands-swe-bench-instance-level-docker-support).
@@ -52,7 +51,8 @@ default, it is set to 1.
 - `dataset_split`, split for the huggingface dataset. e.g., `test`, `dev`. Default to `test`.
 
 There are also two optional environment variables you can set.
-```
+
+```bash
 export USE_HINT_TEXT=true # if you want to use hint text in the evaluation. Default to false. Ignore this if you are not sure.
 export USE_INSTANCE_IMAGE=true # if you want to use instance-level docker images. Default to true
 ```
@@ -127,6 +127,7 @@ With `output.jsonl` file, you can run `eval_infer.sh` to evaluate generated patc
 **This evaluation is performed using the official dockerized evaluation announced [here](https://github.com/princeton-nlp/SWE-bench/blob/main/docs/20240627_docker/README.md).**
 
 > If you want to evaluate existing results, you should first run this to clone existing outputs
+>
 >```bash
 >git clone https://huggingface.co/spaces/OpenHands/evaluation evaluation/evaluation_outputs
 >```
@@ -143,6 +144,7 @@ Then you can run the following:
 ```
 
 The script now accepts optional arguments:
+
 - `instance_id`: Specify a single instance to evaluate (optional)
 - `dataset_name`: The name of the dataset to use (default: `"princeton-nlp/SWE-bench_Lite"`)
 - `split`: The split of the dataset to use (default: `"test"`)
@@ -179,7 +181,6 @@ To clean-up all existing runtimes that you've already started, run:
 ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/benchmarks/swe_bench/scripts/cleanup_remote_runtime.sh
 ```
 
-
 ## Visualize Results
 
 First you need to clone `https://huggingface.co/spaces/OpenHands/evaluation` and add your own running results from openhands into the `outputs` of the cloned repo.
@@ -189,6 +190,7 @@ git clone https://huggingface.co/spaces/OpenHands/evaluation
 ```
 
 **(optional) setup streamlit environment with conda**:
+
 ```bash
 cd evaluation
 conda create -n streamlit python=3.10
diff --git a/evaluation/benchmarks/swe_bench/scripts/eval/summarize_outputs.py b/evaluation/benchmarks/swe_bench/scripts/eval/summarize_outputs.py
index 0da6eb25b54c..b376a70aea28 100755
--- a/evaluation/benchmarks/swe_bench/scripts/eval/summarize_outputs.py
+++ b/evaluation/benchmarks/swe_bench/scripts/eval/summarize_outputs.py
@@ -128,6 +128,11 @@ def process_file(file_path):
                 for error, count in error_counter.items()
             },
         },
+        'costs': {
+            'main_agent': sum(main_agent_cost),
+            'editor': sum(editor_cost),
+            'total': sum(main_agent_cost) + sum(editor_cost),
+        },
         'statistics': {
             'avg_turns': sum(num_turns) / num_lines if num_lines > 0 else 0,
             'costs': {
@@ -251,6 +256,7 @@ def aggregate_directory(input_path) -> pd.DataFrame:
             print(
                 f"Number of unfinished runs: {result['unfinished_runs']['count']} / {result['total_instances']} ({result['unfinished_runs']['percentage']:.2f}%)"
             )
+            print(f"Total cost: {result['costs']['total']:.2f} USD")
             print('## Statistics')
             print(
                 f"Avg. num of turns per instance: {result['statistics']['avg_turns']:.2f}"
diff --git a/evaluation/benchmarks/swe_bench/scripts/eval/verify_costs.py b/evaluation/benchmarks/swe_bench/scripts/eval/verify_costs.py
new file mode 100644
index 000000000000..2e7b151e6873
--- /dev/null
+++ b/evaluation/benchmarks/swe_bench/scripts/eval/verify_costs.py
@@ -0,0 +1,104 @@
+import argparse
+
+import pandas as pd
+
+from openhands.core.logger import openhands_logger as logger
+
+
+def verify_instance_costs(row: pd.Series) -> float:
+    """
+    Verifies that the accumulated_cost matches the sum of individual costs in metrics.
+    Also checks for duplicate consecutive costs which might indicate buggy counting.
+    If the consecutive costs are identical, the file is affected by this bug:
+    https://github.com/All-Hands-AI/OpenHands/issues/5383
+
+    Args:
+        row: DataFrame row containing instance data with metrics
+    Returns:
+        float: The verified total cost for this instance (corrected if needed)
+    """
+    try:
+        metrics = row.get('metrics')
+        if not metrics:
+            logger.warning(f"Instance {row['instance_id']}: No metrics found")
+            return 0.0
+
+        accumulated = metrics.get('accumulated_cost')
+        costs = metrics.get('costs', [])
+
+        if accumulated is None:
+            logger.warning(
+                f"Instance {row['instance_id']}: No accumulated_cost in metrics"
+            )
+            return 0.0
+
+        # Check for duplicate consecutive costs and systematic even-odd pairs
+        has_duplicate = False
+        all_pairs_match = True
+
+        # Check each even-odd pair (0-1, 2-3, etc.)
+        for i in range(0, len(costs) - 1, 2):
+            if abs(costs[i]['cost'] - costs[i + 1]['cost']) < 1e-6:
+                has_duplicate = True
+                logger.debug(
+                    f"Instance {row['instance_id']}: Possible buggy double-counting detected! "
+                    f"Steps {i} and {i+1} have identical costs: {costs[i]['cost']:.2f}"
+                )
+            else:
+                all_pairs_match = False
+                break
+
+        # Calculate total cost, accounting for buggy double counting if detected
+        if len(costs) >= 2 and has_duplicate and all_pairs_match:
+            paired_steps_cost = sum(
+                cost_entry['cost']
+                for cost_entry in costs[: -1 if len(costs) % 2 else None]
+            )
+            real_paired_cost = paired_steps_cost / 2
+
+            unpaired_cost = costs[-1]['cost'] if len(costs) % 2 else 0
+            total_cost = real_paired_cost + unpaired_cost
+
+        else:
+            total_cost = sum(cost_entry['cost'] for cost_entry in costs)
+
+        if not abs(total_cost - accumulated) < 1e-6:
+            logger.warning(
+                f"Instance {row['instance_id']}: Cost mismatch: "
+                f"accumulated: {accumulated:.2f}, sum of costs: {total_cost:.2f}, "
+            )
+
+        return total_cost
+
+    except Exception as e:
+        logger.error(
+            f"Error verifying costs for instance {row.get('instance_id', 'UNKNOWN')}: {e}"
+        )
+        return 0.0
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Verify costs in SWE-bench output file'
+    )
+    parser.add_argument(
+        'input_filepath', type=str, help='Path to the output.jsonl file'
+    )
+    args = parser.parse_args()
+
+    try:
+        # Load and verify the JSONL file
+        df = pd.read_json(args.input_filepath, lines=True)
+        logger.info(f'Loaded {len(df)} instances from {args.input_filepath}')
+
+        # Verify costs for each instance and sum up total
+        total_cost = df.apply(verify_instance_costs, axis=1).sum()
+        logger.info(f'Total verified cost across all instances: ${total_cost:.2f}')
+
+    except Exception as e:
+        logger.error(f'Failed to process file: {e}')
+        raise
+
+
+if __name__ == '__main__':
+    main()
diff --git a/evaluation/benchmarks/swe_bench/scripts/setup/instance_swe_entry.sh b/evaluation/benchmarks/swe_bench/scripts/setup/instance_swe_entry.sh
index 22427b001254..9cbf9cc00710 100755
--- a/evaluation/benchmarks/swe_bench/scripts/setup/instance_swe_entry.sh
+++ b/evaluation/benchmarks/swe_bench/scripts/setup/instance_swe_entry.sh
@@ -33,7 +33,7 @@ if [ -d /workspace/$WORKSPACE_NAME ]; then
     rm -rf /workspace/$WORKSPACE_NAME
 fi
 mkdir -p /workspace
-ln -s /testbed /workspace/$WORKSPACE_NAME
+mv /testbed /workspace/$WORKSPACE_NAME
 
 # Activate instance-specific environment
 . /opt/miniconda3/etc/profile.d/conda.sh
diff --git a/evaluation/benchmarks/toolqa/README.md b/evaluation/benchmarks/toolqa/README.md
index eda478f4489f..b6b25da43b0e 100644
--- a/evaluation/benchmarks/toolqa/README.md
+++ b/evaluation/benchmarks/toolqa/README.md
@@ -4,7 +4,7 @@ This folder contains an evaluation harness we built on top of the original [Tool
 
 ## Setup Environment and LLM Configuration
 
-Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
 
 ## Run Inference on ToolQA Instances
 
diff --git a/evaluation/benchmarks/webarena/README.md b/evaluation/benchmarks/webarena/README.md
index 3e403d5a7f46..68f37c1a7b8f 100644
--- a/evaluation/benchmarks/webarena/README.md
+++ b/evaluation/benchmarks/webarena/README.md
@@ -4,7 +4,7 @@ This folder contains evaluation for [WebArena](https://github.com/web-arena-x/we
 
 ## Setup Environment and LLM Configuration
 
-Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
 
 ## Setup WebArena Environment
 
diff --git a/frontend/__tests__/components/browser.test.tsx b/frontend/__tests__/components/browser.test.tsx
index eea2eb910a48..8fb7e0a31014 100644
--- a/frontend/__tests__/components/browser.test.tsx
+++ b/frontend/__tests__/components/browser.test.tsx
@@ -11,6 +11,7 @@ describe("Browser", () => {
         browser: {
           url: "https://example.com",
           screenshotSrc: "",
+          updateCount: 0,
         },
       },
     });
@@ -26,6 +27,7 @@ describe("Browser", () => {
           url: "https://example.com",
           screenshotSrc:
             "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mN0uGvyHwAFCAJS091fQwAAAABJRU5ErkJggg==",
+          updateCount: 0,
         },
       },
     });
diff --git a/frontend/__tests__/components/chat-message.test.tsx b/frontend/__tests__/components/chat-message.test.tsx
index 9b3156ee54fe..08fd090551d3 100644
--- a/frontend/__tests__/components/chat-message.test.tsx
+++ b/frontend/__tests__/components/chat-message.test.tsx
@@ -70,4 +70,12 @@ describe("ChatMessage", () => {
     );
     expect(screen.getByTestId("custom-component")).toBeInTheDocument();
   });
+
+  it("should apply correct styles to inline code", () => {
+    render(<ChatMessage type="assistant" message="Here is some `inline code` text" />);
+    const codeElement = screen.getByText("inline code");
+
+    expect(codeElement.tagName.toLowerCase()).toBe("code");
+    expect(codeElement.closest("article")).not.toBeNull();
+  });
 });
diff --git a/frontend/__tests__/components/chat/expandable-message.test.tsx b/frontend/__tests__/components/chat/expandable-message.test.tsx
new file mode 100644
index 000000000000..8eab988339de
--- /dev/null
+++ b/frontend/__tests__/components/chat/expandable-message.test.tsx
@@ -0,0 +1,60 @@
+import { describe, expect, it } from "vitest";
+import { screen } from "@testing-library/react";
+import { renderWithProviders } from "test-utils";
+import { ExpandableMessage } from "#/components/features/chat/expandable-message";
+
+describe("ExpandableMessage", () => {
+  it("should render with neutral border for non-action messages", () => {
+    renderWithProviders(<ExpandableMessage message="Hello" type="thought" />);
+    const element = screen.getByText("Hello");
+    const container = element.closest("div.flex.gap-2.items-center.justify-between");
+    expect(container).toHaveClass("border-neutral-300");
+    expect(screen.queryByTestId("status-icon")).not.toBeInTheDocument();
+  });
+
+  it("should render with neutral border for error messages", () => {
+    renderWithProviders(<ExpandableMessage message="Error occurred" type="error" />);
+    const element = screen.getByText("Error occurred");
+    const container = element.closest("div.flex.gap-2.items-center.justify-between");
+    expect(container).toHaveClass("border-neutral-300");
+    expect(screen.queryByTestId("status-icon")).not.toBeInTheDocument();
+  });
+
+  it("should render with success icon for successful action messages", () => {
+    renderWithProviders(
+      <ExpandableMessage
+        message="Command executed successfully"
+        type="action"
+        success={true}
+      />
+    );
+    const element = screen.getByText("Command executed successfully");
+    const container = element.closest("div.flex.gap-2.items-center.justify-between");
+    expect(container).toHaveClass("border-neutral-300");
+    const icon = screen.getByTestId("status-icon");
+    expect(icon).toHaveClass("fill-success");
+  });
+
+  it("should render with error icon for failed action messages", () => {
+    renderWithProviders(
+      <ExpandableMessage
+        message="Command failed"
+        type="action"
+        success={false}
+      />
+    );
+    const element = screen.getByText("Command failed");
+    const container = element.closest("div.flex.gap-2.items-center.justify-between");
+    expect(container).toHaveClass("border-neutral-300");
+    const icon = screen.getByTestId("status-icon");
+    expect(icon).toHaveClass("fill-danger");
+  });
+
+  it("should render with neutral border and no icon for action messages without success prop", () => {
+    renderWithProviders(<ExpandableMessage message="Running command" type="action" />);
+    const element = screen.getByText("Running command");
+    const container = element.closest("div.flex.gap-2.items-center.justify-between");
+    expect(container).toHaveClass("border-neutral-300");
+    expect(screen.queryByTestId("status-icon")).not.toBeInTheDocument();
+  });
+});
diff --git a/frontend/__tests__/components/features/waitlist-modal.test.tsx b/frontend/__tests__/components/features/waitlist-modal.test.tsx
new file mode 100644
index 000000000000..b1f17bd26eac
--- /dev/null
+++ b/frontend/__tests__/components/features/waitlist-modal.test.tsx
@@ -0,0 +1,45 @@
+import { render, screen } from "@testing-library/react";
+import { it, describe, expect, vi } from "vitest";
+import userEvent from "@testing-library/user-event";
+import { WaitlistModal } from "#/components/features/waitlist/waitlist-modal";
+import * as CaptureConsent from "#/utils/handle-capture-consent";
+
+describe("WaitlistModal", () => {
+  it("should render a tos checkbox that is unchecked by default", () => {
+    render(<WaitlistModal ghToken={null} githubAuthUrl={null} />);
+    const checkbox = screen.getByRole("checkbox");
+
+    expect(checkbox).not.toBeChecked();
+  });
+
+  it("should only enable the GitHub button if the tos checkbox is checked", async () => {
+    const user = userEvent.setup();
+    render(<WaitlistModal ghToken={null} githubAuthUrl={null} />);
+    const checkbox = screen.getByRole("checkbox");
+    const button = screen.getByRole("button", { name: "Connect to GitHub" });
+
+    expect(button).toBeDisabled();
+
+    await user.click(checkbox);
+
+    expect(button).not.toBeDisabled();
+  });
+
+  it("should set user analytics consent to true when the user checks the tos checkbox", async () => {
+    const handleCaptureConsentSpy = vi.spyOn(
+      CaptureConsent,
+      "handleCaptureConsent",
+    );
+
+    const user = userEvent.setup();
+    render(<WaitlistModal ghToken={null} githubAuthUrl="mock-url" />);
+
+    const checkbox = screen.getByRole("checkbox");
+    await user.click(checkbox);
+
+    const button = screen.getByRole("button", { name: "Connect to GitHub" });
+    await user.click(button);
+
+    expect(handleCaptureConsentSpy).toHaveBeenCalledWith(true);
+  });
+});
diff --git a/frontend/__tests__/components/interactive-chat-box.test.tsx b/frontend/__tests__/components/interactive-chat-box.test.tsx
index fa0d3a1b8e30..fe6ba329763b 100644
--- a/frontend/__tests__/components/interactive-chat-box.test.tsx
+++ b/frontend/__tests__/components/interactive-chat-box.test.tsx
@@ -1,4 +1,4 @@
-import { render, screen, within } from "@testing-library/react";
+import { render, screen, within, fireEvent } from "@testing-library/react";
 import userEvent from "@testing-library/user-event";
 import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
 import { InteractiveChatBox } from "#/components/features/chat/interactive-chat-box";
@@ -131,4 +131,60 @@ describe("InteractiveChatBox", () => {
     await user.click(stopButton);
     expect(onStopMock).toHaveBeenCalledOnce();
   });
+
+  it("should handle image upload and message submission correctly", async () => {
+    const user = userEvent.setup();
+    const onSubmit = vi.fn();
+    const onStop = vi.fn();
+    const onChange = vi.fn();
+
+    const { rerender } = render(
+      <InteractiveChatBox
+        onSubmit={onSubmit}
+        onStop={onStop}
+        onChange={onChange}
+        value="test message"
+      />
+    );
+
+    // Upload an image via the upload button - this should NOT clear the text input
+    const file = new File(["dummy content"], "test.png", { type: "image/png" });
+    const input = screen.getByTestId("upload-image-input");
+    await user.upload(input, file);
+
+    // Verify text input was not cleared
+    expect(screen.getByRole("textbox")).toHaveValue("test message");
+    expect(onChange).not.toHaveBeenCalledWith("");
+
+    // Submit the message with image
+    const submitButton = screen.getByRole("button", { name: "Send" });
+    await user.click(submitButton);
+
+    // Verify onSubmit was called with the message and image
+    expect(onSubmit).toHaveBeenCalledWith("test message", [file]);
+
+    // Verify onChange was called to clear the text input
+    expect(onChange).toHaveBeenCalledWith("");
+
+    // Simulate parent component updating the value prop
+    rerender(
+      <InteractiveChatBox
+        onSubmit={onSubmit}
+        onStop={onStop}
+        onChange={onChange}
+        value=""
+      />
+    );
+
+    // Verify the text input was cleared
+    expect(screen.getByRole("textbox")).toHaveValue("");
+
+    // Upload another image - this should NOT clear the text input
+    onChange.mockClear();
+    await user.upload(input, file);
+
+    // Verify text input is still empty and onChange was not called
+    expect(screen.getByRole("textbox")).toHaveValue("");
+    expect(onChange).not.toHaveBeenCalled();
+  });
 });
diff --git a/frontend/__tests__/routes/_oh.test.tsx b/frontend/__tests__/routes/_oh.test.tsx
index c97a9a825d56..8897b5dd8bd3 100644
--- a/frontend/__tests__/routes/_oh.test.tsx
+++ b/frontend/__tests__/routes/_oh.test.tsx
@@ -4,8 +4,9 @@ import { screen, waitFor, within } from "@testing-library/react";
 import { renderWithProviders } from "test-utils";
 import userEvent from "@testing-library/user-event";
 import MainApp from "#/routes/_oh/route";
-import * as CaptureConsent from "#/utils/handle-capture-consent";
 import i18n from "#/i18n";
+import * as CaptureConsent from "#/utils/handle-capture-consent";
+import OpenHands from "#/api/open-hands";
 
 describe("frontend/routes/_oh", () => {
   const RouteStub = createRoutesStub([{ Component: MainApp, path: "/" }]);
@@ -60,13 +61,20 @@ describe("frontend/routes/_oh", () => {
     });
   });
 
-  it("should capture the user's consent", async () => {
+  it("should render and capture the user's consent if oss mode", async () => {
     const user = userEvent.setup();
+    const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
     const handleCaptureConsentSpy = vi.spyOn(
       CaptureConsent,
       "handleCaptureConsent",
     );
 
+    getConfigSpy.mockResolvedValue({
+      APP_MODE: "oss",
+      GITHUB_CLIENT_ID: "test-id",
+      POSTHOG_CLIENT_KEY: "test-key",
+    });
+
     renderWithProviders(<RouteStub />);
 
     // The user has not consented to tracking
@@ -87,6 +95,23 @@ describe("frontend/routes/_oh", () => {
     ).not.toBeInTheDocument();
   });
 
+  it("should not render the user consent form if saas mode", async () => {
+    const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
+    getConfigSpy.mockResolvedValue({
+      APP_MODE: "saas",
+      GITHUB_CLIENT_ID: "test-id",
+      POSTHOG_CLIENT_KEY: "test-key",
+    });
+
+    renderWithProviders(<RouteStub />);
+
+    await waitFor(() => {
+      expect(
+        screen.queryByTestId("user-capture-consent-form"),
+      ).not.toBeInTheDocument();
+    });
+  });
+
   it("should not render the user consent form if the user has already made a decision", async () => {
     localStorage.setItem("analytics-consent", "true");
     renderWithProviders(<RouteStub />);
diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index ea59e9e41a73..b2213074af30 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "openhands-frontend",
-  "version": "0.15.0",
+  "version": "0.15.2",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "openhands-frontend",
-      "version": "0.15.0",
+      "version": "0.15.2",
       "dependencies": {
         "@monaco-editor/react": "^4.6.0",
         "@nextui-org/react": "^2.4.8",
@@ -51,7 +51,7 @@
         "@playwright/test": "^1.48.2",
         "@react-router/dev": "^7.0.1",
         "@tailwindcss/typography": "^0.5.15",
-        "@tanstack/eslint-plugin-query": "^5.60.1",
+        "@tanstack/eslint-plugin-query": "^5.62.1",
         "@testing-library/jest-dom": "^6.6.1",
         "@testing-library/react": "^16.0.1",
         "@testing-library/user-event": "^14.5.2",
@@ -78,7 +78,7 @@
         "husky": "^9.1.6",
         "jsdom": "^25.0.1",
         "lint-staged": "^15.2.10",
-        "msw": "^2.3.0-ws.rc-12",
+        "msw": "^2.6.6",
         "postcss": "^8.4.47",
         "prettier": "^3.3.3",
         "tailwindcss": "^3.4.14",
@@ -5503,9 +5503,9 @@
       }
     },
     "node_modules/@tanstack/eslint-plugin-query": {
-      "version": "5.61.4",
-      "resolved": "https://registry.npmjs.org/@tanstack/eslint-plugin-query/-/eslint-plugin-query-5.61.4.tgz",
-      "integrity": "sha512-QVVsY8hwrX9r6c8lLV48oY682SU2GeVlo0hWMSaOKkI05Yi4bXhw5jv7E2qkbjGrgA6DcVl3o/F0dT4wpT+/SQ==",
+      "version": "5.62.1",
+      "resolved": "https://registry.npmjs.org/@tanstack/eslint-plugin-query/-/eslint-plugin-query-5.62.1.tgz",
+      "integrity": "sha512-1886D5U+re1TW0wSH4/kUGG36yIoW5Wkz4twVEzlk3ZWmjF3XkRSWgB+Sc7n+Lyzt8usNV8ZqkZE6DA7IC47fQ==",
       "dev": true,
       "dependencies": {
         "@typescript-eslint/utils": "^8.15.0"
diff --git a/frontend/package.json b/frontend/package.json
index 7980387f71ba..827808729752 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -1,6 +1,6 @@
 {
   "name": "openhands-frontend",
-  "version": "0.15.0",
+  "version": "0.15.2",
   "private": true,
   "type": "module",
   "engines": {
@@ -78,7 +78,7 @@
     "@playwright/test": "^1.48.2",
     "@react-router/dev": "^7.0.1",
     "@tailwindcss/typography": "^0.5.15",
-    "@tanstack/eslint-plugin-query": "^5.60.1",
+    "@tanstack/eslint-plugin-query": "^5.62.1",
     "@testing-library/jest-dom": "^6.6.1",
     "@testing-library/react": "^16.0.1",
     "@testing-library/user-event": "^14.5.2",
@@ -105,7 +105,7 @@
     "husky": "^9.1.6",
     "jsdom": "^25.0.1",
     "lint-staged": "^15.2.10",
-    "msw": "^2.3.0-ws.rc-12",
+    "msw": "^2.6.6",
     "postcss": "^8.4.47",
     "prettier": "^3.3.3",
     "tailwindcss": "^3.4.14",
diff --git a/frontend/src/components/features/chat/chat-input.tsx b/frontend/src/components/features/chat/chat-input.tsx
index 815eb1933720..6cec5f42e04d 100644
--- a/frontend/src/components/features/chat/chat-input.tsx
+++ b/frontend/src/components/features/chat/chat-input.tsx
@@ -83,9 +83,13 @@ export function ChatInput({
   };
 
   const handleSubmitMessage = () => {
-    if (textareaRef.current?.value) {
-      onSubmit(textareaRef.current.value);
-      textareaRef.current.value = "";
+    const message = value || textareaRef.current?.value || "";
+    if (message) {
+      onSubmit(message);
+      onChange?.("");
+      if (textareaRef.current) {
+        textareaRef.current.value = "";
+      }
     }
   };
 
diff --git a/frontend/src/components/features/chat/expandable-message.tsx b/frontend/src/components/features/chat/expandable-message.tsx
index f42b3f0b13ad..6ebcaa3aeed5 100644
--- a/frontend/src/components/features/chat/expandable-message.tsx
+++ b/frontend/src/components/features/chat/expandable-message.tsx
@@ -6,17 +6,21 @@ import { code } from "../markdown/code";
 import { ol, ul } from "../markdown/list";
 import ArrowUp from "#/icons/angle-up-solid.svg?react";
 import ArrowDown from "#/icons/angle-down-solid.svg?react";
+import CheckCircle from "#/icons/check-circle-solid.svg?react";
+import XCircle from "#/icons/x-circle-solid.svg?react";
 
 interface ExpandableMessageProps {
   id?: string;
   message: string;
   type: string;
+  success?: boolean;
 }
 
 export function ExpandableMessage({
   id,
   message,
   type,
+  success,
 }: ExpandableMessageProps) {
   const { t, i18n } = useTranslation();
   const [showDetails, setShowDetails] = useState(true);
@@ -31,22 +35,14 @@ export function ExpandableMessage({
     }
   }, [id, message, i18n.language]);
 
-  const border = type === "error" ? "border-danger" : "border-neutral-300";
-  const textColor = type === "error" ? "text-danger" : "text-neutral-300";
-  let arrowClasses = "h-4 w-4 ml-2 inline";
-  if (type === "error") {
-    arrowClasses += " fill-danger";
-  } else {
-    arrowClasses += " fill-neutral-300";
-  }
+  const arrowClasses = "h-4 w-4 ml-2 inline fill-neutral-300";
+  const statusIconClasses = "h-4 w-4 ml-2 inline";
 
   return (
-    <div
-      className={`flex gap-2 items-center justify-start border-l-2 pl-2 my-2 py-2 ${border}`}
-    >
+    <div className="flex gap-2 items-center justify-between border-l-2 border-neutral-300 pl-2 my-2 py-2">
       <div className="text-sm leading-4 flex flex-col gap-2 max-w-full">
         {headline && (
-          <p className={`${textColor} font-bold`}>
+          <p className="text-neutral-300 font-bold">
             {headline}
             <button
               type="button"
@@ -75,6 +71,21 @@ export function ExpandableMessage({
           </Markdown>
         )}
       </div>
+      {type === "action" && success !== undefined && (
+        <div className="flex-shrink-0">
+          {success ? (
+            <CheckCircle
+              data-testid="status-icon"
+              className={`${statusIconClasses} fill-success`}
+            />
+          ) : (
+            <XCircle
+              data-testid="status-icon"
+              className={`${statusIconClasses} fill-danger`}
+            />
+          )}
+        </div>
+      )}
     </div>
   );
 }
diff --git a/frontend/src/components/features/chat/interactive-chat-box.tsx b/frontend/src/components/features/chat/interactive-chat-box.tsx
index e96339adf0a7..09dcf84b32d6 100644
--- a/frontend/src/components/features/chat/interactive-chat-box.tsx
+++ b/frontend/src/components/features/chat/interactive-chat-box.tsx
@@ -38,6 +38,9 @@ export function InteractiveChatBox({
   const handleSubmit = (message: string) => {
     onSubmit(message, images);
     setImages([]);
+    if (message) {
+      onChange?.("");
+    }
   };
 
   return (
diff --git a/frontend/src/components/features/chat/messages.tsx b/frontend/src/components/features/chat/messages.tsx
index 1f7eff016582..e1bd34637472 100644
--- a/frontend/src/components/features/chat/messages.tsx
+++ b/frontend/src/components/features/chat/messages.tsx
@@ -14,13 +14,13 @@ export function Messages({
 }: MessagesProps) {
   return messages.map((message, index) => {
     if (message.type === "error" || message.type === "action") {
-      console.log("expando", message);
       return (
         <ExpandableMessage
           key={index}
           type={message.type}
           id={message.translationID}
           message={message.content}
+          success={message.success}
         />
       );
     }
diff --git a/frontend/src/components/features/file-explorer/file-explorer.tsx b/frontend/src/components/features/file-explorer/file-explorer.tsx
index b033cfb62126..039fc1602003 100644
--- a/frontend/src/components/features/file-explorer/file-explorer.tsx
+++ b/frontend/src/components/features/file-explorer/file-explorer.tsx
@@ -1,5 +1,5 @@
 import React from "react";
-import { useSelector } from "react-redux";
+import { useDispatch, useSelector } from "react-redux";
 import { useTranslation } from "react-i18next";
 import AgentState from "#/types/agent-state";
 import { ExplorerTree } from "#/components/features/file-explorer/explorer-tree";
@@ -14,6 +14,7 @@ import { Dropzone } from "./dropzone";
 import { FileExplorerHeader } from "./file-explorer-header";
 import { useVSCodeUrl } from "#/hooks/query/use-vscode-url";
 import { OpenVSCodeButton } from "#/components/shared/buttons/open-vscode-button";
+import { addAssistantMessage } from "#/state/chat-slice";
 
 interface FileExplorerProps {
   isOpen: boolean;
@@ -22,15 +23,37 @@ interface FileExplorerProps {
 
 export function FileExplorer({ isOpen, onToggle }: FileExplorerProps) {
   const { t } = useTranslation();
+  const dispatch = useDispatch();
 
   const fileInputRef = React.useRef<HTMLInputElement | null>(null);
   const [isDragging, setIsDragging] = React.useState(false);
 
   const { curAgentState } = useSelector((state: RootState) => state.agent);
 
+  const agentIsReady =
+    curAgentState !== AgentState.INIT && curAgentState !== AgentState.LOADING;
+
   const { data: paths, refetch, error } = useListFiles();
   const { mutate: uploadFiles } = useUploadFiles();
-  const { refetch: getVSCodeUrl } = useVSCodeUrl();
+  const { data: vscodeUrl } = useVSCodeUrl({ enabled: agentIsReady });
+
+  const handleOpenVSCode = () => {
+    if (vscodeUrl?.vscode_url) {
+      dispatch(
+        addAssistantMessage(
+          "You opened VS Code. Please inform the agent of any changes you made to the workspace or environment. To avoid conflicts, it's best to pause the agent before making any changes.",
+        ),
+      );
+      window.open(vscodeUrl.vscode_url, "_blank");
+    } else if (vscodeUrl?.error) {
+      toast.error(
+        `open-vscode-error-${new Date().getTime()}`,
+        t(I18nKey.EXPLORER$VSCODE_SWITCHING_ERROR_MESSAGE, {
+          error: vscodeUrl.error,
+        }),
+      );
+    }
+  };
 
   const selectFileInput = () => {
     fileInputRef.current?.click(); // Trigger the file browser
@@ -142,11 +165,8 @@ export function FileExplorer({ isOpen, onToggle }: FileExplorerProps) {
           )}
           {isOpen && (
             <OpenVSCodeButton
-              onClick={getVSCodeUrl}
-              isDisabled={
-                curAgentState === AgentState.INIT ||
-                curAgentState === AgentState.LOADING
-              }
+              onClick={handleOpenVSCode}
+              isDisabled={!agentIsReady}
             />
           )}
         </div>
diff --git a/frontend/src/components/features/file-explorer/tree-node.tsx b/frontend/src/components/features/file-explorer/tree-node.tsx
index 524f56578e1c..1410e38a9c7b 100644
--- a/frontend/src/components/features/file-explorer/tree-node.tsx
+++ b/frontend/src/components/features/file-explorer/tree-node.tsx
@@ -1,10 +1,12 @@
 import React from "react";
+import { useSelector } from "react-redux";
 
 import { useFiles } from "#/context/files";
 import { cn } from "#/utils/utils";
 import { useListFiles } from "#/hooks/query/use-list-files";
 import { useListFile } from "#/hooks/query/use-list-file";
 import { Filename } from "./filename";
+import { RootState } from "#/store";
 
 interface TreeNodeProps {
   path: string;
@@ -20,6 +22,7 @@ function TreeNode({ path, defaultOpen = false }: TreeNodeProps) {
     selectedPath,
   } = useFiles();
   const [isOpen, setIsOpen] = React.useState(defaultOpen);
+  const { curAgentState } = useSelector((state: RootState) => state.agent);
 
   const isDirectory = path.endsWith("/");
 
@@ -39,6 +42,12 @@ function TreeNode({ path, defaultOpen = false }: TreeNodeProps) {
     }
   }, [fileContent, path]);
 
+  React.useEffect(() => {
+    if (selectedPath === path && !isDirectory) {
+      refetch();
+    }
+  }, [curAgentState, selectedPath, path, isDirectory]);
+
   const fileParts = path.split("/");
   const filename =
     fileParts[fileParts.length - 1] || fileParts[fileParts.length - 2];
diff --git a/frontend/src/components/features/markdown/code.tsx b/frontend/src/components/features/markdown/code.tsx
index a7522722d5ea..d68191d4766e 100644
--- a/frontend/src/components/features/markdown/code.tsx
+++ b/frontend/src/components/features/markdown/code.tsx
@@ -17,7 +17,20 @@ export function code({
   const match = /language-(\w+)/.exec(className || ""); // get the language
 
   if (!match) {
-    return <code className={className}>{children}</code>;
+    return (
+      <code
+        className={className}
+        style={{
+          backgroundColor: "#2a3038",
+          padding: "0.2em 0.4em",
+          borderRadius: "4px",
+          color: "#e6edf3",
+          border: "1px solid #30363d",
+        }}
+      >
+        {children}
+      </code>
+    );
   }
 
   return (
diff --git a/frontend/src/components/features/project-menu/ProjectMenuCard.tsx b/frontend/src/components/features/project-menu/ProjectMenuCard.tsx
index b8fd755a6050..bb4074907682 100644
--- a/frontend/src/components/features/project-menu/ProjectMenuCard.tsx
+++ b/frontend/src/components/features/project-menu/ProjectMenuCard.tsx
@@ -1,9 +1,7 @@
 import React from "react";
-import { useDispatch } from "react-redux";
 import toast from "react-hot-toast";
 import posthog from "posthog-js";
 import EllipsisH from "#/icons/ellipsis-h.svg?react";
-import { addUserMessage } from "#/state/chat-slice";
 import { createChatMessage } from "#/services/chat-service";
 import { ProjectMenuCardContextMenu } from "./project.menu-card-context-menu";
 import { ProjectMenuDetailsPlaceholder } from "./project-menu-details-placeholder";
@@ -28,7 +26,6 @@ export function ProjectMenuCard({
   githubData,
 }: ProjectMenuCardProps) {
   const { send } = useWsClient();
-  const dispatch = useDispatch();
 
   const [contextMenuIsOpen, setContextMenuIsOpen] = React.useState(false);
   const [connectToGitHubModalOpen, setConnectToGitHubModalOpen] =
@@ -56,7 +53,6 @@ Please push the changes to GitHub and open a pull request.
     );
 
     send(event); // send to socket
-    dispatch(addUserMessage(rawEvent)); // display in chat interface
     setContextMenuIsOpen(false);
   };
 
diff --git a/frontend/src/components/features/sidebar/sidebar.tsx b/frontend/src/components/features/sidebar/sidebar.tsx
index 468c5c156882..fa11bc3fb366 100644
--- a/frontend/src/components/features/sidebar/sidebar.tsx
+++ b/frontend/src/components/features/sidebar/sidebar.tsx
@@ -54,13 +54,13 @@ export function Sidebar() {
 
   return (
     <>
-      <aside className="px-1 flex flex-col gap-1">
+      <aside className="h-[40px] md:h-auto px-1 flex flex-row md:flex-col gap-1">
         <div className="w-[34px] h-[34px] flex items-center justify-center">
           {user.isLoading && <LoadingSpinner size="small" />}
           {!user.isLoading && <AllHandsLogoButton onClick={handleClickLogo} />}
         </div>
 
-        <nav className="py-[18px] flex flex-col items-center gap-[18px]">
+        <nav className="md:py-[18px] flex flex-row md:flex-col items-center gap-[18px]">
           <UserActions
             user={user.data ? { avatar_url: user.data.avatar_url } : undefined}
             onLogout={logout}
diff --git a/frontend/src/components/features/waitlist/tos-checkbox.tsx b/frontend/src/components/features/waitlist/tos-checkbox.tsx
new file mode 100644
index 000000000000..2a780776fb3d
--- /dev/null
+++ b/frontend/src/components/features/waitlist/tos-checkbox.tsx
@@ -0,0 +1,22 @@
+interface TOSCheckboxProps {
+  onChange: () => void;
+}
+
+export function TOSCheckbox({ onChange }: TOSCheckboxProps) {
+  return (
+    <label className="flex items-center gap-2">
+      <input type="checkbox" onChange={onChange} />
+      <span>
+        I accept the{" "}
+        <a
+          href="https://www.all-hands.dev/tos"
+          target="_blank"
+          rel="noopener noreferrer"
+          className="underline underline-offset-2 text-blue-500 hover:text-blue-700"
+        >
+          terms of service
+        </a>
+      </span>
+    </label>
+  );
+}
diff --git a/frontend/src/components/features/waitlist/waitlist-modal.tsx b/frontend/src/components/features/waitlist/waitlist-modal.tsx
index ba221da68a73..486bf1855e33 100644
--- a/frontend/src/components/features/waitlist/waitlist-modal.tsx
+++ b/frontend/src/components/features/waitlist/waitlist-modal.tsx
@@ -1,3 +1,4 @@
+import React from "react";
 import GitHubLogo from "#/assets/branding/github-logo.svg?react";
 import AllHandsLogo from "#/assets/branding/all-hands-logo.svg?react";
 import { JoinWaitlistAnchor } from "./join-waitlist-anchor";
@@ -5,6 +6,8 @@ import { WaitlistMessage } from "./waitlist-message";
 import { ModalBackdrop } from "#/components/shared/modals/modal-backdrop";
 import { ModalButton } from "#/components/shared/buttons/modal-button";
 import { ModalBody } from "#/components/shared/modals/modal-body";
+import { TOSCheckbox } from "./tos-checkbox";
+import { handleCaptureConsent } from "#/utils/handle-capture-consent";
 
 interface WaitlistModalProps {
   ghToken: string | null;
@@ -12,22 +15,30 @@ interface WaitlistModalProps {
 }
 
 export function WaitlistModal({ ghToken, githubAuthUrl }: WaitlistModalProps) {
+  const [isTosAccepted, setIsTosAccepted] = React.useState(false);
+
+  const handleGitHubAuth = () => {
+    if (githubAuthUrl) {
+      handleCaptureConsent(true);
+      window.location.href = githubAuthUrl;
+    }
+  };
+
   return (
     <ModalBackdrop>
       <ModalBody>
         <AllHandsLogo width={68} height={46} />
         <WaitlistMessage content={ghToken ? "waitlist" : "sign-in"} />
 
+        <TOSCheckbox onChange={() => setIsTosAccepted((prev) => !prev)} />
+
         {!ghToken && (
           <ModalButton
+            disabled={!isTosAccepted}
             text="Connect to GitHub"
             icon={<GitHubLogo width={20} height={20} />}
             className="bg-[#791B80] w-full"
-            onClick={() => {
-              if (githubAuthUrl) {
-                window.location.href = githubAuthUrl;
-              }
-            }}
+            onClick={handleGitHubAuth}
           />
         )}
         {ghToken && <JoinWaitlistAnchor />}
diff --git a/frontend/src/components/layout/container.tsx b/frontend/src/components/layout/container.tsx
index 15047cccd779..f2dcb39a3f11 100644
--- a/frontend/src/components/layout/container.tsx
+++ b/frontend/src/components/layout/container.tsx
@@ -5,7 +5,7 @@ import { NavTab } from "./nav-tab";
 interface ContainerProps {
   label?: string;
   labels?: {
-    label: string;
+    label: string | React.ReactNode;
     to: string;
     icon?: React.ReactNode;
     isBeta?: boolean;
@@ -39,7 +39,7 @@ export function Container({
           {label}
         </div>
       )}
-      <div className="overflow-scroll h-full rounded-b-xl">{children}</div>
+      <div className="overflow-hidden h-full rounded-b-xl">{children}</div>
     </div>
   );
 }
diff --git a/frontend/src/components/layout/count-badge.tsx b/frontend/src/components/layout/count-badge.tsx
new file mode 100644
index 000000000000..359ae3b71aee
--- /dev/null
+++ b/frontend/src/components/layout/count-badge.tsx
@@ -0,0 +1,7 @@
+export function CountBadge({ count }: { count: number }) {
+  return (
+    <span className="text-[11px] leading-5 text-root-primary bg-neutral-400 px-1 rounded-xl">
+      {count}
+    </span>
+  );
+}
diff --git a/frontend/src/components/layout/nav-tab.tsx b/frontend/src/components/layout/nav-tab.tsx
index 930a5437713e..a9f363e39532 100644
--- a/frontend/src/components/layout/nav-tab.tsx
+++ b/frontend/src/components/layout/nav-tab.tsx
@@ -4,7 +4,7 @@ import { BetaBadge } from "./beta-badge";
 
 interface NavTabProps {
   to: string;
-  label: string;
+  label: string | React.ReactNode;
   icon: React.ReactNode;
   isBeta?: boolean;
 }
diff --git a/frontend/src/context/ws-client-provider.tsx b/frontend/src/context/ws-client-provider.tsx
index fcbc435616c6..36028d89c169 100644
--- a/frontend/src/context/ws-client-provider.tsx
+++ b/frontend/src/context/ws-client-provider.tsx
@@ -38,6 +38,7 @@ interface WsClientProviderProps {
   enabled: boolean;
   token: string | null;
   ghToken: string | null;
+  selectedRepository: string | null;
   settings: Settings | null;
 }
 
@@ -45,12 +46,14 @@ export function WsClientProvider({
   enabled,
   token,
   ghToken,
+  selectedRepository,
   settings,
   children,
 }: React.PropsWithChildren<WsClientProviderProps>) {
   const sioRef = React.useRef<Socket | null>(null);
   const tokenRef = React.useRef<string | null>(token);
   const ghTokenRef = React.useRef<string | null>(ghToken);
+  const selectedRepositoryRef = React.useRef<string | null>(selectedRepository);
   const disconnectRef = React.useRef<ReturnType<typeof setTimeout> | null>(
     null,
   );
@@ -81,8 +84,11 @@ export function WsClientProvider({
     if (ghToken) {
       initEvent.github_token = ghToken;
     }
+    if (selectedRepository) {
+      initEvent.selected_repository = selectedRepository;
+    }
     const lastEvent = lastEventRef.current;
-    if (lastEvent && !Number.isNaN(parseInt(lastEvent.id as string, 10))) {
+    if (lastEvent) {
       initEvent.latest_event_id = lastEvent.id;
     }
     send(initEvent);
@@ -93,7 +99,9 @@ export function WsClientProvider({
       messageRateHandler.record(new Date().getTime());
     }
     setEvents((prevEvents) => [...prevEvents, event]);
-    lastEventRef.current = event;
+    if (!Number.isNaN(parseInt(event.id as string, 10))) {
+      lastEventRef.current = event;
+    }
     const extras = event.extras as Record<string, unknown>;
     if (extras?.agent_state === AgentState.INIT) {
       setStatus(WsClientProviderStatus.ACTIVE);
@@ -156,6 +164,7 @@ export function WsClientProvider({
     sioRef.current = sio;
     tokenRef.current = token;
     ghTokenRef.current = ghToken;
+    selectedRepositoryRef.current = selectedRepository;
 
     return () => {
       sio.off("connect", handleConnect);
@@ -164,7 +173,7 @@ export function WsClientProvider({
       sio.off("connect_failed", handleError);
       sio.off("disconnect", handleDisconnect);
     };
-  }, [enabled, token, ghToken]);
+  }, [enabled, token, ghToken, selectedRepository]);
 
   // Strict mode mounts and unmounts each component twice, so we have to wait in the destructor
   // before actually disconnecting the socket and cancel the operation if the component gets remounted.
diff --git a/frontend/src/hooks/query/use-vscode-url.ts b/frontend/src/hooks/query/use-vscode-url.ts
index 9c913e57cf4b..9024876114c4 100644
--- a/frontend/src/hooks/query/use-vscode-url.ts
+++ b/frontend/src/hooks/query/use-vscode-url.ts
@@ -1,43 +1,13 @@
 import { useQuery } from "@tanstack/react-query";
-import React from "react";
-import { useTranslation } from "react-i18next";
-import { useDispatch } from "react-redux";
-import toast from "#/utils/toast";
-import { addAssistantMessage } from "#/state/chat-slice";
-import { I18nKey } from "#/i18n/declaration";
 import OpenHands from "#/api/open-hands";
 
-export const useVSCodeUrl = () => {
-  const { t } = useTranslation();
-  const dispatch = useDispatch();
-
+export const useVSCodeUrl = (config: { enabled: boolean }) => {
   const data = useQuery({
     queryKey: ["vscode_url"],
     queryFn: OpenHands.getVSCodeUrl,
-    enabled: false,
+    enabled: config.enabled,
+    refetchOnMount: false,
   });
 
-  const { data: vscodeUrlObject, isFetching } = data;
-
-  React.useEffect(() => {
-    if (isFetching) return;
-
-    if (vscodeUrlObject?.vscode_url) {
-      dispatch(
-        addAssistantMessage(
-          "You opened VS Code. Please inform the agent of any changes you made to the workspace or environment. To avoid conflicts, it's best to pause the agent before making any changes.",
-        ),
-      );
-      window.open(vscodeUrlObject.vscode_url, "_blank");
-    } else if (vscodeUrlObject?.error) {
-      toast.error(
-        `open-vscode-error-${new Date().getTime()}`,
-        t(I18nKey.EXPLORER$VSCODE_SWITCHING_ERROR_MESSAGE, {
-          error: vscodeUrlObject.error,
-        }),
-      );
-    }
-  }, [vscodeUrlObject, isFetching]);
-
   return data;
 };
diff --git a/frontend/src/i18n/translation.json b/frontend/src/i18n/translation.json
index fe32717180ab..c86c62f2f55c 100644
--- a/frontend/src/i18n/translation.json
+++ b/frontend/src/i18n/translation.json
@@ -2017,6 +2017,9 @@
   "ACTION_MESSAGE$WRITE": {
     "en": "Writing to a file"
   },
+  "ACTION_MESSAGE$BROWSE": {
+    "en": "Browsing the web"
+  },
   "OBSERVATION_MESSAGE$RUN": {
     "en": "Ran a bash command"
   },
@@ -2029,6 +2032,9 @@
   "OBSERVATION_MESSAGE$WRITE": {
     "en": "Wrote to a file"
   },
+  "OBSERVATION_MESSAGE$BROWSE": {
+    "en": "Browsing completed"
+  },
   "EXPANDABLE_MESSAGE$SHOW_DETAILS": {
     "en": "Show details"
   },
diff --git a/frontend/src/icons/check-circle-solid.svg b/frontend/src/icons/check-circle-solid.svg
new file mode 100644
index 000000000000..a07362b4ab3f
--- /dev/null
+++ b/frontend/src/icons/check-circle-solid.svg
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512">
+  <path d="M256 512A256 256 0 1 0 256 0a256 256 0 1 0 0 512zM369 209L241 337c-9.4 9.4-24.6 9.4-33.9 0l-64-64c-9.4-9.4-9.4-24.6 0-33.9s24.6-9.4 33.9 0l47 47L335 175c9.4-9.4 24.6-9.4 33.9 0s9.4 24.6 0 33.9z"/>
+</svg>
diff --git a/frontend/src/icons/x-circle-solid.svg b/frontend/src/icons/x-circle-solid.svg
new file mode 100644
index 000000000000..f673bbf0b1e5
--- /dev/null
+++ b/frontend/src/icons/x-circle-solid.svg
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512">
+  <path d="M256 512A256 256 0 1 0 256 0a256 256 0 1 0 0 512zM175 175c9.4-9.4 24.6-9.4 33.9 0l47 47 47-47c9.4-9.4 24.6-9.4 33.9 0s9.4 24.6 0 33.9l-47 47 47 47c9.4 9.4 9.4 24.6 0 33.9s-24.6 9.4-33.9 0l-47-47-47 47c-9.4 9.4-24.6 9.4-33.9 0s-9.4-24.6 0-33.9l47-47-47-47c-9.4-9.4-9.4-24.6 0-33.9z"/>
+</svg>
diff --git a/frontend/src/index.css b/frontend/src/index.css
index a8d6571ed1e1..b5814513e8bd 100644
--- a/frontend/src/index.css
+++ b/frontend/src/index.css
@@ -32,8 +32,11 @@ code {
   margin: 0;
   font-size: 85%;
   white-space: break-spaces;
-  background-color: var(--bg-neutral-muted);
-  border-radius: 6px;
+  background-color: #2a3038;
+  border-radius: 4px;
+  color: #e6edf3;
+  border: 1px solid #30363d;
+  letter-spacing: -0.2px;
 }
 
 .markdown-body pre code {
diff --git a/frontend/src/message.d.ts b/frontend/src/message.d.ts
index 5b70e39c8f56..65bd7e0cb193 100644
--- a/frontend/src/message.d.ts
+++ b/frontend/src/message.d.ts
@@ -4,6 +4,7 @@ type Message = {
   timestamp: string;
   imageUrls?: string[];
   type?: "thought" | "error" | "action";
+  success?: boolean;
   pending?: boolean;
   translationID?: string;
   eventID?: number;
diff --git a/frontend/src/routes/_oh.app/hooks/use-ws-status-change.ts b/frontend/src/routes/_oh.app/hooks/use-ws-status-change.ts
index c08cbe87db23..789af7dadfec 100644
--- a/frontend/src/routes/_oh.app/hooks/use-ws-status-change.ts
+++ b/frontend/src/routes/_oh.app/hooks/use-ws-status-change.ts
@@ -6,7 +6,6 @@ import {
   WsClientProviderStatus,
 } from "#/context/ws-client-provider";
 import { createChatMessage } from "#/services/chat-service";
-import { getCloneRepoCommand } from "#/services/terminal-service";
 import { setCurrentAgentState } from "#/state/agent-slice";
 import { addUserMessage } from "#/state/chat-slice";
 import {
@@ -37,11 +36,6 @@ export const useWSStatusChange = () => {
     send(createChatMessage(query, base64Files, timestamp));
   };
 
-  const dispatchCloneRepoCommand = (ghToken: string, repository: string) => {
-    send(getCloneRepoCommand(ghToken, repository));
-    dispatch(clearSelectedRepository());
-  };
-
   const dispatchInitialQuery = (query: string, additionalInfo: string) => {
     if (additionalInfo) {
       sendInitialQuery(`${query}\n\n[${additionalInfo}]`, files);
@@ -57,7 +51,7 @@ export const useWSStatusChange = () => {
     let additionalInfo = "";
 
     if (gitHubToken && selectedRepository) {
-      dispatchCloneRepoCommand(gitHubToken, selectedRepository);
+      dispatch(clearSelectedRepository());
       additionalInfo = `Repository ${selectedRepository} has been cloned to /workspace. Please check the /workspace for files.`;
     } else if (importedProjectZip) {
       // if there's an uploaded project zip, add it to the chat
diff --git a/frontend/src/routes/_oh.app/route.tsx b/frontend/src/routes/_oh.app/route.tsx
index 776d53373c39..8541ec45ccbd 100644
--- a/frontend/src/routes/_oh.app/route.tsx
+++ b/frontend/src/routes/_oh.app/route.tsx
@@ -21,6 +21,7 @@ import { useUserPrefs } from "#/context/user-prefs-context";
 import { useConversationConfig } from "#/hooks/query/use-conversation-config";
 import { Container } from "#/components/layout/container";
 import Security from "#/components/shared/modals/security/security";
+import { CountBadge } from "#/components/layout/count-badge";
 
 function App() {
   const { token, gitHubToken } = useAuth();
@@ -33,6 +34,8 @@ function App() {
     (state: RootState) => state.initalQuery,
   );
 
+  const { updateCount } = useSelector((state: RootState) => state.browser);
+
   const { data: latestGitHubCommit } = useLatestRepoCommit({
     repository: selectedRepository,
   });
@@ -64,26 +67,31 @@ function App() {
       enabled
       token={token}
       ghToken={gitHubToken}
+      selectedRepository={selectedRepository}
       settings={settings}
     >
       <EventHandler>
         <div className="flex flex-col h-full gap-3">
           <div className="flex h-full overflow-auto gap-3">
-            <Container className="w-[390px] max-h-full relative">
+            <Container className="w-full md:w-[390px] max-h-full relative">
               <ChatInterface />
             </Container>
 
-            <div className="flex flex-col grow gap-3">
+            <div className="hidden md:flex flex-col grow gap-3">
               <Container
                 className="h-2/3"
                 labels={[
                   { label: "Workspace", to: "", icon: <CodeIcon /> },
                   { label: "Jupyter", to: "jupyter", icon: <ListIcon /> },
                   {
-                    label: "Browser",
+                    label: (
+                      <div className="flex items-center gap-1">
+                        Browser
+                        {updateCount > 0 && <CountBadge count={updateCount} />}
+                      </div>
+                    ),
                     to: "browser",
                     icon: <GlobeIcon />,
-                    isBeta: true,
                   },
                 ]}
               >
diff --git a/frontend/src/routes/_oh/route.tsx b/frontend/src/routes/_oh/route.tsx
index 60ae56b0fc63..7812dec4d115 100644
--- a/frontend/src/routes/_oh/route.tsx
+++ b/frontend/src/routes/_oh/route.tsx
@@ -6,9 +6,9 @@ import { useIsAuthed } from "#/hooks/query/use-is-authed";
 import { useAuth } from "#/context/auth-context";
 import { useUserPrefs } from "#/context/user-prefs-context";
 import { useConfig } from "#/hooks/query/use-config";
-import { AnalyticsConsentFormModal } from "#/components/features/analytics/analytics-consent-form-modal";
 import { Sidebar } from "#/components/features/sidebar/sidebar";
 import { WaitlistModal } from "#/components/features/waitlist/waitlist-modal";
+import { AnalyticsConsentFormModal } from "#/components/features/analytics/analytics-consent-form-modal";
 
 export function ErrorBoundary() {
   const error = useRouteError();
@@ -79,18 +79,19 @@ export default function MainApp() {
   return (
     <div
       data-testid="root-layout"
-      className="bg-root-primary p-3 h-screen min-w-[1024px] overflow-x-hidden flex gap-3"
+      className="bg-root-primary p-3 h-screen md:min-w-[1024px] overflow-x-hidden flex flex-col md:flex-row gap-3"
     >
       <Sidebar />
 
-      <div className="h-full w-full relative">
+      <div className="h-[calc(100%-50px)] md:h-full w-full relative">
         <Outlet />
       </div>
 
       {isInWaitlist && (
         <WaitlistModal ghToken={gitHubToken} githubAuthUrl={gitHubAuthUrl} />
       )}
-      {consentFormIsOpen && (
+
+      {config.data?.APP_MODE === "oss" && consentFormIsOpen && (
         <AnalyticsConsentFormModal
           onClose={() => setConsentFormIsOpen(false)}
         />
diff --git a/frontend/src/services/actions.ts b/frontend/src/services/actions.ts
index 8fa8152dbe2e..b2a976161d45 100644
--- a/frontend/src/services/actions.ts
+++ b/frontend/src/services/actions.ts
@@ -4,12 +4,9 @@ import {
   addUserMessage,
   addErrorMessage,
 } from "#/state/chat-slice";
+import { appendSecurityAnalyzerInput } from "#/state/security-analyzer-slice";
 import { setCode, setActiveFilepath } from "#/state/code-slice";
 import { appendJupyterInput } from "#/state/jupyter-slice";
-import {
-  ActionSecurityRisk,
-  appendSecurityAnalyzerInput,
-} from "#/state/security-analyzer-slice";
 import { setCurStatusMessage } from "#/state/status-slice";
 import store from "#/store";
 import ActionType from "#/types/action-type";
@@ -18,21 +15,16 @@ import {
   ObservationMessage,
   StatusMessage,
 } from "#/types/message";
-import EventLogger from "#/utils/event-logger";
 import { handleObservationMessage } from "./observations";
 
 const messageActions = {
   [ActionType.BROWSE]: (message: ActionMessage) => {
-    if (message.args.thought) {
-      store.dispatch(addAssistantMessage(message.args.thought));
-    } else {
+    if (!message.args.thought && message.message) {
       store.dispatch(addAssistantMessage(message.message));
     }
   },
   [ActionType.BROWSE_INTERACTIVE]: (message: ActionMessage) => {
-    if (message.args.thought) {
-      store.dispatch(addAssistantMessage(message.args.thought));
-    } else {
+    if (!message.args.thought && message.message) {
       store.dispatch(addAssistantMessage(message.message));
     }
   },
@@ -51,6 +43,8 @@ const messageActions = {
           pending: false,
         }),
       );
+    } else {
+      store.dispatch(addAssistantMessage(message.args.content));
     }
   },
   [ActionType.RUN_IPYTHON]: (message: ActionMessage) => {
@@ -60,130 +54,21 @@ const messageActions = {
   },
 };
 
-function getRiskText(risk: ActionSecurityRisk) {
-  switch (risk) {
-    case ActionSecurityRisk.LOW:
-      return "Low Risk";
-    case ActionSecurityRisk.MEDIUM:
-      return "Medium Risk";
-    case ActionSecurityRisk.HIGH:
-      return "High Risk";
-    case ActionSecurityRisk.UNKNOWN:
-    default:
-      return "Unknown Risk";
+export function handleActionMessage(message: ActionMessage) {
+  if (message.args?.hidden) {
+    return;
   }
-}
 
-export function handleActionMessage(message: ActionMessage) {
   if ("args" in message && "security_risk" in message.args) {
     store.dispatch(appendSecurityAnalyzerInput(message));
   }
 
-  if (
-    (message.action === ActionType.RUN ||
-      message.action === ActionType.RUN_IPYTHON) &&
-    message.args.confirmation_state === "awaiting_confirmation"
-  ) {
-    if (message.args.thought) {
-      store.dispatch(addAssistantMessage(message.args.thought));
-    }
-    if (message.args.command) {
-      store.dispatch(
-        addAssistantMessage(
-          `Running this command now: \n\`\`\`\`bash\n${message.args.command}\n\`\`\`\`\nEstimated security risk: ${getRiskText(message.args.security_risk as unknown as ActionSecurityRisk)}`,
-        ),
-      );
-    } else if (message.args.code) {
-      store.dispatch(
-        addAssistantMessage(
-          `Running this code now: \n\`\`\`\`python\n${message.args.code}\n\`\`\`\`\nEstimated security risk: ${getRiskText(message.args.security_risk as unknown as ActionSecurityRisk)}`,
-        ),
-      );
-    } else {
-      store.dispatch(addAssistantMessage(message.message));
-    }
-    return;
-  }
-
-  if (message.source !== "user" && !message.args?.hidden) {
+  if (message.source === "agent") {
     if (message.args && message.args.thought) {
       store.dispatch(addAssistantMessage(message.args.thought));
     }
-    // Convert the message to a properly typed action
-    const baseAction = {
-      ...message,
-      source: "agent" as const,
-      args: {
-        ...message.args,
-        thought: message.args?.thought || message.message || "",
-      },
-    };
-
-    // Cast to the appropriate action type based on the action field
-    switch (message.action) {
-      case "run":
-        store.dispatch(
-          addAssistantAction({
-            ...baseAction,
-            action: "run" as const,
-            args: {
-              command: String(message.args?.command || ""),
-              confirmation_state: (message.args?.confirmation_state ||
-                "confirmed") as
-                | "confirmed"
-                | "rejected"
-                | "awaiting_confirmation",
-              thought: String(message.args?.thought || message.message || ""),
-              hidden: Boolean(message.args?.hidden),
-            },
-          }),
-        );
-        break;
-      case "message":
-        store.dispatch(
-          addAssistantAction({
-            ...baseAction,
-            action: "message" as const,
-            args: {
-              content: String(message.args?.content || message.message || ""),
-              image_urls: Array.isArray(message.args?.image_urls)
-                ? message.args.image_urls
-                : null,
-              wait_for_response: Boolean(message.args?.wait_for_response),
-            },
-          }),
-        );
-        break;
-      case "run_ipython":
-        store.dispatch(
-          addAssistantAction({
-            ...baseAction,
-            action: "run_ipython" as const,
-            args: {
-              code: String(message.args?.code || ""),
-              confirmation_state: (message.args?.confirmation_state ||
-                "confirmed") as
-                | "confirmed"
-                | "rejected"
-                | "awaiting_confirmation",
-              kernel_init_code: String(message.args?.kernel_init_code || ""),
-              thought: String(message.args?.thought || message.message || ""),
-            },
-          }),
-        );
-        break;
-      default:
-        // For other action types, ensure we have the required thought property
-        store.dispatch(
-          addAssistantAction({
-            ...baseAction,
-            action: "reject" as const,
-            args: {
-              thought: String(message.args?.thought || message.message || ""),
-            },
-          }),
-        );
-    }
+    // @ts-expect-error TODO: fix
+    store.dispatch(addAssistantAction(message));
   }
 
   if (message.action in messageActions) {
@@ -217,6 +102,10 @@ export function handleAssistantMessage(message: Record<string, unknown>) {
   } else if (message.status_update) {
     handleStatusMessage(message as unknown as StatusMessage);
   } else {
-    EventLogger.error(`Unknown message type ${message}`);
+    store.dispatch(
+      addErrorMessage({
+        message: "Unknown message type received",
+      }),
+    );
   }
 }
diff --git a/frontend/src/services/settings.ts b/frontend/src/services/settings.ts
index 63efaedc7a58..53c717d32163 100644
--- a/frontend/src/services/settings.ts
+++ b/frontend/src/services/settings.ts
@@ -93,7 +93,7 @@ export const saveSettings = (settings: Partial<Settings>) => {
     if (!isValid) return;
     let value = settings[key as keyof Settings];
     if (value === undefined || value === null) value = "";
-    localStorage.setItem(key, value.toString());
+    localStorage.setItem(key, value.toString().trim());
   });
   localStorage.setItem("SETTINGS_VERSION", LATEST_SETTINGS_VERSION.toString());
 };
diff --git a/frontend/src/services/terminal-service.ts b/frontend/src/services/terminal-service.ts
index 08f654b5affc..c5807596b65d 100644
--- a/frontend/src/services/terminal-service.ts
+++ b/frontend/src/services/terminal-service.ts
@@ -10,11 +10,3 @@ export function getGitHubTokenCommand(gitHubToken: string) {
   const event = getTerminalCommand(command, true);
   return event;
 }
-
-export function getCloneRepoCommand(gitHubToken: string, repository: string) {
-  const url = `https://${gitHubToken}@github.com/${repository}.git`;
-  const dirName = repository.split("/")[1];
-  const command = `git clone ${url} ${dirName} ; cd ${dirName} ; git checkout -b openhands-workspace`;
-  const event = getTerminalCommand(command, true);
-  return event;
-}
diff --git a/frontend/src/state/browser-slice.ts b/frontend/src/state/browser-slice.ts
index fc05f0c50830..7276f3577a3f 100644
--- a/frontend/src/state/browser-slice.ts
+++ b/frontend/src/state/browser-slice.ts
@@ -5,6 +5,8 @@ export const initialState = {
   url: "https://github.com/All-Hands-AI/OpenHands",
   // Base64-encoded screenshot of browser window (placeholder for now, will be replaced with the actual screenshot later)
   screenshotSrc: "",
+  // Counter for browser updates
+  updateCount: 0,
 };
 
 export const browserSlice = createSlice({
@@ -16,6 +18,7 @@ export const browserSlice = createSlice({
     },
     setScreenshotSrc: (state, action) => {
       state.screenshotSrc = action.payload;
+      state.updateCount += 1;
     },
   },
 });
diff --git a/frontend/src/state/chat-slice.ts b/frontend/src/state/chat-slice.ts
index 45fc3ad71e6f..47d2b651754d 100644
--- a/frontend/src/state/chat-slice.ts
+++ b/frontend/src/state/chat-slice.ts
@@ -1,13 +1,39 @@
 import { createSlice, PayloadAction } from "@reduxjs/toolkit";
 
-import { OpenHandsObservation } from "#/types/core/observations";
+import { ActionSecurityRisk } from "#/state/security-analyzer-slice";
+import {
+  OpenHandsObservation,
+  CommandObservation,
+  IPythonObservation,
+} from "#/types/core/observations";
 import { OpenHandsAction } from "#/types/core/actions";
+import { OpenHandsEventType } from "#/types/core/base";
 
 type SliceState = { messages: Message[] };
 
 const MAX_CONTENT_LENGTH = 1000;
 
-const HANDLED_ACTIONS = ["run", "run_ipython", "write", "read"];
+const HANDLED_ACTIONS: OpenHandsEventType[] = [
+  "run",
+  "run_ipython",
+  "write",
+  "read",
+  "browse",
+];
+
+function getRiskText(risk: ActionSecurityRisk) {
+  switch (risk) {
+    case ActionSecurityRisk.LOW:
+      return "Low Risk";
+    case ActionSecurityRisk.MEDIUM:
+      return "Medium Risk";
+    case ActionSecurityRisk.HIGH:
+      return "High Risk";
+    case ActionSecurityRisk.UNKNOWN:
+    default:
+      return "Unknown Risk";
+  }
+}
 
 const initialState: SliceState = {
   messages: [],
@@ -77,6 +103,15 @@ export const chatSlice = createSlice({
         text = `${action.payload.args.path}\n${content}`;
       } else if (actionID === "read") {
         text = action.payload.args.path;
+      } else if (actionID === "browse") {
+        text = `Browsing ${action.payload.args.url}`;
+      }
+      if (actionID === "run" || actionID === "run_ipython") {
+        if (
+          action.payload.args.confirmation_state === "awaiting_confirmation"
+        ) {
+          text += `\n\n${getRiskText(action.payload.args.security_risk as unknown as ActionSecurityRisk)}`;
+        }
       }
       const message: Message = {
         type: "action",
@@ -107,6 +142,18 @@ export const chatSlice = createSlice({
         return;
       }
       causeMessage.translationID = translationID;
+      // Set success property based on observation type
+      if (observationID === "run") {
+        const commandObs = observation.payload as CommandObservation;
+        causeMessage.success = commandObs.extras.exit_code === 0;
+      } else if (observationID === "run_ipython") {
+        // For IPython, we consider it successful if there's no error message
+        const ipythonObs = observation.payload as IPythonObservation;
+        causeMessage.success = !ipythonObs.message
+          .toLowerCase()
+          .includes("error");
+      }
+
       if (observationID === "run" || observationID === "run_ipython") {
         let { content } = observation.payload;
         if (content.length > MAX_CONTENT_LENGTH) {
@@ -114,6 +161,16 @@ export const chatSlice = createSlice({
         }
         content = `\`\`\`\n${content}\n\`\`\``;
         causeMessage.content = content; // Observation content includes the action
+      } else if (observationID === "browse") {
+        let content = `**URL:** ${observation.payload.extras.url}\n`;
+        if (observation.payload.extras.error) {
+          content += `**Error:**\n${observation.payload.extras.error}\n`;
+        }
+        content += `**Output:**\n${observation.payload.content}`;
+        if (content.length > MAX_CONTENT_LENGTH) {
+          content = `${content.slice(0, MAX_CONTENT_LENGTH)}...`;
+        }
+        causeMessage.content = content;
       }
     },
 
@@ -122,7 +179,6 @@ export const chatSlice = createSlice({
       action: PayloadAction<{ id?: string; message: string }>,
     ) {
       const { id, message } = action.payload;
-      console.log("add err message", id, message);
       state.messages.push({
         translationID: id,
         content: message,
diff --git a/frontend/src/types/core/actions.ts b/frontend/src/types/core/actions.ts
index f035054092c1..5242a8c5836e 100644
--- a/frontend/src/types/core/actions.ts
+++ b/frontend/src/types/core/actions.ts
@@ -1,4 +1,5 @@
 import { OpenHandsActionEvent } from "./base";
+import { ActionSecurityRisk } from "#/state/security-analyzer-slice";
 
 export interface UserMessageAction extends OpenHandsActionEvent<"message"> {
   source: "user";
@@ -12,6 +13,7 @@ export interface CommandAction extends OpenHandsActionEvent<"run"> {
   source: "agent";
   args: {
     command: string;
+    security_risk: ActionSecurityRisk;
     confirmation_state: "confirmed" | "rejected" | "awaiting_confirmation";
     thought: string;
     hidden?: boolean;
@@ -32,6 +34,7 @@ export interface IPythonAction extends OpenHandsActionEvent<"run_ipython"> {
   source: "agent";
   args: {
     code: string;
+    security_risk: ActionSecurityRisk;
     confirmation_state: "confirmed" | "rejected" | "awaiting_confirmation";
     kernel_init_code: string;
     thought: string;
diff --git a/frontend/src/types/core/observations.ts b/frontend/src/types/core/observations.ts
index 0b95099a8384..7ddc3f05dd94 100644
--- a/frontend/src/types/core/observations.ts
+++ b/frontend/src/types/core/observations.ts
@@ -52,6 +52,21 @@ export interface BrowseObservation extends OpenHandsObservationEvent<"browse"> {
   };
 }
 
+export interface WriteObservation extends OpenHandsObservationEvent<"write"> {
+  source: "agent";
+  extras: {
+    path: string;
+    content: string;
+  };
+}
+
+export interface ReadObservation extends OpenHandsObservationEvent<"read"> {
+  source: "agent";
+  extras: {
+    path: string;
+  };
+}
+
 export interface ErrorObservation extends OpenHandsObservationEvent<"error"> {
   source: "user";
   extras: {
@@ -65,4 +80,6 @@ export type OpenHandsObservation =
   | IPythonObservation
   | DelegateObservation
   | BrowseObservation
+  | WriteObservation
+  | ReadObservation
   | ErrorObservation;
diff --git a/frontend/tailwind.config.js b/frontend/tailwind.config.js
index 0a0c34d94159..1a57ebcd8bce 100644
--- a/frontend/tailwind.config.js
+++ b/frontend/tailwind.config.js
@@ -1,5 +1,7 @@
 /** @type {import('tailwindcss').Config} */
-const { nextui } = require("@nextui-org/react");
+import { nextui } from "@nextui-org/react";
+import typography from '@tailwindcss/typography';
+
 export default {
   content: [
     "./src/**/*.{js,ts,jsx,tsx}",
@@ -12,6 +14,7 @@ export default {
         'root-secondary': '#262626',
         'hyperlink': '#007AFF',
         'danger': '#EF3744',
+        'success': '#4CAF50',
       },
     },
   },
@@ -33,6 +36,6 @@ export default {
         }
       }
     }),
-    require('@tailwindcss/typography'),
+    typography,
   ],
 };
diff --git a/frontend/test-utils.tsx b/frontend/test-utils.tsx
index 4b336602fbf6..6739e3be6e15 100644
--- a/frontend/test-utils.tsx
+++ b/frontend/test-utils.tsx
@@ -6,10 +6,31 @@ import { configureStore } from "@reduxjs/toolkit";
 // eslint-disable-next-line import/no-extraneous-dependencies
 import { RenderOptions, render } from "@testing-library/react";
 import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
+import { I18nextProvider } from "react-i18next";
+import i18n from "i18next";
+import { initReactI18next } from "react-i18next";
 import { AppStore, RootState, rootReducer } from "./src/store";
 import { AuthProvider } from "#/context/auth-context";
 import { UserPrefsProvider } from "#/context/user-prefs-context";
 
+// Initialize i18n for tests
+i18n
+  .use(initReactI18next)
+  .init({
+    lng: "en",
+    fallbackLng: "en",
+    ns: ["translation"],
+    defaultNS: "translation",
+    resources: {
+      en: {
+        translation: {},
+      },
+    },
+    interpolation: {
+      escapeValue: false,
+    },
+  });
+
 const setupStore = (preloadedState?: Partial<RootState>): AppStore =>
   configureStore({
     reducer: rootReducer,
@@ -40,7 +61,9 @@ export function renderWithProviders(
         <UserPrefsProvider>
           <AuthProvider>
             <QueryClientProvider client={new QueryClient()}>
-              {children}
+              <I18nextProvider i18n={i18n}>
+                {children}
+              </I18nextProvider>
             </QueryClientProvider>
           </AuthProvider>
         </UserPrefsProvider>
diff --git a/frontend/vitest.setup.ts b/frontend/vitest.setup.ts
index 105337e75eba..e9a89c8677f6 100644
--- a/frontend/vitest.setup.ts
+++ b/frontend/vitest.setup.ts
@@ -12,7 +12,13 @@ HTMLElement.prototype.scrollTo = vi.fn();
 // Mock the i18n provider
 vi.mock("react-i18next", async (importOriginal) => ({
   ...(await importOriginal<typeof import("react-i18next")>()),
-  useTranslation: () => ({ t: (key: string) => key }),
+  useTranslation: () => ({
+    t: (key: string) => key,
+    i18n: {
+      language: "en",
+      exists: () => false,
+    },
+  }),
 }));
 
 // Mock requests during tests
diff --git a/openhands/__init__.py b/openhands/__init__.py
index 4b918466a4a0..eda56824e36b 100644
--- a/openhands/__init__.py
+++ b/openhands/__init__.py
@@ -4,6 +4,16 @@
 
 
 def get_version():
+    # Try getting the version from pyproject.toml
+    try:
+        root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        with open(os.path.join(root_dir, 'pyproject.toml'), 'r') as f:
+            for line in f:
+                if line.startswith('version ='):
+                    return line.split('=')[1].strip().strip('"')
+    except FileNotFoundError:
+        pass
+
     try:
         from importlib.metadata import PackageNotFoundError, version
 
@@ -18,16 +28,6 @@ def get_version():
     except (ImportError, DistributionNotFound):
         pass
 
-    # Try getting the version from pyproject.toml
-    try:
-        root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-        with open(os.path.join(root_dir, 'pyproject.toml'), 'r') as f:
-            for line in f:
-                if line.startswith('version ='):
-                    return line.split('=')[1].strip().strip('"')
-    except FileNotFoundError:
-        pass
-
     return 'unknown'
 
 
diff --git a/openhands/agenthub/codeact_agent/README.md b/openhands/agenthub/codeact_agent/README.md
index 45ccb42ba085..0e15939cdfb8 100644
--- a/openhands/agenthub/codeact_agent/README.md
+++ b/openhands/agenthub/codeact_agent/README.md
@@ -1,28 +1,75 @@
 # CodeAct Agent Framework
 
-This folder implements the CodeAct idea ([paper](https://arxiv.org/abs/2402.01030), [tweet](https://twitter.com/xingyaow_/status/1754556835703751087)) that consolidates LLM agents’ **act**ions into a unified **code** action space for both *simplicity* and *performance* (see paper for more details).
+This folder is an implementation of OpenHands's main agent, the CodeAct Agent. It is based on ([CodeAct](https://arxiv.org/abs/2402.01030), [tweet](https://twitter.com/xingyaow_/status/1754556835703751087)), an idea of consolidating LLM agents' **act**ions into a unified **code** action space for both *simplicity* and *performance*.
 
-The conceptual idea is illustrated below. At each turn, the agent can:
+## Overview
+
+The CodeAct agent operates through a function calling interface. At each turn, the agent can:
 
 1. **Converse**: Communicate with humans in natural language to ask for clarification, confirmation, etc.
-2. **CodeAct**: Choose to perform the task by executing code
-   - Execute any valid Linux `bash` command
-   - Execute any valid `Python` code with [an interactive Python interpreter](https://ipython.org/). This is simulated through `bash` command, see plugin system below for more details.
+2. **CodeAct**: Execute actions through a set of well-defined tools:
+   - Execute Linux `bash` commands with `execute_bash`
+   - Run Python code in an [IPython](https://ipython.org/) environment with `execute_ipython_cell`
+   - Interact with web browsers using `browser` and `web_read`
+   - Edit files using `str_replace_editor` or `edit_file`
 
 ![image](https://github.com/All-Hands-AI/OpenHands/assets/38853559/92b622e3-72ad-4a61-8f41-8c040b6d5fb3)
 
-## Adding New Tools
+## Built-in Tools
+
+The agent provides several built-in tools:
+
+### 1. `execute_bash`
+- Execute any valid Linux bash command
+- Handles long-running commands by running them in background with output redirection
+- Supports interactive processes with STDIN input and process interruption
+- Handles command timeouts with automatic retry in background mode
+
+### 2. `execute_ipython_cell`
+- Run Python code in an IPython environment
+- Supports magic commands like `%pip`
+- Variables are scoped to the IPython environment
+- Requires defining variables and importing packages before use
+
+### 3. `web_read` and `browser`
+- `web_read`: Read and convert webpage content to markdown
+- `browser`: Interact with webpages through Python code
+- Supports common browser actions like navigation, clicking, form filling, scrolling
+- Handles file uploads and drag-and-drop operations
+
+### 4. `str_replace_editor`
+- View, create and edit files through string replacement
+- Persistent state across command calls
+- File viewing with line numbers
+- String replacement with exact matching
+- Undo functionality for edits
+
+### 5. `edit_file` (LLM-based)
+- Edit files using LLM-based content generation
+- Support for partial file edits with line ranges
+- Handles large files by editing specific sections
+- Append mode for adding content to files
+
+## Configuration
 
-The CodeAct agent uses a function calling interface to define tools that the agent can use. Tools are defined in `function_calling.py` using the `ChatCompletionToolParam` class from `litellm`. Each tool consists of:
+Tools can be enabled/disabled through configuration parameters:
+- `codeact_enable_browsing`: Enable browser interaction tools
+- `codeact_enable_jupyter`: Enable IPython code execution
+- `codeact_enable_llm_editor`: Enable LLM-based file editing (falls back to string replacement editor if disabled)
+
+## Micro-agents
 
-1. A description string that explains what the tool does and how to use it
-2. A tool definition using `ChatCompletionToolParam` that specifies:
-   - The tool's name
-   - The tool's parameters and their types
-   - Required vs optional parameters
+The agent includes specialized micro-agents for specific tasks:
 
-Here's an example of how a tool is defined:
+1. **npm**: Handles npm package installation with non-interactive shell workarounds
+2. **github**: Manages GitHub operations with API token support and PR creation guidelines
+3. **flarglebargle**: Easter egg response handler
+
+## Adding New Tools
 
+The CodeAct agent uses a function calling interface based on `litellm`'s `ChatCompletionToolParam`. To add a new tool:
+
+1. Define the tool in `function_calling.py`:
 ```python
 MyTool = ChatCompletionToolParam(
     type='function',
@@ -47,20 +94,20 @@ MyTool = ChatCompletionToolParam(
 )
 ```
 
-To add a new tool:
+2. Add the tool to `get_tools()` in `function_calling.py`
+3. Implement the corresponding action handler in the agent class
 
-1. Define your tool in `function_calling.py` following the pattern above
-2. Add your tool to the `get_tools()` function in `function_calling.py`
-3. Implement the corresponding action handler in the agent to process the tool's invocation
+## Implementation Details
 
-The agent currently supports several built-in tools:
-- `execute_bash`: Execute bash commands
-- `execute_ipython_cell`: Run Python code in IPython
-- `browser`: Interact with a web browser
-- `str_replace_editor`: Edit files using string replacement
-- `edit_file`: Edit files using LLM-based editing
+The agent is implemented in two main files:
 
-Tools can be enabled/disabled through configuration parameters:
-- `codeact_enable_browsing`: Enable browser interaction
-- `codeact_enable_jupyter`: Enable IPython code execution
-- `codeact_enable_llm_editor`: Enable LLM-based file editing (if disabled, uses string replacement editor instead)
+1. `codeact_agent.py`: Core agent implementation with:
+   - Message history management
+   - Tool execution handling
+   - State management
+   - Action/observation processing
+
+2. `function_calling.py`: Tool definitions and function calling interface with:
+   - Tool parameter specifications
+   - Tool descriptions and examples
+   - Function calling response parsing
diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index 1113fd0271d3..a4c249d899ff 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -154,10 +154,7 @@ def get_action_message(
                 BrowseInteractiveAction,
                 BrowseURLAction,
             ),
-        ) or (
-            isinstance(action, (AgentFinishAction, CmdRunAction))
-            and action.source == 'agent'
-        ):
+        ) or (isinstance(action, CmdRunAction) and action.source == 'agent'):
             tool_metadata = action.tool_call_metadata
             assert tool_metadata is not None, (
                 'Tool call metadata should NOT be None when function calling is enabled. Action: '
@@ -166,8 +163,10 @@ def get_action_message(
 
             llm_response: ModelResponse = tool_metadata.model_response
             assistant_msg = llm_response.choices[0].message
+
             # Add the LLM message (assistant) that initiated the tool calls
             # (overwrites any previous message with the same response_id)
+            logger.debug(f'Tool calls type: {type(assistant_msg.tool_calls)}, value: {assistant_msg.tool_calls}')
             pending_tool_call_action_messages[llm_response.id] = Message(
                 role=assistant_msg.role,
                 # tool call content SHOULD BE a string
@@ -177,6 +176,33 @@ def get_action_message(
                 tool_calls=assistant_msg.tool_calls,
             )
             return []
+        elif isinstance(action, AgentFinishAction):
+            role = 'user' if action.source == 'user' else 'assistant'
+
+            # when agent finishes, it has tool_metadata
+            # which has already been executed, and it doesn't have a response
+            # when the user finishes (/exit), we don't have tool_metadata
+            tool_metadata = action.tool_call_metadata
+            if tool_metadata is not None:
+                # take the response message from the tool call
+                assistant_msg = tool_metadata.model_response.choices[0].message
+                content = assistant_msg.content or ''
+
+                # save content if any, to thought
+                if action.thought:
+                    if action.thought != content:
+                        action.thought += '\n' + content
+                else:
+                    action.thought = content
+
+                # remove the tool call metadata
+                action.tool_call_metadata = None
+            return [
+                Message(
+                    role=role,
+                    content=[TextContent(text=action.thought)],
+                )
+            ]
         elif isinstance(action, MessageAction):
             role = 'user' if action.source == 'user' else 'assistant'
             content = [TextContent(text=action.content or '')]
@@ -373,6 +399,9 @@ def _get_messages(self, state: State) -> list[Message]:
             - Messages from the same role are combined to prevent consecutive same-role messages
             - For Anthropic models, specific messages are cached according to their documentation
         """
+        if not self.prompt_manager:
+            raise Exception('Prompt Manager not instantiated.')
+
         messages: list[Message] = [
             Message(
                 role='system',
diff --git a/openhands/controller/agent.py b/openhands/controller/agent.py
index cffdbbf22d35..fd2657ebc2a8 100644
--- a/openhands/controller/agent.py
+++ b/openhands/controller/agent.py
@@ -11,6 +11,7 @@
 )
 from openhands.llm.llm import LLM
 from openhands.runtime.plugins import PluginRequirement
+from openhands.utils.prompt import PromptManager
 
 
 class Agent(ABC):
@@ -33,6 +34,7 @@ def __init__(
         self.llm = llm
         self.config = config
         self._complete = False
+        self.prompt_manager: PromptManager | None = None
 
     @property
     def complete(self) -> bool:
diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 8f614745b153..8078855330cf 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -5,7 +5,7 @@
 from typing import Callable, ClassVar, Type
 
 import litellm
-from litellm.exceptions import ContextWindowExceededError
+from litellm.exceptions import BadRequestError, ContextWindowExceededError
 
 from openhands.controller.agent import Agent
 from openhands.controller.state.state import State, TrafficControlState
@@ -184,7 +184,7 @@ def update_state_before_step(self):
         self.state.local_iteration += 1
 
     async def update_state_after_step(self):
-        # update metrics especially for cost. Use deepcopy to avoid it being modified by agent.reset()
+        # update metrics especially for cost. Use deepcopy to avoid it being modified by agent._reset()
         self.state.local_metrics = copy.deepcopy(self.agent.llm.metrics)
 
     async def _react_to_exception(
@@ -317,9 +317,10 @@ async def _handle_message_action(self, action: MessageAction) -> None:
         elif action.source == EventSource.AGENT and action.wait_for_response:
             await self.set_agent_state_to(AgentState.AWAITING_USER_INPUT)
 
-    def reset_task(self) -> None:
-        """Resets the agent's task."""
+    def _reset(self) -> None:
+        """Resets the agent controller"""
         self.almost_stuck = 0
+        self._pending_action = None
         self.agent.reset()
 
     async def set_agent_state_to(self, new_state: AgentState) -> None:
@@ -337,7 +338,7 @@ async def set_agent_state_to(self, new_state: AgentState) -> None:
             return
 
         if new_state in (AgentState.STOPPED, AgentState.ERROR):
-            self.reset_task()
+            self._reset()
         elif (
             new_state == AgentState.RUNNING
             and self.state.agent_state == AgentState.PAUSED
@@ -454,13 +455,10 @@ async def _step(self) -> None:
             await asyncio.sleep(1)
             return
 
-        if self._is_stuck():
-            await self._react_to_exception(RuntimeError('Agent got stuck in a loop'))
-            return
-
         if self.delegate is not None:
             assert self.delegate != self
             if self.delegate.get_agent_state() == AgentState.PAUSED:
+                # no need to check too often
                 await asyncio.sleep(1)
             else:
                 await self._delegate_step()
@@ -487,6 +485,10 @@ async def _step(self) -> None:
         if stop_step:
             return
 
+        if self._is_stuck():
+            await self._react_to_exception(RuntimeError('Agent got stuck in a loop'))
+            return
+
         self.update_state_before_step()
         action: Action = NullAction()
         try:
@@ -507,15 +509,24 @@ async def _step(self) -> None:
                 EventSource.AGENT,
             )
             return
-        except ContextWindowExceededError:
-            # When context window is exceeded, keep roughly half of agent interactions
-            self.state.history = self._apply_conversation_window(self.state.history)
-
-            # Save the ID of the first event in our truncated history for future reloading
-            if self.state.history:
-                self.state.start_id = self.state.history[0].id
-            # Don't add error event - let the agent retry with reduced context
-            return
+        except (ContextWindowExceededError, BadRequestError) as e:
+            # FIXME: this is a hack until a litellm fix is confirmed
+            # Check if this is a nested context window error
+            error_str = str(e).lower()
+            if (
+                'contextwindowexceedederror' in error_str
+                or 'prompt is too long' in error_str
+                or isinstance(e, ContextWindowExceededError)
+            ):
+                # When context window is exceeded, keep roughly half of agent interactions
+                self.state.history = self._apply_conversation_window(self.state.history)
+
+                # Save the ID of the first event in our truncated history for future reloading
+                if self.state.history:
+                    self.state.start_id = self.state.history[0].id
+                # Don't add error event - let the agent retry with reduced context
+                return
+            raise
 
         if action.runnable:
             if self.state.confirmation_mode and (
diff --git a/openhands/core/config/app_config.py b/openhands/core/config/app_config.py
index a10df018dfd1..497cb9061e38 100644
--- a/openhands/core/config/app_config.py
+++ b/openhands/core/config/app_config.py
@@ -1,4 +1,3 @@
-import uuid
 from dataclasses import dataclass, field, fields, is_dataclass
 from typing import ClassVar
 
@@ -66,7 +65,10 @@ class AppConfig:
     modal_api_token_id: str = ''
     modal_api_token_secret: str = ''
     disable_color: bool = False
-    jwt_secret: str = uuid.uuid4().hex
+    jwt_secret: str = ''
+    attach_session_middleware_class: str = (
+        'openhands.server.middleware.AttachSessionMiddleware'
+    )
     debug: bool = False
     file_uploads_max_file_size_mb: int = 0
     file_uploads_restrict_file_types: bool = False
diff --git a/openhands/core/config/config_utils.py b/openhands/core/config/config_utils.py
index 6e7ddebac611..38c3c1d03df5 100644
--- a/openhands/core/config/config_utils.py
+++ b/openhands/core/config/config_utils.py
@@ -2,7 +2,7 @@
 from typing import get_args, get_origin
 
 OH_DEFAULT_AGENT = 'CodeActAgent'
-OH_MAX_ITERATIONS = 100
+OH_MAX_ITERATIONS = 500
 
 
 def get_field_info(f):
diff --git a/openhands/core/config/sandbox_config.py b/openhands/core/config/sandbox_config.py
index fee44cbaffd6..c6367067557b 100644
--- a/openhands/core/config/sandbox_config.py
+++ b/openhands/core/config/sandbox_config.py
@@ -48,6 +48,7 @@ class SandboxConfig:
         False  # once enabled, OpenHands would lint files after editing
     )
     use_host_network: bool = False
+    runtime_extra_build_args: list[str] | None = None
     initialize_plugins: bool = True
     force_rebuild_runtime: bool = False
     runtime_extra_deps: str | None = None
diff --git a/openhands/core/config/utils.py b/openhands/core/config/utils.py
index 00f41dc1da7b..3aedaf952353 100644
--- a/openhands/core/config/utils.py
+++ b/openhands/core/config/utils.py
@@ -5,6 +5,7 @@
 from dataclasses import is_dataclass
 from types import UnionType
 from typing import Any, MutableMapping, get_args, get_origin
+from uuid import uuid4
 
 import toml
 from dotenv import load_dotenv
@@ -19,7 +20,10 @@
 from openhands.core.config.llm_config import LLMConfig
 from openhands.core.config.sandbox_config import SandboxConfig
 from openhands.core.config.security_config import SecurityConfig
+from openhands.storage import get_file_store
+from openhands.storage.files import FileStore
 
+JWT_SECRET = '.jwt_secret'
 load_dotenv()
 
 
@@ -195,6 +199,16 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
         )
 
 
+def get_or_create_jwt_secret(file_store: FileStore) -> str:
+    try:
+        jwt_secret = file_store.read(JWT_SECRET)
+        return jwt_secret
+    except FileNotFoundError:
+        new_secret = uuid4().hex
+        file_store.write(JWT_SECRET, new_secret)
+        return new_secret
+
+
 def finalize_config(cfg: AppConfig):
     """More tweaks to the config after it's been loaded."""
     if cfg.workspace_base is not None:
@@ -223,6 +237,11 @@ def finalize_config(cfg: AppConfig):
     if cfg.cache_dir:
         pathlib.Path(cfg.cache_dir).mkdir(parents=True, exist_ok=True)
 
+    if not cfg.jwt_secret:
+        cfg.jwt_secret = get_or_create_jwt_secret(
+            get_file_store(cfg.file_store, cfg.file_store_path)
+        )
+
 
 # Utility function for command line --group argument
 def get_llm_config_arg(
@@ -376,6 +395,11 @@ def get_parser() -> argparse.ArgumentParser:
         type=str,
         help='The comma-separated list (in quotes) of IDs of the instances to evaluate',
     )
+    parser.add_argument(
+        '--no-auto-continue',
+        action='store_true',
+        help='Disable automatic "continue" responses. Will read from stdin instead.',
+    )
     return parser
 
 
diff --git a/openhands/core/logger.py b/openhands/core/logger.py
index 238b4c39435c..07c799d7ff90 100644
--- a/openhands/core/logger.py
+++ b/openhands/core/logger.py
@@ -5,7 +5,8 @@
 import sys
 import traceback
 from datetime import datetime
-from typing import Literal, Mapping
+from types import TracebackType
+from typing import Any, Literal, Mapping
 
 from termcolor import colored
 
@@ -61,7 +62,8 @@ def format(self, record: logging.LogRecord) -> str:
 
 
 def strip_ansi(s: str) -> str:
-    """
+    """Remove ANSI escape sequences (terminal color/formatting codes) from string.
+
     Removes ANSI escape sequences from str, as defined by ECMA-048 in
     http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-048.pdf
     # https://github.com/ewen-lbh/python-strip-ansi/blob/master/strip_ansi/__init__.py
@@ -136,6 +138,7 @@ def write_immediately(self, line):
 
     def print_lines(self):
         """Display the last n log_lines in the console (not for file logging).
+
         This will create the effect of a rolling display in the console.
         """
         self.move_back()
@@ -143,18 +146,14 @@ def print_lines(self):
             self.replace_current_line(line)
 
     def move_back(self, amount=-1):
-        """
-        '\033[F'    moves the cursor up one line.
-        """
+        r"""'\033[F' moves the cursor up one line."""
         if amount == -1:
             amount = self.max_lines
         self._write('\033[F' * (self.max_lines))
         self._flush()
 
     def replace_current_line(self, line=''):
-        """
-        '\033[2K\r' clears the line and moves the cursor to the beginning of the line.
-        """
+        r"""'\033[2K\r' clears the line and moves the cursor to the beginning of the line."""
         self._write('\033[2K' + line + '\n')
         self._flush()
 
@@ -232,18 +231,21 @@ def get_file_handler(log_dir: str, log_level: int = logging.INFO):
 logging.basicConfig(level=logging.ERROR)
 
 
-def log_uncaught_exceptions(ex_cls, ex, tb):
+def log_uncaught_exceptions(
+    ex_cls: type[BaseException], ex: BaseException, tb: TracebackType | None
+) -> Any:
     """Logs uncaught exceptions along with the traceback.
 
     Args:
-        ex_cls (type): The type of the exception.
-        ex (Exception): The exception instance.
-        tb (traceback): The traceback object.
+        ex_cls: The type of the exception.
+        ex: The exception instance.
+        tb: The traceback object.
 
     Returns:
         None
     """
-    logging.error(''.join(traceback.format_tb(tb)))
+    if tb:  # Add check since tb can be None
+        logging.error(''.join(traceback.format_tb(tb)))
     logging.error('{0}: {1}'.format(ex_cls, ex))
 
 
@@ -283,7 +285,7 @@ def log_uncaught_exceptions(ex_cls, ex, tb):
 
 
 class LlmFileHandler(logging.FileHandler):
-    """# LLM prompt and response logging"""
+    """LLM prompt and response logging."""
 
     def __init__(self, filename, mode='a', encoding='utf-8', delay=False):
         """Initializes an instance of LlmFileHandler.
diff --git a/openhands/core/main.py b/openhands/core/main.py
index d55aa0175102..3be74235ba12 100644
--- a/openhands/core/main.py
+++ b/openhands/core/main.py
@@ -188,7 +188,9 @@ async def on_event(event: Event):
                 if exit_on_message:
                     message = '/exit'
                 elif fake_user_response_fn is None:
-                    message = input('Request user input >> ')
+                    # read until EOF (Ctrl+D on Unix, Ctrl+Z on Windows)
+                    print('Request user input (press Ctrl+D/Z when done) >> ')
+                    message = sys.stdin.read().rstrip()
                 else:
                     message = fake_user_response_fn(controller.get_state())
                 action = MessageAction(content=message)
@@ -241,6 +243,17 @@ def generate_sid(config: AppConfig, session_name: str | None = None) -> str:
     return f'{session_name}-{hash_str[:16]}'
 
 
+def auto_continue_response(
+    state: State,
+    encapsulate_solution: bool = False,
+    try_parse: Callable[[Action | None], str] | None = None,
+) -> str:
+    """Default function to generate user responses.
+    Returns 'continue' to tell the agent to proceed without asking for more input.
+    """
+    return 'continue'
+
+
 if __name__ == '__main__':
     args = parse_arguments()
 
@@ -284,5 +297,8 @@ def generate_sid(config: AppConfig, session_name: str | None = None) -> str:
             config=config,
             initial_user_action=initial_user_action,
             sid=sid,
+            fake_user_response_fn=None
+            if args.no_auto_continue
+            else auto_continue_response,
         )
     )
diff --git a/openhands/core/message.py b/openhands/core/message.py
index a5b67917eaee..e7ae745d386a 100644
--- a/openhands/core/message.py
+++ b/openhands/core/message.py
@@ -105,14 +105,6 @@ def _list_serializer(self) -> dict:
 
         message_dict: dict = {'content': content, 'role': self.role}
 
-        # pop content if it's empty
-        if not content or (
-            len(content) == 1
-            and content[0]['type'] == 'text'
-            and content[0]['text'] == ''
-        ):
-            message_dict.pop('content')
-
         if role_tool_with_prompt_caching:
             message_dict['cache_control'] = {'type': 'ephemeral'}
 
@@ -122,11 +114,21 @@ def _list_serializer(self) -> dict:
     def _add_tool_call_keys(self, message_dict: dict) -> dict:
         """Add tool call keys if we have a tool call or response.
 
-        NOTE: this is necessary for both native and non-native tool calling"""
+        NOTE: this is necessary for both native and non-native tool calling."""
 
         # an assistant message calling a tool
         if self.tool_calls is not None:
-            message_dict['tool_calls'] = self.tool_calls
+            message_dict['tool_calls'] = [
+                {
+                    'id': tool_call.id,
+                    'type': 'function',
+                    'function': {
+                        'name': tool_call.function.name,
+                        'arguments': tool_call.function.arguments,
+                    },
+                }
+                for tool_call in self.tool_calls
+            ]
 
         # an observation message with tool response
         if self.tool_call_id is not None:
diff --git a/openhands/core/message_format.md b/openhands/core/message_format.md
new file mode 100644
index 000000000000..caa9cf1b84df
--- /dev/null
+++ b/openhands/core/message_format.md
@@ -0,0 +1,89 @@
+# OpenHands Message Format and litellm Integration
+
+## Overview
+
+OpenHands uses its own `Message` class (`openhands/core/message.py`) which provides rich content support while maintaining compatibility with litellm's message handling system.
+
+## Class Structure
+
+Our `Message` class (`openhands/core/message.py`):
+```python
+class Message(BaseModel):
+    role: Literal['user', 'system', 'assistant', 'tool']
+    content: list[TextContent | ImageContent] = Field(default_factory=list)
+    cache_enabled: bool = False
+    vision_enabled: bool = False
+    condensable: bool = True
+    function_calling_enabled: bool = False
+    tool_calls: list[ChatCompletionMessageToolCall] | None = None
+    tool_call_id: str | None = None
+    name: str | None = None
+    event_id: int = -1
+```
+
+litellm's `Message` class (`litellm/types/utils.py`):
+```python
+class Message(OpenAIObject):
+    content: Optional[str]
+    role: Literal["assistant", "user", "system", "tool", "function"]
+    tool_calls: Optional[List[ChatCompletionMessageToolCall]]
+    function_call: Optional[FunctionCall]
+    audio: Optional[ChatCompletionAudioResponse] = None
+```
+
+## How It Works
+
+1. **Message Creation**: Our `Message` class is a Pydantic model that supports rich content (text and images) through its `content` field.
+
+2. **Serialization**: The class uses Pydantic's `@model_serializer` to convert messages into dictionaries that litellm can understand. We have two serialization methods:
+   ```python
+   def _string_serializer(self) -> dict:
+       # convert content to a single string
+       content = '\n'.join(item.text for item in self.content if isinstance(item, TextContent))
+       message_dict: dict = {'content': content, 'role': self.role}
+       return self._add_tool_call_keys(message_dict)
+
+   def _list_serializer(self) -> dict:
+       content: list[dict] = []
+       for item in self.content:
+           d = item.model_dump()
+           if isinstance(item, TextContent):
+               content.append(d)
+           elif isinstance(item, ImageContent) and self.vision_enabled:
+               content.extend(d)
+       return {'content': content, 'role': self.role}
+   ```
+
+   The appropriate serializer is chosen based on the message's capabilities:
+   ```python
+   @model_serializer
+   def serialize_model(self) -> dict:
+       if self.cache_enabled or self.vision_enabled or self.function_calling_enabled:
+           return self._list_serializer()
+       return self._string_serializer()
+   ```
+
+3. **Tool Call Handling**: Tool calls require special attention in serialization because:
+   - They need to work with litellm's API calls (which accept both dicts and objects)
+   - They need to be properly serialized for token counting
+   - They need to maintain compatibility with different LLM providers' formats
+
+4. **litellm Integration**: When we pass our messages to `litellm.completion()`, litellm doesn't care about the message class type - it works with the dictionary representation. This works because:
+   - litellm's transformation code (e.g., `litellm/llms/anthropic/chat/transformation.py`) processes messages based on their structure, not their type
+   - our serialization produces dictionaries that match litellm's expected format
+   - litellm handles rich content by looking at the message structure, supporting both simple string content and lists of content items
+
+5. **Provider-Specific Handling**: litellm then transforms these messages into provider-specific formats (e.g., Anthropic, OpenAI) through its transformation layers, which know how to handle both simple and rich content structures.
+
+### Token Counting
+
+To use litellm's token counter, we need to make sure that all message components (including tool calls) are properly serialized to dictionaries. This is because:
+- litellm's token counter expects dictionary structures
+- Tool calls need to be included in the token count
+- Different providers may count tokens differently for structured content
+
+## Note
+
+- We don't need to inherit from litellm's `Message` class because litellm works with dictionary representations, not class types
+- Our rich content model is more sophisticated than litellm's basic string content, but litellm handles it correctly through its transformation layers
+- The compatibility is maintained through proper serialization rather than inheritance
diff --git a/openhands/events/observation/commands.py b/openhands/events/observation/commands.py
index a182168e694a..b522b5c47283 100644
--- a/openhands/events/observation/commands.py
+++ b/openhands/events/observation/commands.py
@@ -23,6 +23,10 @@ def error(self) -> bool:
     def message(self) -> str:
         return f'Command `{self.command}` executed with exit code {self.exit_code}.'
 
+    @property
+    def success(self) -> bool:
+        return not self.error
+
     def __str__(self) -> str:
         return f'**CmdOutputObservation (source={self.source}, exit code={self.exit_code})**\n{self.content}'
 
@@ -42,5 +46,9 @@ def error(self) -> bool:
     def message(self) -> str:
         return 'Code executed in IPython cell.'
 
+    @property
+    def success(self) -> bool:
+        return True  # IPython cells are always considered successful
+
     def __str__(self) -> str:
         return f'**IPythonRunCellObservation**\n{self.content}'
diff --git a/openhands/events/serialization/event.py b/openhands/events/serialization/event.py
index 78f7940626d4..6ee82a1cc81c 100644
--- a/openhands/events/serialization/event.py
+++ b/openhands/events/serialization/event.py
@@ -83,6 +83,9 @@ def event_to_dict(event: 'Event') -> dict:
     elif 'observation' in d:
         d['content'] = props.pop('content', '')
         d['extras'] = props
+        # Include success field for CmdOutputObservation
+        if hasattr(event, 'success'):
+            d['success'] = event.success
     else:
         raise ValueError('Event must be either action or observation')
     return d
diff --git a/openhands/events/serialization/observation.py b/openhands/events/serialization/observation.py
index 9030ccb1e1dd..d9d8dc51adaf 100644
--- a/openhands/events/serialization/observation.py
+++ b/openhands/events/serialization/observation.py
@@ -50,4 +50,5 @@ def observation_from_dict(observation: dict) -> Observation:
     observation.pop('message', None)
     content = observation.pop('content', '')
     extras = observation.pop('extras', {})
+
     return observation_class(content=content, **extras)
diff --git a/openhands/llm/debug_mixin.py b/openhands/llm/debug_mixin.py
index f386613d6fba..6a247471ee78 100644
--- a/openhands/llm/debug_mixin.py
+++ b/openhands/llm/debug_mixin.py
@@ -16,7 +16,7 @@ def log_prompt(self, messages: list[dict[str, Any]] | dict[str, Any]):
         debug_message = MESSAGE_SEPARATOR.join(
             self._format_message_content(msg)
             for msg in messages
-            if msg.get('content', None)
+            if msg['content'] is not None
         )
 
         if debug_message:
diff --git a/openhands/llm/fn_call_converter.py b/openhands/llm/fn_call_converter.py
index 491ef906eaa1..ae4d87f8d62b 100644
--- a/openhands/llm/fn_call_converter.py
+++ b/openhands/llm/fn_call_converter.py
@@ -321,7 +321,7 @@ def convert_fncall_messages_to_non_fncall_messages(
     first_user_message_encountered = False
     for message in messages:
         role = message['role']
-        content = message.get('content', '')
+        content = message['content']
 
         # 1. SYSTEM MESSAGES
         # append system prompt suffix to content
diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index 700c3827fda0..6b87b330096d 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -199,9 +199,17 @@ def wrapper(*args, **kwargs):
                     }
 
             try:
+                # Record start time for latency measurement
+                start_time = time.time()
+
                 # we don't support streaming here, thus we get a ModelResponse
                 resp: ModelResponse = self._completion_unwrapped(*args, **kwargs)
 
+                # Calculate and record latency
+                latency = time.time() - start_time
+                response_id = resp.get('id', 'unknown')
+                self.metrics.add_response_latency(latency, response_id)
+
                 non_fncall_response = copy.deepcopy(resp)
                 if mock_function_calling:
                     assert len(resp.choices) == 1
@@ -219,6 +227,20 @@ def wrapper(*args, **kwargs):
                         )
                     resp.choices[0].message = fn_call_response_message
 
+                message_back: str = resp['choices'][0]['message']['content'] or ''
+                tool_calls = resp['choices'][0]['message'].get('tool_calls', [])
+                if tool_calls:
+                    for tool_call in tool_calls:
+                        fn_name = tool_call.function.name
+                        fn_args = tool_call.function.arguments
+                        message_back += f'\nFunction call: {fn_name}({fn_args})'
+
+                # log the LLM response
+                self.log_response(message_back)
+
+                # post-process the response first to calculate cost
+                cost = self._post_completion(resp)
+
                 # log for evals or other scripts that need the raw completion
                 if self.config.log_completions:
                     assert self.config.log_completions_folder is not None
@@ -228,37 +250,27 @@ def wrapper(*args, **kwargs):
                         f'{self.metrics.model_name.replace("/", "__")}-{time.time()}.json',
                     )
 
+                    # set up the dict to be logged
                     _d = {
                         'messages': messages,
                         'response': resp,
                         'args': args,
                         'kwargs': {k: v for k, v in kwargs.items() if k != 'messages'},
                         'timestamp': time.time(),
-                        'cost': self._completion_cost(resp),
+                        'cost': cost,
                     }
+
+                    # if non-native function calling, save messages/response separately
                     if mock_function_calling:
-                        # Overwrite response as non-fncall to be consistent with `messages``
+                        # Overwrite response as non-fncall to be consistent with messages
                         _d['response'] = non_fncall_response
+
                         # Save fncall_messages/response separately
                         _d['fncall_messages'] = original_fncall_messages
                         _d['fncall_response'] = resp
                     with open(log_file, 'w') as f:
                         f.write(json.dumps(_d))
 
-                message_back: str = resp['choices'][0]['message']['content'] or ''
-                tool_calls = resp['choices'][0]['message'].get('tool_calls', [])
-                if tool_calls:
-                    for tool_call in tool_calls:
-                        fn_name = tool_call.function.name
-                        fn_args = tool_call.function.arguments
-                        message_back += f'\nFunction call: {fn_name}({fn_args})'
-
-                # log the LLM response
-                self.log_response(message_back)
-
-                # post-process the response
-                self._post_completion(resp)
-
                 return resp
             except APIError as e:
                 if 'Attention Required! | Cloudflare' in str(e):
@@ -414,7 +426,7 @@ def is_function_calling_active(self) -> bool:
         )
         return model_name_supported
 
-    def _post_completion(self, response: ModelResponse) -> None:
+    def _post_completion(self, response: ModelResponse) -> float:
         """Post-process the completion response.
 
         Logs the cost and usage stats of the completion call.
@@ -432,6 +444,11 @@ def _post_completion(self, response: ModelResponse) -> None:
                 self.metrics.accumulated_cost,
             )
 
+        # Add latency to stats if available
+        if self.metrics.response_latencies:
+            latest_latency = self.metrics.response_latencies[-1]
+            stats += 'Response Latency: %.3f seconds\n' % latest_latency.latency
+
         usage: Usage | None = response.get('usage')
 
         if usage:
@@ -472,6 +489,8 @@ def _post_completion(self, response: ModelResponse) -> None:
         if stats:
             logger.debug(stats)
 
+        return cur_cost
+
     def get_token_count(self, messages) -> int:
         """Get the number of tokens in a list of messages.
 
diff --git a/openhands/llm/metrics.py b/openhands/llm/metrics.py
index 182d48d93b89..5388dd04025c 100644
--- a/openhands/llm/metrics.py
+++ b/openhands/llm/metrics.py
@@ -9,15 +9,25 @@ class Cost(BaseModel):
     timestamp: float = Field(default_factory=time.time)
 
 
+class ResponseLatency(BaseModel):
+    """Metric tracking the round-trip time per completion call."""
+
+    model: str
+    latency: float
+    response_id: str
+
+
 class Metrics:
     """Metrics class can record various metrics during running and evaluation.
     Currently, we define the following metrics:
         accumulated_cost: the total cost (USD $) of the current LLM.
+        response_latency: the time taken for each LLM completion call.
     """
 
     def __init__(self, model_name: str = 'default') -> None:
         self._accumulated_cost: float = 0.0
         self._costs: list[Cost] = []
+        self._response_latencies: list[ResponseLatency] = []
         self.model_name = model_name
 
     @property
@@ -34,26 +44,44 @@ def accumulated_cost(self, value: float) -> None:
     def costs(self) -> list[Cost]:
         return self._costs
 
+    @property
+    def response_latencies(self) -> list[ResponseLatency]:
+        return self._response_latencies
+
     def add_cost(self, value: float) -> None:
         if value < 0:
             raise ValueError('Added cost cannot be negative.')
         self._accumulated_cost += value
         self._costs.append(Cost(cost=value, model=self.model_name))
 
+    def add_response_latency(self, value: float, response_id: str) -> None:
+        if value < 0:
+            raise ValueError('Response latency cannot be negative.')
+        self._response_latencies.append(
+            ResponseLatency(
+                latency=value, model=self.model_name, response_id=response_id
+            )
+        )
+
     def merge(self, other: 'Metrics') -> None:
         self._accumulated_cost += other.accumulated_cost
         self._costs += other._costs
+        self._response_latencies += other._response_latencies
 
     def get(self) -> dict:
         """Return the metrics in a dictionary."""
         return {
             'accumulated_cost': self._accumulated_cost,
             'costs': [cost.model_dump() for cost in self._costs],
+            'response_latencies': [
+                latency.model_dump() for latency in self._response_latencies
+            ],
         }
 
     def reset(self):
         self._accumulated_cost = 0.0
         self._costs = []
+        self._response_latencies = []
 
     def log(self):
         """Log the metrics."""
diff --git a/openhands/memory/memory.py b/openhands/memory/memory.py
index 9d83cc9cdc8c..b3ad8fecb8e4 100644
--- a/openhands/memory/memory.py
+++ b/openhands/memory/memory.py
@@ -51,6 +51,7 @@ def __init__(
         self.embed_model = EmbeddingsLoader.get_embedding_model(
             embedding_strategy, llm_config
         )
+        logger.debug(f'Using embedding model: {self.embed_model}')
 
         # instantiate the index
         self.index = VectorStoreIndex.from_vector_store(vector_store, self.embed_model)
diff --git a/openhands/resolver/README.md b/openhands/resolver/README.md
index a43470f901ae..296d8b085c67 100644
--- a/openhands/resolver/README.md
+++ b/openhands/resolver/README.md
@@ -32,11 +32,11 @@ Follow these steps to use this workflow in your own repository:
 
 5. Set up [GitHub secrets](https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions):
    - Required:
+    - `LLM_API_KEY`: Your LLM API key
+   - Optional:
      - `PAT_USERNAME`: GitHub username for the personal access token
      - `PAT_TOKEN`: The personal access token
-     - `LLM_MODEL`: LLM model to use (e.g., "anthropic/claude-3-5-sonnet-20241022")
-     - `LLM_API_KEY`: Your LLM API key
-   - Optional:
+     - `LLM_MODEL`: LLM model to use (defaults to "anthropic/claude-3-5-sonnet-20241022")
      - `LLM_BASE_URL`: Base URL for LLM API (only if using a proxy)
 
    Note: You can set these secrets at the organization level to use across multiple repositories.
@@ -61,7 +61,7 @@ Follow these steps to use this workflow in your own repository:
         2. Create a draft PR if successful, or push a branch if unsuccessful
         3. Comment on the issue with the results
 
-Need help? Feel free to [open an issue](https://github.com/all-hands-ai/openhands-resolver/issues) or email us at [contact@all-hands.dev](mailto:contact@all-hands.dev).
+Need help? Feel free to [open an issue](https://github.com/all-hands-ai/openhands/issues) or email us at [contact@all-hands.dev](mailto:contact@all-hands.dev).
 
 ## Manual Installation
 
@@ -111,7 +111,7 @@ python -m openhands.resolver.resolve_issue --repo [OWNER]/[REPO] --issue-number
 For instance, if you want to resolve issue #100 in this repo, you would run:
 
 ```bash
-python -m openhands.resolver.resolve_issue --repo all-hands-ai/openhands-resolver --issue-number 100
+python -m openhands.resolver.resolve_issue --repo all-hands-ai/openhands --issue-number 100
 ```
 
 The output will be written to the `output/` directory.
@@ -119,7 +119,7 @@ The output will be written to the `output/` directory.
 If you've installed the package from source using poetry, you can use:
 
 ```bash
-poetry run python openhands/resolver/resolve_issue.py --repo all-hands-ai/openhands-resolver --issue-number 100
+poetry run python openhands/resolver/resolve_issue.py --repo all-hands-ai/openhands --issue-number 100
 ```
 
 For resolving multiple issues at once (e.g., in a batch process), you can use the `resolve_all_issues` command:
@@ -131,7 +131,7 @@ python -m openhands.resolver.resolve_all_issues --repo [OWNER]/[REPO] --issue-nu
 For example:
 
 ```bash
-python -m openhands.resolver.resolve_all_issues --repo all-hands-ai/openhands-resolver --issue-numbers 100,101,102
+python -m openhands.resolver.resolve_all_issues --repo all-hands-ai/openhands --issue-numbers 100,101,102
 ```
 
 ## Responding to PR Comments
diff --git a/openhands/resolver/examples/openhands-resolver.yml b/openhands/resolver/examples/openhands-resolver.yml
index a244af04c6a1..95075680156b 100644
--- a/openhands/resolver/examples/openhands-resolver.yml
+++ b/openhands/resolver/examples/openhands-resolver.yml
@@ -23,10 +23,10 @@ jobs:
     with:
       macro: ${{ vars.OPENHANDS_MACRO || '@openhands-agent' }}
       max_iterations: ${{ fromJson(vars.OPENHANDS_MAX_ITER || 50) }}
-      base_container_image: ${{ vars.OPENHANDS_BASE_CONTAINER_IMAGE || "" }}
+      base_container_image: ${{ vars.OPENHANDS_BASE_CONTAINER_IMAGE || '' }}
+      LLM_MODEL: ${{ vars.LLM_MODEL }}
     secrets:
       PAT_TOKEN: ${{ secrets.PAT_TOKEN }}
       PAT_USERNAME: ${{ secrets.PAT_USERNAME }}
-      LLM_MODEL: ${{ secrets.LLM_MODEL }}
       LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
       LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
diff --git a/openhands/resolver/issue_definitions.py b/openhands/resolver/issue_definitions.py
index 52c552b10227..f36bb7a61527 100644
--- a/openhands/resolver/issue_definitions.py
+++ b/openhands/resolver/issue_definitions.py
@@ -62,19 +62,23 @@ def _download_issues_from_github(self) -> list[Any]:
         params: dict[str, int | str] = {'state': 'open', 'per_page': 100, 'page': 1}
         all_issues = []
 
+        # Get issues, page by page
         while True:
             response = requests.get(url, headers=headers, params=params)
             response.raise_for_status()
             issues = response.json()
 
+            # No more issues, break the loop
             if not issues:
                 break
 
+            # Sanity check - the response is a list of dictionaries
             if not isinstance(issues, list) or any(
                 [not isinstance(issue, dict) for issue in issues]
             ):
                 raise ValueError('Expected list of dictionaries from Github API.')
 
+            # Add the issues to the final list
             all_issues.extend(issues)
             assert isinstance(params['page'], int)
             params['page'] += 1
@@ -107,7 +111,12 @@ def _extract_issue_references(self, body: str) -> list[int]:
     def _get_issue_comments(
         self, issue_number: int, comment_id: int | None = None
     ) -> list[str] | None:
-        """Download comments for a specific issue from Github."""
+        """Retrieve comments for a specific issue from Github.
+
+        Args:
+            issue_number: The ID of the issue to get comments for
+            comment_id: The ID of a single comment, if provided, otherwise all comments
+        """
         url = f'https://api.github.com/repos/{self.owner}/{self.repo}/issues/{issue_number}/comments'
         headers = {
             'Authorization': f'token {self.token}',
@@ -116,6 +125,7 @@ def _get_issue_comments(
         params = {'per_page': 100, 'page': 1}
         all_comments = []
 
+        # Get comments, page by page
         while True:
             response = requests.get(url, headers=headers, params=params)
             response.raise_for_status()
@@ -124,6 +134,7 @@ def _get_issue_comments(
             if not comments:
                 break
 
+            # If a single comment ID is provided, return only that comment
             if comment_id:
                 matching_comment = next(
                     (
@@ -136,6 +147,7 @@ def _get_issue_comments(
                 if matching_comment:
                     return [matching_comment]
             else:
+                # Otherwise, return all comments
                 all_comments.extend([comment['body'] for comment in comments])
 
             params['page'] += 1
@@ -147,6 +159,10 @@ def get_converted_issues(
     ) -> list[GithubIssue]:
         """Download issues from Github.
 
+        Args:
+            issue_numbers: The numbers of the issues to download
+            comment_id: The ID of a single comment, if provided, otherwise all comments
+
         Returns:
             List of Github issues.
         """
@@ -203,7 +219,14 @@ def get_instruction(
         prompt_template: str,
         repo_instruction: str | None = None,
     ) -> tuple[str, list[str]]:
-        """Generate instruction for the agent."""
+        """Generate instruction for the agent.
+
+        Args:
+            issue: The issue to generate instruction for
+            prompt_template: The prompt template to use
+            repo_instruction: The repository instruction if it exists
+        """
+
         # Format thread comments if they exist
         thread_context = ''
         if issue.thread_comments:
@@ -211,6 +234,7 @@ def get_instruction(
                 issue.thread_comments
             )
 
+        # Extract image URLs from the issue body and thread comments
         images = []
         images.extend(self._extract_image_urls(issue.body))
         images.extend(self._extract_image_urls(thread_context))
@@ -227,8 +251,14 @@ def get_instruction(
     def guess_success(
         self, issue: GithubIssue, history: list[Event]
     ) -> tuple[bool, None | list[bool], str]:
-        """Guess if the issue is fixed based on the history and the issue description."""
+        """Guess if the issue is fixed based on the history and the issue description.
+
+        Args:
+            issue: The issue to check
+            history: The agent's history
+        """
         last_message = history[-1].message
+
         # Include thread comments in the prompt if they exist
         issue_context = issue.body
         if issue.thread_comments:
@@ -236,6 +266,7 @@ def guess_success(
                 issue.thread_comments
             )
 
+        # Prepare the prompt
         with open(
             os.path.join(
                 os.path.dirname(__file__),
@@ -246,6 +277,7 @@ def guess_success(
             template = jinja2.Template(f.read())
         prompt = template.render(issue_context=issue_context, last_message=last_message)
 
+        # Get the LLM response and check for 'success' and 'explanation' in the answer
         response = self.llm.completion(messages=[{'role': 'user', 'content': prompt}])
 
         answer = response.choices[0].message.content.strip()
@@ -328,6 +360,7 @@ def __download_pr_metadata(
 
         variables = {'owner': self.owner, 'repo': self.repo, 'pr': pull_number}
 
+        # Run the query
         url = 'https://api.github.com/graphql'
         headers = {
             'Authorization': f'Bearer {self.token}',
@@ -394,10 +427,12 @@ def __download_pr_metadata(
                             review_thread['body'] + '\n'
                         )  # Add each thread in a new line
 
+                    # Source files on which the comments were made
                     file = review_thread.get('path')
                     if file and file not in files:
                         files.append(file)
 
+                # If the comment ID is not provided or the thread contains the comment ID, add the thread to the list
                 if comment_id is None or thread_contains_comment_id:
                     unresolved_thread = ReviewThread(comment=message, files=files)
                     review_threads.append(unresolved_thread)
diff --git a/openhands/resolver/resolve_issue.py b/openhands/resolver/resolve_issue.py
index 63a9e40a05ba..a90c4fb2ce05 100644
--- a/openhands/resolver/resolve_issue.py
+++ b/openhands/resolver/resolve_issue.py
@@ -344,6 +344,14 @@ async def resolve_issue(
         issue_numbers=[issue_number], comment_id=comment_id
     )
 
+    if not issues:
+        raise ValueError(
+            f'No issues found for issue number {issue_number}. Please verify that:\n'
+            f'1. The issue/PR #{issue_number} exists in the repository {owner}/{repo}\n'
+            f'2. You have the correct permissions to access it\n'
+            f'3. The repository name is spelled correctly'
+        )
+
     issue = issues[0]
 
     if comment_id is not None:
diff --git a/openhands/resolver/send_pull_request.py b/openhands/resolver/send_pull_request.py
index 29f720160ba7..d888691e5857 100644
--- a/openhands/resolver/send_pull_request.py
+++ b/openhands/resolver/send_pull_request.py
@@ -5,11 +5,11 @@
 import subprocess
 
 import jinja2
-import litellm
 import requests
 
 from openhands.core.config import LLMConfig
 from openhands.core.logger import openhands_logger as logger
+from openhands.llm.llm import LLM
 from openhands.resolver.github_issue import GithubIssue
 from openhands.resolver.io_utils import (
     load_all_resolver_outputs,
@@ -20,6 +20,12 @@
 
 
 def apply_patch(repo_dir: str, patch: str) -> None:
+    """Apply a patch to a repository.
+
+    Args:
+        repo_dir: The directory containing the repository
+        patch: The patch to apply
+    """
     diffs = parse_patch(patch)
     for diff in diffs:
         if not diff.header.new_path:
@@ -112,6 +118,14 @@ def apply_patch(repo_dir: str, patch: str) -> None:
 def initialize_repo(
     output_dir: str, issue_number: int, issue_type: str, base_commit: str | None = None
 ) -> str:
+    """Initialize the repository.
+
+    Args:
+        output_dir: The output directory to write the repository to
+        issue_number: The issue number to fix
+        issue_type: The type of the issue
+        base_commit: The base commit to checkout (if issue_type is pr)
+    """
     src_dir = os.path.join(output_dir, 'repo')
     dest_dir = os.path.join(output_dir, 'patches', f'{issue_type}_{issue_number}')
 
@@ -124,6 +138,7 @@ def initialize_repo(
     shutil.copytree(src_dir, dest_dir)
     print(f'Copied repository to {dest_dir}')
 
+    # Checkout the base commit if provided
     if base_commit:
         result = subprocess.run(
             f'git -C {dest_dir} checkout {base_commit}',
@@ -139,6 +154,13 @@ def initialize_repo(
 
 
 def make_commit(repo_dir: str, issue: GithubIssue, issue_type: str) -> None:
+    """Make a commit with the changes to the repository.
+
+    Args:
+        repo_dir: The directory containing the repository
+        issue: The issue to fix
+        issue_type: The type of the issue
+    """
     # Check if git username is set
     result = subprocess.run(
         f'git -C {repo_dir} config user.name',
@@ -158,6 +180,7 @@ def make_commit(repo_dir: str, issue: GithubIssue, issue_type: str) -> None:
         )
         print('Git user configured as openhands')
 
+    # Add all changes to the git index
     result = subprocess.run(
         f'git -C {repo_dir} add .', shell=True, capture_output=True, text=True
     )
@@ -165,6 +188,7 @@ def make_commit(repo_dir: str, issue: GithubIssue, issue_type: str) -> None:
         print(f'Error adding files: {result.stderr}')
         raise RuntimeError('Failed to add files to git')
 
+    # Check the status of the git index
     status_result = subprocess.run(
         f'git -C {repo_dir} status --porcelain',
         shell=True,
@@ -172,11 +196,15 @@ def make_commit(repo_dir: str, issue: GithubIssue, issue_type: str) -> None:
         text=True,
     )
 
+    # If there are no changes, raise an error
     if not status_result.stdout.strip():
         print(f'No changes to commit for issue #{issue.number}. Skipping commit.')
         raise RuntimeError('ERROR: Openhands failed to make code changes.')
 
+    # Prepare the commit message
     commit_message = f'Fix {issue_type} #{issue.number}: {issue.title}'
+
+    # Commit the changes
     result = subprocess.run(
         ['git', '-C', repo_dir, 'commit', '-m', commit_message],
         capture_output=True,
@@ -206,12 +234,24 @@ def send_pull_request(
     github_token: str,
     github_username: str | None,
     patch_dir: str,
-    llm_config: LLMConfig,
     pr_type: str,
     fork_owner: str | None = None,
     additional_message: str | None = None,
     target_branch: str | None = None,
+    reviewer: str | None = None,
 ) -> str:
+    """Send a pull request to a GitHub repository.
+
+    Args:
+        github_issue: The issue to send the pull request for
+        github_token: The GitHub token to use for authentication
+        github_username: The GitHub username, if provided
+        patch_dir: The directory containing the patches to apply
+        pr_type: The type: branch (no PR created), draft or ready (regular PR created)
+        fork_owner: The owner of the fork to push changes to (if different from the original repo owner)
+        additional_message: The additional messages to post as a comment on the PR in json list format
+        target_branch: The target branch to create the pull request against (defaults to repository default branch)
+    """
     if pr_type not in ['branch', 'draft', 'ready']:
         raise ValueError(f'Invalid pr_type: {pr_type}')
 
@@ -227,6 +267,7 @@ def send_pull_request(
     branch_name = base_branch_name
     attempt = 1
 
+    # Find a unique branch name
     print('Checking if branch exists...')
     while branch_exists(base_url, branch_name, headers):
         attempt += 1
@@ -279,6 +320,7 @@ def send_pull_request(
         print(f'Error pushing changes: {result.stderr}')
         raise RuntimeError('Failed to push changes to the remote repository')
 
+    # Prepare the PR data: title and body
     pr_title = f'Fix issue #{github_issue.number}: {github_issue.title}'
     pr_body = f'This pull request fixes #{github_issue.number}.'
     if additional_message:
@@ -290,6 +332,7 @@ def send_pull_request(
     if pr_type == 'branch':
         url = f'https://github.com/{push_owner}/{github_issue.repo}/compare/{branch_name}?expand=1'
     else:
+        # Prepare the PR for the GitHub API
         data = {
             'title': pr_title,  # No need to escape title for GitHub API
             'body': pr_body,
@@ -297,6 +340,8 @@ def send_pull_request(
             'base': base_branch,
             'draft': pr_type == 'draft',
         }
+
+        # Send the PR and get its URL to tell the user
         response = requests.post(f'{base_url}/pulls', headers=headers, json=data)
         if response.status_code == 403:
             raise RuntimeError(
@@ -306,6 +351,19 @@ def send_pull_request(
         response.raise_for_status()
         pr_data = response.json()
 
+        # Request review if a reviewer was specified
+        if reviewer and pr_type != 'branch':
+            review_data = {'reviewers': [reviewer]}
+            review_response = requests.post(
+                f'{base_url}/pulls/{pr_data["number"]}/requested_reviewers',
+                headers=headers,
+                json=review_data,
+            )
+            if review_response.status_code != 201:
+                print(
+                    f'Warning: Failed to request review from {reviewer}: {review_response.text}'
+                )
+
         url = pr_data['html_url']
 
     print(f'{pr_type} created: {url}\n\n--- Title: {pr_title}\n\n--- Body:\n{pr_body}')
@@ -314,6 +372,13 @@ def send_pull_request(
 
 
 def reply_to_comment(github_token: str, comment_id: str, reply: str):
+    """Reply to a comment on a GitHub issue or pull request.
+
+    Args:
+        github_token: The GitHub token to use for authentication
+        comment_id: The ID of the comment to reply to
+        reply: The reply message to post
+    """
     # Opting for graphql as REST API doesn't allow reply to replies in comment threads
     query = """
             mutation($body: String!, $pullRequestReviewThreadId: ID!) {
@@ -327,6 +392,7 @@ def reply_to_comment(github_token: str, comment_id: str, reply: str):
             }
             """
 
+    # Prepare the reply to the comment
     comment_reply = f'Openhands fix success summary\n\n\n{reply}'
     variables = {'body': comment_reply, 'pullRequestReviewThreadId': comment_id}
     url = 'https://api.github.com/graphql'
@@ -335,6 +401,7 @@ def reply_to_comment(github_token: str, comment_id: str, reply: str):
         'Content-Type': 'application/json',
     }
 
+    # Send the reply to the comment
     response = requests.post(
         url, json={'query': query, 'variables': variables}, headers=headers
     )
@@ -392,13 +459,14 @@ def update_existing_pull_request(
     base_url = f'https://api.github.com/repos/{github_issue.owner}/{github_issue.repo}'
     branch_name = github_issue.head_branch
 
-    # Push the changes to the existing branch
+    # Prepare the push command
     push_command = (
         f'git -C {patch_dir} push '
         f'https://{github_username}:{github_token}@github.com/'
         f'{github_issue.owner}/{github_issue.repo}.git {branch_name}'
     )
 
+    # Push the changes to the existing branch
     result = subprocess.run(push_command, shell=True, capture_output=True, text=True)
     if result.returncode != 0:
         print(f'Error pushing changes: {result.stderr}')
@@ -420,6 +488,7 @@ def update_existing_pull_request(
 
                 # Summarize with LLM if provided
                 if llm_config is not None:
+                    llm = LLM(llm_config)
                     with open(
                         os.path.join(
                             os.path.dirname(__file__),
@@ -429,16 +498,13 @@ def update_existing_pull_request(
                     ) as f:
                         template = jinja2.Template(f.read())
                     prompt = template.render(comment_message=comment_message)
-                    response = litellm.completion(
-                        model=llm_config.model,
+                    response = llm.completion(
                         messages=[{'role': 'user', 'content': prompt}],
-                        api_key=llm_config.api_key,
-                        base_url=llm_config.base_url,
                     )
                     comment_message = response.choices[0].message.content.strip()
 
         except (json.JSONDecodeError, TypeError):
-            comment_message = 'New OpenHands update'
+            comment_message = f'A new OpenHands update is available, but failed to parse or summarize the changes:\n{additional_message}'
 
     # Post a comment on the PR
     if comment_message:
@@ -468,6 +534,7 @@ def process_single_issue(
     fork_owner: str | None,
     send_on_failure: bool,
     target_branch: str | None = None,
+    reviewer: str | None = None,
 ) -> None:
     if not resolver_output.success and not send_on_failure:
         print(
@@ -514,10 +581,10 @@ def process_single_issue(
             github_username=github_username,
             patch_dir=patched_repo_dir,
             pr_type=pr_type,
-            llm_config=llm_config,
             fork_owner=fork_owner,
             additional_message=resolver_output.success_explanation,
             target_branch=target_branch,
+            reviewer=reviewer,
         )
 
 
@@ -614,6 +681,12 @@ def main():
         default=None,
         help='Target branch to create the pull request against (defaults to repository default branch)',
     )
+    parser.add_argument(
+        '--reviewer',
+        type=str,
+        help='GitHub username of the person to request review from',
+        default=None,
+    )
     my_args = parser.parse_args()
 
     github_token = (
@@ -667,6 +740,7 @@ def main():
             my_args.fork_owner,
             my_args.send_on_failure,
             my_args.target_branch,
+            my_args.reviewer,
         )
 
 
diff --git a/openhands/runtime/action_execution_server.py b/openhands/runtime/action_execution_server.py
index b48a9b55a693..26b728284e33 100644
--- a/openhands/runtime/action_execution_server.py
+++ b/openhands/runtime/action_execution_server.py
@@ -9,8 +9,10 @@
 import asyncio
 import base64
 import io
+import json
 import mimetypes
 import os
+import re
 import shutil
 import tempfile
 import time
@@ -199,6 +201,26 @@ async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
 
             obs: IPythonRunCellObservation = await _jupyter_plugin.run(action)
             obs.content = obs.content.rstrip()
+            matches = re.findall(
+                r'<oh_aci_output>(.*?)</oh_aci_output>', obs.content, re.DOTALL
+            )
+            if matches:
+                results = []
+                for match in matches:
+                    try:
+                        result_dict = json.loads(match)
+                        results.append(
+                            result_dict.get('formatted_output_and_error', '')
+                        )
+                    except json.JSONDecodeError:
+                        # Handle JSON decoding errors if necessary
+                        results.append(
+                            f"Invalid JSON in 'openhands-aci' output: {match}"
+                        )
+
+                # Combine the results (e.g., join them) or handle them as required
+                obs.content = '\n'.join(results)
+
             if action.include_extra:
                 obs.content += (
                     f'\n[Jupyter current working directory: {self.bash_session.pwd}]'
diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py
index 4c5f577ec133..5d4e79e19c80 100644
--- a/openhands/runtime/base.py
+++ b/openhands/runtime/base.py
@@ -213,6 +213,47 @@ async def on_event(self, event: Event) -> None:
             source = event.source if event.source else EventSource.AGENT
             self.event_stream.add_event(observation, source)  # type: ignore[arg-type]
 
+    def clone_repo(self, github_token: str | None, selected_repository: str | None):
+        if not github_token or not selected_repository:
+            return
+        url = f'https://{github_token}@github.com/{selected_repository}.git'
+        dir_name = selected_repository.split('/')[1]
+        action = CmdRunAction(
+            command=f'git clone {url} {dir_name} ; cd {dir_name} ; git checkout -b openhands-workspace'
+        )
+        self.log('info', 'Cloning repo: {selected_repository}')
+        self.run_action(action)
+
+    def get_custom_microagents(self, selected_repository: str | None) -> list[str]:
+        custom_microagents_content = []
+        custom_microagents_dir = Path('.openhands') / 'microagents'
+
+        dir_name = str(custom_microagents_dir)
+        if selected_repository:
+            dir_name = str(
+                Path(selected_repository.split('/')[1]) / custom_microagents_dir
+            )
+        oh_instructions_header = '---\nname: openhands_instructions\nagent: CodeActAgent\ntriggers:\n- ""\n---\n'
+        obs = self.read(FileReadAction(path='.openhands_instructions'))
+        if isinstance(obs, ErrorObservation):
+            self.log('error', 'Failed to read openhands_instructions')
+        else:
+            openhands_instructions = oh_instructions_header + obs.content
+            self.log('info', f'openhands_instructions: {openhands_instructions}')
+            custom_microagents_content.append(openhands_instructions)
+
+        files = self.list_files(dir_name)
+
+        self.log('info', f'Found {len(files)} custom microagents.')
+
+        for fname in files:
+            content = self.read(
+                FileReadAction(path=str(custom_microagents_dir / fname))
+            ).content
+            custom_microagents_content.append(content)
+
+        return custom_microagents_content
+
     def run_action(self, action: Action) -> Observation:
         """Run an action and return the resulting observation.
         If the action is not runnable in any runtime, a NullObservation is returned.
diff --git a/openhands/runtime/builder/base.py b/openhands/runtime/builder/base.py
index 4930b13d7ffd..acfe3c60fb89 100644
--- a/openhands/runtime/builder/base.py
+++ b/openhands/runtime/builder/base.py
@@ -8,14 +8,16 @@ def build(
         path: str,
         tags: list[str],
         platform: str | None = None,
+        extra_build_args: list[str] | None = None,
     ) -> str:
-        """
-        Build the runtime image.
+        """Build the runtime image.
 
         Args:
             path (str): The path to the runtime image's build directory.
             tags (list[str]): The tags to apply to the runtime image (e.g., ["repo:my-repo", "sha:my-sha"]).
             platform (str, optional): The target platform for the build. Defaults to None.
+            extra_build_args (list[str], optional): Additional build arguments to pass to the builder. Defaults to None.
+
         Returns:
             str: The name:tag of the runtime image after build (e.g., "repo:sha").
                 This can be different from the tags input if the builder chooses to mutate the tags (e.g., adding a
@@ -28,8 +30,7 @@ def build(
 
     @abc.abstractmethod
     def image_exists(self, image_name: str, pull_from_repo: bool = True) -> bool:
-        """
-        Check if the runtime image exists.
+        """Check if the runtime image exists.
 
         Args:
             image_name (str): The name of the runtime image (e.g., "repo:sha").
diff --git a/openhands/runtime/builder/docker.py b/openhands/runtime/builder/docker.py
index a3cb5af39f3d..880b1c73c578 100644
--- a/openhands/runtime/builder/docker.py
+++ b/openhands/runtime/builder/docker.py
@@ -9,6 +9,7 @@
 from openhands.core.logger import RollingLogger
 from openhands.core.logger import openhands_logger as logger
 from openhands.runtime.builder.base import RuntimeBuilder
+from openhands.utils.term_color import TermColor, colorize
 
 
 class DockerRuntimeBuilder(RuntimeBuilder):
@@ -27,8 +28,8 @@ def build(
         path: str,
         tags: list[str],
         platform: str | None = None,
-        use_local_cache: bool = False,
         extra_build_args: list[str] | None = None,
+        use_local_cache: bool = False,
     ) -> str:
         """Builds a Docker image using BuildKit and handles the build logs appropriately.
 
@@ -187,7 +188,9 @@ def image_exists(self, image_name: str, pull_from_repo: bool = True) -> bool:
             return True
         except docker.errors.ImageNotFound:
             if not pull_from_repo:
-                logger.debug(f'Image {image_name} not found locally')
+                logger.debug(
+                    f'Image {image_name} {colorize("not found", TermColor.WARNING)} locally'
+                )
                 return False
             try:
                 logger.debug(
@@ -214,7 +217,7 @@ def image_exists(self, image_name: str, pull_from_repo: bool = True) -> bool:
                 logger.debug('Could not find image locally or in registry.')
                 return False
             except Exception as e:
-                msg = 'Image could not be pulled: '
+                msg = f'Image {colorize("could not be pulled", TermColor.ERROR)}: '
                 ex_msg = str(e)
                 if 'Not Found' in ex_msg:
                     msg += 'image not found in registry.'
@@ -286,8 +289,7 @@ def _output_build_progress(
             logger.debug(current_line['status'])
 
     def _prune_old_cache_files(self, cache_dir: str, max_age_days: int = 7) -> None:
-        """
-        Prune cache files older than the specified number of days.
+        """Prune cache files older than the specified number of days.
 
         Args:
             cache_dir (str): The path to the cache directory.
@@ -311,8 +313,7 @@ def _prune_old_cache_files(self, cache_dir: str, max_age_days: int = 7) -> None:
             logger.warning(f'Error during build cache pruning: {e}')
 
     def _is_cache_usable(self, cache_dir: str) -> bool:
-        """
-        Check if the cache directory is usable (exists and is writable).
+        """Check if the cache directory is usable (exists and is writable).
 
         Args:
             cache_dir (str): The path to the cache directory.
diff --git a/openhands/runtime/builder/remote.py b/openhands/runtime/builder/remote.py
index c9d3228a70af..5cfe1a4943a4 100644
--- a/openhands/runtime/builder/remote.py
+++ b/openhands/runtime/builder/remote.py
@@ -23,7 +23,13 @@ def __init__(self, api_url: str, api_key: str):
         self.session = requests.Session()
         self.session.headers.update({'X-API-Key': self.api_key})
 
-    def build(self, path: str, tags: list[str], platform: str | None = None) -> str:
+    def build(
+        self,
+        path: str,
+        tags: list[str],
+        platform: str | None = None,
+        extra_build_args: list[str] | None = None,
+    ) -> str:
         """Builds a Docker image using the Runtime API's /build endpoint."""
         # Create a tar archive of the build context
         tar_buffer = io.BytesIO()
diff --git a/openhands/runtime/impl/e2b/sandbox.py b/openhands/runtime/impl/e2b/sandbox.py
index 783028ebb0e3..9bcd0f0514b6 100644
--- a/openhands/runtime/impl/e2b/sandbox.py
+++ b/openhands/runtime/impl/e2b/sandbox.py
@@ -4,7 +4,7 @@
 from glob import glob
 
 from e2b import Sandbox as E2BSandbox
-from e2b.sandbox import TimeoutException
+from e2b.exceptions import TimeoutException
 
 from openhands.core.config import SandboxConfig
 from openhands.core.logger import openhands_logger as logger
diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py
index 2862273078f3..becff94fb12b 100644
--- a/openhands/runtime/impl/eventstream/eventstream_runtime.py
+++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -168,6 +168,7 @@ async def connect(self):
                     platform=self.config.sandbox.platform,
                     extra_deps=self.config.sandbox.runtime_extra_deps,
                     force_rebuild=self.config.sandbox.force_rebuild_runtime,
+                    extra_build_args=self.config.sandbox.runtime_extra_build_args,
                 )
 
             self.log(
diff --git a/openhands/runtime/utils/edit.py b/openhands/runtime/utils/edit.py
index bcb876f865d9..d95dacb100dd 100644
--- a/openhands/runtime/utils/edit.py
+++ b/openhands/runtime/utils/edit.py
@@ -150,11 +150,14 @@ def _get_lint_error(
     ) -> ErrorObservation | None:
         linter = DefaultLinter()
         # Copy the original file to a temporary file (with the same ext) and lint it
-        with tempfile.NamedTemporaryFile(
-            suffix=suffix, mode='w+', encoding='utf-8'
-        ) as original_file_copy, tempfile.NamedTemporaryFile(
-            suffix=suffix, mode='w+', encoding='utf-8'
-        ) as updated_file_copy:
+        with (
+            tempfile.NamedTemporaryFile(
+                suffix=suffix, mode='w+', encoding='utf-8'
+            ) as original_file_copy,
+            tempfile.NamedTemporaryFile(
+                suffix=suffix, mode='w+', encoding='utf-8'
+            ) as updated_file_copy,
+        ):
             # Lint the original file
             original_file_copy.write(old_content)
             original_file_copy.flush()
diff --git a/openhands/runtime/utils/request.py b/openhands/runtime/utils/request.py
index 46d730bfba2d..234464874fb6 100644
--- a/openhands/runtime/utils/request.py
+++ b/openhands/runtime/utils/request.py
@@ -24,7 +24,7 @@ def send_request(
     timeout: int = 10,
     **kwargs: Any,
 ) -> requests.Response:
-    response = session.request(method, url, **kwargs)
+    response = session.request(method, url, timeout=timeout, **kwargs)
     try:
         response.raise_for_status()
     except requests.HTTPError as e:
diff --git a/openhands/runtime/utils/runtime_build.py b/openhands/runtime/utils/runtime_build.py
index eab98befe538..de939efd9a38 100644
--- a/openhands/runtime/utils/runtime_build.py
+++ b/openhands/runtime/utils/runtime_build.py
@@ -111,6 +111,7 @@ def build_runtime_image(
     build_folder: str | None = None,
     dry_run: bool = False,
     force_rebuild: bool = False,
+    extra_build_args: List[str] | None = None,
 ) -> str:
     """Prepares the final docker build folder.
     If dry_run is False, it will also build the OpenHands runtime Docker image using the docker build folder.
@@ -123,6 +124,7 @@ def build_runtime_image(
     - build_folder (str): The directory to use for the build. If not provided a temporary directory will be used
     - dry_run (bool): if True, it will only ready the build folder. It will not actually build the Docker image
     - force_rebuild (bool): if True, it will create the Dockerfile which uses the base_image
+    - extra_build_args (List[str]): Additional build arguments to pass to the builder
 
     Returns:
     - str: <image_repo>:<MD5 hash>. Where MD5 hash is the hash of the docker build folder
@@ -139,6 +141,7 @@ def build_runtime_image(
                 dry_run=dry_run,
                 force_rebuild=force_rebuild,
                 platform=platform,
+                extra_build_args=extra_build_args,
             )
             return result
 
@@ -150,6 +153,7 @@ def build_runtime_image(
         dry_run=dry_run,
         force_rebuild=force_rebuild,
         platform=platform,
+        extra_build_args=extra_build_args,
     )
     return result
 
@@ -162,6 +166,7 @@ def build_runtime_image_in_folder(
     dry_run: bool,
     force_rebuild: bool,
     platform: str | None = None,
+    extra_build_args: List[str] | None = None,
 ) -> str:
     runtime_image_repo, _ = get_runtime_image_repo_and_tag(base_image)
     lock_tag = f'oh_v{oh_version}_{get_hash_for_lock_files(base_image)}'
@@ -193,6 +198,7 @@ def build_runtime_image_in_folder(
                 lock_tag,
                 versioned_tag,
                 platform,
+                extra_build_args=extra_build_args,
             )
         return hash_image_name
 
@@ -234,6 +240,7 @@ def build_runtime_image_in_folder(
             if build_from == BuildFromImageType.SCRATCH
             else None,
             platform=platform,
+            extra_build_args=extra_build_args,
         )
 
     return hash_image_name
@@ -339,6 +346,7 @@ def _build_sandbox_image(
     lock_tag: str,
     versioned_tag: str | None,
     platform: str | None = None,
+    extra_build_args: List[str] | None = None,
 ):
     """Build and tag the sandbox image. The image will be tagged with all tags that do not yet exist"""
     names = [
@@ -350,7 +358,10 @@ def _build_sandbox_image(
     names = [name for name in names if not runtime_builder.image_exists(name, False)]
 
     image_name = runtime_builder.build(
-        path=str(build_folder), tags=names, platform=platform
+        path=str(build_folder),
+        tags=names,
+        platform=platform,
+        extra_build_args=extra_build_args,
     )
     if not image_name:
         raise RuntimeError(f'Build failed for image {names}')
diff --git a/openhands/server/app.py b/openhands/server/app.py
index 33f9766fe60d..6207ccf2eb0b 100644
--- a/openhands/server/app.py
+++ b/openhands/server/app.py
@@ -1,4 +1,5 @@
 import warnings
+from contextlib import asynccontextmanager
 
 with warnings.catch_warnings():
     warnings.simplefilter('ignore')
@@ -21,8 +22,17 @@
 from openhands.server.routes.files import app as files_api_router
 from openhands.server.routes.public import app as public_api_router
 from openhands.server.routes.security import app as security_api_router
+from openhands.server.shared import config, session_manager
+from openhands.utils.import_utils import get_impl
 
-app = FastAPI()
+
+@asynccontextmanager
+async def _lifespan(app: FastAPI):
+    async with session_manager:
+        yield
+
+
+app = FastAPI(lifespan=_lifespan)
 app.add_middleware(
     LocalhostCORSMiddleware,
     allow_credentials=True,
@@ -48,9 +58,16 @@ async def health():
 app.include_router(security_api_router)
 app.include_router(feedback_api_router)
 
-app.middleware('http')(AttachSessionMiddleware(app, target_router=files_api_router))
+AttachSessionMiddlewareImpl = get_impl(
+    AttachSessionMiddleware, config.attach_session_middleware_class
+)
+app.middleware('http')(AttachSessionMiddlewareImpl(app, target_router=files_api_router))
+app.middleware('http')(
+    AttachSessionMiddlewareImpl(app, target_router=conversation_api_router)
+)
+app.middleware('http')(
+    AttachSessionMiddlewareImpl(app, target_router=security_api_router)
+)
 app.middleware('http')(
-    AttachSessionMiddleware(app, target_router=conversation_api_router)
+    AttachSessionMiddlewareImpl(app, target_router=feedback_api_router)
 )
-app.middleware('http')(AttachSessionMiddleware(app, target_router=security_api_router))
-app.middleware('http')(AttachSessionMiddleware(app, target_router=feedback_api_router))
diff --git a/openhands/server/listen.py b/openhands/server/listen.py
index 3ad6fd1b88fa..0af7cc7aa69e 100644
--- a/openhands/server/listen.py
+++ b/openhands/server/listen.py
@@ -1,7 +1,7 @@
 import socketio
 
 from openhands.server.app import app as base_app
-from openhands.server.socket import sio
+from openhands.server.listen_socket import sio
 from openhands.server.static import SPAStaticFiles
 
 base_app.mount(
diff --git a/openhands/server/socket.py b/openhands/server/listen_socket.py
similarity index 63%
rename from openhands/server/socket.py
rename to openhands/server/listen_socket.py
index 19a4993a2945..9946aafefe47 100644
--- a/openhands/server/socket.py
+++ b/openhands/server/listen_socket.py
@@ -8,10 +8,12 @@
 from openhands.events.observation import (
     NullObservation,
 )
+from openhands.events.observation.agent import AgentStateChangedObservation
 from openhands.events.serialization import event_to_dict
 from openhands.events.stream import AsyncEventStreamWrapper
 from openhands.server.auth import get_sid_from_token, sign_token
 from openhands.server.github_utils import authenticate_github_user
+from openhands.server.session.session_init_data import SessionInitData
 from openhands.server.shared import config, session_manager, sio
 
 
@@ -25,19 +27,32 @@ async def oh_action(connection_id: str, data: dict):
     # If it's an init, we do it here.
     action = data.get('action', '')
     if action == ActionType.INIT:
-        await init_connection(connection_id, data)
+        token = data.pop('token', None)
+        github_token = data.pop('github_token', None)
+        latest_event_id = int(data.pop('latest_event_id', -1))
+        kwargs = {k.lower(): v for k, v in (data.get('args') or {}).items()}
+        session_init_data = SessionInitData(**kwargs)
+        session_init_data.github_token = github_token
+        session_init_data.selected_repository = data.get('selected_repository', None)
+        await init_connection(
+            connection_id, token, github_token, session_init_data, latest_event_id
+        )
         return
 
     logger.info(f'sio:oh_action:{connection_id}')
     await session_manager.send_to_event_stream(connection_id, data)
 
 
-async def init_connection(connection_id: str, data: dict):
-    gh_token = data.pop('github_token', None)
+async def init_connection(
+    connection_id: str,
+    token: str | None,
+    gh_token: str | None,
+    session_init_data: SessionInitData,
+    latest_event_id: int,
+):
     if not await authenticate_github_user(gh_token):
         raise RuntimeError(status.WS_1008_POLICY_VIOLATION)
 
-    token = data.pop('token', None)
     if token:
         sid = get_sid_from_token(token, config.jwt_secret)
         if sid == '':
@@ -51,12 +66,13 @@ async def init_connection(connection_id: str, data: dict):
     token = sign_token({'sid': sid}, config.jwt_secret)
     await sio.emit('oh_event', {'token': token, 'status': 'ok'}, to=connection_id)
 
-    latest_event_id = int(data.pop('latest_event_id', -1))
-
     # The session in question should exist, but may not actually be running locally...
-    event_stream = await session_manager.init_or_join_session(sid, connection_id, data)
+    event_stream = await session_manager.init_or_join_session(
+        sid, connection_id, session_init_data
+    )
 
     # Send events
+    agent_state_changed = None
     async_stream = AsyncEventStreamWrapper(event_stream, latest_event_id + 1)
     async for event in async_stream:
         if isinstance(
@@ -67,7 +83,12 @@ async def init_connection(connection_id: str, data: dict):
             ),
         ):
             continue
+        elif isinstance(event, AgentStateChangedObservation):
+            agent_state_changed = event
+            continue
         await sio.emit('oh_event', event_to_dict(event), to=connection_id)
+    if agent_state_changed:
+        await sio.emit('oh_event', event_to_dict(agent_state_changed), to=connection_id)
 
 
 @sio.event
diff --git a/openhands/server/session/agent_session.py b/openhands/server/session/agent_session.py
index 2dc0ebace229..8ad454c7072e 100644
--- a/openhands/server/session/agent_session.py
+++ b/openhands/server/session/agent_session.py
@@ -7,7 +7,7 @@
 from openhands.core.config import AgentConfig, AppConfig, LLMConfig
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.schema.agent import AgentState
-from openhands.events.action.agent import ChangeAgentStateAction
+from openhands.events.action import ChangeAgentStateAction
 from openhands.events.event import EventSource
 from openhands.events.stream import EventStream
 from openhands.runtime import get_runtime_cls
@@ -60,6 +60,8 @@ async def start(
         max_budget_per_task: float | None = None,
         agent_to_llm_config: dict[str, LLMConfig] | None = None,
         agent_configs: dict[str, AgentConfig] | None = None,
+        github_token: str | None = None,
+        selected_repository: str | None = None,
     ):
         """Starts the Agent session
         Parameters:
@@ -86,6 +88,8 @@ async def start(
             max_budget_per_task,
             agent_to_llm_config,
             agent_configs,
+            github_token,
+            selected_repository,
         )
 
     def _start_thread(self, *args):
@@ -104,13 +108,18 @@ async def _start(
         max_budget_per_task: float | None = None,
         agent_to_llm_config: dict[str, LLMConfig] | None = None,
         agent_configs: dict[str, AgentConfig] | None = None,
+        github_token: str | None = None,
+        selected_repository: str | None = None,
     ):
         self._create_security_analyzer(config.security.security_analyzer)
         await self._create_runtime(
             runtime_name=runtime_name,
             config=config,
             agent=agent,
+            github_token=github_token,
+            selected_repository=selected_repository,
         )
+
         self._create_controller(
             agent,
             config.security.confirmation_mode,
@@ -165,6 +174,8 @@ async def _create_runtime(
         runtime_name: str,
         config: AppConfig,
         agent: Agent,
+        github_token: str | None = None,
+        selected_repository: str | None = None,
     ):
         """Creates a runtime instance
 
@@ -199,6 +210,12 @@ async def _create_runtime(
             return
 
         if self.runtime is not None:
+            self.runtime.clone_repo(github_token, selected_repository)
+            if agent.prompt_manager:
+                agent.prompt_manager.load_microagent_files(
+                    self.runtime.get_custom_microagents(selected_repository)
+                )
+
             logger.debug(
                 f'Runtime initialized with plugins: {[plugin.name for plugin in self.runtime.plugins]}'
             )
diff --git a/openhands/server/session/manager.py b/openhands/server/session/manager.py
index 7447ce0f8e04..fbcf4eaa836a 100644
--- a/openhands/server/session/manager.py
+++ b/openhands/server/session/manager.py
@@ -11,6 +11,7 @@
 from openhands.runtime.base import RuntimeUnavailableError
 from openhands.server.session.conversation import Conversation
 from openhands.server.session.session import ROOM_KEY, Session
+from openhands.server.session.session_init_data import SessionInitData
 from openhands.storage.files import FileStore
 from openhands.utils.shutdown_listener import should_continue
 
@@ -51,6 +52,7 @@ async def _redis_subscribe(self):
         """
         We use a redis backchannel to send actions between server nodes
         """
+        logger.debug('_redis_subscribe')
         redis_client = self._get_redis_client()
         pubsub = redis_client.pubsub()
         await pubsub.subscribe('oh_event')
@@ -74,7 +76,7 @@ async def _redis_subscribe(self):
 
     async def _process_message(self, message: dict):
         data = json.loads(message['data'])
-        logger.info(f'got_published_message:{message}')
+        logger.debug(f'got_published_message:{message}')
         sid = data['sid']
         message_type = data['message_type']
         if message_type == 'event':
@@ -111,7 +113,7 @@ async def _process_message(self, message: dict):
         elif message_type == 'session_closing':
             # Session closing event - We only get this in the event of graceful shutdown,
             # which can't be guaranteed - nodes can simply vanish unexpectedly!
-            logger.info(f'session_closing:{sid}')
+            logger.debug(f'session_closing:{sid}')
             for (
                 connection_id,
                 local_sid,
@@ -141,7 +143,9 @@ async def attach_to_conversation(self, sid: str) -> Conversation | None:
     async def detach_from_conversation(self, conversation: Conversation):
         await conversation.disconnect()
 
-    async def init_or_join_session(self, sid: str, connection_id: str, data: dict):
+    async def init_or_join_session(
+        self, sid: str, connection_id: str, session_init_data: SessionInitData
+    ):
         await self.sio.enter_room(connection_id, ROOM_KEY.format(sid=sid))
         self.local_connection_id_to_session_id[connection_id] = sid
 
@@ -156,7 +160,7 @@ async def init_or_join_session(self, sid: str, connection_id: str, data: dict):
         if redis_client and await self._is_session_running_in_cluster(sid):
             return EventStream(sid, self.file_store)
 
-        return await self.start_local_session(sid, data)
+        return await self.start_local_session(sid, session_init_data)
 
     async def _is_session_running_in_cluster(self, sid: str) -> bool:
         """As the rest of the cluster if a session is running. Wait a for a short timeout for a reply"""
@@ -164,6 +168,7 @@ async def _is_session_running_in_cluster(self, sid: str) -> bool:
         flag = asyncio.Event()
         self._session_is_running_flags[sid] = flag
         try:
+            logger.debug(f'publish:is_session_running:{sid}')
             await self._get_redis_client().publish(
                 'oh_event',
                 json.dumps(
@@ -210,14 +215,14 @@ async def _has_remote_connections(self, sid: str) -> bool:
         finally:
             self._has_remote_connections_flags.pop(sid)
 
-    async def start_local_session(self, sid: str, data: dict):
+    async def start_local_session(self, sid: str, session_init_data: SessionInitData):
         # Start a new local session
         logger.info(f'start_new_local_session:{sid}')
         session = Session(
             sid=sid, file_store=self.file_store, config=self.config, sio=self.sio
         )
         self.local_sessions_by_sid[sid] = session
-        await session.initialize_agent(data)
+        await session.initialize_agent(session_init_data)
         return session.agent_session.event_stream
 
     async def send_to_event_stream(self, connection_id: str, data: dict):
diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py
index a6edb82f3d19..04f5ba72541a 100644
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -1,4 +1,5 @@
 import asyncio
+from copy import deepcopy
 import time
 
 import socketio
@@ -21,6 +22,7 @@
 from openhands.events.stream import EventStreamSubscriber
 from openhands.llm.llm import LLM
 from openhands.server.session.agent_session import AgentSession
+from openhands.server.session.session_init_data import SessionInitData
 from openhands.storage.files import FileStore
 
 ROOM_KEY = 'room:{sid}'
@@ -34,7 +36,6 @@ class Session:
     agent_session: AgentSession
     loop: asyncio.AbstractEventLoop
     config: AppConfig
-    settings: dict | None
 
     def __init__(
         self,
@@ -52,41 +53,30 @@ def __init__(
         self.agent_session.event_stream.subscribe(
             EventStreamSubscriber.SERVER, self.on_event, self.sid
         )
-        self.config = config
+        # Copying this means that when we update variables they are not applied to the shared global configuration!
+        self.config = deepcopy(config)
         self.loop = asyncio.get_event_loop()
-        self.settings = None
 
     def close(self):
         self.is_alive = False
         self.agent_session.close()
 
-    async def initialize_agent(self, data: dict):
-        self.settings = data
+    async def initialize_agent(self, session_init_data: SessionInitData):
         self.agent_session.event_stream.add_event(
             AgentStateChangedObservation('', AgentState.LOADING),
             EventSource.ENVIRONMENT,
         )
         # Extract the agent-relevant arguments from the request
-        args = {key: value for key, value in data.get('args', {}).items()}
-        agent_cls = args.get(ConfigType.AGENT, self.config.default_agent)
-        self.config.security.confirmation_mode = args.get(
-            ConfigType.CONFIRMATION_MODE, self.config.security.confirmation_mode
-        )
-        self.config.security.security_analyzer = data.get('args', {}).get(
-            ConfigType.SECURITY_ANALYZER, self.config.security.security_analyzer
-        )
-        max_iterations = args.get(ConfigType.MAX_ITERATIONS, self.config.max_iterations)
+        agent_cls = session_init_data.agent or self.config.default_agent
+        self.config.security.confirmation_mode = self.config.security.confirmation_mode if session_init_data.confirmation_mode is None else session_init_data.confirmation_mode
+        self.config.security.security_analyzer = session_init_data.security_analyzer or self.config.security.security_analyzer
+        max_iterations = session_init_data.max_iterations or self.config.max_iterations
         # override default LLM config
+
         default_llm_config = self.config.get_llm_config()
-        default_llm_config.model = args.get(
-            ConfigType.LLM_MODEL, default_llm_config.model
-        )
-        default_llm_config.api_key = args.get(
-            ConfigType.LLM_API_KEY, default_llm_config.api_key
-        )
-        default_llm_config.base_url = args.get(
-            ConfigType.LLM_BASE_URL, default_llm_config.base_url
-        )
+        default_llm_config.model = session_init_data.llm_model or default_llm_config.model
+        default_llm_config.api_key = session_init_data.llm_api_key or default_llm_config.api_key
+        default_llm_config.base_url = session_init_data.llm_base_url or default_llm_config.base_url
 
         # TODO: override other LLM config & agent config groups (#2075)
 
@@ -103,6 +93,8 @@ async def initialize_agent(self, data: dict):
                 max_budget_per_task=self.config.max_budget_per_task,
                 agent_to_llm_config=self.config.get_agent_to_llm_config_map(),
                 agent_configs=self.config.get_agent_configs(),
+                github_token=session_init_data.github_token,
+                selected_repository=session_init_data.selected_repository,
             )
         except Exception as e:
             logger.exception(f'Error creating controller: {e}')
diff --git a/openhands/server/session/session_init_data.py b/openhands/server/session/session_init_data.py
new file mode 100644
index 000000000000..1308598b14c7
--- /dev/null
+++ b/openhands/server/session/session_init_data.py
@@ -0,0 +1,20 @@
+
+
+from dataclasses import dataclass
+
+
+@dataclass
+class SessionInitData:
+    """
+    Session initialization data for the web environment - a deep copy of the global config is made and then overridden with this data.
+    """
+    language: str | None = None
+    agent: str | None = None
+    max_iterations: int | None = None
+    security_analyzer: str | None = None
+    confirmation_mode: bool | None = None
+    llm_model: str | None = None
+    llm_api_key: str | None = None
+    llm_base_url: str | None = None
+    github_token: str | None = None
+    selected_repository: str | None = None
diff --git a/openhands/utils/embeddings.py b/openhands/utils/embeddings.py
index 900b43052b13..7e251f0e5022 100644
--- a/openhands/utils/embeddings.py
+++ b/openhands/utils/embeddings.py
@@ -102,6 +102,12 @@ def get_embedding_model(strategy: str, llm_config: LLMConfig) -> 'BaseEmbedding'
                 azure_endpoint=llm_config.base_url,
                 api_version=llm_config.api_version,
             )
+        elif strategy == 'voyage':
+            from llama_index.embeddings.voyageai import VoyageEmbedding
+
+            return VoyageEmbedding(
+                model_name='voyage-code-3',
+            )
         elif (strategy is not None) and (strategy.lower() == 'none'):
             # TODO: this works but is not elegant enough. The incentive is when
             # an agent using embeddings is not used, there is no reason we need to
diff --git a/openhands/utils/import_utils.py b/openhands/utils/import_utils.py
new file mode 100644
index 000000000000..1a14c119dece
--- /dev/null
+++ b/openhands/utils/import_utils.py
@@ -0,0 +1,22 @@
+import importlib
+from typing import Type, TypeVar
+
+T = TypeVar('T')
+
+
+def import_from(qual_name: str):
+    """Import the value from the qualified name given"""
+    parts = qual_name.split('.')
+    module_name = '.'.join(parts[:-1])
+    module = importlib.import_module(module_name)
+    result = getattr(module, parts[-1])
+    return result
+
+
+def get_impl(cls: Type[T], impl_name: str | None) -> Type[T]:
+    """Import a named implementation of the specified class"""
+    if impl_name is None:
+        return cls
+    impl_class = import_from(impl_name)
+    assert cls == impl_class or issubclass(impl_class, cls)
+    return impl_class
diff --git a/openhands/utils/microagent.py b/openhands/utils/microagent.py
index 4aa311d6b205..0d4816a0c842 100644
--- a/openhands/utils/microagent.py
+++ b/openhands/utils/microagent.py
@@ -11,14 +11,20 @@ class MicroAgentMetadata(pydantic.BaseModel):
 
 
 class MicroAgent:
-    def __init__(self, path: str):
-        self.path = path
-        if not os.path.exists(path):
-            raise FileNotFoundError(f'Micro agent file {path} is not found')
-        with open(path, 'r') as file:
-            self._loaded = frontmatter.load(file)
-            self._content = self._loaded.content
-            self._metadata = MicroAgentMetadata(**self._loaded.metadata)
+    def __init__(self, path: str | None = None, content: str | None = None):
+        if path and not content:
+            self.path = path
+            if not os.path.exists(path):
+                raise FileNotFoundError(f'Micro agent file {path} is not found')
+            with open(path, 'r') as file:
+                self._loaded = frontmatter.load(file)
+                self._content = self._loaded.content
+                self._metadata = MicroAgentMetadata(**self._loaded.metadata)
+        elif content and not path:
+            self._metadata, self._content = frontmatter.parse(content)
+            self._metadata = MicroAgentMetadata(**self._metadata)
+        else:
+            raise Exception('You must pass either path or file content, but not both.')
 
     def get_trigger(self, message: str) -> str | None:
         message = message.lower()
diff --git a/openhands/utils/prompt.py b/openhands/utils/prompt.py
index b6591198fa41..1d5e0080b334 100644
--- a/openhands/utils/prompt.py
+++ b/openhands/utils/prompt.py
@@ -42,13 +42,18 @@ def __init__(
                 if f.endswith('.md')
             ]
         for microagent_file in microagent_files:
-            microagent = MicroAgent(microagent_file)
+            microagent = MicroAgent(path=microagent_file)
             if (
                 disabled_microagents is None
                 or microagent.name not in disabled_microagents
             ):
                 self.microagents[microagent.name] = microagent
 
+    def load_microagent_files(self, microagent_files: list[str]):
+        for microagent_file in microagent_files:
+            microagent = MicroAgent(content=microagent_file)
+            self.microagents[microagent.name] = microagent
+
     def _load_template(self, template_name: str) -> Template:
         if self.prompt_dir is None:
             raise ValueError('Prompt directory is not set')
diff --git a/openhands/utils/shutdown_listener.py b/openhands/utils/shutdown_listener.py
index e72667bf4763..5cf84a309cc9 100644
--- a/openhands/utils/shutdown_listener.py
+++ b/openhands/utils/shutdown_listener.py
@@ -19,7 +19,7 @@ def _register_signal_handler(sig: signal.Signals):
     original_handler = None
 
     def handler(sig_: int, frame: FrameType | None):
-        logger.info(f'shutdown_signal:{sig_}')
+        logger.debug(f'shutdown_signal:{sig_}')
         global _should_exit
         _should_exit = True
         if original_handler:
@@ -34,15 +34,15 @@ def _register_signal_handlers():
         return
     _should_exit = False
 
-    logger.info('_register_signal_handlers')
+    logger.debug('_register_signal_handlers')
 
     # Check if we're in the main thread of the main interpreter
     if threading.current_thread() is threading.main_thread():
-        logger.info('_register_signal_handlers:main_thread')
+        logger.debug('_register_signal_handlers:main_thread')
         for sig in HANDLED_SIGNALS:
             _register_signal_handler(sig)
     else:
-        logger.info('_register_signal_handlers:not_main_thread')
+        logger.debug('_register_signal_handlers:not_main_thread')
 
 
 def should_exit() -> bool:
diff --git a/openhands/utils/term_color.py b/openhands/utils/term_color.py
new file mode 100644
index 000000000000..6938369da336
--- /dev/null
+++ b/openhands/utils/term_color.py
@@ -0,0 +1,25 @@
+from enum import Enum
+
+from termcolor import colored
+
+
+class TermColor(Enum):
+    """Terminal color codes."""
+
+    WARNING = 'yellow'
+    SUCCESS = 'green'
+    ERROR = 'red'
+    INFO = 'blue'
+
+
+def colorize(text: str, color: TermColor = TermColor.WARNING) -> str:
+    """Colorize text with specified color.
+
+    Args:
+        text (str): Text to be colored
+        color (TermColor, optional): Color to use. Defaults to TermColor.WARNING
+
+    Returns:
+        str: Colored text
+    """
+    return colored(text, color.value)
diff --git a/poetry.lock b/poetry.lock
index 5eb49683b956..fa1be60f9f4a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,16 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
-
-[[package]]
-name = "aenum"
-version = "3.1.15"
-description = "Advanced Enumerations (compatible with Python's stdlib Enum), NamedTuples, and NamedConstants"
-optional = false
-python-versions = "*"
-files = [
-    {file = "aenum-3.1.15-py2-none-any.whl", hash = "sha256:27b1710b9d084de6e2e695dab78fe9f269de924b51ae2850170ee7e1ca6288a5"},
-    {file = "aenum-3.1.15-py3-none-any.whl", hash = "sha256:e0dfaeea4c2bd362144b87377e2c61d91958c5ed0b4daf89cb6f45ae23af6288"},
-    {file = "aenum-3.1.15.tar.gz", hash = "sha256:8cbd76cd18c4f870ff39b24284d3ea028fbe8731a58df3aa581e434c575b9559"},
-]
+# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -565,17 +553,17 @@ files = [
 
 [[package]]
 name = "boto3"
-version = "1.35.68"
+version = "1.35.78"
 description = "The AWS SDK for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "boto3-1.35.68-py3-none-any.whl", hash = "sha256:9b26fa31901da7793c1dcd65eee9bab7e897d8aa1ffed0b5e1c3bce93d2aefe4"},
-    {file = "boto3-1.35.68.tar.gz", hash = "sha256:091d6bed1422370987a839bff3f8755df7404fc15e9fac2a48e8505356f07433"},
+    {file = "boto3-1.35.78-py3-none-any.whl", hash = "sha256:5ef7166fe5060637b92af8dc152cd7acecf96b3fc9c5456706a886cadb534391"},
+    {file = "boto3-1.35.78.tar.gz", hash = "sha256:fc8001519c8842e766ad3793bde3fbd0bb39e821a582fc12cf67876b8f3cf7f1"},
 ]
 
 [package.dependencies]
-botocore = ">=1.35.68,<1.36.0"
+botocore = ">=1.35.78,<1.36.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.10.0,<0.11.0"
 
@@ -584,13 +572,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
 
 [[package]]
 name = "botocore"
-version = "1.35.68"
+version = "1.35.78"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "botocore-1.35.68-py3-none-any.whl", hash = "sha256:599139d5564291f5be873800711f9e4e14a823395ae9ce7b142be775e9849b94"},
-    {file = "botocore-1.35.68.tar.gz", hash = "sha256:42c3700583a82f2b5316281a073d644a521d6358837e2b446dc458ba5d990fb4"},
+    {file = "botocore-1.35.78-py3-none-any.whl", hash = "sha256:41c37bd7c0326f25122f33ec84fb80fc0a14d7fcc9961431b0e57568e88c9cb5"},
+    {file = "botocore-1.35.78.tar.gz", hash = "sha256:6905036c25449ae8dba5e950e4b908e4b8a6fe6b516bf61e007ecb62fa21f323"},
 ]
 
 [package.dependencies]
@@ -953,13 +941,13 @@ numpy = "*"
 
 [[package]]
 name = "chromadb"
-version = "0.5.20"
+version = "0.5.23"
 description = "Chroma."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "chromadb-0.5.20-py3-none-any.whl", hash = "sha256:9550ba1b6dce911e35cac2568b301badf4b42f457b99a432bdeec2b6b9dd3680"},
-    {file = "chromadb-0.5.20.tar.gz", hash = "sha256:19513a23b2d20059866216bfd80195d1d4a160ffba234b8899f5e80978160ca7"},
+    {file = "chromadb-0.5.23-py3-none-any.whl", hash = "sha256:ffe5bdd7276d12cb682df0d38a13aa37573e6a3678e71889ac45f539ae05ad7e"},
+    {file = "chromadb-0.5.23.tar.gz", hash = "sha256:360a12b9795c5a33cb1f839d14410ccbde662ef1accd36153b0ae22312edabd1"},
 ]
 
 [package.dependencies]
@@ -986,10 +974,10 @@ pypika = ">=0.48.9"
 PyYAML = ">=6.0.0"
 rich = ">=10.11.0"
 tenacity = ">=8.2.3"
-tokenizers = ">=0.13.2"
+tokenizers = ">=0.13.2,<=0.20.3"
 tqdm = ">=4.65.0"
 typer = ">=0.9.0"
-typing-extensions = ">=4.5.0"
+typing_extensions = ">=4.5.0"
 uvicorn = {version = ">=0.18.3", extras = ["standard"]}
 
 [[package]]
@@ -1550,25 +1538,23 @@ files = [
 
 [[package]]
 name = "e2b"
-version = "0.17.1"
+version = "1.0.5"
 description = "E2B SDK that give agents cloud environments"
 optional = false
 python-versions = "<4.0,>=3.8"
 files = [
-    {file = "e2b-0.17.1-py3-none-any.whl", hash = "sha256:c0698fd03b639f4dd88eed167a98af4d450668c0ae9805122a98f62f36f2491f"},
-    {file = "e2b-0.17.1.tar.gz", hash = "sha256:9e69a059cb73334bac7db189287552af9321fb3ac8ced52557907e10c4310733"},
+    {file = "e2b-1.0.5-py3-none-any.whl", hash = "sha256:a71bdec46f33d3e38e87d475d7fd2939bd7b6b753b819c9639ca211cd375b79e"},
+    {file = "e2b-1.0.5.tar.gz", hash = "sha256:43c82705af7b7d4415c2510ff77dab4dc075351e0b769d6adf8e0d7bb4868d13"},
 ]
 
 [package.dependencies]
-aenum = ">=3.1.11"
-aiohttp = ">=3.8.4"
-jsonrpcclient = ">=4.0.3"
-pydantic = "*"
+attrs = ">=23.2.0"
+httpcore = ">=1.0.5,<2.0.0"
+httpx = ">=0.27.0,<1.0.0"
+packaging = ">=24.1"
+protobuf = ">=3.20.0,<6.0.0"
 python-dateutil = ">=2.8.2"
-requests = ">=2.31.0"
-typing-extensions = ">=4.8.0"
-urllib3 = ">=1.25.3"
-websockets = ">=11.0.3"
+typing-extensions = ">=4.1.0"
 
 [[package]]
 name = "evaluate"
@@ -3304,19 +3290,6 @@ files = [
     {file = "jsonpointer-3.0.0.tar.gz", hash = "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef"},
 ]
 
-[[package]]
-name = "jsonrpcclient"
-version = "4.0.3"
-description = "Send JSON-RPC requests"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "jsonrpcclient-4.0.3-py3-none-any.whl", hash = "sha256:3cbb9e27e1be29821becf135ea183144a836215422727e1ffe5056a49a670f0d"},
-]
-
-[package.extras]
-qa = ["pytest", "pytest-cov", "tox"]
-
 [[package]]
 name = "jsonschema"
 version = "4.23.0"
@@ -3766,22 +3739,23 @@ types-tqdm = "*"
 
 [[package]]
 name = "litellm"
-version = "1.52.15"
+version = "1.54.1"
 description = "Library to easily interface with LLM API providers"
 optional = false
 python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
 files = [
-    {file = "litellm-1.52.15-py3-none-any.whl", hash = "sha256:8a2d8e2526c5e7afb3006b0214d3c348778462fefafd582fd76bb7f5c35d28d0"},
-    {file = "litellm-1.52.15.tar.gz", hash = "sha256:11a61b1b033ddff9d480da66c00acc9d3e4fbfeed166d1b0de8eda16c684116e"},
+    {file = "litellm-1.54.1-py3-none-any.whl", hash = "sha256:d8e60d4a5e8decb0234a1e8c20351c904aec561fb4025df7df3d0d7ea81ca442"},
+    {file = "litellm-1.54.1.tar.gz", hash = "sha256:b5a8fc99160fab0699b9258457432b3975499218ffcf1b515709808b2ce5a2d7"},
 ]
 
 [package.dependencies]
 aiohttp = "*"
 click = "*"
+httpx = ">=0.23.0,<0.28.0"
 importlib-metadata = ">=6.8.0"
 jinja2 = ">=3.1.2,<4.0.0"
 jsonschema = ">=4.22.0,<5.0.0"
-openai = ">=1.54.0"
+openai = ">=1.55.3"
 pydantic = ">=2.0.0,<3.0.0"
 python-dotenv = ">=0.2.0"
 requests = ">=2.31.0,<3.0.0"
@@ -3809,24 +3783,24 @@ pydantic = ">=1.10"
 
 [[package]]
 name = "llama-index"
-version = "0.12.2"
+version = "0.12.5"
 description = "Interface between LLMs and your data"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "llama_index-0.12.2-py3-none-any.whl", hash = "sha256:971528db7889f5a0d15fd9039a403bc6f92bfafc2d4e1bab2d166657728ae94c"},
-    {file = "llama_index-0.12.2.tar.gz", hash = "sha256:da9738dd666e219689839c7451c9df8bed72e6510a6f7d6f7d9907bfdd4588eb"},
+    {file = "llama_index-0.12.5-py3-none-any.whl", hash = "sha256:2bb6d234cf6d7fdb6a308e9aff1a607e83a24210cc7325be62c65bc43493680f"},
+    {file = "llama_index-0.12.5.tar.gz", hash = "sha256:a816f18079c88e17b53fab6efc27f7c3dfb0a7af559afaaeaeef0577654235a4"},
 ]
 
 [package.dependencies]
 llama-index-agent-openai = ">=0.4.0,<0.5.0"
 llama-index-cli = ">=0.4.0,<0.5.0"
-llama-index-core = ">=0.12.2,<0.13.0"
+llama-index-core = ">=0.12.5,<0.13.0"
 llama-index-embeddings-openai = ">=0.3.0,<0.4.0"
 llama-index-indices-managed-llama-cloud = ">=0.4.0"
 llama-index-legacy = ">=0.9.48,<0.10.0"
 llama-index-llms-openai = ">=0.3.0,<0.4.0"
-llama-index-multi-modal-llms-openai = ">=0.3.0,<0.4.0"
+llama-index-multi-modal-llms-openai = ">=0.4.0,<0.5.0"
 llama-index-program-openai = ">=0.3.0,<0.4.0"
 llama-index-question-gen-openai = ">=0.3.0,<0.4.0"
 llama-index-readers-file = ">=0.4.0,<0.5.0"
@@ -3867,13 +3841,13 @@ llama-index-llms-openai = ">=0.3.0,<0.4.0"
 
 [[package]]
 name = "llama-index-core"
-version = "0.12.2"
+version = "0.12.5"
 description = "Interface between LLMs and your data"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "llama_index_core-0.12.2-py3-none-any.whl", hash = "sha256:27a5548523435a5c2b84f75c15894a44522b7f968e9f29a03f9a301ca09fb7fa"},
-    {file = "llama_index_core-0.12.2.tar.gz", hash = "sha256:a48b2de9c3a09608ab5c03c5a313428f119c86946acdefde555992b7c0b8a38e"},
+    {file = "llama_index_core-0.12.5-py3-none-any.whl", hash = "sha256:1fe6dd39b2dc5a945b4702d780a2f5962a553e187524a255429461dc92a664db"},
+    {file = "llama_index_core-0.12.5.tar.gz", hash = "sha256:1d967323891920579fad3d6497587c137894df3f76718a3ec134f9201f2f4fc0"},
 ]
 
 [package.dependencies]
@@ -3889,11 +3863,11 @@ networkx = ">=3.0"
 nltk = ">3.8.1"
 numpy = "*"
 pillow = ">=9.0.0"
-pydantic = ">=2.7.0,<2.10.0"
+pydantic = ">=2.8.0"
 PyYAML = ">=6.0.1"
 requests = ">=2.31.0"
 SQLAlchemy = {version = ">=1.4.49", extras = ["asyncio"]}
-tenacity = ">=8.2.0,<8.4.0 || >8.4.0,<9.0.0"
+tenacity = ">=8.2.0,<8.4.0 || >8.4.0,<10.0.0"
 tiktoken = ">=0.3.3"
 tqdm = ">=4.66.1,<5.0.0"
 typing-extensions = ">=4.5.0"
@@ -3934,18 +3908,18 @@ sentence-transformers = ">=2.6.1"
 
 [[package]]
 name = "llama-index-embeddings-ollama"
-version = "0.4.0"
+version = "0.5.0"
 description = "llama-index embeddings ollama integration"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "llama_index_embeddings_ollama-0.4.0-py3-none-any.whl", hash = "sha256:198706882bad4c91db8250b5bdb47fd4eb9f871e0825aff64361b09210a8087c"},
-    {file = "llama_index_embeddings_ollama-0.4.0.tar.gz", hash = "sha256:ebe73354470f8b6061d5f961b1ee50716d54972ef295fb253e3ebc393d5b36cb"},
+    {file = "llama_index_embeddings_ollama-0.5.0-py3-none-any.whl", hash = "sha256:843ecccfbe2db548a39e71a85e8ebbfe3cf2659db9533c080dcb291e4975af3b"},
+    {file = "llama_index_embeddings_ollama-0.5.0.tar.gz", hash = "sha256:fec8fa249ed2fb13912e1511decb21c025a53294728a21f25bd2d5f30f435a94"},
 ]
 
 [package.dependencies]
 llama-index-core = ">=0.12.0,<0.13.0"
-ollama = ">=0.3.1,<0.4.0"
+ollama = ">=0.3.1"
 
 [[package]]
 name = "llama-index-embeddings-openai"
@@ -3962,6 +3936,21 @@ files = [
 llama-index-core = ">=0.12.0,<0.13.0"
 openai = ">=1.1.0"
 
+[[package]]
+name = "llama-index-embeddings-voyageai"
+version = "0.3.1"
+description = "llama-index embeddings voyageai integration"
+optional = false
+python-versions = "<4.0,>=3.9"
+files = [
+    {file = "llama_index_embeddings_voyageai-0.3.1-py3-none-any.whl", hash = "sha256:f0e0b327ab21669a2b0501f207a6862f7a0b0a115bff15b6ceac712273a6fa03"},
+    {file = "llama_index_embeddings_voyageai-0.3.1.tar.gz", hash = "sha256:cfbc0a0697bda39c18398418628596c6ae8c668a0306d504a4fc16100fcd7d57"},
+]
+
+[package.dependencies]
+llama-index-core = ">=0.12.0,<0.13.0"
+voyageai = {version = ">=0.2.1,<0.4.0", markers = "python_version >= \"3.9\" and python_version < \"3.13\""}
+
 [[package]]
 name = "llama-index-indices-managed-llama-cloud"
 version = "0.6.2"
@@ -4050,17 +4039,17 @@ openai = ">=1.40.0,<2.0.0"
 
 [[package]]
 name = "llama-index-multi-modal-llms-openai"
-version = "0.3.0"
+version = "0.4.0"
 description = "llama-index multi-modal-llms openai integration"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "llama_index_multi_modal_llms_openai-0.3.0-py3-none-any.whl", hash = "sha256:9b7e3e39b19b2668b9c75014bcb90795bb546f0f9e1af8b7f1087f8687805763"},
-    {file = "llama_index_multi_modal_llms_openai-0.3.0.tar.gz", hash = "sha256:71e983c7771c39088e4058cd78029219315a0fb631b9e12b903e53243b9a3fd6"},
+    {file = "llama_index_multi_modal_llms_openai-0.4.0-py3-none-any.whl", hash = "sha256:c5bda1b3c6d14eee87a819ba72b122d82877829695dce8f90a8c600ac16ce243"},
+    {file = "llama_index_multi_modal_llms_openai-0.4.0.tar.gz", hash = "sha256:11c3ac7e2d7ace9dbcdd9a662f27bca5fefce98c5682abaffb7dd01d59776658"},
 ]
 
 [package.dependencies]
-llama-index-core = ">=0.12.0,<0.13.0"
+llama-index-core = ">=0.12.3,<0.13.0"
 llama-index-llms-openai = ">=0.3.0,<0.4.0"
 
 [[package]]
@@ -4133,13 +4122,13 @@ llama-parse = ">=0.5.0"
 
 [[package]]
 name = "llama-index-vector-stores-chroma"
-version = "0.4.0"
+version = "0.4.1"
 description = "llama-index vector_stores chroma integration"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "llama_index_vector_stores_chroma-0.4.0-py3-none-any.whl", hash = "sha256:058986b80df0d5f71f435df2a58f5b8e21feebdb34a56e4dbced8c2c225d10b6"},
-    {file = "llama_index_vector_stores_chroma-0.4.0.tar.gz", hash = "sha256:c5e979591d09adc91f09b08fb1f523d3cfe30eee3cf13e53da28540caea7366a"},
+    {file = "llama_index_vector_stores_chroma-0.4.1-py3-none-any.whl", hash = "sha256:42200af4fa5c8df820b865825d1b506cecb922c8fbd7421eda8a5609b390c1d5"},
+    {file = "llama_index_vector_stores_chroma-0.4.1.tar.gz", hash = "sha256:70ee74ccf304adda04171d014e483759c68a1c92f679ea2ca2e6b6f45b6fef08"},
 ]
 
 [package.dependencies]
@@ -5440,13 +5429,13 @@ sympy = "*"
 
 [[package]]
 name = "openai"
-version = "1.55.0"
+version = "1.57.2"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "openai-1.55.0-py3-none-any.whl", hash = "sha256:446e08918f8dd70d8723274be860404c8c7cc46b91b93bbc0ef051f57eb503c1"},
-    {file = "openai-1.55.0.tar.gz", hash = "sha256:6c0975ac8540fe639d12b4ff5a8e0bf1424c844c4a4251148f59f06c4b2bd5db"},
+    {file = "openai-1.57.2-py3-none-any.whl", hash = "sha256:f7326283c156fdee875746e7e54d36959fb198eadc683952ee05e3302fbd638d"},
+    {file = "openai-1.57.2.tar.gz", hash = "sha256:5f49fd0f38e9f2131cda7deb45dafdd1aee4f52a637e190ce0ecf40147ce8cee"},
 ]
 
 [package.dependencies]
@@ -5483,13 +5472,13 @@ numpy = {version = ">=1.26.0", markers = "python_version >= \"3.12\""}
 
 [[package]]
 name = "openhands-aci"
-version = "0.1.1"
+version = "0.1.2"
 description = "An Agent-Computer Interface (ACI) designed for software development agents OpenHands."
 optional = false
 python-versions = "<4.0,>=3.12"
 files = [
-    {file = "openhands_aci-0.1.1-py3-none-any.whl", hash = "sha256:8831f97b887571005dca0d70a9f6f0a4f9feb35d3d41f499e70d72b5fb68a599"},
-    {file = "openhands_aci-0.1.1.tar.gz", hash = "sha256:705b74a12a8f428e64295b5de125f553500f62ef5ab3a5a6284d8fcf638025e6"},
+    {file = "openhands_aci-0.1.2-py3-none-any.whl", hash = "sha256:a2fcae7a2f1047d516d6862742c7a2f8ea988c6a58295599bc305c99b8d53067"},
+    {file = "openhands_aci-0.1.2.tar.gz", hash = "sha256:c3c91aa3f13554159168b44a7f86bf333da30067fa6370a46ed785bf4240631b"},
 ]
 
 [package.dependencies]
@@ -9386,6 +9375,24 @@ platformdirs = ">=3.9.1,<5"
 docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
 test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"]
 
+[[package]]
+name = "voyageai"
+version = "0.2.4"
+description = ""
+optional = false
+python-versions = "<4.0.0,>=3.7.1"
+files = [
+    {file = "voyageai-0.2.4-py3-none-any.whl", hash = "sha256:e3070e5c78dec89adae43231334b4637aa88933dad99b1c33d3219fdfc94dfa4"},
+    {file = "voyageai-0.2.4.tar.gz", hash = "sha256:b9911d8629e8a4e363291c133482fead49a3536afdf1e735f3ab3aaccd8d250d"},
+]
+
+[package.dependencies]
+aiohttp = ">=3.5,<4.0"
+aiolimiter = ">=1.1.0,<2.0.0"
+numpy = ">=1.11"
+requests = ">=2.20,<3.0"
+tenacity = ">=8.0.1"
+
 [[package]]
 name = "watchdog"
 version = "6.0.0"
@@ -10087,4 +10094,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.12"
-content-hash = "56a80082afb76e518239060855598921d94a0373123b2d9222cf8c7b6238b7ad"
+content-hash = "7334dd947fe93756227b5fc8f86303852c5e9aaf8787cc35b0ce23fc1540df67"
diff --git a/pyproject.toml b/pyproject.toml
index a098aa81a648..61dae1566fbc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "openhands-ai"
-version = "0.15.0"
+version = "0.15.2"
 description = "OpenHands: Code Less, Make More"
 authors = ["OpenHands"]
 license = "MIT"
@@ -14,7 +14,7 @@ packages = [
 python = "^3.12"
 datasets = "*"
 pandas = "*"
-litellm = "^1.52.3"
+litellm = "^1.54.1"
 google-generativeai = "*" # To use litellm with Gemini Pro API
 google-api-python-client = "*" # For Google Sheets API
 google-auth-httplib2 = "*" # For Google Sheets authentication
@@ -30,7 +30,7 @@ numpy = "*"
 json-repair = "*"
 browsergym-core = "0.10.2" # integrate browsergym-core as the browsing interface
 html2text = "*"
-e2b = ">=0.17.1,<1.1.0"
+e2b = ">=1.0.5,<1.1.0"
 pexpect = "*"
 jinja2 = "^3.1.3"
 python-multipart = "*"
@@ -64,7 +64,7 @@ modal = "^0.66.26"
 runloop-api-client = "0.10.0"
 pygithub = "^2.5.0"
 joblib = "*"
-openhands-aci = "^0.1.1"
+openhands-aci = "^0.1.2"
 python-socketio = "^5.11.4"
 redis = "^5.2.0"
 
@@ -76,6 +76,8 @@ llama-index-embeddings-huggingface = "*"
 torch = "2.5.1"
 llama-index-embeddings-azure-openai = "*"
 llama-index-embeddings-ollama = "*"
+voyageai = "*"
+llama-index-embeddings-voyageai = "*"
 
 [tool.poetry.group.dev.dependencies]
 ruff = "0.8.0"
diff --git a/tests/unit/resolver/test_pr_handler_guess_success.py b/tests/unit/resolver/test_pr_handler_guess_success.py
index 58cbd89c8180..af7f22e3ab9c 100644
--- a/tests/unit/resolver/test_pr_handler_guess_success.py
+++ b/tests/unit/resolver/test_pr_handler_guess_success.py
@@ -16,7 +16,7 @@ def mock_llm_response(content):
 
 
 def test_guess_success_review_threads_litellm_call():
-    """Test that the litellm.completion() call for review threads contains the expected content."""
+    """Test that the completion() call for review threads contains the expected content."""
     # Create a PR handler instance
     llm_config = LLMConfig(model='test', api_key='test')
     handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
@@ -77,7 +77,7 @@ def test_guess_success_review_threads_litellm_call():
         mock_completion.return_value = mock_response
         success, success_list, explanation = handler.guess_success(issue, history)
 
-        # Verify the litellm.completion() calls
+        # Verify the completion() calls
         assert mock_completion.call_count == 2  # One call per review thread
 
         # Check first call
@@ -121,7 +121,7 @@ def test_guess_success_review_threads_litellm_call():
 
 
 def test_guess_success_thread_comments_litellm_call():
-    """Test that the litellm.completion() call for thread comments contains the expected content."""
+    """Test that the completion() call for thread comments contains the expected content."""
     # Create a PR handler instance
     llm_config = LLMConfig(model='test', api_key='test')
     handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
@@ -176,7 +176,7 @@ def test_guess_success_thread_comments_litellm_call():
         mock_completion.return_value = mock_response
         success, success_list, explanation = handler.guess_success(issue, history)
 
-        # Verify the litellm.completion() call
+        # Verify the completion() call
         mock_completion.assert_called_once()
         call_args = mock_completion.call_args
         prompt = call_args[1]['messages'][0]['content']
@@ -270,7 +270,7 @@ def test_check_review_thread():
             review_thread, issues_context, last_message
         )
 
-        # Verify the litellm.completion() call
+        # Verify the completion() call
         mock_completion.assert_called_once()
         call_args = mock_completion.call_args
         prompt = call_args[1]['messages'][0]['content']
@@ -326,7 +326,7 @@ def test_check_thread_comments():
             thread_comments, issues_context, last_message
         )
 
-        # Verify the litellm.completion() call
+        # Verify the completion() call
         mock_completion.assert_called_once()
         call_args = mock_completion.call_args
         prompt = call_args[1]['messages'][0]['content']
@@ -379,7 +379,7 @@ def test_check_review_comments():
             review_comments, issues_context, last_message
         )
 
-        # Verify the litellm.completion() call
+        # Verify the completion() call
         mock_completion.assert_called_once()
         call_args = mock_completion.call_args
         prompt = call_args[1]['messages'][0]['content']
@@ -395,7 +395,7 @@ def test_check_review_comments():
 
 
 def test_guess_success_review_comments_litellm_call():
-    """Test that the litellm.completion() call for review comments contains the expected content."""
+    """Test that the completion() call for review comments contains the expected content."""
     # Create a PR handler instance
     llm_config = LLMConfig(model='test', api_key='test')
     handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
@@ -447,7 +447,7 @@ def test_guess_success_review_comments_litellm_call():
         mock_completion.return_value = mock_response
         success, success_list, explanation = handler.guess_success(issue, history)
 
-        # Verify the litellm.completion() call
+        # Verify the completion() call
         mock_completion.assert_called_once()
         call_args = mock_completion.call_args
         prompt = call_args[1]['messages'][0]['content']
diff --git a/tests/unit/resolver/test_pr_title_escaping.py b/tests/unit/resolver/test_pr_title_escaping.py
index 45dd523b036a..9cc5d90bc4b0 100644
--- a/tests/unit/resolver/test_pr_title_escaping.py
+++ b/tests/unit/resolver/test_pr_title_escaping.py
@@ -153,7 +153,6 @@ def mock_run(*args, **kwargs):
 
         # Try to send a PR - this will fail if the title is incorrectly escaped
         print('Sending PR...')
-        from openhands.core.config import LLMConfig
         from openhands.resolver.send_pull_request import send_pull_request
 
         send_pull_request(
@@ -161,6 +160,5 @@ def mock_run(*args, **kwargs):
             github_token='dummy-token',
             github_username='test-user',
             patch_dir=temp_dir,
-            llm_config=LLMConfig(model='test-model', api_key='test-key'),
             pr_type='ready',
         )
diff --git a/tests/unit/resolver/test_resolve_issues.py b/tests/unit/resolver/test_resolve_issues.py
index 8d54adb87627..ab42165642a4 100644
--- a/tests/unit/resolver/test_resolve_issues.py
+++ b/tests/unit/resolver/test_resolve_issues.py
@@ -84,6 +84,41 @@ def test_initialize_runtime():
     )
 
 
+@pytest.mark.asyncio
+async def test_resolve_issue_no_issues_found():
+    from openhands.resolver.resolve_issue import resolve_issue
+
+    # Mock dependencies
+    mock_handler = MagicMock()
+    mock_handler.get_converted_issues.return_value = []  # Return empty list
+
+    with patch(
+        'openhands.resolver.resolve_issue.issue_handler_factory',
+        return_value=mock_handler,
+    ):
+        with pytest.raises(ValueError) as exc_info:
+            await resolve_issue(
+                owner='test-owner',
+                repo='test-repo',
+                token='test-token',
+                username='test-user',
+                max_iterations=5,
+                output_dir='/tmp',
+                llm_config=LLMConfig(model='test', api_key='test'),
+                runtime_container_image='test-image',
+                prompt_template='test-template',
+                issue_type='pr',
+                repo_instruction=None,
+                issue_number=5432,
+                comment_id=None,
+            )
+
+        assert 'No issues found for issue number 5432' in str(exc_info.value)
+        assert 'test-owner/test-repo' in str(exc_info.value)
+        assert 'exists in the repository' in str(exc_info.value)
+        assert 'correct permissions' in str(exc_info.value)
+
+
 def test_download_issues_from_github():
     llm_config = LLMConfig(model='test', api_key='test')
     handler = IssueHandler('owner', 'repo', 'token', llm_config)
@@ -389,16 +424,23 @@ async def test_process_issue(mock_output_dir, mock_prompt_template):
         handler_instance.get_instruction.return_value = ('Test instruction', [])
         handler_instance.issue_type = 'pr' if test_case.get('is_pr', False) else 'issue'
 
-        with patch(
-            'openhands.resolver.resolve_issue.create_runtime', mock_create_runtime
-        ), patch(
-            'openhands.resolver.resolve_issue.initialize_runtime',
-            mock_initialize_runtime,
-        ), patch(
-            'openhands.resolver.resolve_issue.run_controller', mock_run_controller
-        ), patch(
-            'openhands.resolver.resolve_issue.complete_runtime', mock_complete_runtime
-        ), patch('openhands.resolver.resolve_issue.logger'):
+        with (
+            patch(
+                'openhands.resolver.resolve_issue.create_runtime', mock_create_runtime
+            ),
+            patch(
+                'openhands.resolver.resolve_issue.initialize_runtime',
+                mock_initialize_runtime,
+            ),
+            patch(
+                'openhands.resolver.resolve_issue.run_controller', mock_run_controller
+            ),
+            patch(
+                'openhands.resolver.resolve_issue.complete_runtime',
+                mock_complete_runtime,
+            ),
+            patch('openhands.resolver.resolve_issue.logger'),
+        ):
             # Call the function
             result = await process_issue(
                 issue,
diff --git a/tests/unit/resolver/test_send_pull_request.py b/tests/unit/resolver/test_send_pull_request.py
index f83e2e97ec2f..c83b8a892c58 100644
--- a/tests/unit/resolver/test_send_pull_request.py
+++ b/tests/unit/resolver/test_send_pull_request.py
@@ -244,8 +244,12 @@ def test_initialize_repo(mock_output_dir):
 @patch('openhands.resolver.send_pull_request.reply_to_comment')
 @patch('requests.post')
 @patch('subprocess.run')
+@patch('openhands.resolver.send_pull_request.LLM')
 def test_update_existing_pull_request(
-    mock_subprocess_run, mock_requests_post, mock_reply_to_comment
+    mock_llm_class,
+    mock_subprocess_run,
+    mock_requests_post,
+    mock_reply_to_comment,
 ):
     # Arrange: Set up test data
     github_issue = GithubIssue(
@@ -267,23 +271,28 @@ def test_update_existing_pull_request(
 
     # Mock the requests.post call for adding a PR comment
     mock_requests_post.return_value.status_code = 201
+
+    # Mock LLM instance and completion call
+    mock_llm_instance = MagicMock()
     mock_completion_response = MagicMock()
     mock_completion_response.choices = [
         MagicMock(message=MagicMock(content='This is an issue resolution.'))
     ]
+    mock_llm_instance.completion.return_value = mock_completion_response
+    mock_llm_class.return_value = mock_llm_instance
+
     llm_config = LLMConfig()
 
     # Act: Call the function without comment_message to test auto-generation
-    with patch('litellm.completion', MagicMock(return_value=mock_completion_response)):
-        result = update_existing_pull_request(
-            github_issue,
-            github_token,
-            github_username,
-            patch_dir,
-            llm_config,
-            comment_message=None,
-            additional_message=additional_message,
-        )
+    result = update_existing_pull_request(
+        github_issue,
+        github_token,
+        github_username,
+        patch_dir,
+        llm_config,
+        comment_message=None,
+        additional_message=additional_message,
+    )
 
     # Assert: Check if the git push command was executed
     push_command = (
@@ -342,7 +351,6 @@ def test_send_pull_request(
     mock_run,
     mock_github_issue,
     mock_output_dir,
-    mock_llm_config,
     pr_type,
     target_branch,
 ):
@@ -377,7 +385,6 @@ def test_send_pull_request(
         github_username='test-user',
         patch_dir=repo_path,
         pr_type=pr_type,
-        llm_config=mock_llm_config,
         target_branch=target_branch,
     )
 
@@ -425,9 +432,72 @@ def test_send_pull_request(
         assert post_data['draft'] == (pr_type == 'draft')
 
 
+@patch('subprocess.run')
+@patch('requests.post')
+@patch('requests.get')
+def test_send_pull_request_with_reviewer(
+    mock_get, mock_post, mock_run, mock_github_issue, mock_output_dir
+):
+    repo_path = os.path.join(mock_output_dir, 'repo')
+    reviewer = 'test-reviewer'
+
+    # Mock API responses
+    mock_get.side_effect = [
+        MagicMock(status_code=404),  # Branch doesn't exist
+        MagicMock(json=lambda: {'default_branch': 'main'}),  # Get default branch
+    ]
+
+    # Mock PR creation response
+    mock_post.side_effect = [
+        MagicMock(
+            status_code=201,
+            json=lambda: {
+                'html_url': 'https://github.com/test-owner/test-repo/pull/1',
+                'number': 1,
+            },
+        ),  # PR creation
+        MagicMock(status_code=201),  # Reviewer request
+    ]
+
+    # Mock subprocess.run calls
+    mock_run.side_effect = [
+        MagicMock(returncode=0),  # git checkout -b
+        MagicMock(returncode=0),  # git push
+    ]
+
+    # Call the function with reviewer
+    result = send_pull_request(
+        github_issue=mock_github_issue,
+        github_token='test-token',
+        github_username='test-user',
+        patch_dir=repo_path,
+        pr_type='ready',
+        reviewer=reviewer,
+    )
+
+    # Assert API calls
+    assert mock_get.call_count == 2
+    assert mock_post.call_count == 2
+
+    # Check PR creation
+    pr_create_call = mock_post.call_args_list[0]
+    assert pr_create_call[1]['json']['title'] == 'Fix issue #42: Test Issue'
+
+    # Check reviewer request
+    reviewer_request_call = mock_post.call_args_list[1]
+    assert (
+        reviewer_request_call[0][0]
+        == 'https://api.github.com/repos/test-owner/test-repo/pulls/1/requested_reviewers'
+    )
+    assert reviewer_request_call[1]['json'] == {'reviewers': ['test-reviewer']}
+
+    # Check the result URL
+    assert result == 'https://github.com/test-owner/test-repo/pull/1'
+
+
 @patch('requests.get')
 def test_send_pull_request_invalid_target_branch(
-    mock_get, mock_github_issue, mock_output_dir, mock_llm_config
+    mock_get, mock_github_issue, mock_output_dir
 ):
     """Test that an error is raised when specifying a non-existent target branch"""
     repo_path = os.path.join(mock_output_dir, 'repo')
@@ -448,7 +518,6 @@ def test_send_pull_request_invalid_target_branch(
             github_username='test-user',
             patch_dir=repo_path,
             pr_type='ready',
-            llm_config=mock_llm_config,
             target_branch='nonexistent-branch',
         )
 
@@ -460,7 +529,7 @@ def test_send_pull_request_invalid_target_branch(
 @patch('requests.post')
 @patch('requests.get')
 def test_send_pull_request_git_push_failure(
-    mock_get, mock_post, mock_run, mock_github_issue, mock_output_dir, mock_llm_config
+    mock_get, mock_post, mock_run, mock_github_issue, mock_output_dir
 ):
     repo_path = os.path.join(mock_output_dir, 'repo')
 
@@ -483,7 +552,6 @@ def test_send_pull_request_git_push_failure(
             github_username='test-user',
             patch_dir=repo_path,
             pr_type='ready',
-            llm_config=mock_llm_config,
         )
 
     # Assert that subprocess.run was called twice
@@ -519,7 +587,7 @@ def test_send_pull_request_git_push_failure(
 @patch('requests.post')
 @patch('requests.get')
 def test_send_pull_request_permission_error(
-    mock_get, mock_post, mock_run, mock_github_issue, mock_output_dir, mock_llm_config
+    mock_get, mock_post, mock_run, mock_github_issue, mock_output_dir
 ):
     repo_path = os.path.join(mock_output_dir, 'repo')
 
@@ -543,7 +611,6 @@ def test_send_pull_request_permission_error(
             github_username='test-user',
             patch_dir=repo_path,
             pr_type='ready',
-            llm_config=mock_llm_config,
         )
 
     # Assert that the branch was created and pushed
@@ -757,10 +824,10 @@ def test_process_single_issue(
         github_username=github_username,
         patch_dir=f'{mock_output_dir}/patches/issue_1',
         pr_type=pr_type,
-        llm_config=mock_llm_config,
         fork_owner=None,
         additional_message=resolver_output.success_explanation,
         target_branch=None,
+        reviewer=None,
     )
 
 
@@ -940,7 +1007,7 @@ def test_process_all_successful_issues(
 @patch('requests.get')
 @patch('subprocess.run')
 def test_send_pull_request_branch_naming(
-    mock_run, mock_get, mock_github_issue, mock_output_dir, mock_llm_config
+    mock_run, mock_get, mock_github_issue, mock_output_dir
 ):
     repo_path = os.path.join(mock_output_dir, 'repo')
 
@@ -965,7 +1032,6 @@ def test_send_pull_request_branch_naming(
         github_username='test-user',
         patch_dir=repo_path,
         pr_type='branch',
-        llm_config=mock_llm_config,
     )
 
     # Assert API calls
@@ -1029,6 +1095,7 @@ def test_main(
     mock_args.llm_base_url = 'mock_url'
     mock_args.llm_api_key = 'mock_key'
     mock_args.target_branch = None
+    mock_args.reviewer = None
     mock_parser.return_value.parse_args.return_value = mock_args
 
     # Setup environment variables
@@ -1063,6 +1130,7 @@ def test_main(
         None,
         False,
         mock_args.target_branch,
+        mock_args.reviewer,
     )
 
     # Other assertions
diff --git a/tests/unit/test_agent_skill.py b/tests/unit/test_agent_skill.py
index 6079eb659aea..63745f4dd2b4 100644
--- a/tests/unit/test_agent_skill.py
+++ b/tests/unit/test_agent_skill.py
@@ -715,87 +715,3 @@ def test_parse_pptx(tmp_path):
         'Hello, this is the second test PPTX slide.\n\n'
     )
     assert output == expected_output, f'Expected output does not match. Got: {output}'
-
-
-# =============================================================================
-
-
-def test_file_editor_view(tmp_path):
-    # generate a random directory
-    random_dir = tmp_path / 'dir_1'
-    random_dir.mkdir()
-    # create a file in the directory
-    random_file = random_dir / 'a.txt'
-    random_file.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5')
-    random_dir_2 = tmp_path / 'dir_2'
-    random_dir_2.mkdir()
-    random_file_2 = random_dir_2 / 'b.txt'
-    random_file_2.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5')
-
-    from openhands.runtime.plugins.agent_skills.agentskills import file_editor
-
-    # view the file
-    result = file_editor(command='view', path=str(random_file))
-    print('\n', result)
-    assert result is not None
-    assert (
-        result.split('\n')
-        == f"""Here's the result of running `cat -n` on {random_file}:
-     1\tLine 1
-     2\tLine 2
-     3\tLine 3
-     4\tLine 4
-     5\tLine 5
-""".split('\n')
-    )
-
-    # view the directory
-    result = file_editor(command='view', path=str(tmp_path))
-    print('\n', result)
-    assert result is not None
-    assert (
-        result.strip().split('\n')
-        == f"""Here's the files and directories up to 2 levels deep in {tmp_path}, excluding hidden items:
-{tmp_path}
-{tmp_path}/dir_2
-{tmp_path}/dir_2/b.txt
-{tmp_path}/dir_1
-{tmp_path}/dir_1/a.txt
-""".strip().split('\n')
-    )
-
-
-def test_file_editor_create(tmp_path):
-    # generate a random directory
-    random_dir = tmp_path / 'dir_1'
-    random_dir.mkdir()
-    # create a file in the directory
-    random_file = random_dir / 'a.txt'
-
-    from openhands.runtime.plugins.agent_skills.agentskills import file_editor
-
-    # view an unexist file
-    result = file_editor(command='view', path=str(random_file))
-    print(result)
-    assert result is not None
-    assert (
-        result
-        == f'ERROR:\nInvalid `path` parameter: {random_file}. The path {random_file} does not exist. Please provide a valid path.'
-    )
-
-    # create a file
-    result = file_editor(command='create', path=str(random_file), file_text='Line 6')
-    print(result)
-    assert result is not None
-    assert result == f'File created successfully at: {random_file}'
-
-    # view again
-    result = file_editor(command='view', path=str(random_file))
-    print(result)
-    assert result is not None
-    assert (
-        result.strip().split('\n')
-        == f"""Here's the result of running `cat -n` on {random_file}:
-     1\tLine 6
-""".strip().split('\n')
-    )
diff --git a/tests/unit/test_arg_parser.py b/tests/unit/test_arg_parser.py
index 6788904cedc0..45b9a473742f 100644
--- a/tests/unit/test_arg_parser.py
+++ b/tests/unit/test_arg_parser.py
@@ -19,6 +19,7 @@ def test_parser_default_values():
     assert args.eval_note is None
     assert args.llm_config is None
     assert args.name == 'default'
+    assert not args.no_auto_continue
 
 
 def test_parser_custom_values():
@@ -49,6 +50,7 @@ def test_parser_custom_values():
             'gpt4',
             '-n',
             'test_session',
+            '--no-auto-continue',
         ]
     )
 
@@ -64,6 +66,7 @@ def test_parser_custom_values():
     assert args.eval_note == 'Test run'
     assert args.llm_config == 'gpt4'
     assert args.name == 'test_session'
+    assert args.no_auto_continue
 
 
 def test_parser_file_overrides_task():
@@ -124,10 +127,11 @@ def test_help_message(capsys):
         '-l LLM_CONFIG, --llm-config LLM_CONFIG',
         '-n NAME, --name NAME',
         '--config-file CONFIG_FILE',
+        '--no-auto-continue',
     ]
 
     for element in expected_elements:
         assert element in help_output, f"Expected '{element}' to be in the help message"
 
     option_count = help_output.count('  -')
-    assert option_count == 15, f'Expected 15 options, found {option_count}'
+    assert option_count == 16, f'Expected 16 options, found {option_count}'
diff --git a/tests/unit/test_command_success.py b/tests/unit/test_command_success.py
new file mode 100644
index 000000000000..b52ceb4815c7
--- /dev/null
+++ b/tests/unit/test_command_success.py
@@ -0,0 +1,27 @@
+from openhands.events.observation.commands import (
+    CmdOutputObservation,
+    IPythonRunCellObservation,
+)
+
+
+def test_cmd_output_success():
+    # Test successful command
+    obs = CmdOutputObservation(
+        command_id=1, command='ls', content='file1.txt\nfile2.txt', exit_code=0
+    )
+    assert obs.success is True
+    assert obs.error is False
+
+    # Test failed command
+    obs = CmdOutputObservation(
+        command_id=2, command='ls', content='No such file or directory', exit_code=1
+    )
+    assert obs.success is False
+    assert obs.error is True
+
+
+def test_ipython_cell_success():
+    # IPython cells are always successful
+    obs = IPythonRunCellObservation(code='print("Hello")', content='Hello')
+    assert obs.success is True
+    assert obs.error is False
diff --git a/tests/unit/test_event_serialization.py b/tests/unit/test_event_serialization.py
new file mode 100644
index 000000000000..d1989a30bb09
--- /dev/null
+++ b/tests/unit/test_event_serialization.py
@@ -0,0 +1,18 @@
+from openhands.events.observation import CmdOutputObservation
+from openhands.events.serialization import event_to_dict
+
+
+def test_command_output_success_serialization():
+    # Test successful command
+    obs = CmdOutputObservation(
+        command_id=1, command='ls', content='file1.txt\nfile2.txt', exit_code=0
+    )
+    serialized = event_to_dict(obs)
+    assert serialized['success'] is True
+
+    # Test failed command
+    obs = CmdOutputObservation(
+        command_id=2, command='ls', content='No such file or directory', exit_code=1
+    )
+    serialized = event_to_dict(obs)
+    assert serialized['success'] is False
diff --git a/tests/unit/test_import_utils.py b/tests/unit/test_import_utils.py
new file mode 100644
index 000000000000..876c37d27be1
--- /dev/null
+++ b/tests/unit/test_import_utils.py
@@ -0,0 +1,24 @@
+from abc import abstractmethod
+from dataclasses import dataclass
+
+from openhands.utils.import_utils import get_impl
+
+
+class Shape:
+    @abstractmethod
+    def get_area(self):
+        """Get the area of this shape"""
+
+
+@dataclass
+class Square(Shape):
+    length: float
+
+    def get_area(self):
+        return self.length**2
+
+
+def test_get_impl():
+    ShapeImpl = get_impl(Shape, f'{Shape.__module__}.Square')
+    shape = ShapeImpl(5)
+    assert shape.get_area() == 25
diff --git a/tests/unit/test_listen.py b/tests/unit/test_listen.py
index f19be8aedb49..c39c656e0afc 100644
--- a/tests/unit/test_listen.py
+++ b/tests/unit/test_listen.py
@@ -16,8 +16,9 @@ def __init__(self, *args, **kwargs):
 
 
 # Patch necessary components before importing from listen
-with patch('openhands.server.session.SessionManager', MockSessionManager), patch(
-    'fastapi.staticfiles.StaticFiles', MockStaticFiles
+with (
+    patch('openhands.server.session.SessionManager', MockSessionManager),
+    patch('fastapi.staticfiles.StaticFiles', MockStaticFiles),
 ):
     from openhands.server.file_config import (
         is_extension_allowed,
@@ -53,8 +54,9 @@ def test_load_file_upload_config_invalid_max_size():
 
 
 def test_is_extension_allowed():
-    with patch('openhands.server.file_config.RESTRICT_FILE_TYPES', True), patch(
-        'openhands.server.file_config.ALLOWED_EXTENSIONS', ['.txt', '.pdf']
+    with (
+        patch('openhands.server.file_config.RESTRICT_FILE_TYPES', True),
+        patch('openhands.server.file_config.ALLOWED_EXTENSIONS', ['.txt', '.pdf']),
     ):
         assert is_extension_allowed('file.txt')
         assert is_extension_allowed('file.pdf')
@@ -71,8 +73,9 @@ def test_is_extension_allowed_no_restrictions():
 
 
 def test_is_extension_allowed_wildcard():
-    with patch('openhands.server.file_config.RESTRICT_FILE_TYPES', True), patch(
-        'openhands.server.file_config.ALLOWED_EXTENSIONS', ['.*']
+    with (
+        patch('openhands.server.file_config.RESTRICT_FILE_TYPES', True),
+        patch('openhands.server.file_config.ALLOWED_EXTENSIONS', ['.*']),
     ):
         assert is_extension_allowed('file.txt')
         assert is_extension_allowed('file.pdf')
diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py
index 073743ea81e5..3dc1e1a79706 100644
--- a/tests/unit/test_llm.py
+++ b/tests/unit/test_llm.py
@@ -92,13 +92,47 @@ def test_llm_init_with_metrics():
     )  # because we didn't specify model_name in Metrics init
 
 
+@patch('openhands.llm.llm.litellm_completion')
+@patch('time.time')
+def test_response_latency_tracking(mock_time, mock_litellm_completion):
+    # Mock time.time() to return controlled values
+    mock_time.side_effect = [1000.0, 1002.5]  # Start time, end time (2.5s difference)
+
+    # Mock the completion response with a specific ID
+    mock_response = {
+        'id': 'test-response-123',
+        'choices': [{'message': {'content': 'Test response'}}],
+    }
+    mock_litellm_completion.return_value = mock_response
+
+    # Create LLM instance and make a completion call
+    config = LLMConfig(model='gpt-4o', api_key='test_key')
+    llm = LLM(config)
+    response = llm.completion(messages=[{'role': 'user', 'content': 'Hello!'}])
+
+    # Verify the response latency was tracked correctly
+    assert len(llm.metrics.response_latencies) == 1
+    latency_record = llm.metrics.response_latencies[0]
+    assert latency_record.model == 'gpt-4o'
+    assert (
+        latency_record.latency == 2.5
+    )  # Should be the difference between our mocked times
+    assert latency_record.response_id == 'test-response-123'
+
+    # Verify the completion response was returned correctly
+    assert response['id'] == 'test-response-123'
+    assert response['choices'][0]['message']['content'] == 'Test response'
+
+
 def test_llm_reset():
     llm = LLM(LLMConfig(model='gpt-4o-mini', api_key='test_key'))
     initial_metrics = copy.deepcopy(llm.metrics)
     initial_metrics.add_cost(1.0)
+    initial_metrics.add_response_latency(0.5, 'test-id')
     llm.reset()
     assert llm.metrics._accumulated_cost != initial_metrics._accumulated_cost
     assert llm.metrics._costs != initial_metrics._costs
+    assert llm.metrics._response_latencies != initial_metrics._response_latencies
     assert isinstance(llm.metrics, Metrics)
 
 
diff --git a/tests/unit/test_manager.py b/tests/unit/test_manager.py
index d17fc94bf175..9ec9e4ac3159 100644
--- a/tests/unit/test_manager.py
+++ b/tests/unit/test_manager.py
@@ -7,6 +7,7 @@
 
 from openhands.core.config.app_config import AppConfig
 from openhands.server.session.manager import SessionManager
+from openhands.server.session.session_init_data import SessionInitData
 from openhands.storage.memory import InMemoryFileStore
 
 
@@ -100,7 +101,7 @@ async def test_init_new_local_session():
             sio, AppConfig(), InMemoryFileStore()
         ) as session_manager:
             await session_manager.init_or_join_session(
-                'new-session-id', 'new-session-id', {'type': 'mock-settings'}
+                'new-session-id', 'new-session-id', SessionInitData()
             )
     assert session_instance.initialize_agent.call_count == 1
     assert sio.enter_room.await_count == 1
@@ -132,11 +133,11 @@ async def test_join_local_session():
         ) as session_manager:
             # First call initializes
             await session_manager.init_or_join_session(
-                'new-session-id', 'new-session-id', {'type': 'mock-settings'}
+                'new-session-id', 'new-session-id', SessionInitData()
             )
             # Second call joins
             await session_manager.init_or_join_session(
-                'new-session-id', 'extra-connection-id', {'type': 'mock-settings'}
+                'new-session-id', 'extra-connection-id', SessionInitData()
             )
     assert session_instance.initialize_agent.call_count == 1
     assert sio.enter_room.await_count == 2
@@ -168,7 +169,7 @@ async def test_join_cluster_session():
         ) as session_manager:
             # First call initializes
             await session_manager.init_or_join_session(
-                'new-session-id', 'new-session-id', {'type': 'mock-settings'}
+                'new-session-id', 'new-session-id', SessionInitData()
             )
     assert session_instance.initialize_agent.call_count == 0
     assert sio.enter_room.await_count == 1
@@ -199,7 +200,7 @@ async def test_add_to_local_event_stream():
             sio, AppConfig(), InMemoryFileStore()
         ) as session_manager:
             await session_manager.init_or_join_session(
-                'new-session-id', 'connection-id', {'type': 'mock-settings'}
+                'new-session-id', 'connection-id', SessionInitData()
             )
             await session_manager.send_to_event_stream(
                 'connection-id', {'event_type': 'some_event'}
@@ -232,7 +233,7 @@ async def test_add_to_cluster_event_stream():
             sio, AppConfig(), InMemoryFileStore()
         ) as session_manager:
             await session_manager.init_or_join_session(
-                'new-session-id', 'connection-id', {'type': 'mock-settings'}
+                'new-session-id', 'connection-id', SessionInitData()
             )
             await session_manager.send_to_event_stream(
                 'connection-id', {'event_type': 'some_event'}
diff --git a/tests/unit/test_message_serialization.py b/tests/unit/test_message_serialization.py
index 16035f0aed13..ed40c473cdab 100644
--- a/tests/unit/test_message_serialization.py
+++ b/tests/unit/test_message_serialization.py
@@ -1,3 +1,5 @@
+from litellm import ChatCompletionMessageToolCall
+
 from openhands.core.message import ImageContent, Message, TextContent
 
 
@@ -114,3 +116,55 @@ def test_message_with_mixed_content_and_vision_disabled():
     assert serialized_message == expected_serialized_message
     # Assert that images exist in the original message
     assert message.contains_image
+
+
+def test_message_tool_call_serialization():
+    """Test that tool calls are properly serialized into dicts for token counting."""
+    # Create a tool call
+    tool_call = ChatCompletionMessageToolCall(
+        id='call_123',
+        type='function',
+        function={'name': 'test_function', 'arguments': '{"arg1": "value1"}'},
+    )
+
+    # Create a message with the tool call
+    message = Message(
+        role='assistant',
+        content=[TextContent(text='Test message')],
+        tool_calls=[tool_call],
+    )
+
+    # Serialize the message
+    serialized = message.model_dump()
+
+    # Check that tool calls are properly serialized
+    assert 'tool_calls' in serialized
+    assert isinstance(serialized['tool_calls'], list)
+    assert len(serialized['tool_calls']) == 1
+
+    tool_call_dict = serialized['tool_calls'][0]
+    assert isinstance(tool_call_dict, dict)
+    assert tool_call_dict['id'] == 'call_123'
+    assert tool_call_dict['type'] == 'function'
+    assert tool_call_dict['function']['name'] == 'test_function'
+    assert tool_call_dict['function']['arguments'] == '{"arg1": "value1"}'
+
+
+def test_message_tool_response_serialization():
+    """Test that tool responses are properly serialized."""
+    # Create a message with tool response
+    message = Message(
+        role='tool',
+        content=[TextContent(text='Function result')],
+        tool_call_id='call_123',
+        name='test_function',
+    )
+
+    # Serialize the message
+    serialized = message.model_dump()
+
+    # Check that tool response fields are properly serialized
+    assert 'tool_call_id' in serialized
+    assert serialized['tool_call_id'] == 'call_123'
+    assert 'name' in serialized
+    assert serialized['name'] == 'test_function'
diff --git a/tests/unit/test_observation_serialization.py b/tests/unit/test_observation_serialization.py
index ae636ddf562b..67a95449b719 100644
--- a/tests/unit/test_observation_serialization.py
+++ b/tests/unit/test_observation_serialization.py
@@ -40,36 +40,23 @@ def serialization_deserialization(
 
 
 # Additional tests for various observation subclasses can be included here
-def test_observation_event_props_serialization_deserialization():
-    original_observation_dict = {
-        'id': 42,
-        'source': 'agent',
-        'timestamp': '2021-08-01T12:00:00',
-        'observation': 'run',
-        'message': 'Command `ls -l` executed with exit code 0.',
-        'extras': {
-            'exit_code': 0,
-            'command': 'ls -l',
-            'command_id': 3,
-            'hidden': False,
-            'interpreter_details': '',
-        },
-        'content': 'foo.txt',
-    }
-    serialization_deserialization(original_observation_dict, CmdOutputObservation)
-
+def test_success_field_serialization():
+    # Test success=True
+    obs = CmdOutputObservation(
+        content='Command succeeded',
+        exit_code=0,
+        command='ls -l',
+        command_id=3,
+    )
+    serialized = event_to_dict(obs)
+    assert serialized['success'] is True
 
-def test_command_output_observation_serialization_deserialization():
-    original_observation_dict = {
-        'observation': 'run',
-        'extras': {
-            'exit_code': 0,
-            'command': 'ls -l',
-            'command_id': 3,
-            'hidden': False,
-            'interpreter_details': '',
-        },
-        'message': 'Command `ls -l` executed with exit code 0.',
-        'content': 'foo.txt',
-    }
-    serialization_deserialization(original_observation_dict, CmdOutputObservation)
+    # Test success=False
+    obs = CmdOutputObservation(
+        content='No such file or directory',
+        exit_code=1,
+        command='ls -l',
+        command_id=3,
+    )
+    serialized = event_to_dict(obs)
+    assert serialized['success'] is False
diff --git a/tests/unit/test_runtime_build.py b/tests/unit/test_runtime_build.py
index 79a7c9a22b6b..b5cbd91056e8 100644
--- a/tests/unit/test_runtime_build.py
+++ b/tests/unit/test_runtime_build.py
@@ -239,6 +239,7 @@ def test_build_runtime_image_from_scratch():
                 f'{get_runtime_image_repo()}:{OH_VERSION}_mock-versioned-tag',
             ],
             platform=None,
+            extra_build_args=None,
         )
         assert (
             image_name
@@ -333,6 +334,7 @@ def image_exists_side_effect(image_name, *args):
                 # VERSION tag will NOT be included except from scratch
             ],
             platform=None,
+            extra_build_args=None,
         )
         mock_prep_build_folder.assert_called_once_with(
             ANY,
@@ -391,6 +393,7 @@ def image_exists_side_effect(image_name, *args):
                 # VERSION tag will NOT be included except from scratch
             ],
             platform=None,
+            extra_build_args=None,
         )
         mock_prep_build_folder.assert_called_once_with(
             ANY,
diff --git a/tests/unit/test_security.py b/tests/unit/test_security.py
index fab27a3ec2f3..a36c66104f65 100644
--- a/tests/unit/test_security.py
+++ b/tests/unit/test_security.py
@@ -51,24 +51,48 @@ def add_events(event_stream: EventStream, data: list[tuple[Event, EventSource]])
 
 
 def test_msg(temp_dir: str):
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('main', file_store)
-    policy = """
-    raise "Disallow ABC [risk=medium]" if:
-        (msg: Message)
-        "ABC" in msg.content
-    """
-    InvariantAnalyzer(event_stream, policy)
-    data = [
-        (MessageAction('Hello world!'), EventSource.USER),
-        (MessageAction('AB!'), EventSource.AGENT),
-        (MessageAction('Hello world!'), EventSource.USER),
-        (MessageAction('ABC!'), EventSource.AGENT),
+    mock_container = MagicMock()
+    mock_container.status = 'running'
+    mock_container.attrs = {
+        'NetworkSettings': {'Ports': {'8000/tcp': [{'HostPort': 34567}]}}
+    }
+    mock_docker = MagicMock()
+    mock_docker.from_env().containers.list.return_value = [mock_container]
+
+    mock_requests = MagicMock()
+    mock_requests.get().json.return_value = {'id': 'mock-session-id'}
+    mock_requests.post().json.side_effect = [
+        {'monitor_id': 'mock-monitor-id'},
+        [],  # First check
+        [],  # Second check
+        [],  # Third check
+        [
+            'PolicyViolation(Disallow ABC [risk=medium], ranges=[<2 ranges>])'
+        ],  # Fourth check
     ]
-    add_events(event_stream, data)
-    for i in range(3):
-        assert data[i][0].security_risk == ActionSecurityRisk.LOW
-    assert data[3][0].security_risk == ActionSecurityRisk.MEDIUM
+
+    with (
+        patch(f'{InvariantAnalyzer.__module__}.docker', mock_docker),
+        patch(f'{InvariantClient.__module__}.requests', mock_requests),
+    ):
+        file_store = get_file_store('local', temp_dir)
+        event_stream = EventStream('main', file_store)
+        policy = """
+        raise "Disallow ABC [risk=medium]" if:
+            (msg: Message)
+            "ABC" in msg.content
+        """
+        InvariantAnalyzer(event_stream, policy)
+        data = [
+            (MessageAction('Hello world!'), EventSource.USER),
+            (MessageAction('AB!'), EventSource.AGENT),
+            (MessageAction('Hello world!'), EventSource.USER),
+            (MessageAction('ABC!'), EventSource.AGENT),
+        ]
+        add_events(event_stream, data)
+        for i in range(3):
+            assert data[i][0].security_risk == ActionSecurityRisk.LOW
+        assert data[3][0].security_risk == ActionSecurityRisk.MEDIUM
 
 
 @pytest.mark.parametrize(
@@ -76,22 +100,44 @@ def test_msg(temp_dir: str):
     [('rm -rf root_dir', ActionSecurityRisk.MEDIUM), ['ls', ActionSecurityRisk.LOW]],
 )
 def test_cmd(cmd, expected_risk, temp_dir: str):
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('main', file_store)
-    policy = """
-    raise "Disallow rm -rf [risk=medium]" if:
-        (call: ToolCall)
-        call is tool:run
-        match("rm -rf", call.function.arguments.command)
-    """
-    InvariantAnalyzer(event_stream, policy)
-    data = [
-        (MessageAction('Hello world!'), EventSource.USER),
-        (CmdRunAction(cmd), EventSource.USER),
+    mock_container = MagicMock()
+    mock_container.status = 'running'
+    mock_container.attrs = {
+        'NetworkSettings': {'Ports': {'8000/tcp': [{'HostPort': 34567}]}}
+    }
+    mock_docker = MagicMock()
+    mock_docker.from_env().containers.list.return_value = [mock_container]
+
+    mock_requests = MagicMock()
+    mock_requests.get().json.return_value = {'id': 'mock-session-id'}
+    mock_requests.post().json.side_effect = [
+        {'monitor_id': 'mock-monitor-id'},
+        [],  # First check
+        ['PolicyViolation(Disallow rm -rf [risk=medium], ranges=[<2 ranges>])']
+        if expected_risk == ActionSecurityRisk.MEDIUM
+        else [],  # Second check
     ]
-    add_events(event_stream, data)
-    assert data[0][0].security_risk == ActionSecurityRisk.LOW
-    assert data[1][0].security_risk == expected_risk
+
+    with (
+        patch(f'{InvariantAnalyzer.__module__}.docker', mock_docker),
+        patch(f'{InvariantClient.__module__}.requests', mock_requests),
+    ):
+        file_store = get_file_store('local', temp_dir)
+        event_stream = EventStream('main', file_store)
+        policy = """
+        raise "Disallow rm -rf [risk=medium]" if:
+            (call: ToolCall)
+            call is tool:run
+            match("rm -rf", call.function.arguments.command)
+        """
+        InvariantAnalyzer(event_stream, policy)
+        data = [
+            (MessageAction('Hello world!'), EventSource.USER),
+            (CmdRunAction(cmd), EventSource.USER),
+        ]
+        add_events(event_stream, data)
+        assert data[0][0].security_risk == ActionSecurityRisk.LOW
+        assert data[1][0].security_risk == expected_risk
 
 
 @pytest.mark.parametrize(
@@ -102,26 +148,49 @@ def test_cmd(cmd, expected_risk, temp_dir: str):
     ],
 )
 def test_leak_secrets(code, expected_risk, temp_dir: str):
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('main', file_store)
-    policy = """
-    from invariant.detectors import secrets
-
-    raise "Disallow writing secrets [risk=medium]" if:
-        (call: ToolCall)
-        call is tool:run_ipython
-        any(secrets(call.function.arguments.code))
-    """
-    InvariantAnalyzer(event_stream, policy)
-    data = [
-        (MessageAction('Hello world!'), EventSource.USER),
-        (IPythonRunCellAction(code), EventSource.AGENT),
-        (IPythonRunCellAction('hello'), EventSource.AGENT),
+    mock_container = MagicMock()
+    mock_container.status = 'running'
+    mock_container.attrs = {
+        'NetworkSettings': {'Ports': {'8000/tcp': [{'HostPort': 34567}]}}
+    }
+    mock_docker = MagicMock()
+    mock_docker.from_env().containers.list.return_value = [mock_container]
+
+    mock_requests = MagicMock()
+    mock_requests.get().json.return_value = {'id': 'mock-session-id'}
+    mock_requests.post().json.side_effect = [
+        {'monitor_id': 'mock-monitor-id'},
+        [],  # First check
+        ['PolicyViolation(Disallow writing secrets [risk=medium], ranges=[<2 ranges>])']
+        if expected_risk == ActionSecurityRisk.MEDIUM
+        else [],  # Second check
+        [],  # Third check
     ]
-    add_events(event_stream, data)
-    assert data[0][0].security_risk == ActionSecurityRisk.LOW
-    assert data[1][0].security_risk == expected_risk
-    assert data[2][0].security_risk == ActionSecurityRisk.LOW
+
+    with (
+        patch(f'{InvariantAnalyzer.__module__}.docker', mock_docker),
+        patch(f'{InvariantClient.__module__}.requests', mock_requests),
+    ):
+        file_store = get_file_store('local', temp_dir)
+        event_stream = EventStream('main', file_store)
+        policy = """
+        from invariant.detectors import secrets
+
+        raise "Disallow writing secrets [risk=medium]" if:
+            (call: ToolCall)
+            call is tool:run_ipython
+            any(secrets(call.function.arguments.code))
+        """
+        InvariantAnalyzer(event_stream, policy)
+        data = [
+            (MessageAction('Hello world!'), EventSource.USER),
+            (IPythonRunCellAction(code), EventSource.AGENT),
+            (IPythonRunCellAction('hello'), EventSource.AGENT),
+        ]
+        add_events(event_stream, data)
+        assert data[0][0].security_risk == ActionSecurityRisk.LOW
+        assert data[1][0].security_risk == expected_risk
+        assert data[2][0].security_risk == ActionSecurityRisk.LOW
 
 
 def test_unsafe_python_code(temp_dir: str):
@@ -458,26 +527,48 @@ def default_config():
 def test_check_usertask(
     mock_litellm_completion, usertask, is_appropriate, default_config, temp_dir: str
 ):
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('main', file_store)
-    analyzer = InvariantAnalyzer(event_stream)
-    mock_response = {'choices': [{'message': {'content': is_appropriate}}]}
-    mock_litellm_completion.return_value = mock_response
-    analyzer.guardrail_llm = LLM(config=default_config)
-    analyzer.check_browsing_alignment = True
-    data = [
-        (MessageAction(usertask), EventSource.USER),
+    mock_container = MagicMock()
+    mock_container.status = 'running'
+    mock_container.attrs = {
+        'NetworkSettings': {'Ports': {'8000/tcp': [{'HostPort': 34567}]}}
+    }
+    mock_docker = MagicMock()
+    mock_docker.from_env().containers.list.return_value = [mock_container]
+
+    mock_requests = MagicMock()
+    mock_requests.get().json.return_value = {'id': 'mock-session-id'}
+    mock_requests.post().json.side_effect = [
+        {'monitor_id': 'mock-monitor-id'},
+        [],
+        [
+            'PolicyViolation(Vulnerability in python code [risk=medium], ranges=[<2 ranges>])'
+        ],
     ]
-    add_events(event_stream, data)
-    event_list = list(event_stream.get_events())
 
-    if is_appropriate == 'No':
-        assert len(event_list) == 2
-        assert type(event_list[0]) == MessageAction
-        assert type(event_list[1]) == ChangeAgentStateAction
-    elif is_appropriate == 'Yes':
-        assert len(event_list) == 1
-        assert type(event_list[0]) == MessageAction
+    with (
+        patch(f'{InvariantAnalyzer.__module__}.docker', mock_docker),
+        patch(f'{InvariantClient.__module__}.requests', mock_requests),
+    ):
+        file_store = get_file_store('local', temp_dir)
+        event_stream = EventStream('main', file_store)
+        analyzer = InvariantAnalyzer(event_stream)
+        mock_response = {'choices': [{'message': {'content': is_appropriate}}]}
+        mock_litellm_completion.return_value = mock_response
+        analyzer.guardrail_llm = LLM(config=default_config)
+        analyzer.check_browsing_alignment = True
+        data = [
+            (MessageAction(usertask), EventSource.USER),
+        ]
+        add_events(event_stream, data)
+        event_list = list(event_stream.get_events())
+
+        if is_appropriate == 'No':
+            assert len(event_list) == 2
+            assert type(event_list[0]) == MessageAction
+            assert type(event_list[1]) == ChangeAgentStateAction
+        elif is_appropriate == 'Yes':
+            assert len(event_list) == 1
+            assert type(event_list[0]) == MessageAction
 
 
 @pytest.mark.parametrize(
@@ -491,23 +582,45 @@ def test_check_usertask(
 def test_check_fillaction(
     mock_litellm_completion, fillaction, is_harmful, default_config, temp_dir: str
 ):
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('main', file_store)
-    analyzer = InvariantAnalyzer(event_stream)
-    mock_response = {'choices': [{'message': {'content': is_harmful}}]}
-    mock_litellm_completion.return_value = mock_response
-    analyzer.guardrail_llm = LLM(config=default_config)
-    analyzer.check_browsing_alignment = True
-    data = [
-        (BrowseInteractiveAction(browser_actions=fillaction), EventSource.AGENT),
+    mock_container = MagicMock()
+    mock_container.status = 'running'
+    mock_container.attrs = {
+        'NetworkSettings': {'Ports': {'8000/tcp': [{'HostPort': 34567}]}}
+    }
+    mock_docker = MagicMock()
+    mock_docker.from_env().containers.list.return_value = [mock_container]
+
+    mock_requests = MagicMock()
+    mock_requests.get().json.return_value = {'id': 'mock-session-id'}
+    mock_requests.post().json.side_effect = [
+        {'monitor_id': 'mock-monitor-id'},
+        [],
+        [
+            'PolicyViolation(Vulnerability in python code [risk=medium], ranges=[<2 ranges>])'
+        ],
     ]
-    add_events(event_stream, data)
-    event_list = list(event_stream.get_events())
-
-    if is_harmful == 'Yes':
-        assert len(event_list) == 2
-        assert type(event_list[0]) == BrowseInteractiveAction
-        assert type(event_list[1]) == ChangeAgentStateAction
-    elif is_harmful == 'No':
-        assert len(event_list) == 1
-        assert type(event_list[0]) == BrowseInteractiveAction
+
+    with (
+        patch(f'{InvariantAnalyzer.__module__}.docker', mock_docker),
+        patch(f'{InvariantClient.__module__}.requests', mock_requests),
+    ):
+        file_store = get_file_store('local', temp_dir)
+        event_stream = EventStream('main', file_store)
+        analyzer = InvariantAnalyzer(event_stream)
+        mock_response = {'choices': [{'message': {'content': is_harmful}}]}
+        mock_litellm_completion.return_value = mock_response
+        analyzer.guardrail_llm = LLM(config=default_config)
+        analyzer.check_browsing_alignment = True
+        data = [
+            (BrowseInteractiveAction(browser_actions=fillaction), EventSource.AGENT),
+        ]
+        add_events(event_stream, data)
+        event_list = list(event_stream.get_events())
+
+        if is_harmful == 'Yes':
+            assert len(event_list) == 2
+            assert type(event_list[0]) == BrowseInteractiveAction
+            assert type(event_list[1]) == ChangeAgentStateAction
+        elif is_harmful == 'No':
+            assert len(event_list) == 1
+            assert type(event_list[0]) == BrowseInteractiveAction