Merge branch 'main' into kevin

SmartManoj · Sep 13, 2024 · 8edc79a · 8edc79a
2 parents 7c475d8 + bff9296
commit 8edc79a
Show file tree

Hide file tree

Showing 18 changed files with 132 additions and 68 deletions.
diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml
@@ -1,9 +1,14 @@
-name: Release workflow
+# Publishes the OpenHands PyPi package
+name: Publish PyPi Package
 
+# Triggered manually
 on:
-  push:
-    tags:
-      - "[0-9]+.[0-9]+.[0-9]+*"
+  workflow_dispatch:
+    inputs:
+      reason:
+        description: 'Reason for manual trigger'
+        required: true
+        default: ''
 
 jobs:
   release:

diff --git a/README.md b/README.md
@@ -65,7 +65,7 @@ The vision is to leverage SLMs effectively and work towards solving most of the
   <br/>
   <a href="https://docs.all-hands.dev/modules/usage/getting-started"><img src="https://img.shields.io/badge/Documentation-000?logo=googledocs&logoColor=FFE165&style=for-the-badge" alt="Check out the documentation"></a>
   <a href="https://arxiv.org/abs/2407.16741"><img src="https://img.shields.io/badge/Paper%20on%20Arxiv-000?logoColor=FFE165&logo=arxiv&style=for-the-badge" alt="Paper on Arxiv"></a>
-  <a href="https://huggingface.co/spaces/OpenDevin/evaluation"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score"></a>
+  <a href="https://huggingface.co/spaces/OpenHands/evaluation"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score"></a>
   <hr>
 </div>
 

diff --git a/docs/src/components/HomepageHeader/HomepageHeader.tsx b/docs/src/components/HomepageHeader/HomepageHeader.tsx
@@ -29,7 +29,7 @@ export function HomepageHeader() {
           <br/>
           <a href="https://docs.all-hands.dev/modules/usage/getting-started"><img src="https://img.shields.io/badge/Documentation-000?logo=googledocs&logoColor=FFE165&style=for-the-badge" alt="Check out the documentation" /></a>
           <a href="https://arxiv.org/abs/2407.16741"><img src="https://img.shields.io/badge/Paper%20on%20Arxiv-000?logoColor=FFE165&logo=arxiv&style=for-the-badge" alt="Paper on Arxiv" /></a>
-          <a href="https://huggingface.co/spaces/OpenDevin/evaluation"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score" /></a>
+          <a href="https://huggingface.co/spaces/OpenHands/evaluation"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score" /></a>
         </div>
 
         <Demo />

diff --git a/evaluation/README.md b/evaluation/README.md
@@ -9,7 +9,7 @@ To better organize the evaluation folder, we should follow the rules below:
 - Each subfolder contains a specific benchmark or experiment. For example, `evaluation/swe_bench` should contain
 all the preprocessing/evaluation/analysis scripts.
 - Raw data and experimental records should not be stored within this repo.
-- For model outputs, they should be stored at [this huggingface space](https://huggingface.co/spaces/OpenDevin/evaluation) for visualization.
+- For model outputs, they should be stored at [this huggingface space](https://huggingface.co/spaces/OpenHands/evaluation) for visualization.
 - Important data files of manageable size and analysis scripts (e.g., jupyter notebooks) can be directly uploaded to this repo.
 
 ## Supported Benchmarks
@@ -69,8 +69,8 @@ temperature = 0.0
 
 ### Result Visualization
 
-Check [this huggingface space](https://huggingface.co/spaces/OpenDevin/evaluation) for visualization of existing experimental results.
+Check [this huggingface space](https://huggingface.co/spaces/OpenHands/evaluation) for visualization of existing experimental results.
 
 ### Upload your results
 
-You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenDevin/evaluation) and submit a PR of your evaluation results to our hosted huggingface repo via PR following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
+You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenHands/evaluation) and submit a PR of your evaluation results to our hosted huggingface repo via PR following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
diff --git a/evaluation/miniwob/README.md b/evaluation/miniwob/README.md
@@ -26,7 +26,7 @@ poetry run python evaluation/miniwob/get_success_rate.py evaluation/evaluation_o
 
 ## Submit your evaluation results
 
-You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenDevin/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
+You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenHands/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
 
 
 ## BrowsingAgent V1.0 result

diff --git a/evaluation/swe_bench/README.md b/evaluation/swe_bench/README.md
@@ -125,7 +125,7 @@ With `output.jsonl` file, you can run `eval_infer.sh` to evaluate generated patc
 
 > If you want to evaluate existing results, you should first run this to clone existing outputs
 >```bash
->git clone https://huggingface.co/spaces/OpenDevin/evaluation evaluation/evaluation_outputs
+>git clone https://huggingface.co/spaces/OpenHands/evaluation evaluation/evaluation_outputs
 >```
 
 NOTE, you should have already pulled the instance-level OR env-level docker images following [this section](#openhands-swe-bench-instance-level-docker-support).
@@ -159,10 +159,10 @@ The final results will be saved to `evaluation/evaluation_outputs/outputs/swe_be
 
 ## Visualize Results
 
-First you need to clone `https://huggingface.co/spaces/OpenDevin/evaluation` and add your own running results from openhands into the `outputs` of the cloned repo.
+First you need to clone `https://huggingface.co/spaces/OpenHands/evaluation` and add your own running results from openhands into the `outputs` of the cloned repo.
 
 ```bash
-git clone https://huggingface.co/spaces/OpenDevin/evaluation
+git clone https://huggingface.co/spaces/OpenHands/evaluation
 ```
 
 **(optional) setup streamlit environment with conda**:
@@ -186,4 +186,4 @@ Then you can access the SWE-Bench trajectory visualizer at `localhost:8501`.
 
 ## Submit your evaluation results
 
-You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenDevin/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
+You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenHands/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py
@@ -501,11 +501,11 @@ def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
     output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
     instances = prepare_dataset(swe_bench_tests, output_file, args.eval_n_limit)
 
-    if not isinstance(
+    if len(instances) > 0 and not isinstance(
         instances['PASS_TO_PASS'][instances['PASS_TO_PASS'].index[0]], str
     ):
         for col in ['PASS_TO_PASS', 'FAIL_TO_PASS']:
-            instances[col] = instances[col].apply(lambda x: str(list(x)))
+            instances[col] = instances[col].apply(lambda x: str(x))
 
     run_evaluation(
         instances, metadata, output_file, args.eval_num_workers, process_instance

diff --git a/evaluation/webarena/README.md b/evaluation/webarena/README.md
@@ -37,7 +37,7 @@ poetry run python evaluation/webarena/get_success_rate.py evaluation/evaluation_
 
 ## Submit your evaluation results
 
-You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenDevin/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
+You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenHands/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
 
 ## BrowsingAgent V1.0 result
 

diff --git a/frontend/package-lock.json b/frontend/package-lock.json
diff --git a/frontend/package.json b/frontend/package.json
@@ -1,6 +1,6 @@
 {
   "name": "openhands-frontend",
-  "version": "0.9.2",
+  "version": "0.9.3",
   "private": true,
   "type": "module",
   "engines": {

diff --git a/frontend/src/components/modals/settings/ModelSelector.tsx b/frontend/src/components/modals/settings/ModelSelector.tsx
@@ -48,7 +48,11 @@ export function ModelSelector({
 
   const handleChangeModel = (model: string) => {
     const separator = models[selectedProvider || ""]?.separator || "";
-    const fullModel = selectedProvider + separator + model;
+    let fullModel = selectedProvider + separator + model;
+    if (selectedProvider === "openai") {
+      // LiteLLM lists OpenAI models without the openai/ prefix
+      fullModel = model;
+    }
     setLitellmId(fullModel);
     onModelChange(fullModel);
     setSelectedModel(model);

diff --git a/frontend/src/components/modals/settings/SettingsForm.test.tsx b/frontend/src/components/modals/settings/SettingsForm.test.tsx
@@ -20,10 +20,10 @@ const renderSettingsForm = (settings?: Settings) => {
       settings={
         settings || {
           LLM_MODEL: "gpt-4o",
-          LLM_BASE_URL: "base_url",
           AGENT: "agent1",
           LANGUAGE: "en",
           LLM_API_KEY: "sk-...",
+          LLM_BASE_URL: "",
           CONFIRMATION_MODE: false,
           SECURITY_ANALYZER: "",
         }
@@ -62,10 +62,10 @@ describe("SettingsForm", () => {
   it("should display the existing values if they are present", () => {
     renderSettingsForm({
       LLM_MODEL: "gpt-3.5-turbo",
-      LLM_BASE_URL: "base_url",
       AGENT: "agent2",
       LANGUAGE: "es",
       LLM_API_KEY: "sk-...",
+      LLM_BASE_URL: "",
       CONFIRMATION_MODE: false,
       SECURITY_ANALYZER: "",
     });
@@ -81,15 +81,66 @@ describe("SettingsForm", () => {
     expect(languageInput).toHaveValue("Español");
   });
 
+  it("should show advanced settings by default if advanced settings are in use", () => {
+    renderSettingsForm({
+      LLM_MODEL: "gpt-3.5-turbo",
+      AGENT: "agent2",
+      LANGUAGE: "es",
+      LLM_API_KEY: "sk-...",
+      LLM_BASE_URL: "",
+      CONFIRMATION_MODE: true,
+      SECURITY_ANALYZER: "",
+    });
+
+    const customModelInput = screen.getByTestId("custom-model-input");
+    expect(customModelInput).toBeInTheDocument();
+  });
+
+  it("should show advanced settings if using a custom model", () => {
+    renderSettingsForm({
+      LLM_MODEL: "bagel",
+      AGENT: "agent2",
+      LANGUAGE: "es",
+      LLM_API_KEY: "sk-...",
+      LLM_BASE_URL: "",
+      CONFIRMATION_MODE: false,
+      SECURITY_ANALYZER: "",
+    });
+
+    const customModelInput = screen.getByTestId("custom-model-input");
+    expect(customModelInput).toBeInTheDocument();
+  });
+
+  it("should show advanced settings if button is clicked", async () => {
+    renderSettingsForm({
+      LLM_MODEL: "gpt-3.5-turbo",
+      AGENT: "agent2",
+      LANGUAGE: "es",
+      LLM_API_KEY: "sk-...",
+      LLM_BASE_URL: "",
+      CONFIRMATION_MODE: false,
+      SECURITY_ANALYZER: "",
+    });
+
+    let customModelInput = screen.queryByTestId("custom-model-input");
+    expect(customModelInput).not.toBeInTheDocument();
+
+    const advancedToggle = screen.getByTestId("advanced-options-toggle");
+    await userEvent.click(advancedToggle);
+
+    customModelInput = screen.getByTestId("custom-model-input");
+    expect(customModelInput).toBeInTheDocument();
+  });
+
   it("should disable settings when disabled is true", () => {
     renderWithProviders(
       <SettingsForm
         settings={{
           LLM_MODEL: "gpt-4o",
-          LLM_BASE_URL: "base_url",
           AGENT: "agent1",
           LANGUAGE: "en",
           LLM_API_KEY: "sk-...",
+          LLM_BASE_URL: "",
           CONFIRMATION_MODE: false,
           SECURITY_ANALYZER: "",
         }}

diff --git a/frontend/src/components/modals/settings/SettingsForm.tsx b/frontend/src/components/modals/settings/SettingsForm.tsx
@@ -41,9 +41,14 @@ function SettingsForm({
 }: SettingsFormProps) {
   const { t } = useTranslation();
   const { isOpen: isVisible, onOpenChange: onVisibleChange } = useDisclosure();
-  const advancedAlreadyInUse =
-    !!settings.SECURITY_ANALYZER || !!settings.CONFIRMATION_MODE;
-  // TODO: || model is not in the list
+  const advancedAlreadyInUse = React.useMemo(
+    () =>
+      !!settings.SECURITY_ANALYZER ||
+      !!settings.CONFIRMATION_MODE ||
+      !!settings.LLM_BASE_URL ||
+      (!!settings.LLM_MODEL && !models.includes(settings.LLM_MODEL)),
+    [],
+  );
   const [enableAdvanced, setEnableAdvanced] =
     React.useState(advancedAlreadyInUse);
 

diff --git a/frontend/src/components/modals/settings/SettingsModal.test.tsx b/frontend/src/components/modals/settings/SettingsModal.test.tsx
@@ -79,7 +79,9 @@ describe("SettingsModal", () => {
   it("should close the modal when the close button is clicked", async () => {
     const user = userEvent.setup();
     const onOpenChange = vi.fn();
-    renderWithProviders(<SettingsModal isOpen onOpenChange={onOpenChange} />);
+    await act(async () =>
+      renderWithProviders(<SettingsModal isOpen onOpenChange={onOpenChange} />),
+    );
 
     const cancelButton = screen.getByRole("button", {
       name: /MODAL_CLOSE_BUTTON_LABEL/i, // i18n key

diff --git a/frontend/src/components/modals/settings/SettingsModal.tsx b/frontend/src/components/modals/settings/SettingsModal.tsx
@@ -152,30 +152,34 @@ function SettingsModal({ isOpen, onOpenChange }: SettingsProps) {
       title={t(I18nKey.CONFIGURATION$MODAL_TITLE)}
       isDismissable={settingsAreUpToDate()}
       subtitle={subtitle}
-      actions={[
-        {
-          label: t(I18nKey.CONFIGURATION$MODAL_SAVE_BUTTON_LABEL),
-          action: handleSaveSettings,
-          isDisabled: saveIsDisabled,
-          closeAfterAction: true,
-          className: "bg-primary rounded-lg",
-        },
-        {
-          label: t(I18nKey.CONFIGURATION$MODAL_RESET_BUTTON_LABEL),
-          action: handleResetSettings,
-          closeAfterAction: false,
-          className: "bg-neutral-500 rounded-lg",
-        },
-        {
-          label: t(I18nKey.CONFIGURATION$MODAL_CLOSE_BUTTON_LABEL),
-          action: () => {
-            setSettings(getSettings()); // reset settings from any changes
-          },
-          isDisabled: !settingsAreUpToDate(),
-          closeAfterAction: true,
-          className: "bg-rose-600 rounded-lg",
-        },
-      ]}
+      actions={
+        loading
+          ? []
+          : [
+              {
+                label: t(I18nKey.CONFIGURATION$MODAL_SAVE_BUTTON_LABEL),
+                action: handleSaveSettings,
+                isDisabled: saveIsDisabled,
+                closeAfterAction: true,
+                className: "bg-primary rounded-lg",
+              },
+              {
+                label: t(I18nKey.CONFIGURATION$MODAL_RESET_BUTTON_LABEL),
+                action: handleResetSettings,
+                closeAfterAction: false,
+                className: "bg-neutral-500 rounded-lg",
+              },
+              {
+                label: t(I18nKey.CONFIGURATION$MODAL_CLOSE_BUTTON_LABEL),
+                action: () => {
+                  setSettings(getSettings()); // reset settings from any changes
+                },
+                isDisabled: !settingsAreUpToDate(),
+                closeAfterAction: true,
+                className: "bg-rose-600 rounded-lg",
+              },
+            ]
+      }
     >
       {loading && <Spinner />}
       {!loading && (

diff --git a/frontend/src/services/settings.ts b/frontend/src/services/settings.ts
@@ -11,7 +11,7 @@ export type Settings = {
 };
 
 export const DEFAULT_SETTINGS: Settings = {
-  LLM_MODEL: "openai/gpt-4o",
+  LLM_MODEL: "gpt-4o",
   LLM_BASE_URL: "",
   AGENT: "CodeActAgent",
   LANGUAGE: "en",

diff --git a/openhands/server/listen.py b/openhands/server/listen.py
@@ -417,6 +417,9 @@ async def list_files(request: Request, path: str | None = None):
         )
     runtime: Runtime = request.state.session.agent_session.runtime
     file_list = runtime.list_files(path)
+    if path:
+        file_list = [os.path.join(path, f) for f in file_list]
+
     file_list = [f for f in file_list if f not in FILES_TO_IGNORE]
 
     def filter_for_gitignore(file_list, base_path):
@@ -459,13 +462,7 @@ async def select_file(file: str, request: Request):
     """
     runtime: Runtime = request.state.session.agent_session.runtime
 
-    # convert file to an absolute path inside the runtime
-    if not os.path.isabs(file):
-        return JSONResponse(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            content={'error': 'File path must be absolute'},
-        )
-
+    file = os.path.join(runtime.config.workspace_mount_path_in_sandbox, file)
     read_action = FileReadAction(file)
     observation = runtime.run_action(read_action)
 
@@ -703,15 +700,11 @@ async def save_file(request: Request):
         if not file_path or content is None:
             raise HTTPException(status_code=400, detail='Missing filePath or content')
 
-        # Make sure file_path is abs
-        if not os.path.isabs(file_path):
-            return JSONResponse(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                content={'error': 'File path must be absolute'},
-            )
-
         # Save the file to the agent's runtime file store
         runtime: Runtime = request.state.session.agent_session.runtime
+        file_path = os.path.join(
+            runtime.config.workspace_mount_path_in_sandbox, file_path
+        )
         write_action = FileWriteAction(file_path, content)
         observation = runtime.run_action(write_action)
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "openhands-ai"
-version = "0.9.2"
+version = "0.9.3"
 description = "OpenHands: Code Less, Make More"
 authors = ["OpenHands"]
 license = "MIT"