Merge branch 'main' into kevin

SmartManoj · Jan 13, 2025 · e86cb4e · e86cb4e
2 parents 067860d + 23f40a1
commit e86cb4e
Show file tree

Hide file tree

Showing 26 changed files with 826 additions and 477 deletions.
diff --git a/.github/workflows/openhands-resolver.yml b/.github/workflows/openhands-resolver.yml
@@ -184,6 +184,7 @@ jobs:
             });
 
       - name: Install OpenHands
+        id: install_openhands
         uses: actions/github-script@v7
         env:
           COMMENT_BODY: ${{ github.event.comment.body || '' }}
@@ -196,7 +197,6 @@ jobs:
             const reviewBody = process.env.REVIEW_BODY.trim();
             const labelName = process.env.LABEL_NAME.trim();
             const eventName = process.env.EVENT_NAME.trim();
-
             // Check conditions
             const isExperimentalLabel = labelName === "fix-me-experimental";
             const isIssueCommentExperimental =
@@ -205,6 +205,9 @@ jobs:
             const isReviewCommentExperimental =
               eventName === "pull_request_review" && reviewBody.includes("@openhands-agent-exp");
 
+            // Set output variable
+            core.setOutput('isExperimental', isExperimentalLabel || isIssueCommentExperimental || isReviewCommentExperimental);
+
             // Perform package installation
             if (isExperimentalLabel || isIssueCommentExperimental || isReviewCommentExperimental) {
               console.log("Installing experimental OpenHands...");
@@ -230,7 +233,8 @@ jobs:
             --issue-number ${{ env.ISSUE_NUMBER }} \
             --issue-type ${{ env.ISSUE_TYPE }} \
             --max-iterations ${{ env.MAX_ITERATIONS }} \
-            --comment-id ${{ env.COMMENT_ID }}
+            --comment-id ${{ env.COMMENT_ID }} \
+            --is-experimental ${{ steps.install_openhands.outputs.isExperimental }}
 
       - name: Check resolution result
         id: check_result

diff --git a/evaluation/benchmarks/the_agent_company/run_infer.py b/evaluation/benchmarks/the_agent_company/run_infer.py
@@ -80,7 +80,7 @@ def load_dependencies(runtime: Runtime) -> List[str]:
 def init_task_env(runtime: Runtime, hostname: str, env_llm_config: LLMConfig):
     command = (
         f'SERVER_HOSTNAME={hostname} '
-        f'LITELLM_API_KEY={env_llm_config.api_key} '
+        f'LITELLM_API_KEY={env_llm_config.api_key.get_secret_value() if env_llm_config.api_key else None} '
         f'LITELLM_BASE_URL={env_llm_config.base_url} '
         f'LITELLM_MODEL={env_llm_config.model} '
         'bash /utils/init.sh'
@@ -165,7 +165,7 @@ def run_evaluator(
     runtime: Runtime, env_llm_config: LLMConfig, trajectory_path: str, result_path: str
 ):
     command = (
-        f'LITELLM_API_KEY={env_llm_config.api_key} '
+        f'LITELLM_API_KEY={env_llm_config.api_key.get_secret_value() if env_llm_config.api_key else None} '
         f'LITELLM_BASE_URL={env_llm_config.base_url} '
         f'LITELLM_MODEL={env_llm_config.model} '
         f"DECRYPTION_KEY='theagentcompany is all you need' "  # Hardcoded Key

diff --git a/evaluation/utils/shared.py b/evaluation/utils/shared.py
@@ -53,30 +53,6 @@ class EvalMetadata(BaseModel):
     details: dict[str, Any] | None = None
     condenser_config: CondenserConfig | None = None
 
-    def model_dump(self, *args, **kwargs):
-        dumped_dict = super().model_dump(*args, **kwargs)
-        # avoid leaking sensitive information
-        dumped_dict['llm_config'] = self.llm_config.to_safe_dict()
-        if hasattr(self.condenser_config, 'llm_config'):
-            dumped_dict['condenser_config']['llm_config'] = (
-                self.condenser_config.llm_config.to_safe_dict()
-            )
-
-        return dumped_dict
-
-    def model_dump_json(self, *args, **kwargs):
-        dumped = super().model_dump_json(*args, **kwargs)
-        dumped_dict = json.loads(dumped)
-        # avoid leaking sensitive information
-        dumped_dict['llm_config'] = self.llm_config.to_safe_dict()
-        if hasattr(self.condenser_config, 'llm_config'):
-            dumped_dict['condenser_config']['llm_config'] = (
-                self.condenser_config.llm_config.to_safe_dict()
-            )
-
-        logger.debug(f'Dumped metadata: {dumped_dict}')
-        return json.dumps(dumped_dict)
-
 
 class EvalOutput(BaseModel):
     # NOTE: User-specified
@@ -99,23 +75,6 @@ class EvalOutput(BaseModel):
     # Optionally save the input test instance
     instance: dict[str, Any] | None = None
 
-    def model_dump(self, *args, **kwargs):
-        dumped_dict = super().model_dump(*args, **kwargs)
-        # Remove None values
-        dumped_dict = {k: v for k, v in dumped_dict.items() if v is not None}
-        # Apply custom serialization for metadata (to avoid leaking sensitive information)
-        if self.metadata is not None:
-            dumped_dict['metadata'] = self.metadata.model_dump()
-        return dumped_dict
-
-    def model_dump_json(self, *args, **kwargs):
-        dumped = super().model_dump_json(*args, **kwargs)
-        dumped_dict = json.loads(dumped)
-        # Apply custom serialization for metadata (to avoid leaking sensitive information)
-        if 'metadata' in dumped_dict:
-            dumped_dict['metadata'] = json.loads(self.metadata.model_dump_json())
-        return json.dumps(dumped_dict)
-
 
 class EvalException(Exception):
     pass
@@ -315,15 +274,7 @@ def update_progress(
     logger.info(
         f'Finished evaluation for instance {result.instance_id}: {str(result.test_result)[:300]}...\n'
     )
-
-    # Custom JSON encoder
-    class NumpyEncoder(json.JSONEncoder):
-        def default(self, obj):
-            if isinstance(obj, np.ndarray):
-                return obj.tolist()  # Convert ndarray to list
-            return super().default(obj)
-
-    output_fp.write(json.dumps(result.model_dump(), cls=NumpyEncoder) + '\n')
+    output_fp.write(result.model_dump_json() + '\n')
     output_fp.flush()
 
 

diff --git a/openhands/core/config/README.md b/openhands/core/config/README.md
@@ -37,21 +37,17 @@ export SANDBOX_TIMEOUT='300'
 
 ## Type Handling
 
-The `load_from_env` function attempts to cast environment variable values to the types specified in the dataclasses. It handles:
+The `load_from_env` function attempts to cast environment variable values to the types specified in the models. It handles:
 
 - Basic types (str, int, bool)
 - Optional types (e.g., `str | None`)
-- Nested dataclasses
+- Nested models
 
 If type casting fails, an error is logged, and the default value is retained.
 
 ## Default Values
 
-If an environment variable is not set, the default value specified in the dataclass is used.
-
-## Nested Configurations
-
-The `AppConfig` class contains nested configurations like `LLMConfig` and `AgentConfig`. The `load_from_env` function handles these by recursively processing nested dataclasses with updated prefixes.
+If an environment variable is not set, the default value specified in the model is used.
 
 ## Security Considerations
 

diff --git a/openhands/core/config/agent_config.py b/openhands/core/config/agent_config.py
@@ -1,11 +1,9 @@
-from dataclasses import dataclass, field, fields
-import inspect
+from pydantic import BaseModel, Field
+
 from openhands.core.config.condenser_config import CondenserConfig, NoOpCondenserConfig
-from openhands.core.config.config_utils import get_field_info
 
 
-@dataclass
-class AgentConfig:
+class AgentConfig(BaseModel):
     """Configuration for the agent.
 
     Attributes:
@@ -24,30 +22,18 @@ class AgentConfig:
         condenser: Configuration for the memory condenser. Default is NoOpCondenserConfig.
     """
 
-    function_calling: bool = False
-    codeact_enable_browsing: bool = True
-    codeact_enable_llm_editor: bool = False
-    codeact_enable_jupyter: bool = True
-    micro_agent_name: str | None = None
-    memory_enabled: bool = False
-    memory_max_threads: int = 3
-    llm_config: str | None = None
-    use_microagents: bool = True
-    disabled_microagents: list[str] | None = None
-    mind_voice: str | None = None
-    mind_voice_language: str = 'English'
-    condenser: CondenserConfig = field(default_factory=NoOpCondenserConfig)  # type: ignore
-
-    def defaults_to_dict(self) -> dict:
-        """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
-        result = {}
-        for f in fields(self):
-            result[f.name] = get_field_info(f)
-        return result
+    codeact_enable_browsing: bool = Field(default=True)
+    codeact_enable_llm_editor: bool = Field(default=False)
+    codeact_enable_jupyter: bool = Field(default=True)
+    micro_agent_name: str | None = Field(default=None)
+    memory_enabled: bool = Field(default=False)
+    memory_max_threads: int = Field(default=3)
+    llm_config: str | None = Field(default=None)
+    use_microagents: bool = Field(default=True)
+    disabled_microagents: list[str] | None = Field(default=None)
+    condenser: CondenserConfig = Field(default_factory=NoOpCondenserConfig)
+
+    function_calling: bool = Field(default=True)
+    mind_voice: str | None = Field(default=None)
+    mind_voice_language: str = Field(default='English')
 
-    @classmethod
-    def from_dict(cls, env):      
-        return cls(**{
-            k: v for k, v in env.items() 
-            if k in inspect.signature(cls).parameters
-        })
diff --git a/openhands/core/config/app_config.py b/openhands/core/config/app_config.py
@@ -1,20 +1,20 @@
-from dataclasses import dataclass, field, fields, is_dataclass
 from typing import ClassVar
-import inspect
+
+from pydantic import BaseModel, Field, SecretStr
+
 from openhands.core import logger
 from openhands.core.config.agent_config import AgentConfig
 from openhands.core.config.config_utils import (
     OH_DEFAULT_AGENT,
     OH_MAX_ITERATIONS,
-    get_field_info,
+    model_defaults_to_dict,
 )
 from openhands.core.config.llm_config import LLMConfig
 from openhands.core.config.sandbox_config import SandboxConfig
 from openhands.core.config.security_config import SecurityConfig
 
 
-@dataclass
-class AppConfig:
+class AppConfig(BaseModel):
     """Configuration for the app.
 
     Attributes:
@@ -51,43 +51,45 @@ class AppConfig:
             input is read line by line. When enabled, input continues until /exit command.
     """
 
-    llms: dict[str, LLMConfig] = field(default_factory=dict)
-    agents: dict = field(default_factory=dict)
-    default_agent: str = OH_DEFAULT_AGENT
-    sandbox: SandboxConfig = field(default_factory=SandboxConfig)
-    security: SecurityConfig = field(default_factory=SecurityConfig)
-    runtime: str = 'docker'
-    file_store: str = 'local'
-    file_store_path: str = '/tmp/openhands_file_store'
-    trajectories_path: str | None = None
-    workspace_base: str  = './workspace'
-    workspace_mount_path: str | None = None
-    workspace_mount_path_in_sandbox: str = '/workspace'
-    workspace_mount_rewrite: str | None = None
-    cache_dir: str = '/tmp/cache'
-    run_as_openhands: bool = True
-    show_workspace_contents: bool = True
-    max_iterations: int = OH_MAX_ITERATIONS
-    max_budget_per_task: float | None = None
-    e2b_api_key: str = ''
-    modal_api_token_id: str = ''
-    modal_api_token_secret: str = ''
-    disable_color: bool = False
-    jwt_secret: str = 'secretpass'
-    debug: bool = False
-    file_uploads_max_file_size_mb: int = 0
-    file_uploads_restrict_file_types: bool = False
-    file_uploads_allowed_extensions: list[str] = field(default_factory=lambda: ['.*'])
-    override_UI_settings: bool = False
-    runloop_api_key: str | None = None
-    custom_instructions: str = ''
-    use_selenium: bool = False
-    dont_restore_state: bool = False
-
-    cli_multiline_input: bool = False
+    llms: dict[str, LLMConfig] = Field(default_factory=dict)
+    agents: dict = Field(default_factory=dict)
+    default_agent: str = Field(default=OH_DEFAULT_AGENT)
+    sandbox: SandboxConfig = Field(default_factory=SandboxConfig)
+    security: SecurityConfig = Field(default_factory=SecurityConfig)
+    runtime: str = Field(default='docker')
+    file_store: str = Field(default='local')
+    file_store_path: str = Field(default='/tmp/openhands_file_store')
+    trajectories_path: str | None = Field(default=None)
+    workspace_base: str | None = Field(default='./workspace')
+    workspace_mount_path: str | None = Field(default=None)
+    workspace_mount_path_in_sandbox: str = Field(default='/workspace')
+    workspace_mount_rewrite: str | None = Field(default=None)
+    cache_dir: str = Field(default='/tmp/cache')
+    run_as_openhands: bool = Field(default=True)
+    max_iterations: int = Field(default=OH_MAX_ITERATIONS)
+    max_budget_per_task: float | None = Field(default=None)
+    e2b_api_key: SecretStr | None = Field(default=None)
+    modal_api_token_id: SecretStr | None = Field(default=None)
+    modal_api_token_secret: SecretStr | None = Field(default=None)
+    disable_color: bool = Field(default=False)
+    jwt_secret: SecretStr | None = Field(default=None)
+    debug: bool = Field(default=False)
+    file_uploads_max_file_size_mb: int = Field(default=0)
+    file_uploads_restrict_file_types: bool = Field(default=False)
+    file_uploads_allowed_extensions: list[str] = Field(default_factory=lambda: ['.*'])
+    runloop_api_key: SecretStr | None = Field(default=None)
+    cli_multiline_input: bool = Field(default=False)
+
+    show_workspace_contents: bool = Field(default=True)
+    override_UI_settings: bool = Field(default=False)
+    custom_instructions: str = Field(default='')
+    use_selenium: bool = Field(default=False)
+    dont_restore_state: bool = Field(default=False)
 
     defaults_dict: ClassVar[dict] = {}
 
+    model_config = {'extra': 'forbid'}
+
     def get_llm_config(self, name='llm') -> LLMConfig:
         """'llm' is the name for default config (for backward compatibility prior to 0.8)."""
         if name in self.llms:
@@ -126,49 +128,7 @@ def get_llm_config_from_agent(self, name='agent') -> LLMConfig:
     def get_agent_configs(self) -> dict[str, AgentConfig]:
         return self.agents
 
-    def __post_init__(self):
+    def model_post_init(self, __context):
         """Post-initialization hook, called when the instance is created with only default values."""
-        AppConfig.defaults_dict = self.defaults_to_dict()
-
-    def defaults_to_dict(self) -> dict:
-        """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
-        result = {}
-        for f in fields(self):
-            field_value = getattr(self, f.name)
-
-            # dataclasses compute their defaults themselves
-            if is_dataclass(type(field_value)):
-                result[f.name] = field_value.defaults_to_dict()
-            else:
-                result[f.name] = get_field_info(f)
-        return result
-
-    def __str__(self):
-        attr_str = []
-        for f in fields(self):
-            attr_name = f.name
-            attr_value = getattr(self, f.name)
-
-            if attr_name in [
-                'e2b_api_key',
-                'github_token',
-                'jwt_secret',
-                'modal_api_token_id',
-                'modal_api_token_secret',
-                'runloop_api_key',
-            ]:
-                attr_value = '******' if attr_value else None
-
-            attr_str.append(f'{attr_name}={repr(attr_value)}')
-
-        return f"AppConfig({', '.join(attr_str)}"
-
-    def __repr__(self):
-        return self.__str__()
-
-    @classmethod
-    def from_dict(cls, env):      
-        return cls(**{
-            k: v for k, v in env.items() 
-            if k in inspect.signature(cls).parameters
-        })
+        super().model_post_init(__context)
+        AppConfig.defaults_dict = model_defaults_to_dict(self)