All-Hands-AI · csmith49 · Jan 17, 2025 · Jan 8, 2025 · Jan 8, 2025 · Jan 8, 2025
diff --git a/evaluation/benchmarks/the_agent_company/run_infer.py b/evaluation/benchmarks/the_agent_company/run_infer.py
@@ -80,7 +80,7 @@ def load_dependencies(runtime: Runtime) -> List[str]:
 def init_task_env(runtime: Runtime, hostname: str, env_llm_config: LLMConfig):
     command = (
         f'SERVER_HOSTNAME={hostname} '
-        f'LITELLM_API_KEY={env_llm_config.api_key} '
+        f'LITELLM_API_KEY={env_llm_config.api_key.get_secret_value() if env_llm_config.api_key else None} '
         f'LITELLM_BASE_URL={env_llm_config.base_url} '
         f'LITELLM_MODEL={env_llm_config.model} '
         'bash /utils/init.sh'
@@ -165,7 +165,7 @@ def run_evaluator(
     runtime: Runtime, env_llm_config: LLMConfig, trajectory_path: str, result_path: str
 ):
     command = (
-        f'LITELLM_API_KEY={env_llm_config.api_key} '
+        f'LITELLM_API_KEY={env_llm_config.api_key.get_secret_value() if env_llm_config.api_key else None} '
         f'LITELLM_BASE_URL={env_llm_config.base_url} '
         f'LITELLM_MODEL={env_llm_config.model} '
         f"DECRYPTION_KEY='theagentcompany is all you need' "  # Hardcoded Key

diff --git a/evaluation/utils/shared.py b/evaluation/utils/shared.py
@@ -52,30 +52,6 @@ class EvalMetadata(BaseModel):
     details: dict[str, Any] | None = None
     condenser_config: CondenserConfig | None = None
 
-    def model_dump(self, *args, **kwargs):
-        dumped_dict = super().model_dump(*args, **kwargs)
-        # avoid leaking sensitive information
-        dumped_dict['llm_config'] = self.llm_config.to_safe_dict()
-        if hasattr(self.condenser_config, 'llm_config'):
-            dumped_dict['condenser_config']['llm_config'] = (
-                self.condenser_config.llm_config.to_safe_dict()
-            )
-
-        return dumped_dict
-
-    def model_dump_json(self, *args, **kwargs):
-        dumped = super().model_dump_json(*args, **kwargs)
-        dumped_dict = json.loads(dumped)
-        # avoid leaking sensitive information
-        dumped_dict['llm_config'] = self.llm_config.to_safe_dict()
-        if hasattr(self.condenser_config, 'llm_config'):
-            dumped_dict['condenser_config']['llm_config'] = (
-                self.condenser_config.llm_config.to_safe_dict()
-            )
-
-        logger.debug(f'Dumped metadata: {dumped_dict}')
-        return json.dumps(dumped_dict)
-
 
 class EvalOutput(BaseModel):
     # NOTE: User-specified
@@ -98,23 +74,6 @@ class EvalOutput(BaseModel):
     # Optionally save the input test instance
     instance: dict[str, Any] | None = None
 
-    def model_dump(self, *args, **kwargs):
-        dumped_dict = super().model_dump(*args, **kwargs)
-        # Remove None values
-        dumped_dict = {k: v for k, v in dumped_dict.items() if v is not None}
-        # Apply custom serialization for metadata (to avoid leaking sensitive information)
-        if self.metadata is not None:
-            dumped_dict['metadata'] = self.metadata.model_dump()
-        return dumped_dict
-
-    def model_dump_json(self, *args, **kwargs):
-        dumped = super().model_dump_json(*args, **kwargs)
-        dumped_dict = json.loads(dumped)
-        # Apply custom serialization for metadata (to avoid leaking sensitive information)
-        if 'metadata' in dumped_dict:
-            dumped_dict['metadata'] = json.loads(self.metadata.model_dump_json())
-        return json.dumps(dumped_dict)
-
 
 class EvalException(Exception):
     pass
@@ -314,7 +273,7 @@ def update_progress(
     logger.info(
         f'Finished evaluation for instance {result.instance_id}: {str(result.test_result)[:300]}...\n'
     )
-    output_fp.write(json.dumps(result.model_dump()) + '\n')
+    output_fp.write(result.model_dump_json() + '\n')
     output_fp.flush()
 
 

diff --git a/openhands/core/config/README.md b/openhands/core/config/README.md
@@ -37,21 +37,17 @@ export SANDBOX_TIMEOUT='300'
 
 ## Type Handling
 
-The `load_from_env` function attempts to cast environment variable values to the types specified in the dataclasses. It handles:
+The `load_from_env` function attempts to cast environment variable values to the types specified in the models. It handles:
 
 - Basic types (str, int, bool)
 - Optional types (e.g., `str | None`)
-- Nested dataclasses
+- Nested models
 
 If type casting fails, an error is logged, and the default value is retained.
 
 ## Default Values
 
-If an environment variable is not set, the default value specified in the dataclass is used.
-
-## Nested Configurations
-
-The `AppConfig` class contains nested configurations like `LLMConfig` and `AgentConfig`. The `load_from_env` function handles these by recursively processing nested dataclasses with updated prefixes.
+If an environment variable is not set, the default value specified in the model is used.
 
 ## Security Considerations
 

diff --git a/openhands/core/config/agent_config.py b/openhands/core/config/agent_config.py
@@ -1,11 +1,9 @@
-from dataclasses import dataclass, field, fields
+from pydantic import BaseModel, Field
 
 from openhands.core.config.condenser_config import CondenserConfig, NoOpCondenserConfig
-from openhands.core.config.config_utils import get_field_info
 
 
-@dataclass
-class AgentConfig:
+class AgentConfig(BaseModel):
     """Configuration for the agent.
 
     Attributes:
@@ -22,20 +20,13 @@ class AgentConfig:
         condenser: Configuration for the memory condenser. Default is NoOpCondenserConfig.
     """
 
-    codeact_enable_browsing: bool = True
-    codeact_enable_llm_editor: bool = False
-    codeact_enable_jupyter: bool = True
-    micro_agent_name: str | None = None
-    memory_enabled: bool = False
-    memory_max_threads: int = 3
-    llm_config: str | None = None
-    enable_prompt_extensions: bool = True
-    disabled_microagents: list[str] | None = None
-    condenser: CondenserConfig = field(default_factory=NoOpCondenserConfig)  # type: ignore
-
-    def defaults_to_dict(self) -> dict:
-        """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
-        result = {}
-        for f in fields(self):
-            result[f.name] = get_field_info(f)
-        return result
+    codeact_enable_browsing: bool = Field(default=True)
+    codeact_enable_llm_editor: bool = Field(default=False)
+    codeact_enable_jupyter: bool = Field(default=True)
+    micro_agent_name: str | None = Field(default=None)
+    memory_enabled: bool = Field(default=False)
+    memory_max_threads: int = Field(default=3)
+    llm_config: str | None = Field(default=None)
+    enable_prompt_extensions: bool = Field(default=False)
+    disabled_microagents: list[str] | None = Field(default=None)
+    condenser: CondenserConfig = Field(default_factory=NoOpCondenserConfig)
diff --git a/openhands/core/config/app_config.py b/openhands/core/config/app_config.py
@@ -1,20 +1,20 @@
-from dataclasses import dataclass, field, fields, is_dataclass
 from typing import ClassVar
 
+from pydantic import BaseModel, Field, SecretStr
+
 from openhands.core import logger
 from openhands.core.config.agent_config import AgentConfig
 from openhands.core.config.config_utils import (
     OH_DEFAULT_AGENT,
     OH_MAX_ITERATIONS,
-    get_field_info,
+    model_defaults_to_dict,
 )
 from openhands.core.config.llm_config import LLMConfig
 from openhands.core.config.sandbox_config import SandboxConfig
 from openhands.core.config.security_config import SecurityConfig
 
 
-@dataclass
-class AppConfig:
+class AppConfig(BaseModel):
     """Configuration for the app.
 
     Attributes:
@@ -46,37 +46,39 @@ class AppConfig:
             input is read line by line. When enabled, input continues until /exit command.
     """
 
-    llms: dict[str, LLMConfig] = field(default_factory=dict)
-    agents: dict = field(default_factory=dict)
-    default_agent: str = OH_DEFAULT_AGENT
-    sandbox: SandboxConfig = field(default_factory=SandboxConfig)
-    security: SecurityConfig = field(default_factory=SecurityConfig)
-    runtime: str = 'docker'
-    file_store: str = 'local'
-    file_store_path: str = '/tmp/openhands_file_store'
-    save_trajectory_path: str | None = None
-    workspace_base: str | None = None
-    workspace_mount_path: str | None = None
-    workspace_mount_path_in_sandbox: str = '/workspace'
-    workspace_mount_rewrite: str | None = None
-    cache_dir: str = '/tmp/cache'
-    run_as_openhands: bool = True
-    max_iterations: int = OH_MAX_ITERATIONS
-    max_budget_per_task: float | None = None
-    e2b_api_key: str = ''
-    modal_api_token_id: str = ''
-    modal_api_token_secret: str = ''
-    disable_color: bool = False
-    jwt_secret: str = ''
-    debug: bool = False
-    file_uploads_max_file_size_mb: int = 0
-    file_uploads_restrict_file_types: bool = False
-    file_uploads_allowed_extensions: list[str] = field(default_factory=lambda: ['.*'])
-    runloop_api_key: str | None = None
-    cli_multiline_input: bool = False
+    llms: dict[str, LLMConfig] = Field(default_factory=dict)
+    agents: dict = Field(default_factory=dict)
+    default_agent: str = Field(default=OH_DEFAULT_AGENT)
+    sandbox: SandboxConfig = Field(default_factory=SandboxConfig)
+    security: SecurityConfig = Field(default_factory=SecurityConfig)
+    runtime: str = Field(default='docker')
+    file_store: str = Field(default='local')
+    file_store_path: str = Field(default='/tmp/openhands_file_store')
+    save_trajectory_path: str | None = Field(default=None)
+    workspace_base: str | None = Field(default=None)
+    workspace_mount_path: str | None = Field(default=None)
+    workspace_mount_path_in_sandbox: str = Field(default='/workspace')
+    workspace_mount_rewrite: str | None = Field(default=None)
+    cache_dir: str = Field(default='/tmp/cache')
+    run_as_openhands: bool = Field(default=True)
+    max_iterations: int = Field(default=OH_MAX_ITERATIONS)
+    max_budget_per_task: float | None = Field(default=None)
+    e2b_api_key: SecretStr | None = Field(default=None)
+    modal_api_token_id: SecretStr | None = Field(default=None)
+    modal_api_token_secret: SecretStr | None = Field(default=None)
+    disable_color: bool = Field(default=False)
+    jwt_secret: SecretStr | None = Field(default=None)
+    debug: bool = Field(default=False)
+    file_uploads_max_file_size_mb: int = Field(default=0)
+    file_uploads_restrict_file_types: bool = Field(default=False)
+    file_uploads_allowed_extensions: list[str] = Field(default_factory=lambda: ['.*'])
+    runloop_api_key: SecretStr | None = Field(default=None)
+    cli_multiline_input: bool = Field(default=False)
 
     defaults_dict: ClassVar[dict] = {}
 
+    model_config = {'extra': 'forbid'}
+
     def get_llm_config(self, name='llm') -> LLMConfig:
         """'llm' is the name for default config (for backward compatibility prior to 0.8)."""
         if name in self.llms:
@@ -115,42 +117,7 @@ def get_llm_config_from_agent(self, name='agent') -> LLMConfig:
     def get_agent_configs(self) -> dict[str, AgentConfig]:
         return self.agents
 
-    def __post_init__(self):
+    def model_post_init(self, __context):
         """Post-initialization hook, called when the instance is created with only default values."""
-        AppConfig.defaults_dict = self.defaults_to_dict()
-
-    def defaults_to_dict(self) -> dict:
-        """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
-        result = {}
-        for f in fields(self):
-            field_value = getattr(self, f.name)
-
-            # dataclasses compute their defaults themselves
-            if is_dataclass(type(field_value)):
-                result[f.name] = field_value.defaults_to_dict()
-            else:
-                result[f.name] = get_field_info(f)
-        return result
-
-    def __str__(self):
-        attr_str = []
-        for f in fields(self):
-            attr_name = f.name
-            attr_value = getattr(self, f.name)
-
-            if attr_name in [
-                'e2b_api_key',
-                'github_token',
-                'jwt_secret',
-                'modal_api_token_id',
-                'modal_api_token_secret',
-                'runloop_api_key',
-            ]:
-                attr_value = '******' if attr_value else None
-
-            attr_str.append(f'{attr_name}={repr(attr_value)}')
-
-        return f"AppConfig({', '.join(attr_str)}"
-
-    def __repr__(self):
-        return self.__str__()
+        super().model_post_init(__context)
+        AppConfig.defaults_dict = model_defaults_to_dict(self)
diff --git a/openhands/core/config/config_utils.py b/openhands/core/config/config_utils.py
@@ -1,19 +1,22 @@
 from types import UnionType
-from typing import get_args, get_origin
+from typing import Any, get_args, get_origin
+
+from pydantic import BaseModel
+from pydantic.fields import FieldInfo
 
 OH_DEFAULT_AGENT = 'CodeActAgent'
 OH_MAX_ITERATIONS = 500
 
 
-def get_field_info(f):
+def get_field_info(field: FieldInfo) -> dict[str, Any]:
     """Extract information about a dataclass field: type, optional, and default.
 
     Args:
-        f: The field to extract information from.
+        field: The field to extract information from.
 
     Returns: A dict with the field's type, whether it's optional, and its default value.
     """
-    field_type = f.type
+    field_type = field.annotation
     optional = False
 
     # for types like str | None, find the non-None type and set optional to True
@@ -33,7 +36,21 @@ def get_field_info(f):
     )
 
     # default is always present
-    default = f.default
+    default = field.default
 
     # return a schema with the useful info for frontend
     return {'type': type_name.lower(), 'optional': optional, 'default': default}
+
+
+def model_defaults_to_dict(model: BaseModel) -> dict[str, Any]:
+    """Serialize field information in a dict for the frontend, including type hints, defaults, and whether it's optional."""
+    result = {}
+    for name, field in model.model_fields.items():
+        field_value = getattr(model, name)
+
+        if isinstance(field_value, BaseModel):
+            result[name] = model_defaults_to_dict(field_value)
+        else:
+            result[name] = get_field_info(field)
+
+    return result