diff --git a/.github/workflows/openhands-resolver.yml b/.github/workflows/openhands-resolver.yml index 77b3c49b785c..45718be9ef3d 100644 --- a/.github/workflows/openhands-resolver.yml +++ b/.github/workflows/openhands-resolver.yml @@ -184,6 +184,7 @@ jobs: }); - name: Install OpenHands + id: install_openhands uses: actions/github-script@v7 env: COMMENT_BODY: ${{ github.event.comment.body || '' }} @@ -196,7 +197,6 @@ jobs: const reviewBody = process.env.REVIEW_BODY.trim(); const labelName = process.env.LABEL_NAME.trim(); const eventName = process.env.EVENT_NAME.trim(); - // Check conditions const isExperimentalLabel = labelName === "fix-me-experimental"; const isIssueCommentExperimental = @@ -205,6 +205,9 @@ jobs: const isReviewCommentExperimental = eventName === "pull_request_review" && reviewBody.includes("@openhands-agent-exp"); + // Set output variable + core.setOutput('isExperimental', isExperimentalLabel || isIssueCommentExperimental || isReviewCommentExperimental); + // Perform package installation if (isExperimentalLabel || isIssueCommentExperimental || isReviewCommentExperimental) { console.log("Installing experimental OpenHands..."); @@ -230,7 +233,8 @@ jobs: --issue-number ${{ env.ISSUE_NUMBER }} \ --issue-type ${{ env.ISSUE_TYPE }} \ --max-iterations ${{ env.MAX_ITERATIONS }} \ - --comment-id ${{ env.COMMENT_ID }} + --comment-id ${{ env.COMMENT_ID }} \ + --is-experimental ${{ steps.install_openhands.outputs.isExperimental }} - name: Check resolution result id: check_result diff --git a/evaluation/benchmarks/the_agent_company/run_infer.py b/evaluation/benchmarks/the_agent_company/run_infer.py index 03561913087c..6f0cda2efe40 100644 --- a/evaluation/benchmarks/the_agent_company/run_infer.py +++ b/evaluation/benchmarks/the_agent_company/run_infer.py @@ -80,7 +80,7 @@ def load_dependencies(runtime: Runtime) -> List[str]: def init_task_env(runtime: Runtime, hostname: str, env_llm_config: LLMConfig): command = ( f'SERVER_HOSTNAME={hostname} ' - f'LITELLM_API_KEY={env_llm_config.api_key} ' + f'LITELLM_API_KEY={env_llm_config.api_key.get_secret_value() if env_llm_config.api_key else None} ' f'LITELLM_BASE_URL={env_llm_config.base_url} ' f'LITELLM_MODEL={env_llm_config.model} ' 'bash /utils/init.sh' @@ -165,7 +165,7 @@ def run_evaluator( runtime: Runtime, env_llm_config: LLMConfig, trajectory_path: str, result_path: str ): command = ( - f'LITELLM_API_KEY={env_llm_config.api_key} ' + f'LITELLM_API_KEY={env_llm_config.api_key.get_secret_value() if env_llm_config.api_key else None} ' f'LITELLM_BASE_URL={env_llm_config.base_url} ' f'LITELLM_MODEL={env_llm_config.model} ' f"DECRYPTION_KEY='theagentcompany is all you need' " # Hardcoded Key diff --git a/evaluation/utils/shared.py b/evaluation/utils/shared.py index 5a5cdd8b86e8..4f165831eca5 100644 --- a/evaluation/utils/shared.py +++ b/evaluation/utils/shared.py @@ -53,30 +53,6 @@ class EvalMetadata(BaseModel): details: dict[str, Any] | None = None condenser_config: CondenserConfig | None = None - def model_dump(self, *args, **kwargs): - dumped_dict = super().model_dump(*args, **kwargs) - # avoid leaking sensitive information - dumped_dict['llm_config'] = self.llm_config.to_safe_dict() - if hasattr(self.condenser_config, 'llm_config'): - dumped_dict['condenser_config']['llm_config'] = ( - self.condenser_config.llm_config.to_safe_dict() - ) - - return dumped_dict - - def model_dump_json(self, *args, **kwargs): - dumped = super().model_dump_json(*args, **kwargs) - dumped_dict = json.loads(dumped) - # avoid leaking sensitive information - dumped_dict['llm_config'] = self.llm_config.to_safe_dict() - if hasattr(self.condenser_config, 'llm_config'): - dumped_dict['condenser_config']['llm_config'] = ( - self.condenser_config.llm_config.to_safe_dict() - ) - - logger.debug(f'Dumped metadata: {dumped_dict}') - return json.dumps(dumped_dict) - class EvalOutput(BaseModel): # NOTE: User-specified @@ -99,23 +75,6 @@ class EvalOutput(BaseModel): # Optionally save the input test instance instance: dict[str, Any] | None = None - def model_dump(self, *args, **kwargs): - dumped_dict = super().model_dump(*args, **kwargs) - # Remove None values - dumped_dict = {k: v for k, v in dumped_dict.items() if v is not None} - # Apply custom serialization for metadata (to avoid leaking sensitive information) - if self.metadata is not None: - dumped_dict['metadata'] = self.metadata.model_dump() - return dumped_dict - - def model_dump_json(self, *args, **kwargs): - dumped = super().model_dump_json(*args, **kwargs) - dumped_dict = json.loads(dumped) - # Apply custom serialization for metadata (to avoid leaking sensitive information) - if 'metadata' in dumped_dict: - dumped_dict['metadata'] = json.loads(self.metadata.model_dump_json()) - return json.dumps(dumped_dict) - class EvalException(Exception): pass @@ -315,15 +274,7 @@ def update_progress( logger.info( f'Finished evaluation for instance {result.instance_id}: {str(result.test_result)[:300]}...\n' ) - - # Custom JSON encoder - class NumpyEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, np.ndarray): - return obj.tolist() # Convert ndarray to list - return super().default(obj) - - output_fp.write(json.dumps(result.model_dump(), cls=NumpyEncoder) + '\n') + output_fp.write(result.model_dump_json() + '\n') output_fp.flush() diff --git a/openhands/core/config/README.md b/openhands/core/config/README.md index 5e3abae5b13a..c612a0824403 100644 --- a/openhands/core/config/README.md +++ b/openhands/core/config/README.md @@ -37,21 +37,17 @@ export SANDBOX_TIMEOUT='300' ## Type Handling -The `load_from_env` function attempts to cast environment variable values to the types specified in the dataclasses. It handles: +The `load_from_env` function attempts to cast environment variable values to the types specified in the models. It handles: - Basic types (str, int, bool) - Optional types (e.g., `str | None`) -- Nested dataclasses +- Nested models If type casting fails, an error is logged, and the default value is retained. ## Default Values -If an environment variable is not set, the default value specified in the dataclass is used. - -## Nested Configurations - -The `AppConfig` class contains nested configurations like `LLMConfig` and `AgentConfig`. The `load_from_env` function handles these by recursively processing nested dataclasses with updated prefixes. +If an environment variable is not set, the default value specified in the model is used. ## Security Considerations diff --git a/openhands/core/config/agent_config.py b/openhands/core/config/agent_config.py index c57a16210027..de29aed9744f 100644 --- a/openhands/core/config/agent_config.py +++ b/openhands/core/config/agent_config.py @@ -1,11 +1,9 @@ -from dataclasses import dataclass, field, fields -import inspect +from pydantic import BaseModel, Field + from openhands.core.config.condenser_config import CondenserConfig, NoOpCondenserConfig -from openhands.core.config.config_utils import get_field_info -@dataclass -class AgentConfig: +class AgentConfig(BaseModel): """Configuration for the agent. Attributes: @@ -24,30 +22,18 @@ class AgentConfig: condenser: Configuration for the memory condenser. Default is NoOpCondenserConfig. """ - function_calling: bool = False - codeact_enable_browsing: bool = True - codeact_enable_llm_editor: bool = False - codeact_enable_jupyter: bool = True - micro_agent_name: str | None = None - memory_enabled: bool = False - memory_max_threads: int = 3 - llm_config: str | None = None - use_microagents: bool = True - disabled_microagents: list[str] | None = None - mind_voice: str | None = None - mind_voice_language: str = 'English' - condenser: CondenserConfig = field(default_factory=NoOpCondenserConfig) # type: ignore - - def defaults_to_dict(self) -> dict: - """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional.""" - result = {} - for f in fields(self): - result[f.name] = get_field_info(f) - return result + codeact_enable_browsing: bool = Field(default=True) + codeact_enable_llm_editor: bool = Field(default=False) + codeact_enable_jupyter: bool = Field(default=True) + micro_agent_name: str | None = Field(default=None) + memory_enabled: bool = Field(default=False) + memory_max_threads: int = Field(default=3) + llm_config: str | None = Field(default=None) + use_microagents: bool = Field(default=True) + disabled_microagents: list[str] | None = Field(default=None) + condenser: CondenserConfig = Field(default_factory=NoOpCondenserConfig) + + function_calling: bool = Field(default=True) + mind_voice: str | None = Field(default=None) + mind_voice_language: str = Field(default='English') - @classmethod - def from_dict(cls, env): - return cls(**{ - k: v for k, v in env.items() - if k in inspect.signature(cls).parameters - }) diff --git a/openhands/core/config/app_config.py b/openhands/core/config/app_config.py index 4d9d5749a9fb..8c3e495fd0d9 100644 --- a/openhands/core/config/app_config.py +++ b/openhands/core/config/app_config.py @@ -1,20 +1,20 @@ -from dataclasses import dataclass, field, fields, is_dataclass from typing import ClassVar -import inspect + +from pydantic import BaseModel, Field, SecretStr + from openhands.core import logger from openhands.core.config.agent_config import AgentConfig from openhands.core.config.config_utils import ( OH_DEFAULT_AGENT, OH_MAX_ITERATIONS, - get_field_info, + model_defaults_to_dict, ) from openhands.core.config.llm_config import LLMConfig from openhands.core.config.sandbox_config import SandboxConfig from openhands.core.config.security_config import SecurityConfig -@dataclass -class AppConfig: +class AppConfig(BaseModel): """Configuration for the app. Attributes: @@ -51,43 +51,45 @@ class AppConfig: input is read line by line. When enabled, input continues until /exit command. """ - llms: dict[str, LLMConfig] = field(default_factory=dict) - agents: dict = field(default_factory=dict) - default_agent: str = OH_DEFAULT_AGENT - sandbox: SandboxConfig = field(default_factory=SandboxConfig) - security: SecurityConfig = field(default_factory=SecurityConfig) - runtime: str = 'docker' - file_store: str = 'local' - file_store_path: str = '/tmp/openhands_file_store' - trajectories_path: str | None = None - workspace_base: str = './workspace' - workspace_mount_path: str | None = None - workspace_mount_path_in_sandbox: str = '/workspace' - workspace_mount_rewrite: str | None = None - cache_dir: str = '/tmp/cache' - run_as_openhands: bool = True - show_workspace_contents: bool = True - max_iterations: int = OH_MAX_ITERATIONS - max_budget_per_task: float | None = None - e2b_api_key: str = '' - modal_api_token_id: str = '' - modal_api_token_secret: str = '' - disable_color: bool = False - jwt_secret: str = 'secretpass' - debug: bool = False - file_uploads_max_file_size_mb: int = 0 - file_uploads_restrict_file_types: bool = False - file_uploads_allowed_extensions: list[str] = field(default_factory=lambda: ['.*']) - override_UI_settings: bool = False - runloop_api_key: str | None = None - custom_instructions: str = '' - use_selenium: bool = False - dont_restore_state: bool = False - - cli_multiline_input: bool = False + llms: dict[str, LLMConfig] = Field(default_factory=dict) + agents: dict = Field(default_factory=dict) + default_agent: str = Field(default=OH_DEFAULT_AGENT) + sandbox: SandboxConfig = Field(default_factory=SandboxConfig) + security: SecurityConfig = Field(default_factory=SecurityConfig) + runtime: str = Field(default='docker') + file_store: str = Field(default='local') + file_store_path: str = Field(default='/tmp/openhands_file_store') + trajectories_path: str | None = Field(default=None) + workspace_base: str | None = Field(default='./workspace') + workspace_mount_path: str | None = Field(default=None) + workspace_mount_path_in_sandbox: str = Field(default='/workspace') + workspace_mount_rewrite: str | None = Field(default=None) + cache_dir: str = Field(default='/tmp/cache') + run_as_openhands: bool = Field(default=True) + max_iterations: int = Field(default=OH_MAX_ITERATIONS) + max_budget_per_task: float | None = Field(default=None) + e2b_api_key: SecretStr | None = Field(default=None) + modal_api_token_id: SecretStr | None = Field(default=None) + modal_api_token_secret: SecretStr | None = Field(default=None) + disable_color: bool = Field(default=False) + jwt_secret: SecretStr | None = Field(default=None) + debug: bool = Field(default=False) + file_uploads_max_file_size_mb: int = Field(default=0) + file_uploads_restrict_file_types: bool = Field(default=False) + file_uploads_allowed_extensions: list[str] = Field(default_factory=lambda: ['.*']) + runloop_api_key: SecretStr | None = Field(default=None) + cli_multiline_input: bool = Field(default=False) + + show_workspace_contents: bool = Field(default=True) + override_UI_settings: bool = Field(default=False) + custom_instructions: str = Field(default='') + use_selenium: bool = Field(default=False) + dont_restore_state: bool = Field(default=False) defaults_dict: ClassVar[dict] = {} + model_config = {'extra': 'forbid'} + def get_llm_config(self, name='llm') -> LLMConfig: """'llm' is the name for default config (for backward compatibility prior to 0.8).""" if name in self.llms: @@ -126,49 +128,7 @@ def get_llm_config_from_agent(self, name='agent') -> LLMConfig: def get_agent_configs(self) -> dict[str, AgentConfig]: return self.agents - def __post_init__(self): + def model_post_init(self, __context): """Post-initialization hook, called when the instance is created with only default values.""" - AppConfig.defaults_dict = self.defaults_to_dict() - - def defaults_to_dict(self) -> dict: - """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional.""" - result = {} - for f in fields(self): - field_value = getattr(self, f.name) - - # dataclasses compute their defaults themselves - if is_dataclass(type(field_value)): - result[f.name] = field_value.defaults_to_dict() - else: - result[f.name] = get_field_info(f) - return result - - def __str__(self): - attr_str = [] - for f in fields(self): - attr_name = f.name - attr_value = getattr(self, f.name) - - if attr_name in [ - 'e2b_api_key', - 'github_token', - 'jwt_secret', - 'modal_api_token_id', - 'modal_api_token_secret', - 'runloop_api_key', - ]: - attr_value = '******' if attr_value else None - - attr_str.append(f'{attr_name}={repr(attr_value)}') - - return f"AppConfig({', '.join(attr_str)}" - - def __repr__(self): - return self.__str__() - - @classmethod - def from_dict(cls, env): - return cls(**{ - k: v for k, v in env.items() - if k in inspect.signature(cls).parameters - }) + super().model_post_init(__context) + AppConfig.defaults_dict = model_defaults_to_dict(self) diff --git a/openhands/core/config/config_utils.py b/openhands/core/config/config_utils.py index 38c3c1d03df5..44893e119b5a 100644 --- a/openhands/core/config/config_utils.py +++ b/openhands/core/config/config_utils.py @@ -1,19 +1,22 @@ from types import UnionType -from typing import get_args, get_origin +from typing import Any, get_args, get_origin + +from pydantic import BaseModel +from pydantic.fields import FieldInfo OH_DEFAULT_AGENT = 'CodeActAgent' OH_MAX_ITERATIONS = 500 -def get_field_info(f): +def get_field_info(field: FieldInfo) -> dict[str, Any]: """Extract information about a dataclass field: type, optional, and default. Args: - f: The field to extract information from. + field: The field to extract information from. Returns: A dict with the field's type, whether it's optional, and its default value. """ - field_type = f.type + field_type = field.annotation optional = False # for types like str | None, find the non-None type and set optional to True @@ -33,7 +36,21 @@ def get_field_info(f): ) # default is always present - default = f.default + default = field.default # return a schema with the useful info for frontend return {'type': type_name.lower(), 'optional': optional, 'default': default} + + +def model_defaults_to_dict(model: BaseModel) -> dict[str, Any]: + """Serialize field information in a dict for the frontend, including type hints, defaults, and whether it's optional.""" + result = {} + for name, field in model.model_fields.items(): + field_value = getattr(model, name) + + if isinstance(field_value, BaseModel): + result[name] = model_defaults_to_dict(field_value) + else: + result[name] = get_field_info(field) + + return result diff --git a/openhands/core/config/llm_config.py b/openhands/core/config/llm_config.py index bef074860fb0..f7be683c4c57 100644 --- a/openhands/core/config/llm_config.py +++ b/openhands/core/config/llm_config.py @@ -1,16 +1,14 @@ +from __future__ import annotations + import os -import inspect -from dataclasses import dataclass, fields -from typing import Optional +from typing import Any -from openhands.core.config.config_utils import get_field_info -from openhands.core.logger import LOG_DIR +from pydantic import BaseModel, Field, SecretStr -LLM_SENSITIVE_FIELDS = ['api_key', 'aws_access_key_id', 'aws_secret_access_key'] +from openhands.core.logger import LOG_DIR -@dataclass -class LLMConfig: +class LLMConfig(BaseModel): """Configuration for the LLM model. Attributes: @@ -52,101 +50,61 @@ class LLMConfig: native_tool_calling: Whether to use native tool calling if supported by the model. Can be True, False, or not set. """ - model: str = 'claude-3-5-sonnet-20241022' - api_key: str | None = None - base_url: str | None = None - api_version: str | None = None - embedding_model: str = 'local' - embedding_base_url: str | None = None - embedding_deployment_name: str | None = None - aws_access_key_id: str | None = None - aws_secret_access_key: str | None = None - aws_region_name: str | None = None - openrouter_site_url: str = 'https://docs.all-hands.dev/' - openrouter_app_name: str = 'OpenHands' - num_retries: int = 8 - retry_multiplier: float = 1.25 - retry_min_wait: int = 1 - retry_max_wait: int = 120 - timeout: int | None = None - max_message_chars: int = 30_000 # maximum number of characters in an observation's content when sent to the llm - temperature: float = 0.0 - top_p: float = 1.0 - custom_llm_provider: str | None = None - max_input_tokens: int | None = None - max_output_tokens: int | None = None - input_cost_per_token: float | None = None - output_cost_per_token: float | None = None - ollama_base_url: str | None = None - message_summary_trunc_tokens_frac: float = 0.75 - enable_cache: bool = True + model: str = Field(default='claude-3-5-sonnet-20241022') + api_key: SecretStr | None = Field(default=None) + base_url: str | None = Field(default=None) + api_version: str | None = Field(default=None) + embedding_model: str = Field(default='local') + embedding_base_url: str | None = Field(default=None) + embedding_deployment_name: str | None = Field(default=None) + aws_access_key_id: SecretStr | None = Field(default=None) + aws_secret_access_key: SecretStr | None = Field(default=None) + aws_region_name: str | None = Field(default=None) + openrouter_site_url: str = Field(default='https://docs.all-hands.dev/') + openrouter_app_name: str = Field(default='OpenHands') + num_retries: int = Field(default=8) + retry_multiplier: float = Field(default=2) + retry_min_wait: int = Field(default=15) + retry_max_wait: int = Field(default=120) + timeout: int | None = Field(default=None) + max_message_chars: int = Field( + default=30_000 + ) # maximum number of characters in an observation's content when sent to the llm + temperature: float = Field(default=0.0) + top_p: float = Field(default=1.0) + custom_llm_provider: str | None = Field(default=None) + max_input_tokens: int | None = Field(default=None) + max_output_tokens: int | None = Field(default=None) + input_cost_per_token: float | None = Field(default=None) + output_cost_per_token: float | None = Field(default=None) + ollama_base_url: str | None = Field(default=None) + message_summary_trunc_tokens_frac: float = Field(default=0.75) + enable_cache: bool = Field(default=True) # This setting can be sent in each call to litellm - drop_params: bool = True + drop_params: bool = Field(default=True) # Note: this setting is actually global, unlike drop_params - modify_params: bool = True - disable_vision: bool | None = None - caching_prompt: bool = True - log_completions: bool = False - log_completions_folder: str = os.path.join(LOG_DIR, 'completions') - draft_editor: Optional['LLMConfig'] = None - custom_tokenizer: str | None = None - use_group: str | None = None - native_tool_calling: bool | None = None - - def defaults_to_dict(self) -> dict: - """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional.""" - result = {} - for f in fields(self): - result[f.name] = get_field_info(f) - return result + modify_params: bool = Field(default=True) + disable_vision: bool | None = Field(default=None) + caching_prompt: bool = Field(default=True) + log_completions: bool = Field(default=False) + log_completions_folder: str = Field(default=os.path.join(LOG_DIR, 'completions')) + draft_editor: LLMConfig | None = Field(default=None) + custom_tokenizer: str | None = Field(default=None) + native_tool_calling: bool | None = Field(default=None) + + use_group: str | None = Field(default=None) + + model_config = {'extra': 'forbid'} + + def model_post_init(self, __context: Any): + """Post-initialization hook to assign OpenRouter-related variables to environment variables. - def __post_init__(self): - """ - Post-initialization hook to assign OpenRouter-related variables to environment variables. This ensures that these values are accessible to litellm at runtime. """ + super().model_post_init(__context) # Assign OpenRouter-specific variables to environment variables if self.openrouter_site_url: os.environ['OR_SITE_URL'] = self.openrouter_site_url if self.openrouter_app_name: os.environ['OR_APP_NAME'] = self.openrouter_app_name - - def __str__(self): - attr_str = [] - for f in fields(self): - attr_name = f.name - attr_value = getattr(self, f.name) - - if attr_name in LLM_SENSITIVE_FIELDS: - attr_value = '******' if attr_value else None - - attr_str.append(f'{attr_name}={repr(attr_value)}') - - return f"LLMConfig({', '.join(attr_str)})" - - def __repr__(self): - return self.__str__() - - def to_safe_dict(self): - """Return a dict with the sensitive fields replaced with ******.""" - ret = self.__dict__.copy() - for k, v in ret.items(): - if k in LLM_SENSITIVE_FIELDS: - ret[k] = '******' if v else None - elif isinstance(v, LLMConfig): - ret[k] = v.to_safe_dict() - return ret - - @classmethod - def from_dict(cls, llm_config_dict: dict) -> 'LLMConfig': - """Create an LLMConfig object from a dictionary. - - This function is used to create an LLMConfig object from a dictionary, - with the exception of the 'draft_editor' key, which is a nested LLMConfig object. - """ - args = {k: v for k, v in llm_config_dict.items() if not isinstance(v, dict) and k in inspect.signature(cls).parameters} - if 'draft_editor' in llm_config_dict: - draft_editor_config = LLMConfig(**llm_config_dict['draft_editor']) - args['draft_editor'] = draft_editor_config - return cls(**args) diff --git a/openhands/core/config/sandbox_config.py b/openhands/core/config/sandbox_config.py index a00bb2306e41..8c7307cb1276 100644 --- a/openhands/core/config/sandbox_config.py +++ b/openhands/core/config/sandbox_config.py @@ -1,11 +1,9 @@ import os -import inspect -from dataclasses import dataclass, field, fields -from openhands.core.config.config_utils import get_field_info +from pydantic import BaseModel, Field -@dataclass -class SandboxConfig: + +class SandboxConfig(BaseModel): """Configuration for the sandbox. Attributes: @@ -41,57 +39,35 @@ class SandboxConfig: This should be a JSON string that will be parsed into a dictionary. """ - remote_runtime_api_url: str = 'http://localhost:8000' - local_runtime_url: str = 'http://localhost' - keep_runtime_alive: bool = True - rm_all_containers: bool = False - api_key: str | None = None - base_container_image: str = 'docker.io/nikolaik/python-nodejs:python3.12-nodejs22' - runtime_container_image: str | None = None - user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000 - timeout: int = 120 - remote_runtime_init_timeout: int = 180 - enable_auto_lint: bool = ( - False # once enabled, OpenHands would lint files after editing + remote_runtime_api_url: str = Field(default='http://localhost:8000') + local_runtime_url: str = Field(default='http://localhost') + keep_runtime_alive: bool = Field(default=True) + rm_all_containers: bool = Field(default=False) + api_key: str | None = Field(default=None) + base_container_image: str = Field( + default='docker.io/nikolaik/python-nodejs:python3.12-nodejs22' + ) + runtime_container_image: str | None = Field(default=None) + user_id: int = Field(default=os.getuid() if hasattr(os, 'getuid') else 1000) + timeout: int = Field(default=120) + remote_runtime_init_timeout: int = Field(default=180) + enable_auto_lint: bool = Field( + default=False # once enabled, OpenHands would lint files after editing ) - use_host_network: bool = True - runtime_extra_build_args: list[str] | None = None - initialize_plugins: bool = True - force_rebuild_runtime: bool = False - runtime_extra_deps: str | None = None - runtime_startup_env_vars: dict[str, str] = field(default_factory=dict) - browsergym_eval_env: str | None = None + use_host_network: bool = Field(default=True) + runtime_extra_build_args: list[str] | None = Field(default=None) + initialize_plugins: bool = Field(default=True) + force_rebuild_runtime: bool = Field(default=False) + runtime_extra_deps: str | None = Field(default=None) + runtime_startup_env_vars: dict[str, str] = Field(default_factory=dict) + browsergym_eval_env: str | None = Field(default=None) + platform: str | None = Field(default=None) + close_delay: int = Field(default=900) + remote_runtime_resource_factor: int = Field(default=1) + enable_gpu: bool = Field(default=False) + docker_runtime_kwargs: str | None = Field(default=None) + persist_sandbox: bool = True port: int = 63710 - platform: str | None = None - close_delay: int = 900 - remote_runtime_resource_factor: int = 1 - enable_gpu: bool = False - docker_runtime_kwargs: str | None = None - - def defaults_to_dict(self) -> dict: - """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional.""" - dict = {} - for f in fields(self): - dict[f.name] = get_field_info(f) - return dict - - def __str__(self): - attr_str = [] - for f in fields(self): - attr_name = f.name - attr_value = getattr(self, f.name) - - attr_str.append(f'{attr_name}={repr(attr_value)}') - - return f"SandboxConfig({', '.join(attr_str)})" - - def __repr__(self): - return self.__str__() - @classmethod - def from_dict(cls, env): - return cls(**{ - k: v for k, v in env.items() - if k in inspect.signature(cls).parameters - }) + model_config = {'extra': 'forbid'} diff --git a/openhands/core/config/security_config.py b/openhands/core/config/security_config.py index 0cc87f39e53b..a4805e3ab85f 100644 --- a/openhands/core/config/security_config.py +++ b/openhands/core/config/security_config.py @@ -1,10 +1,7 @@ -from dataclasses import dataclass, fields -import inspect -from openhands.core.config.config_utils import get_field_info +from pydantic import BaseModel, Field -@dataclass -class SecurityConfig: +class SecurityConfig(BaseModel): """Configuration for security related functionalities. Attributes: @@ -12,33 +9,5 @@ class SecurityConfig: security_analyzer: The security analyzer to use. """ - confirmation_mode: bool = False - security_analyzer: str | None = None - - def defaults_to_dict(self) -> dict: - """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional.""" - dict = {} - for f in fields(self): - dict[f.name] = get_field_info(f) - return dict - - def __str__(self): - attr_str = [] - for f in fields(self): - attr_name = f.name - attr_value = getattr(self, f.name) - - attr_str.append(f'{attr_name}={repr(attr_value)}') - - return f"SecurityConfig({', '.join(attr_str)})" - - @classmethod - def from_dict(cls, security_config_dict: dict) -> 'SecurityConfig': - return cls(**{ - k: v for k, v in security_config_dict.items() - if k in inspect.signature(cls).parameters - }) - - def __repr__(self): - return self.__str__() - \ No newline at end of file + confirmation_mode: bool = Field(default=False) + security_analyzer: str | None = Field(default=None) diff --git a/openhands/core/config/utils.py b/openhands/core/config/utils.py index d382480b1816..31938264c972 100644 --- a/openhands/core/config/utils.py +++ b/openhands/core/config/utils.py @@ -3,13 +3,13 @@ import pathlib import platform import sys -from dataclasses import is_dataclass from types import UnionType from typing import Any, MutableMapping, get_args, get_origin from uuid import uuid4 import toml from dotenv import load_dotenv +from pydantic import BaseModel, ValidationError from openhands.core import logger from openhands.core.config.agent_config import AgentConfig @@ -43,17 +43,19 @@ def get_optional_type(union_type: UnionType) -> Any: return next((t for t in types if t is not type(None)), None) # helper function to set attributes based on env vars - def set_attr_from_env(sub_config: Any, prefix=''): - """Set attributes of a config dataclass based on environment variables.""" - for field_name, field_type in sub_config.__annotations__.items(): + def set_attr_from_env(sub_config: BaseModel, prefix=''): + """Set attributes of a config model based on environment variables.""" + for field_name, field_info in sub_config.model_fields.items(): + field_value = getattr(sub_config, field_name) + field_type = field_info.annotation + # compute the expected env var name from the prefix and field name # e.g. LLM_BASE_URL env_var_name = (prefix + field_name).upper() - if is_dataclass(field_type): - # nested dataclass - nested_sub_config = getattr(sub_config, field_name) - set_attr_from_env(nested_sub_config, prefix=field_name + '_') + if isinstance(field_value, BaseModel): + set_attr_from_env(field_value, prefix=field_name + '_') + elif env_var_name in env_or_toml_dict: # convert the env var to the correct type and set it value = env_or_toml_dict[env_var_name] @@ -122,45 +124,60 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'): if isinstance(value, dict): try: if key is not None and key.lower() == 'agent': + # Every entry here is either a field for the default `agent` config group, or itself a group + # The best way to tell the difference is to try to parse it as an AgentConfig object + agent_group_ids: set[str] = set() + for nested_key, nested_value in value.items(): + if isinstance(nested_value, dict): + try: + agent_config = AgentConfig(**nested_value) + except ValidationError: + continue + agent_group_ids.add(nested_key) + cfg.set_agent_config(agent_config, nested_key) + logger.openhands_logger.debug( 'Attempt to load default agent config from config toml' ) - non_dict_fields = { - k: v for k, v in value.items() if not isinstance(v, dict) + value_without_groups = { + k: v for k, v in value.items() if k not in agent_group_ids } - agent_config = AgentConfig.from_dict(non_dict_fields) + agent_config = AgentConfig(**value_without_groups) cfg.set_agent_config(agent_config, 'agent') - for nested_key, nested_value in value.items(): - if isinstance(nested_value, dict): - logger.openhands_logger.debug( - f'Attempt to load group {nested_key} from config toml as agent config' - ) - agent_config = AgentConfig.from_dict(nested_value) - cfg.set_agent_config(agent_config, nested_key) + elif key is not None and key.lower() == 'llm': - # logger.openhands_logger.debug( - # 'Attempt to load default LLM config from config toml' - # ) - llm_config = LLMConfig.from_dict(value) - cfg.set_llm_config(llm_config, 'llm') + # Every entry here is either a field for the default `llm` config group, or itself a group + # The best way to tell the difference is to try to parse it as an LLMConfig object + llm_group_ids: set[str] = set() for nested_key, nested_value in value.items(): if isinstance(nested_value, dict): - # logger.openhands_logger.debug( - # f'Attempt to load group {nested_key} from config toml as llm config' - # ) - llm_config = LLMConfig.from_dict(nested_value) + try: + llm_config = LLMConfig(**nested_value) + except ValidationError: + continue + llm_group_ids.add(nested_key) cfg.set_llm_config(llm_config, nested_key) + + logger.openhands_logger.debug( + 'Attempt to load default LLM config from config toml' + ) + value_without_groups = { + k: v for k, v in value.items() if k not in llm_group_ids + } + llm_config = LLMConfig(**value_without_groups) + cfg.set_llm_config(llm_config, 'llm') + elif key is not None and key.lower() == 'security': logger.openhands_logger.debug( 'Attempt to load security config from config toml' ) - security_config = SecurityConfig.from_dict(value) + security_config = SecurityConfig(**value) cfg.security = security_config elif not key.startswith('sandbox') and key.lower() != 'core': logger.openhands_logger.warning( f'Unknown key in {toml_file}: "{key}"' ) - except (TypeError, KeyError) as e: + except (TypeError, KeyError, ValidationError) as e: logger.openhands_logger.warning( f'Cannot parse [{key}] config from toml, values have not been applied.\nError: {e}', exc_info=False, @@ -197,7 +214,7 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'): logger.openhands_logger.warning( f'Unknown config key "{key}" in [core] section' ) - except (TypeError, KeyError) as e: + except (TypeError, KeyError, ValidationError) as e: logger.openhands_logger.warning( f'Cannot parse [sandbox] config from toml, values have not been applied.\nError: {e}', exc_info=False, @@ -299,7 +316,7 @@ def get_llm_config_arg( return LLMConfig(**toml_config[llm_config_arg]) # update the llm config with the specified section if 'llm' in toml_config and llm_config_arg in toml_config['llm']: - return LLMConfig.from_dict(toml_config['llm'][llm_config_arg]) + return LLMConfig(**toml_config['llm'][llm_config_arg]) raise ValueError(f'Loading from toml failed for {llm_config_arg}') diff --git a/openhands/llm/async_llm.py b/openhands/llm/async_llm.py index 233def8da8b5..a6cbc7f34cd3 100644 --- a/openhands/llm/async_llm.py +++ b/openhands/llm/async_llm.py @@ -19,7 +19,9 @@ def __init__(self, *args, **kwargs): self._async_completion = partial( self._call_acompletion, model=self.config.model, - api_key=self.config.api_key, + api_key=self.config.api_key.get_secret_value() + if self.config.api_key + else None, base_url=self.config.base_url, api_version=self.config.api_version, custom_llm_provider=self.config.custom_llm_provider, diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 61177b4e3726..1ffa250a43e3 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -189,7 +189,9 @@ def __init__( self._completion = partial( litellm_completion, model=self.config.model, - api_key=self.config.api_key, + api_key=self.config.api_key.get_secret_value() + if self.config.api_key + else None, base_url=self.config.base_url, api_version=self.config.api_version, custom_llm_provider=self.config.custom_llm_provider, @@ -459,7 +461,9 @@ def init_model_info(self): # GET {base_url}/v1/model/info with litellm_model_id as path param response = requests.get( f'{self.config.base_url}/v1/model/info', - headers={'Authorization': f'Bearer {self.config.api_key}'}, + headers={ + 'Authorization': f'Bearer {self.config.api_key.get_secret_value() if self.config.api_key else None}' + }, ) resp_json = response.json() if 'data' not in resp_json: diff --git a/openhands/llm/streaming_llm.py b/openhands/llm/streaming_llm.py index a45beba985ab..b7dacefd7660 100644 --- a/openhands/llm/streaming_llm.py +++ b/openhands/llm/streaming_llm.py @@ -16,7 +16,9 @@ def __init__(self, *args, **kwargs): self._async_streaming_completion = partial( self._call_acompletion, model=self.config.model, - api_key=self.config.api_key, + api_key=self.config.api_key.get_secret_value() + if self.config.api_key + else None, base_url=self.config.base_url, api_version=self.config.api_version, custom_llm_provider=self.config.custom_llm_provider, diff --git a/openhands/resolver/resolve_issue.py b/openhands/resolver/resolve_issue.py index 21036dbc29b8..f50b37d79447 100644 --- a/openhands/resolver/resolve_issue.py +++ b/openhands/resolver/resolve_issue.py @@ -14,12 +14,7 @@ import openhands from openhands.controller.state.state import State -from openhands.core.config import ( - AgentConfig, - AppConfig, - LLMConfig, - SandboxConfig, -) +from openhands.core.config import AgentConfig, AppConfig, LLMConfig, SandboxConfig from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime, run_controller from openhands.events.action import CmdRunAction, MessageAction @@ -153,7 +148,7 @@ async def process_issue( max_iterations: int, llm_config: LLMConfig, output_dir: str, - runtime_container_image: str, + runtime_container_image: str | None, prompt_template: str, issue_handler: IssueHandlerInterface, repo_instruction: str | None = None, @@ -306,7 +301,7 @@ async def resolve_issue( max_iterations: int, output_dir: str, llm_config: LLMConfig, - runtime_container_image: str, + runtime_container_image: str | None, prompt_template: str, issue_type: str, repo_instruction: str | None, @@ -583,11 +578,16 @@ def int_or_none(value): default=None, help="Target branch to pull and create PR against (for PRs). If not specified, uses the PR's base branch.", ) + parser.add_argument( + '--is-experimental', + type=lambda x: x.lower() == 'true', + help='Whether to run in experimental mode.', + ) my_args = parser.parse_args() runtime_container_image = my_args.runtime_container_image - if runtime_container_image is None: + if runtime_container_image is None and not my_args.is_experimental: runtime_container_image = ( f'ghcr.io/all-hands-ai/runtime:{openhands.__version__}-nikolaik' ) diff --git a/openhands/runtime/impl/modal/modal_runtime.py b/openhands/runtime/impl/modal/modal_runtime.py index 4995f07503ff..8d5aa57e7d10 100644 --- a/openhands/runtime/impl/modal/modal_runtime.py +++ b/openhands/runtime/impl/modal/modal_runtime.py @@ -60,7 +60,8 @@ def __init__( self.sid = sid self.modal_client = modal.Client.from_credentials( - config.modal_api_token_id, config.modal_api_token_secret + config.modal_api_token_id.get_secret_value(), + config.modal_api_token_secret.get_secret_value(), ) self.app = modal.App.lookup( 'openhands', create_if_missing=True, client=self.modal_client diff --git a/openhands/runtime/impl/runloop/runloop_runtime.py b/openhands/runtime/impl/runloop/runloop_runtime.py index 51628f54056d..add4619aea81 100644 --- a/openhands/runtime/impl/runloop/runloop_runtime.py +++ b/openhands/runtime/impl/runloop/runloop_runtime.py @@ -40,7 +40,7 @@ def __init__( self.devbox: DevboxView | None = None self.config = config self.runloop_api_client = Runloop( - bearer_token=config.runloop_api_key, + bearer_token=config.runloop_api_key.get_secret_value(), ) self.container_name = CONTAINER_NAME_PREFIX + sid super().__init__( diff --git a/openhands/server/routes/public.py b/openhands/server/routes/public.py index 5a8925b741b4..fcdb1e52cef7 100644 --- a/openhands/server/routes/public.py +++ b/openhands/server/routes/public.py @@ -51,8 +51,8 @@ async def get_litellm_models() -> list[str]: ): bedrock_model_list = bedrock.list_foundation_models( llm_config.aws_region_name, - llm_config.aws_access_key_id, - llm_config.aws_secret_access_key, + llm_config.aws_access_key_id.get_secret_value(), + llm_config.aws_secret_access_key.get_secret_value(), ) model_list = litellm_model_list_without_bedrock + bedrock_model_list for llm_config in config.llms.values(): diff --git a/openhands/utils/embeddings.py b/openhands/utils/embeddings.py index 7e251f0e5022..6791787d3204 100644 --- a/openhands/utils/embeddings.py +++ b/openhands/utils/embeddings.py @@ -90,7 +90,9 @@ def get_embedding_model(strategy: str, llm_config: LLMConfig) -> 'BaseEmbedding' return OpenAIEmbedding( model='text-embedding-ada-002', - api_key=llm_config.api_key, + api_key=llm_config.api_key.get_secret_value() + if llm_config.api_key + else None, ) elif strategy == 'azureopenai': from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding diff --git a/poetry.lock b/poetry.lock index 6df255f5826c..51f9746cac06 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -5389,17 +5389,15 @@ realtime = ["websockets (>=13,<15)"] [[package]] name = "openhands-aci" -version = "0.1.6" +version = "0.1.8" description = "An Agent-Computer Interface (ACI) designed for software development agents OpenHands." optional = false -python-versions = "<4.0,>=3.12" -files = [ - {file = "openhands_aci-0.1.6-py3-none-any.whl", hash = "sha256:e9589d959a146fad3e6935be1f80b7a4368dd7aa2ba38ad267862c4f8a246e72"}, - {file = "openhands_aci-0.1.6.tar.gz", hash = "sha256:6edf4d6478a349140a324c4a0c4be6d1e9a7acce1739a37d02eecbb9006a2ce7"}, -] +python-versions = "^3.12" +files = [] +develop = false [package.dependencies] -diskcache = ">=5.6.3,<6.0.0" +diskcache = "^5.6.3" flake8 = "*" gitpython = "*" grep-ast = "0.3.3" @@ -5409,7 +5407,13 @@ numpy = "*" pandas = "*" scipy = "*" tree-sitter = "0.21.3" -whatthepatch = ">=1.0.6,<2.0.0" +whatthepatch = "^1.0.6" + +[package.source] +type = "git" +url = "https://github.com/All-Hands-AI/openhands-aci.git" +reference = "fix-find-show-only-hidden-subpaths" +resolved_reference = "910e8c470aff0e496bf262bc673c7ee7b4531159" [[package]] name = "opentelemetry-api" @@ -9853,4 +9857,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "239681e32cbe17b32855c0bccaf636cc05c55a5411fdb79d180ab3ad833284ea" +content-hash = "8320b6c6bb05538516a965589ce03fec4d30df38fb7b47fc934258f1d8d47e30" diff --git a/pyproject.toml b/pyproject.toml index 3753ae45d200..af177c2a6fd9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,7 @@ runloop-api-client = "0.12.0" libtmux = ">=0.37,<0.40" pygithub = "^2.5.0" joblib = "*" -openhands-aci = "0.1.6" +openhands-aci = "0.1.8" python-socketio = "^5.11.4" redis = "^5.2.0" sse-starlette = "^2.1.3" diff --git a/tests/unit/test_acompletion.py b/tests/unit/test_acompletion.py index b6753759be3d..cca18bbb5b29 100644 --- a/tests/unit/test_acompletion.py +++ b/tests/unit/test_acompletion.py @@ -109,9 +109,6 @@ async def mock_on_cancel_requested(): print(f'Cancel requested: {is_set}') return is_set - config = load_app_config() - config.on_cancel_requested_fn = mock_on_cancel_requested - async def mock_acompletion(*args, **kwargs): print('Starting mock_acompletion') for i in range(20): # Increased iterations for longer running task @@ -153,13 +150,6 @@ async def cancel_after_delay(): async def test_async_streaming_completion_with_user_cancellation(cancel_after_chunks): cancel_requested = False - async def mock_on_cancel_requested(): - nonlocal cancel_requested - return cancel_requested - - config = load_app_config() - config.on_cancel_requested_fn = mock_on_cancel_requested - test_messages = [ 'This is ', 'a test ', diff --git a/tests/unit/test_codeact_agent.py b/tests/unit/test_codeact_agent.py index c17607b193af..b4074e63b694 100644 --- a/tests/unit/test_codeact_agent.py +++ b/tests/unit/test_codeact_agent.py @@ -60,7 +60,6 @@ def mock_state() -> State: def test_cmd_output_observation_message(agent: CodeActAgent): - agent.config.function_calling = False obs = CmdOutputObservation( command='echo hello', content='Command output', @@ -82,7 +81,6 @@ def test_cmd_output_observation_message(agent: CodeActAgent): def test_ipython_run_cell_observation_message(agent: CodeActAgent): - agent.config.function_calling = False obs = IPythonRunCellObservation( code='plt.plot()', content='IPython output\n![image]()', @@ -105,7 +103,6 @@ def test_ipython_run_cell_observation_message(agent: CodeActAgent): def test_agent_delegate_observation_message(agent: CodeActAgent): - agent.config.function_calling = False obs = AgentDelegateObservation( content='Content', outputs={'content': 'Delegated agent output'} ) @@ -122,7 +119,6 @@ def test_agent_delegate_observation_message(agent: CodeActAgent): def test_error_observation_message(agent: CodeActAgent): - agent.config.function_calling = False obs = ErrorObservation('Error message') results = agent.get_observation_message(obs, tool_call_id_to_message={}) @@ -145,7 +141,6 @@ def test_unknown_observation_message(agent: CodeActAgent): def test_file_edit_observation_message(agent: CodeActAgent): - agent.config.function_calling = False obs = FileEditObservation( path='/test/file.txt', prev_exist=True, @@ -167,7 +162,6 @@ def test_file_edit_observation_message(agent: CodeActAgent): def test_file_read_observation_message(agent: CodeActAgent): - agent.config.function_calling = False obs = FileReadObservation( path='/test/file.txt', content='File content', @@ -186,7 +180,6 @@ def test_file_read_observation_message(agent: CodeActAgent): def test_browser_output_observation_message(agent: CodeActAgent): - agent.config.function_calling = False obs = BrowserOutputObservation( url='http://example.com', trigger_by_action='browse', @@ -207,7 +200,6 @@ def test_browser_output_observation_message(agent: CodeActAgent): def test_user_reject_observation_message(agent: CodeActAgent): - agent.config.function_calling = False obs = UserRejectObservation('Action rejected') results = agent.get_observation_message(obs, tool_call_id_to_message={}) @@ -223,7 +215,6 @@ def test_user_reject_observation_message(agent: CodeActAgent): def test_function_calling_observation_message(agent: CodeActAgent): - agent.config.function_calling = True mock_response = { 'id': 'mock_id', 'total_calls_in_response': 1, diff --git a/tests/unit/test_condenser.py b/tests/unit/test_condenser.py new file mode 100644 index 000000000000..91878c86baa1 --- /dev/null +++ b/tests/unit/test_condenser.py @@ -0,0 +1,520 @@ +from datetime import datetime +from typing import Any +from unittest.mock import MagicMock + +import pytest + +from openhands.controller.state.state import State +from openhands.core.config.condenser_config import ( + AmortizedForgettingCondenserConfig, + LLMAttentionCondenserConfig, + LLMSummarizingCondenserConfig, + NoOpCondenserConfig, + ObservationMaskingCondenserConfig, + RecentEventsCondenserConfig, +) +from openhands.core.config.llm_config import LLMConfig +from openhands.events.event import Event, EventSource +from openhands.events.observation.observation import Observation +from openhands.llm import LLM +from openhands.memory.condenser import ( + AmortizedForgettingCondenser, + Condenser, + ImportantEventSelection, + LLMAttentionCondenser, + LLMSummarizingCondenser, + NoOpCondenser, + ObservationMaskingCondenser, + RecentEventsCondenser, +) + + +def create_test_event( + message: str, timestamp: datetime | None = None, id: int | None = None +) -> Event: + """Create a simple test event.""" + event = Event() + event._message = message + event.timestamp = timestamp if timestamp else datetime.now() + if id: + event._id = id + event._source = EventSource.USER + return event + + +@pytest.fixture +def mock_llm() -> LLM: + """Mocks an LLM object with a utility function for setting and resetting response contents in unit tests.""" + # Create a MagicMock for the LLM object + mock_llm = MagicMock( + spec=LLM, + config=MagicMock( + spec=LLMConfig, model='gpt-4o', api_key='test_key', custom_llm_provider=None + ), + metrics=MagicMock(), + ) + _mock_content = None + + # Set a mock message with the mocked content + mock_message = MagicMock() + mock_message.content = _mock_content + + def set_mock_response_content(content: Any): + """Set the mock response for the LLM.""" + nonlocal mock_message + mock_message.content = content + + mock_choice = MagicMock() + mock_choice.message = mock_message + + mock_response = MagicMock() + mock_response.choices = [mock_choice] + + mock_llm.completion.return_value = mock_response + + # Attach helper methods to the mock object + mock_llm.set_mock_response_content = set_mock_response_content + + return mock_llm + + +@pytest.fixture +def mock_state() -> State: + """Mocks a State object with the only parameters needed for testing condensers: history and extra_data.""" + mock_state = MagicMock(spec=State) + mock_state.history = [] + mock_state.extra_data = {} + + return mock_state + + +def test_noop_condenser_from_config(): + """Test that the NoOpCondenser objects can be made from config.""" + config = NoOpCondenserConfig() + condenser = Condenser.from_config(config) + + assert isinstance(condenser, NoOpCondenser) + + +def test_noop_condenser(): + """Test that NoOpCondensers preserve their input events.""" + events = [ + create_test_event('Event 1'), + create_test_event('Event 2'), + create_test_event('Event 3'), + ] + + mock_state = MagicMock() + mock_state.history = events + + condenser = NoOpCondenser() + result = condenser.condensed_history(mock_state) + + assert result == events + + +def test_observation_masking_condenser_from_config(): + """Test that ObservationMaskingCondenser objects can be made from config.""" + attention_window = 5 + config = ObservationMaskingCondenserConfig(attention_window=attention_window) + condenser = Condenser.from_config(config) + + assert isinstance(condenser, ObservationMaskingCondenser) + assert condenser.attention_window == attention_window + + +def test_observation_masking_condenser_respects_attention_window(mock_state): + """Test that ObservationMaskingCondenser only masks events outside the attention window.""" + attention_window = 3 + condenser = ObservationMaskingCondenser(attention_window=attention_window) + + events = [ + create_test_event('Event 1'), + Observation('Observation 1'), + create_test_event('Event 3'), + create_test_event('Event 4'), + Observation('Observation 2'), + ] + + mock_state.history = events + result = condenser.condensed_history(mock_state) + + assert len(result) == len(events) + + for index, (event, condensed_event) in enumerate(zip(events, result)): + # If we're outside the attention window, observations should be masked. + if index < len(events) - attention_window: + if isinstance(event, Observation): + assert '' in str(condensed_event) + + # If we're within the attention window, events are unchanged. + else: + assert event == condensed_event + + +def test_recent_events_condenser_from_config(): + """Test that RecentEventsCondenser objects can be made from config.""" + max_events = 5 + keep_first = True + config = RecentEventsCondenserConfig(keep_first=keep_first, max_events=max_events) + condenser = Condenser.from_config(config) + + assert isinstance(condenser, RecentEventsCondenser) + assert condenser.max_events == max_events + assert condenser.keep_first == keep_first + + +def test_recent_events_condenser(): + """Test that RecentEventsCondensers keep just the most recent events.""" + events = [ + create_test_event('Event 1'), + create_test_event('Event 2'), + create_test_event('Event 3'), + create_test_event('Event 4'), + create_test_event('Event 5'), + ] + + mock_state = MagicMock() + mock_state.history = events + + # If the max_events are larger than the number of events, equivalent to a NoOpCondenser. + condenser = RecentEventsCondenser(max_events=len(events)) + result = condenser.condensed_history(mock_state) + + assert result == events + + # If the max_events are smaller than the number of events, only keep the last few. + max_events = 2 + condenser = RecentEventsCondenser(max_events=max_events) + result = condenser.condensed_history(mock_state) + + assert len(result) == max_events + assert result[0]._message == 'Event 4' + assert result[1]._message == 'Event 5' + + # If the keep_first flag is set, the first event will always be present. + keep_first = 1 + max_events = 2 + condenser = RecentEventsCondenser(keep_first=keep_first, max_events=max_events) + result = condenser.condensed_history(mock_state) + + assert len(result) == max_events + assert result[0]._message == 'Event 1' + assert result[1]._message == 'Event 5' + + # We should be able to keep more of the initial events. + keep_first = 2 + max_events = 3 + condenser = RecentEventsCondenser(keep_first=keep_first, max_events=max_events) + result = condenser.condensed_history(mock_state) + + assert len(result) == max_events + assert result[0]._message == 'Event 1' + assert result[1]._message == 'Event 2' + assert result[2]._message == 'Event 5' + + +def test_llm_condenser_from_config(): + """Test that LLMCondensers can be made from config.""" + config = LLMSummarizingCondenserConfig( + llm_config=LLMConfig( + model='gpt-4o', + api_key='test_key', + ) + ) + condenser = Condenser.from_config(config) + + assert isinstance(condenser, LLMSummarizingCondenser) + assert condenser.llm.config.model == 'gpt-4o' + assert condenser.llm.config.api_key.get_secret_value() == 'test_key' + + +def test_llm_condenser(mock_llm, mock_state): + """Test that LLMCondensers use the LLM to generate a summary event.""" + events = [ + create_test_event('Event 1'), + create_test_event('Event 2'), + ] + mock_state.history = events + + mock_llm.metrics = MagicMock() + mock_llm.metrics.get.return_value = {'test_metric': 1.0} + + mock_llm.set_mock_response_content('Summary of events') + + condenser = LLMSummarizingCondenser(llm=mock_llm) + result = condenser.condensed_history(mock_state) + + assert len(result) == 1 + assert result[0].content == 'Summary of events' + + # Verify LLM was called with correct prompt. + mock_llm.completion.assert_called_once() + call_args = mock_llm.completion.call_args[1] + assert 'messages' in call_args + assert len(call_args['messages']) == 1 + assert 'Event 1' in call_args['messages'][0]['content'] + assert 'Event 2' in call_args['messages'][0]['content'] + + # Verify metrics were added to state + assert 'condenser_meta' in mock_state.extra_data + assert len(mock_state.extra_data['condenser_meta']) == 1 + assert mock_state.extra_data['condenser_meta'][0]['metrics'] == {'test_metric': 1.0} + + +def test_llm_condenser_error(): + """Test that LLM errors are propagated during condensation.""" + events = [create_test_event('Event 1', datetime(2024, 1, 1, 10, 0))] + + mock_state = MagicMock() + mock_state.history = events + + mock_llm = MagicMock() + mock_llm.completion.side_effect = Exception('LLM error') + + condenser = LLMSummarizingCondenser(llm=mock_llm) + + try: + condenser.condensed_history(mock_state) + raise AssertionError('Expected exception was not raised.') + except Exception as e: + assert str(e) == 'LLM error' + + +def test_amortized_forgetting_condenser_from_config(): + """Test that AmortizedForgettingCondenser objects can be made from config.""" + max_size = 50 + keep_first = 10 + config = AmortizedForgettingCondenserConfig( + max_size=max_size, keep_first=keep_first + ) + condenser = Condenser.from_config(config) + + assert isinstance(condenser, AmortizedForgettingCondenser) + assert condenser.max_size == max_size + assert condenser.keep_first == keep_first + + +def test_amortized_forgetting_condenser_invalid_config(): + """Test that AmortizedForgettingCondenser raises error when keep_first > max_size.""" + pytest.raises(ValueError, AmortizedForgettingCondenser, max_size=4, keep_first=2) + pytest.raises(ValueError, AmortizedForgettingCondenser, max_size=0) + pytest.raises(ValueError, AmortizedForgettingCondenser, keep_first=-1) + + +def test_amortized_forgetting_condenser_grows_to_max_size(): + """Test that AmortizedForgettingCondenser correctly maintains an event context up to max size.""" + max_size = 15 + condenser = AmortizedForgettingCondenser(max_size=max_size) + + mock_state = MagicMock() + mock_state.extra_data = {} + mock_state.history = [] + + for i in range(max_size): + event = create_test_event(f'Event {i}') + mock_state.history.append(event) + results = condenser.condensed_history(mock_state) + assert len(results) == i + 1 + + +def test_amortized_forgetting_condenser_forgets_when_larger_than_max_size(): + """Test that the AmortizedForgettingCondenser forgets events when the context grows too large.""" + max_size = 2 + condenser = AmortizedForgettingCondenser(max_size=max_size) + + mock_state = MagicMock() + mock_state.extra_data = {} + mock_state.history = [] + + for i in range(max_size * 10): + event = create_test_event(f'Event {i}') + mock_state.history.append(event) + results = condenser.condensed_history(mock_state) + + # The last event in the results is always the event we just added. + assert results[-1] == event + + # The number of results should bounce back and forth between 1, 2, 1, 2, ... + assert len(results) == (i % 2) + 1 + + +def test_amortized_forgetting_condenser_keeps_first_events(): + """Test that the AmortizedForgettingCondenser keeps the right number of initial events when forgetting.""" + max_size = 4 + keep_first = 1 + condenser = AmortizedForgettingCondenser(max_size=max_size, keep_first=keep_first) + + first_event = create_test_event('Event 0') + + mock_state = MagicMock() + mock_state.extra_data = {} + mock_state.history = [first_event] + + for i in range(max_size * 10): + event = create_test_event(f'Event {i+1}', datetime(2024, 1, 1, 10, i + 1)) + mock_state.history.append(event) + results = condenser.condensed_history(mock_state) + + # The last event is always the event we just added. + assert results[-1] == event + + # The first event is always the first event. + assert results[0] == first_event + + # The number of results should bounce back between 2, 3, 4, 2, 3, 4, ... + print(len(results)) + assert len(results) == (i % 3) + 2 + + +def test_llm_attention_condenser_from_config(): + """Test that LLMAttentionCondenser objects can be made from config.""" + config = LLMAttentionCondenserConfig( + max_size=50, + keep_first=10, + llm_config=LLMConfig( + model='gpt-4o', + api_key='test_key', + ), + ) + condenser = Condenser.from_config(config) + + assert isinstance(condenser, LLMAttentionCondenser) + assert condenser.llm.config.model == 'gpt-4o' + assert condenser.llm.config.api_key.get_secret_value() == 'test_key' + assert condenser.max_size == 50 + assert condenser.keep_first == 10 + + +def test_llm_attention_condenser_invalid_config(): + """Test that LLMAttentionCondenser raises an error if the configured LLM doesn't support response schema.""" + config = LLMAttentionCondenserConfig( + max_size=50, + keep_first=10, + llm_config=LLMConfig( + model='claude-2', # Older model that doesn't support response schema + api_key='test_key', + ), + ) + + pytest.raises(ValueError, LLMAttentionCondenser.from_config, config) + + +def test_llm_attention_condenser_keeps_first_events(mock_llm, mock_state): + """Test that the LLMAttentionCondenser keeps the right number of initial events when forgetting.""" + max_size = 4 + condenser = LLMAttentionCondenser(max_size=max_size, keep_first=1, llm=mock_llm) + + first_event = create_test_event('Event 0', id=0) + mock_state.history.append(first_event) + + for i in range(max_size * 10): + event = create_test_event(f'Event {i+1}', id=i + 1) + mock_state.history.append(event) + + mock_llm.set_mock_response_content( + ImportantEventSelection( + ids=[event.id for event in mock_state.history] + ).model_dump_json() + ) + results = condenser.condensed_history(mock_state) + + # The first event is always the first event. + assert results[0] == first_event + + +def test_llm_attention_condenser_grows_to_max_size(mock_llm, mock_state): + """Test that LLMAttentionCondenser correctly maintains an event context up to max size.""" + max_size = 15 + condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm) + + for i in range(max_size): + event = create_test_event(f'Event {i}') + mock_state.history.append(event) + mock_llm.set_mock_response_content( + ImportantEventSelection(ids=[event.id for event in mock_state.history]) + ) + results = condenser.condensed_history(mock_state) + assert len(results) == i + 1 + + +def test_llm_attention_condenser_forgets_when_larger_than_max_size( + mock_llm, mock_state +): + """Test that the LLMAttentionCondenser forgets events when the context grows too large.""" + max_size = 2 + condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm) + + for i in range(max_size * 10): + event = create_test_event(f'Event {i}', id=i) + mock_state.history.append(event) + + mock_llm.set_mock_response_content( + ImportantEventSelection( + ids=[event.id for event in mock_state.history] + ).model_dump_json() + ) + + results = condenser.condensed_history(mock_state) + + # The number of results should bounce back and forth between 1, 2, 1, 2, ... + assert len(results) == (i % 2) + 1 + + +def test_llm_attention_condenser_handles_events_outside_history(mock_llm, mock_state): + """Test that the LLMAttentionCondenser handles event IDs that aren't from the event history.""" + max_size = 2 + condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm) + + for i in range(max_size * 10): + event = create_test_event(f'Event {i}', id=i) + mock_state.history.append(event) + + mock_llm.set_mock_response_content( + ImportantEventSelection( + ids=[event.id for event in mock_state.history] + [-1, -2, -3, -4] + ).model_dump_json() + ) + results = condenser.condensed_history(mock_state) + + # The number of results should bounce back and forth between 1, 2, 1, 2, ... + assert len(results) == (i % 2) + 1 + + +def test_llm_attention_condenser_handles_too_many_events(mock_llm, mock_state): + """Test that the LLMAttentionCondenser handles when the response contains too many event IDs.""" + max_size = 2 + condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm) + + for i in range(max_size * 10): + event = create_test_event(f'Event {i}', id=i) + mock_state.history.append(event) + mock_llm.set_mock_response_content( + ImportantEventSelection( + ids=[event.id for event in mock_state.history] + + [event.id for event in mock_state.history] + ).model_dump_json() + ) + results = condenser.condensed_history(mock_state) + + # The number of results should bounce back and forth between 1, 2, 1, 2, ... + assert len(results) == (i % 2) + 1 + + +def test_llm_attention_condenser_handles_too_few_events(mock_llm, mock_state): + """Test that the LLMAttentionCondenser handles when the response contains too few event IDs.""" + max_size = 2 + condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm) + + for i in range(max_size * 10): + event = create_test_event(f'Event {i}', id=i) + mock_state.history.append(event) + + mock_llm.set_mock_response_content( + ImportantEventSelection(ids=[]).model_dump_json() + ) + + results = condenser.condensed_history(mock_state) + + # The number of results should bounce back and forth between 1, 2, 1, 2, ... + assert len(results) == (i % 2) + 1 diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index b8c08d9dad63..08139848c448 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -63,7 +63,7 @@ def test_compat_env_to_config(monkeypatch, setup_env): assert config.workspace_base == '/repos/openhands/workspace' assert isinstance(config.get_llm_config(), LLMConfig) - assert config.get_llm_config().api_key == 'sk-proj-rgMV0...' + assert config.get_llm_config().api_key.get_secret_value() == 'sk-proj-rgMV0...' assert config.get_llm_config().model == 'gpt-4o' assert isinstance(config.get_agent_config(), AgentConfig) assert isinstance(config.get_agent_config().memory_max_threads, int) @@ -83,7 +83,7 @@ def test_load_from_old_style_env(monkeypatch, default_config): load_from_env(default_config, os.environ) - assert default_config.get_llm_config().api_key == 'test-api-key' + assert default_config.get_llm_config().api_key.get_secret_value() == 'test-api-key' assert default_config.get_agent_config().memory_enabled is True assert default_config.default_agent == 'PlannerAgent' assert default_config.workspace_base == '/opt/files/workspace' @@ -126,7 +126,7 @@ def test_load_from_new_style_toml(default_config, temp_toml_file): # default llm & agent configs assert default_config.default_agent == 'TestAgent' assert default_config.get_llm_config().model == 'test-model' - assert default_config.get_llm_config().api_key == 'toml-api-key' + assert default_config.get_llm_config().api_key.get_secret_value() == 'toml-api-key' assert default_config.get_agent_config().memory_enabled is True # undefined agent config inherits default ones @@ -291,7 +291,7 @@ def test_env_overrides_compat_toml(monkeypatch, default_config, temp_toml_file): assert default_config.get_llm_config().model == 'test-model' assert default_config.get_llm_config('llm').model == 'test-model' assert default_config.get_llm_config_from_agent().model == 'test-model' - assert default_config.get_llm_config().api_key == 'env-api-key' + assert default_config.get_llm_config().api_key.get_secret_value() == 'env-api-key' # after we set workspace_base to 'UNDEFINED' in the environment, # workspace_base should be set to that @@ -336,7 +336,7 @@ def test_env_overrides_sandbox_toml(monkeypatch, default_config, temp_toml_file) assert default_config.workspace_mount_path is None # before load_from_env, values are set to the values from the toml file - assert default_config.get_llm_config().api_key == 'toml-api-key' + assert default_config.get_llm_config().api_key.get_secret_value() == 'toml-api-key' assert default_config.sandbox.timeout == 500 assert default_config.sandbox.user_id == 1001 @@ -345,7 +345,7 @@ def test_env_overrides_sandbox_toml(monkeypatch, default_config, temp_toml_file) # values from env override values from toml assert os.environ.get('LLM_MODEL') is None assert default_config.get_llm_config().model == 'test-model' - assert default_config.get_llm_config().api_key == 'env-api-key' + assert default_config.get_llm_config().api_key.get_secret_value() == 'env-api-key' assert default_config.sandbox.timeout == 1000 assert default_config.sandbox.user_id == 1002 @@ -412,7 +412,7 @@ def test_security_config_from_dict(): # Test with all fields config_dict = {'confirmation_mode': True, 'security_analyzer': 'some_analyzer'} - security_config = SecurityConfig.from_dict(config_dict) + security_config = SecurityConfig(**config_dict) # Verify all fields are correctly set assert security_config.confirmation_mode is True @@ -558,10 +558,7 @@ def test_load_from_toml_partial_invalid(default_config, temp_toml_file, caplog): assert 'Cannot parse [llm] config from toml' in log_content assert 'values have not been applied' in log_content # Error: LLMConfig.__init__() got an unexpected keyword argume - assert ( - 'Error: LLMConfig.__init__() got an unexpected keyword argume' - in log_content - ) + assert 'Error: 1 validation error for LLMConfig' in log_content assert 'invalid_field' in log_content # invalid [sandbox] config @@ -633,12 +630,14 @@ def test_api_keys_repr_str(): aws_access_key_id='my_access_key', aws_secret_access_key='my_secret_key', ) - assert "api_key='******'" in repr(llm_config) - assert "aws_access_key_id='******'" in repr(llm_config) - assert "aws_secret_access_key='******'" in repr(llm_config) - assert "api_key='******'" in str(llm_config) - assert "aws_access_key_id='******'" in str(llm_config) - assert "aws_secret_access_key='******'" in str(llm_config) + + # Check that no secret keys are emitted in representations of the config object + assert 'my_api_key' not in repr(llm_config) + assert 'my_api_key' not in str(llm_config) + assert 'my_access_key' not in repr(llm_config) + assert 'my_access_key' not in str(llm_config) + assert 'my_secret_key' not in repr(llm_config) + assert 'my_secret_key' not in str(llm_config) # Check that no other attrs in LLMConfig have 'key' or 'token' in their name # This will fail when new attrs are added, and attract attention @@ -650,7 +649,7 @@ def test_api_keys_repr_str(): 'output_cost_per_token', 'custom_tokenizer', ] - for attr_name in dir(LLMConfig): + for attr_name in LLMConfig.model_fields.keys(): if ( not attr_name.startswith('__') and attr_name not in known_key_token_attrs_llm @@ -665,7 +664,7 @@ def test_api_keys_repr_str(): # Test AgentConfig # No attrs in AgentConfig have 'key' or 'token' in their name agent_config = AgentConfig(memory_enabled=True, memory_max_threads=4) - for attr_name in dir(AgentConfig): + for attr_name in AgentConfig.model_fields.keys(): if not attr_name.startswith('__'): assert ( 'key' not in attr_name.lower() @@ -684,16 +683,16 @@ def test_api_keys_repr_str(): modal_api_token_secret='my_modal_api_token_secret', runloop_api_key='my_runloop_api_key', ) - assert "e2b_api_key='******'" in repr(app_config) - assert "e2b_api_key='******'" in str(app_config) - assert "jwt_secret='******'" in repr(app_config) - assert "jwt_secret='******'" in str(app_config) - assert "modal_api_token_id='******'" in repr(app_config) - assert "modal_api_token_id='******'" in str(app_config) - assert "modal_api_token_secret='******'" in repr(app_config) - assert "modal_api_token_secret='******'" in str(app_config) - assert "runloop_api_key='******'" in repr(app_config) - assert "runloop_api_key='******'" in str(app_config) + assert 'my_e2b_api_key' not in repr(app_config) + assert 'my_e2b_api_key' not in str(app_config) + assert 'my_jwt_secret' not in repr(app_config) + assert 'my_jwt_secret' not in str(app_config) + assert 'my_modal_api_token_id' not in repr(app_config) + assert 'my_modal_api_token_id' not in str(app_config) + assert 'my_modal_api_token_secret' not in repr(app_config) + assert 'my_modal_api_token_secret' not in str(app_config) + assert 'my_runloop_api_key' not in repr(app_config) + assert 'my_runloop_api_key' not in str(app_config) # Check that no other attrs in AppConfig have 'key' or 'token' in their name # This will fail when new attrs are added, and attract attention @@ -703,7 +702,7 @@ def test_api_keys_repr_str(): 'modal_api_token_secret', 'runloop_api_key', ] - for attr_name in dir(AppConfig): + for attr_name in AppConfig.model_fields.keys(): if ( not attr_name.startswith('__') and attr_name not in known_key_token_attrs_app diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index fc37e41c4dd4..43f55e502f7a 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -40,7 +40,7 @@ def default_config(): def test_llm_init_with_default_config(default_config): llm = LLM(default_config) assert llm.config.model == 'gpt-4o' - assert llm.config.api_key == 'test_key' + assert llm.config.api_key.get_secret_value() == 'test_key' assert isinstance(llm.metrics, Metrics) assert llm.metrics.model_name == 'gpt-4o' @@ -77,7 +77,7 @@ def test_llm_init_with_custom_config(): ) llm = LLM(custom_config) assert llm.config.model == 'custom-model' - assert llm.config.api_key == 'custom_key' + assert llm.config.api_key.get_secret_value() == 'custom_key' assert llm.config.max_input_tokens == 5000 assert llm.config.max_output_tokens == 1500 assert llm.config.temperature == 0.8