Fix llm_config fallback (#4415)

Co-authored-by: openhands <[email protected]>
All-Hands-AI · Jan 15, 2025 · c5797d1 · c5797d1
1 parent 7ce1fb8
commit c5797d1
Show file tree

Hide file tree

Showing 6 changed files with 489 additions and 11 deletions.
diff --git a/docs/modules/usage/configuration-options.md b/docs/modules/usage/configuration-options.md
@@ -140,7 +140,11 @@ The LLM (Large Language Model) configuration options are defined in the `[llm]`
 
 To use these with the docker command, pass in `-e LLM_<option>`. Example: `-e LLM_NUM_RETRIES`.
 
-### AWS Credentials
+:::note
+For development setups, you can also define custom named LLM configurations. See [Custom LLM Configurations](https://docs.all-hands.dev/modules/usage/llms/custom-llm-configs) for details.
+:::
+
+**AWS Credentials**
 - `aws_access_key_id`
   - Type: `str`
   - Default: `""`

diff --git a/docs/modules/usage/llms/custom-llm-configs.md b/docs/modules/usage/llms/custom-llm-configs.md
@@ -0,0 +1,106 @@
+# Custom LLM Configurations
+
+OpenHands supports defining multiple named LLM configurations in your `config.toml` file. This feature allows you to use different LLM configurations for different purposes, such as using a cheaper model for tasks that don't require high-quality responses, or using different models with different parameters for specific agents.
+
+## How It Works
+
+Named LLM configurations are defined in the `config.toml` file using sections that start with `llm.`. For example:
+
+```toml
+# Default LLM configuration
+[llm]
+model = "gpt-4"
+api_key = "your-api-key"
+temperature = 0.0
+
+# Custom LLM configuration for a cheaper model
+[llm.gpt3]
+model = "gpt-3.5-turbo"
+api_key = "your-api-key"
+temperature = 0.2
+
+# Another custom configuration with different parameters
+[llm.high-creativity]
+model = "gpt-4"
+api_key = "your-api-key"
+temperature = 0.8
+top_p = 0.9
+```
+
+Each named configuration inherits all settings from the default `[llm]` section and can override any of those settings. You can define as many custom configurations as needed.
+
+## Using Custom Configurations
+
+### With Agents
+
+You can specify which LLM configuration an agent should use by setting the `llm_config` parameter in the agent's configuration section:
+
+```toml
+[agent.RepoExplorerAgent]
+# Use the cheaper GPT-3 configuration for this agent
+llm_config = 'gpt3'
+
+[agent.CodeWriterAgent]
+# Use the high creativity configuration for this agent
+llm_config = 'high-creativity'
+```
+
+### Configuration Options
+
+Each named LLM configuration supports all the same options as the default LLM configuration. These include:
+
+- Model selection (`model`)
+- API configuration (`api_key`, `base_url`, etc.)
+- Model parameters (`temperature`, `top_p`, etc.)
+- Retry settings (`num_retries`, `retry_multiplier`, etc.)
+- Token limits (`max_input_tokens`, `max_output_tokens`)
+- And all other LLM configuration options
+
+For a complete list of available options, see the LLM Configuration section in the [Configuration Options](../configuration-options.md) documentation.
+
+## Use Cases
+
+Custom LLM configurations are particularly useful in several scenarios:
+
+- **Cost Optimization**: Use cheaper models for tasks that don't require high-quality responses, like repository exploration or simple file operations.
+- **Task-Specific Tuning**: Configure different temperature and top_p values for tasks that require different levels of creativity or determinism.
+- **Different Providers**: Use different LLM providers or API endpoints for different tasks.
+- **Testing and Development**: Easily switch between different model configurations during development and testing.
+
+## Example: Cost Optimization
+
+A practical example of using custom LLM configurations to optimize costs:
+
+```toml
+# Default configuration using GPT-4 for high-quality responses
+[llm]
+model = "gpt-4"
+api_key = "your-api-key"
+temperature = 0.0
+
+# Cheaper configuration for repository exploration
+[llm.repo-explorer]
+model = "gpt-3.5-turbo"
+temperature = 0.2
+
+# Configuration for code generation
+[llm.code-gen]
+model = "gpt-4"
+temperature = 0.0
+max_output_tokens = 2000
+
+[agent.RepoExplorerAgent]
+llm_config = 'repo-explorer'
+
+[agent.CodeWriterAgent]
+llm_config = 'code-gen'
+```
+
+In this example:
+- Repository exploration uses a cheaper model since it mainly involves understanding and navigating code
+- Code generation uses GPT-4 with a higher token limit for generating larger code blocks
+- The default configuration remains available for other tasks
+
+:::note
+Custom LLM configurations are only available when using OpenHands in development mode, via `main.py` or `cli.py`. When running via `docker run`, please use the standard configuration options.
+:::
diff --git a/openhands/core/config/llm_config.py b/openhands/core/config/llm_config.py
@@ -138,8 +138,19 @@ def from_dict(cls, llm_config_dict: dict) -> 'LLMConfig':
         This function is used to create an LLMConfig object from a dictionary,
         with the exception of the 'draft_editor' key, which is a nested LLMConfig object.
         """
-        args = {k: v for k, v in llm_config_dict.items() if not isinstance(v, dict)}
-        if 'draft_editor' in llm_config_dict:
-            draft_editor_config = LLMConfig(**llm_config_dict['draft_editor'])
-            args['draft_editor'] = draft_editor_config
+        # Keep None values to preserve defaults, filter out other dicts
+        args = {
+            k: v
+            for k, v in llm_config_dict.items()
+            if not isinstance(v, dict) or v is None
+        }
+        if (
+            'draft_editor' in llm_config_dict
+            and llm_config_dict['draft_editor'] is not None
+        ):
+            if isinstance(llm_config_dict['draft_editor'], LLMConfig):
+                args['draft_editor'] = llm_config_dict['draft_editor']
+            else:
+                draft_editor_config = LLMConfig(**llm_config_dict['draft_editor'])
+                args['draft_editor'] = draft_editor_config
         return cls(**args)
diff --git a/openhands/core/config/utils.py b/openhands/core/config/utils.py
@@ -144,15 +144,48 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
                     logger.openhands_logger.debug(
                         'Attempt to load default LLM config from config toml'
                     )
-                    llm_config = LLMConfig.from_dict(value)
-                    cfg.set_llm_config(llm_config, 'llm')
+                    # TODO clean up draft_editor
+                    # Extract generic LLM fields, keeping draft_editor
+                    generic_llm_fields = {}
+                    for k, v in value.items():
+                        if not isinstance(v, dict) or k == 'draft_editor':
+                            generic_llm_fields[k] = v
+                    generic_llm_config = LLMConfig.from_dict(generic_llm_fields)
+                    cfg.set_llm_config(generic_llm_config, 'llm')
+
+                    # Process custom named LLM configs
                     for nested_key, nested_value in value.items():
                         if isinstance(nested_value, dict):
                             logger.openhands_logger.debug(
-                                f'Attempt to load group {nested_key} from config toml as llm config'
+                                f'Processing custom LLM config "{nested_key}":'
                             )
-                            llm_config = LLMConfig.from_dict(nested_value)
-                            cfg.set_llm_config(llm_config, nested_key)
+                            # Apply generic LLM config with custom LLM overrides, e.g.
+                            # [llm]
+                            # model="..."
+                            # num_retries = 5
+                            # [llm.claude]
+                            # model="claude-3-5-sonnet"
+                            # results in num_retries APPLIED to claude-3-5-sonnet
+                            custom_fields = {}
+                            for k, v in nested_value.items():
+                                if not isinstance(v, dict) or k == 'draft_editor':
+                                    custom_fields[k] = v
+                            merged_llm_dict = generic_llm_config.__dict__.copy()
+                            merged_llm_dict.update(custom_fields)
+                            # TODO clean up draft_editor
+                            # Handle draft_editor with fallback values:
+                            # - If draft_editor is "null", use None
+                            # - If draft_editor is in custom fields, use that value
+                            # - If draft_editor is not specified, fall back to generic config value
+                            if 'draft_editor' in custom_fields:
+                                if custom_fields['draft_editor'] == 'null':
+                                    merged_llm_dict['draft_editor'] = None
+                            else:
+                                merged_llm_dict['draft_editor'] = (
+                                    generic_llm_config.draft_editor
+                                )
+                            custom_llm_config = LLMConfig.from_dict(merged_llm_dict)
+                            cfg.set_llm_config(custom_llm_config, nested_key)
                 elif key is not None and key.lower() == 'security':
                     logger.openhands_logger.debug(
                         'Attempt to load security config from config toml'
@@ -458,7 +491,11 @@ def setup_config_from_args(args: argparse.Namespace) -> AppConfig:
 
     # Override with command line arguments if provided
     if args.llm_config:
-        llm_config = get_llm_config_arg(args.llm_config)
+        # if we didn't already load it, get it from the toml file
+        if args.llm_config not in config.llms:
+            llm_config = get_llm_config_arg(args.llm_config)
+        else:
+            llm_config = config.llms[args.llm_config]
         if llm_config is None:
             raise ValueError(f'Invalid toml file, cannot read {args.llm_config}')
         config.set_llm_config(llm_config)