All-Hands-AI · ryanhoangt · Jan 6, 2025 · Jan 9, 2025 · Jan 9, 2025 · Jan 9, 2025
diff --git a/config.template.toml b/config.template.toml
@@ -226,6 +226,9 @@ codeact_enable_jupyter = true
 # List of microagents to disable
 #disabled_microagents = []
 
+# Whether to enable plan routing to reasoning models
+#enable_plan_routing = false
+
 [agent.RepoExplorerAgent]
 # Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially
 # useful when an agent doesn't demand high quality but uses a lot of tokens
@@ -276,6 +279,14 @@ llm_config = 'gpt3'
 # The security analyzer to use (For Headless / CLI only -  In Web this is overridden by Session Init)
 #security_analyzer = ""
 
+################################ Model Routing ###############################
+# Configuration for model routing features
+##############################################################################
+[model_routing]
+
+# The reasoning model to use for plan generation
+reasoning_model = "o1-preview-2024-09-12"
+
 #################################### Eval ####################################
 # Configuration for the evaluation, please refer to the specific evaluation
 # plugin for the available options

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -38,6 +38,7 @@
 from openhands.events.serialization.event import truncate_content
 from openhands.llm.llm import LLM
 from openhands.memory.condenser import Condenser
+from openhands.router.plan import LLMBasedPlanRouter
 from openhands.runtime.plugins import (
     AgentSkillsRequirement,
     JupyterRequirement,
@@ -120,6 +121,11 @@ def __init__(
         self.condenser = Condenser.from_config(self.config.condenser)
         logger.debug(f'Using condenser: {self.condenser}')
 
+        # self.plan_router = RuleBasedPlanRouter() if config.enable_plan_routing else None
+        self.plan_router = (
+            LLMBasedPlanRouter(self.llm.config) if config.enable_plan_routing else None
+        )
+
     def get_action_message(
         self,
         action: Action,
@@ -378,11 +384,21 @@ def step(self, state: State) -> Action:
         if latest_user_message and latest_user_message.content.strip() == '/exit':
             return AgentFinishAction()
 
+        params: dict = {}
+
+        # check if the user requests a plan
+        if (
+            latest_user_message
+            and self.plan_router
+            and self.plan_router.should_route_to_custom_model(
+                latest_user_message.content
+            )
+        ):
+            params['use_reasoning_model'] = True
+
         # prepare what we want to send to the LLM
         messages = self._get_messages(state)
-        params: dict = {
-            'messages': self.llm.format_messages_for_llm(messages),
-        }
+        params['messages'] = self.llm.format_messages_for_llm(messages)
         params['tools'] = self.tools
         if self.mock_function_calling:
             params['mock_function_calling'] = True

diff --git a/openhands/core/config/__init__.py b/openhands/core/config/__init__.py
@@ -6,6 +6,7 @@
     get_field_info,
 )
 from openhands.core.config.llm_config import LLMConfig
+from openhands.core.config.model_routing_config import ModelRoutingConfig
 from openhands.core.config.sandbox_config import SandboxConfig
 from openhands.core.config.security_config import SecurityConfig
 from openhands.core.config.utils import (
@@ -27,6 +28,7 @@
     'LLMConfig',
     'SandboxConfig',
     'SecurityConfig',
+    'ModelRoutingConfig',
     'load_app_config',
     'load_from_env',
     'load_from_toml',

diff --git a/openhands/core/config/agent_config.py b/openhands/core/config/agent_config.py
@@ -20,6 +20,7 @@ class AgentConfig:
         use_microagents: Whether to use microagents at all. Default is True.
         disabled_microagents: A list of microagents to disable. Default is None.
         condenser: Configuration for the memory condenser. Default is NoOpCondenserConfig.
+        enable_plan_routing: Whether to enable plan routing to reasoning models. Default is False.
     """
 
     codeact_enable_browsing: bool = True
@@ -32,6 +33,7 @@ class AgentConfig:
     use_microagents: bool = True
     disabled_microagents: list[str] | None = None
     condenser: CondenserConfig = field(default_factory=NoOpCondenserConfig)  # type: ignore
+    enable_plan_routing: bool = False
 
     def defaults_to_dict(self) -> dict:
         """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""

diff --git a/openhands/core/config/app_config.py b/openhands/core/config/app_config.py
@@ -9,6 +9,7 @@
     get_field_info,
 )
 from openhands.core.config.llm_config import LLMConfig
+from openhands.core.config.model_routing_config import ModelRoutingConfig
 from openhands.core.config.sandbox_config import SandboxConfig
 from openhands.core.config.security_config import SecurityConfig
 
@@ -51,6 +52,7 @@ class AppConfig:
     default_agent: str = OH_DEFAULT_AGENT
     sandbox: SandboxConfig = field(default_factory=SandboxConfig)
     security: SecurityConfig = field(default_factory=SecurityConfig)
+    model_routing: ModelRoutingConfig = field(default_factory=ModelRoutingConfig)
     runtime: str = 'docker'
     file_store: str = 'local'
     file_store_path: str = '/tmp/openhands_file_store'

diff --git a/openhands/core/config/model_routing_config.py b/openhands/core/config/model_routing_config.py
@@ -0,0 +1,32 @@
+from dataclasses import dataclass, fields
+
+from openhands.core.config.config_utils import get_field_info
+
+
+@dataclass
+class ModelRoutingConfig:
+    reasoning_model: str = 'o1-preview-2024-09-12'
+
+    def defaults_to_dict(self) -> dict:
+        """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
+        dict = {}
+        for f in fields(self):
+            dict[f.name] = get_field_info(f)
+        return dict
+
+    def __str__(self):
+        attr_str = []
+        for f in fields(self):
+            attr_name = f.name
+            attr_value = getattr(self, f.name)
+
+            attr_str.append(f'{attr_name}={repr(attr_value)}')
+
+        return f"ModelRoutingConfig({', '.join(attr_str)})"
+
+    @classmethod
+    def from_dict(cls, model_routing_config_dict: dict) -> 'ModelRoutingConfig':
+        return cls(**model_routing_config_dict)
+
+    def __repr__(self):
+        return self.__str__()
diff --git a/openhands/core/config/utils.py b/openhands/core/config/utils.py
@@ -14,11 +14,9 @@
 from openhands.core import logger
 from openhands.core.config.agent_config import AgentConfig
 from openhands.core.config.app_config import AppConfig
-from openhands.core.config.config_utils import (
-    OH_DEFAULT_AGENT,
-    OH_MAX_ITERATIONS,
-)
+from openhands.core.config.config_utils import OH_DEFAULT_AGENT, OH_MAX_ITERATIONS
 from openhands.core.config.llm_config import LLMConfig
+from openhands.core.config.model_routing_config import ModelRoutingConfig
 from openhands.core.config.sandbox_config import SandboxConfig
 from openhands.core.config.security_config import SecurityConfig
 from openhands.storage import get_file_store
@@ -141,6 +139,12 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
                             )
                             agent_config = AgentConfig(**nested_value)
                             cfg.set_agent_config(agent_config, nested_key)
+                elif key is not None and key.lower() == 'model_routing':
+                    logger.openhands_logger.debug(
+                        'Attempt to load model routing config from config toml'
+                    )
+                    model_routing_config = ModelRoutingConfig.from_dict(value)
+                    cfg.model_routing = model_routing_config
                 elif key is not None and key.lower() == 'llm':
                     logger.openhands_logger.debug(
                         'Attempt to load default LLM config from config toml'

diff --git a/openhands/core/setup.py b/openhands/core/setup.py
@@ -6,9 +6,7 @@
 from openhands.controller import AgentController
 from openhands.controller.agent import Agent
 from openhands.controller.state.state import State
-from openhands.core.config import (
-    AppConfig,
-)
+from openhands.core.config import AppConfig
 from openhands.core.logger import openhands_logger as logger
 from openhands.events import EventStream
 from openhands.llm.llm import LLM
@@ -61,8 +59,9 @@ def create_agent(runtime: Runtime, config: AppConfig) -> Agent:
     agent_cls: Type[Agent] = Agent.get_cls(config.default_agent)
     agent_config = config.get_agent_config(config.default_agent)
     llm_config = config.get_llm_config_from_agent(config.default_agent)
+    model_routing_config = config.model_routing
     agent = agent_cls(
-        llm=LLM(config=llm_config),
+        llm=LLM(config=llm_config, model_routing_config=model_routing_config),
         config=agent_config,
     )
     if agent.prompt_manager:

diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
@@ -7,7 +7,7 @@
 
 import requests
 
-from openhands.core.config import LLMConfig
+from openhands.core.config import LLMConfig, ModelRoutingConfig
 
 with warnings.catch_warnings():
     warnings.simplefilter('ignore')
@@ -85,6 +85,7 @@ def __init__(
         self,
         config: LLMConfig,
         metrics: Metrics | None = None,
+        model_routing_config: ModelRoutingConfig | None = None,
     ):
         """Initializes the LLM. If LLMConfig is passed, its values will be the fallback.
 
@@ -100,6 +101,7 @@ def __init__(
         )
         self.cost_metric_supported: bool = True
         self.config: LLMConfig = copy.deepcopy(config)
+        self.model_routing_config = model_routing_config
 
         self.model_info: ModelInfo | None = None
 
@@ -158,6 +160,7 @@ def wrapper(*args, **kwargs):
 
             messages: list[dict[str, Any]] | dict[str, Any] = []
             mock_function_calling = kwargs.pop('mock_function_calling', False)
+            use_reasoning_model = kwargs.pop('use_reasoning_model', False)
 
             # some callers might send the model and messages directly
             # litellm allows positional args, like completion(model, messages, **kwargs)
@@ -189,6 +192,15 @@ def wrapper(*args, **kwargs):
                 kwargs['stop'] = STOP_WORDS
                 mock_fncall_tools = kwargs.pop('tools')
 
+            if use_reasoning_model:
+                if self.model_routing_config is None:
+                    raise ValueError(
+                        'Model routing config is required for model routing.'
+                    )
+
+                # Replace the model with the reasoning model
+                kwargs['model'] = self.model_routing_config.reasoning_model
+
             # if we have no messages, something went very wrong
             if not messages:
                 raise ValueError(
@@ -636,7 +648,7 @@ def __str__(self):
             return f'LLM(model={self.config.model}, api_version={self.config.api_version}, base_url={self.config.base_url})'
         elif self.config.base_url:
             return f'LLM(model={self.config.model}, base_url={self.config.base_url})'
-        return f'LLM(model={self.config.model})'
+        return f'LLM(model={self.config.model},reasoning_model={self.model_routing_config.reasoning_model if self.model_routing_config else None})'
 
     def __repr__(self):
         return str(self)

diff --git a/openhands/router/base.py b/openhands/router/base.py
@@ -0,0 +1,7 @@
+from abc import ABC, abstractmethod
+
+
+class BaseRouter(ABC):
+    @abstractmethod
+    def should_route_to_custom_model(self, prompt: str) -> bool:
+        pass
diff --git a/openhands/router/plan/__init__.py b/openhands/router/plan/__init__.py
@@ -0,0 +1,4 @@
+from openhands.router.plan.llm_based import LLMBasedPlanRouter
+from openhands.router.plan.rule_based import RuleBasedPlanRouter
+
+__all__ = ['RuleBasedPlanRouter', 'LLMBasedPlanRouter']
diff --git a/openhands/router/plan/llm_based.py b/openhands/router/plan/llm_based.py
@@ -0,0 +1,36 @@
+import copy
+
+from openhands.core.config import LLMConfig
+from openhands.llm.llm import LLM
+from openhands.router.base import BaseRouter
+from openhands.router.plan.prompts import ANALYZE_PROMPT
+
+
+class LLMBasedPlanRouter(BaseRouter):
+    """
+    Router that routes the prompt that is judged by a LLM as complex and requires a step-by-step plan.
+    """
+
+    JUDGE_MODEL = 'gpt-4o'
+
+    def __init__(self, llm_config: LLMConfig):
+        super().__init__()
+
+        judge_llm_config = copy.deepcopy(llm_config)
+        self.judge_llm = LLM(judge_llm_config)
+
+    def should_route_to_custom_model(self, prompt: str) -> bool:
+        messages = []
+
+        messages.append(
+            {
+                'role': 'user',
+                'content': ANALYZE_PROMPT.format(message=prompt),
+            }
+        )
+
+        response = self.judge_llm.completion(
+            messages=messages,
+            model=self.JUDGE_MODEL,
+        )
+        return int(response['choices'][0]['message']['content'].strip()) == 1
diff --git a/openhands/router/plan/prompts.py b/openhands/router/plan/prompts.py
@@ -0,0 +1,42 @@
+ANALYZE_PROMPT = """Analyze this prompt to see if it requires a detailed plan generation.
+
+Some example scenarios that require generating a step-by-step plan:
+
+1. Structured Rule-Based Tasks with Well-Defined Constraints
+    * Example: In a synthetic task, adhering to a sequence like loosening nuts before removing wheels is critical
+
+2. Tasks Requiring Step-by-Step Reasoning to plan a structured chain of actions
+	* Example: In a synthetic task, objects must be manipulated in a sequence to achieve a configuration
+
+3. Scenarios with Limited Resources or Strict Constraints
+	* Tasks that require resource-sensitive planning, such as minimizing actions or handling tools efficiently
+	* Example: In a synthetic task, we need to efficiently coordinate robot actions across rooms and minimize energy consumption costs
+
+4. Generalization in Familiar Symbolic Representations
+	* Tasks where the rules remain consistent, and the specific instances change.
+	* Example: When we need to adapt strategies to new but structured instances of tasks.
+
+5. Requests Requiring Self-Evaluation
+	* Self-evaluation mechanism enables the identification and correction of errors mid-process.
+	* Example: When we need to reevaluate actions and adjust plans or actions based on constraints.
+
+In context of software engineering, below are some scenarios where plan generation is required:
+
+1. Dependency and Workflow Management
+    * Automating and optimizing CI/CD pipelines, build processes, and package dependency resolution.
+	* Example: Resolving complex dependency graphs or sequencing multi-step deployments.
+2. Code Refactoring and Debugging
+	* Planning systematic changes for refactoring large codebases and isolating root causes during debugging.
+	* Example: Refactoring monolithic code into modular components while preserving functionality.
+3. Infrastructure and Resource Planning
+	* Designing and optimizing Infrastructure as Code (IaC) changes and dynamic resource allocation.
+	* Example: Planning cloud resource provisioning while adhering to dependency constraints.
+4. High-level Requirements to Low-level Implementation Mapping
+    * Translating high-level requirements into detailed implementation steps and ensuring consistency.
+
+=== BEGIN USER MESSAGE ===
+{message}
+=== END USER MESSAGE ===
+
+Only respond with 0 for no plan generation required or 1 for plan generation required.
+"""
diff --git a/openhands/router/plan/rule_based.py b/openhands/router/plan/rule_based.py
@@ -0,0 +1,11 @@
+from openhands.router.base import BaseRouter
+
+
+class RuleBasedPlanRouter(BaseRouter):
+    """
+    Router that detects if the prompt contains the string "plan".
+    """
+
+    def should_route_to_custom_model(self, prompt: str) -> bool:
+        # Returns True if the prompt contains the word "plan"
+        return 'plan' in prompt
diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py
@@ -90,7 +90,10 @@ async def initialize_agent(
 
         # TODO: override other LLM config & agent config groups (#2075)
 
-        llm = LLM(config=self.config.get_llm_config_from_agent(agent_cls))
+        llm = LLM(
+            config=self.config.get_llm_config_from_agent(agent_cls),
+            model_routing_config=self.config.model_routing,
+        )
         agent_config = self.config.get_agent_config(agent_cls)
         agent = Agent.get_cls(agent_cls)(llm, agent_config)