From cec10a1a40fb1a0c5912602c7671d6bb7b4a285a Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Mon, 6 Jan 2025 15:40:41 +0700
Subject: [PATCH 01/21] prototype

---
 openhands/router/base.py           |  7 ++++
 openhands/router/plan/.env.example |  3 ++
 openhands/router/plan/plan.py      | 56 ++++++++++++++++++++++++++++++
 openhands/router/plan/prompts.py   |  8 +++++
 4 files changed, 74 insertions(+)
 create mode 100644 openhands/router/base.py
 create mode 100644 openhands/router/plan/.env.example
 create mode 100644 openhands/router/plan/plan.py
 create mode 100644 openhands/router/plan/prompts.py

diff --git a/openhands/router/base.py b/openhands/router/base.py
new file mode 100644
index 000000000000..0fb3578793ba
--- /dev/null
+++ b/openhands/router/base.py
@@ -0,0 +1,7 @@
+from abc import ABC, abstractmethod
+
+
+class BaseRouter(ABC):
+    @abstractmethod
+    def route(self, prompt: str) -> str:
+        pass
diff --git a/openhands/router/plan/.env.example b/openhands/router/plan/.env.example
new file mode 100644
index 000000000000..0bb7550d1314
--- /dev/null
+++ b/openhands/router/plan/.env.example
@@ -0,0 +1,3 @@
+LITELLM_API_KEY=
+LITELLM_BASE_URL=
+LITELLM_MODEL=
diff --git a/openhands/router/plan/plan.py b/openhands/router/plan/plan.py
new file mode 100644
index 000000000000..860fefac0521
--- /dev/null
+++ b/openhands/router/plan/plan.py
@@ -0,0 +1,56 @@
+import os
+from os import path
+
+from dotenv import load_dotenv
+from litellm import completion
+
+from openhands.router.base import BaseRouter
+from openhands.router.plan.prompts import ANALYZE_PROMPT
+
+# Load the environment variables
+dotenv_path = path.join(path.dirname(__file__), '.env')
+load_dotenv(dotenv_path)
+
+litellm_config = {
+    'model': os.environ['LITELLM_MODEL'],
+    'api_key': os.environ['LITELLM_API_KEY'],
+    'base_url': os.environ['LITELLM_BASE_URL'],
+}
+
+
+class PlanRouter(BaseRouter):
+    """
+    Router that routes the prompt requiring plan generation to specialized reasoning models.
+    """
+
+    def route(self, prompt: str) -> str:
+        """
+        Routes the prompt to the specialized reasoning model.
+
+        Parameters:
+        - prompt (str): the prompt to be routed
+
+        Returns:
+        - str: the response from the specialized reasoning model
+        """
+
+        raise NotImplementedError
+
+    def _requires_plan_generation(self, prompt: str) -> bool:
+        messages = []
+
+        messages.append(
+            {
+                'role': 'user',
+                'content': ANALYZE_PROMPT.format(message=prompt),
+            }
+        )
+
+        response = completion(
+            messages=messages,
+            **litellm_config,
+            temperature=0.0,
+            max_tokens=10,
+            stream=False,
+        )
+        return int(response['choices'][0]['message']['content'].strip()) == 1
diff --git a/openhands/router/plan/prompts.py b/openhands/router/plan/prompts.py
new file mode 100644
index 000000000000..e9af5d75fd03
--- /dev/null
+++ b/openhands/router/plan/prompts.py
@@ -0,0 +1,8 @@
+ANALYZE_PROMPT = """Analyze this prompt to see if it already contains a step-by-step plan or requires more detailed plan generation:
+
+---
+{message}
+---
+
+Only respond with 0 for no plan generation required or 1 for plan generation required.
+"""

From c33ba45f263393f61d00938f3d106af5addf18bd Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Thu, 9 Jan 2025 21:06:44 +0700
Subject: [PATCH 02/21] add routing config

---
 openhands/core/config/__init__.py             |  2 ++
 openhands/core/config/app_config.py           |  2 ++
 openhands/core/config/model_routing_config.py | 32 +++++++++++++++++++
 openhands/core/config/utils.py                | 12 ++++---
 openhands/core/setup.py                       |  7 ++--
 openhands/llm/llm.py                          |  3 +-
 openhands/router/plan/plan.py                 |  5 +--
 7 files changed, 52 insertions(+), 11 deletions(-)
 create mode 100644 openhands/core/config/model_routing_config.py

diff --git a/openhands/core/config/__init__.py b/openhands/core/config/__init__.py
index 2e0f87e32143..d085101681a8 100644
--- a/openhands/core/config/__init__.py
+++ b/openhands/core/config/__init__.py
@@ -6,6 +6,7 @@
     get_field_info,
 )
 from openhands.core.config.llm_config import LLMConfig
+from openhands.core.config.model_routing_config import ModelRoutingConfig
 from openhands.core.config.sandbox_config import SandboxConfig
 from openhands.core.config.security_config import SecurityConfig
 from openhands.core.config.utils import (
@@ -27,6 +28,7 @@
     'LLMConfig',
     'SandboxConfig',
     'SecurityConfig',
+    'ModelRoutingConfig',
     'load_app_config',
     'load_from_env',
     'load_from_toml',
diff --git a/openhands/core/config/app_config.py b/openhands/core/config/app_config.py
index 2dbb4aeaa8c4..6249cc56d9b9 100644
--- a/openhands/core/config/app_config.py
+++ b/openhands/core/config/app_config.py
@@ -9,6 +9,7 @@
     get_field_info,
 )
 from openhands.core.config.llm_config import LLMConfig
+from openhands.core.config.model_routing_config import ModelRoutingConfig
 from openhands.core.config.sandbox_config import SandboxConfig
 from openhands.core.config.security_config import SecurityConfig
 
@@ -51,6 +52,7 @@ class AppConfig:
     default_agent: str = OH_DEFAULT_AGENT
     sandbox: SandboxConfig = field(default_factory=SandboxConfig)
     security: SecurityConfig = field(default_factory=SecurityConfig)
+    model_routing = field(default_factory=ModelRoutingConfig)
     runtime: str = 'docker'
     file_store: str = 'local'
     file_store_path: str = '/tmp/openhands_file_store'
diff --git a/openhands/core/config/model_routing_config.py b/openhands/core/config/model_routing_config.py
new file mode 100644
index 000000000000..902a7fcaa782
--- /dev/null
+++ b/openhands/core/config/model_routing_config.py
@@ -0,0 +1,32 @@
+from dataclasses import dataclass, fields
+
+from openhands.core.config.config_utils import get_field_info
+
+
+@dataclass
+class ModelRoutingConfig:
+    reasoning_model: str = 'o1-preview-2024-09-12'
+
+    def defaults_to_dict(self) -> dict:
+        """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
+        dict = {}
+        for f in fields(self):
+            dict[f.name] = get_field_info(f)
+        return dict
+
+    def __str__(self):
+        attr_str = []
+        for f in fields(self):
+            attr_name = f.name
+            attr_value = getattr(self, f.name)
+
+            attr_str.append(f'{attr_name}={repr(attr_value)}')
+
+        return f"ModelRoutingConfig({', '.join(attr_str)})"
+
+    @classmethod
+    def from_dict(cls, model_routing_config_dict: dict) -> 'ModelRoutingConfig':
+        return cls(**model_routing_config_dict)
+
+    def __repr__(self):
+        return self.__str__()
diff --git a/openhands/core/config/utils.py b/openhands/core/config/utils.py
index 7719ce0d59b1..93f22762a2b1 100644
--- a/openhands/core/config/utils.py
+++ b/openhands/core/config/utils.py
@@ -14,11 +14,9 @@
 from openhands.core import logger
 from openhands.core.config.agent_config import AgentConfig
 from openhands.core.config.app_config import AppConfig
-from openhands.core.config.config_utils import (
-    OH_DEFAULT_AGENT,
-    OH_MAX_ITERATIONS,
-)
+from openhands.core.config.config_utils import OH_DEFAULT_AGENT, OH_MAX_ITERATIONS
 from openhands.core.config.llm_config import LLMConfig
+from openhands.core.config.model_routing_config import ModelRoutingConfig
 from openhands.core.config.sandbox_config import SandboxConfig
 from openhands.core.config.security_config import SecurityConfig
 from openhands.storage import get_file_store
@@ -141,6 +139,12 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
                             )
                             agent_config = AgentConfig(**nested_value)
                             cfg.set_agent_config(agent_config, nested_key)
+                elif key is not None and key.lower() == 'model_routing':
+                    logger.openhands_logger.debug(
+                        'Attempt to load model routing config from config toml'
+                    )
+                    model_routing_config = ModelRoutingConfig.from_dict(value)
+                    cfg.model_routing = model_routing_config
                 elif key is not None and key.lower() == 'llm':
                     logger.openhands_logger.debug(
                         'Attempt to load default LLM config from config toml'
diff --git a/openhands/core/setup.py b/openhands/core/setup.py
index 28888478017a..4fde6963b76f 100644
--- a/openhands/core/setup.py
+++ b/openhands/core/setup.py
@@ -6,9 +6,7 @@
 from openhands.controller import AgentController
 from openhands.controller.agent import Agent
 from openhands.controller.state.state import State
-from openhands.core.config import (
-    AppConfig,
-)
+from openhands.core.config import AppConfig
 from openhands.core.logger import openhands_logger as logger
 from openhands.events import EventStream
 from openhands.llm.llm import LLM
@@ -61,8 +59,9 @@ def create_agent(runtime: Runtime, config: AppConfig) -> Agent:
     agent_cls: Type[Agent] = Agent.get_cls(config.default_agent)
     agent_config = config.get_agent_config(config.default_agent)
     llm_config = config.get_llm_config_from_agent(config.default_agent)
+    model_routing_config = config.model_routing
     agent = agent_cls(
-        llm=LLM(config=llm_config),
+        llm=LLM(config=llm_config, model_routing_config=model_routing_config),
         config=agent_config,
     )
     if agent.prompt_manager:
diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index 743d6535ba3b..1f2191717264 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -7,7 +7,7 @@
 
 import requests
 
-from openhands.core.config import LLMConfig
+from openhands.core.config import LLMConfig, ModelRoutingConfig
 
 with warnings.catch_warnings():
     warnings.simplefilter('ignore')
@@ -85,6 +85,7 @@ def __init__(
         self,
         config: LLMConfig,
         metrics: Metrics | None = None,
+        model_routing_config: ModelRoutingConfig | None = None,
     ):
         """Initializes the LLM. If LLMConfig is passed, its values will be the fallback.
 
diff --git a/openhands/router/plan/plan.py b/openhands/router/plan/plan.py
index 860fefac0521..29ca12a531ab 100644
--- a/openhands/router/plan/plan.py
+++ b/openhands/router/plan/plan.py
@@ -23,6 +23,8 @@ class PlanRouter(BaseRouter):
     Router that routes the prompt requiring plan generation to specialized reasoning models.
     """
 
+    REASONING_MODEL: str = 'o1-preview-2024-09-12'
+
     def route(self, prompt: str) -> str:
         """
         Routes the prompt to the specialized reasoning model.
@@ -33,8 +35,7 @@ def route(self, prompt: str) -> str:
         Returns:
         - str: the response from the specialized reasoning model
         """
-
-        raise NotImplementedError
+        return self.REASONING_MODEL
 
     def _requires_plan_generation(self, prompt: str) -> bool:
         messages = []

From 7b0872472aaf66e332799e7366161159dff6f57f Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Thu, 9 Jan 2025 22:06:31 +0700
Subject: [PATCH 03/21] wire up with codeact and llm

---
 .../agenthub/codeact_agent/codeact_agent.py   |  3 +++
 openhands/core/config/agent_config.py         |  1 +
 openhands/llm/llm.py                          | 11 ++++++++++
 openhands/router/base.py                      |  2 +-
 openhands/router/plan/__init__.py             |  4 ++++
 .../router/plan/{plan.py => llm_based.py}     | 20 +++----------------
 openhands/router/plan/rule_based.py           | 11 ++++++++++
 7 files changed, 34 insertions(+), 18 deletions(-)
 create mode 100644 openhands/router/plan/__init__.py
 rename openhands/router/plan/{plan.py => llm_based.py} (62%)
 create mode 100644 openhands/router/plan/rule_based.py

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index d8b5702a235d..e9df80cf4ac1 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -38,6 +38,7 @@
 from openhands.events.serialization.event import truncate_content
 from openhands.llm.llm import LLM
 from openhands.memory.condenser import Condenser
+from openhands.router.plan import RuleBasedPlanRouter
 from openhands.runtime.plugins import (
     AgentSkillsRequirement,
     JupyterRequirement,
@@ -120,6 +121,8 @@ def __init__(
         self.condenser = Condenser.from_config(self.config.condenser)
         logger.debug(f'Using condenser: {self.condenser}')
 
+        self.plan_router = None if config.enable_plan_routing else RuleBasedPlanRouter()
+
     def get_action_message(
         self,
         action: Action,
diff --git a/openhands/core/config/agent_config.py b/openhands/core/config/agent_config.py
index 77e9dbc1e32d..5b3a86b33f46 100644
--- a/openhands/core/config/agent_config.py
+++ b/openhands/core/config/agent_config.py
@@ -32,6 +32,7 @@ class AgentConfig:
     use_microagents: bool = True
     disabled_microagents: list[str] | None = None
     condenser: CondenserConfig = field(default_factory=NoOpCondenserConfig)  # type: ignore
+    enable_plan_routing: bool = False
 
     def defaults_to_dict(self) -> dict:
         """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index 1f2191717264..fc9c731e299d 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -101,6 +101,7 @@ def __init__(
         )
         self.cost_metric_supported: bool = True
         self.config: LLMConfig = copy.deepcopy(config)
+        self.model_routing_config = model_routing_config
 
         self.model_info: ModelInfo | None = None
 
@@ -159,6 +160,7 @@ def wrapper(*args, **kwargs):
 
             messages: list[dict[str, Any]] | dict[str, Any] = []
             mock_function_calling = kwargs.pop('mock_function_calling', False)
+            use_reasoning_model = kwargs.pop('use_reasoning_model', False)
 
             # some callers might send the model and messages directly
             # litellm allows positional args, like completion(model, messages, **kwargs)
@@ -190,6 +192,15 @@ def wrapper(*args, **kwargs):
                 kwargs['stop'] = STOP_WORDS
                 mock_fncall_tools = kwargs.pop('tools')
 
+            if use_reasoning_model:
+                if self.model_routing_config is None:
+                    raise ValueError(
+                        'Model routing config is required for model routing.'
+                    )
+
+                # Replace the model with the reasoning model
+                kwargs['model'] = self.model_routing_config.reasoning_model
+
             # if we have no messages, something went very wrong
             if not messages:
                 raise ValueError(
diff --git a/openhands/router/base.py b/openhands/router/base.py
index 0fb3578793ba..ccc7ad47f1c5 100644
--- a/openhands/router/base.py
+++ b/openhands/router/base.py
@@ -3,5 +3,5 @@
 
 class BaseRouter(ABC):
     @abstractmethod
-    def route(self, prompt: str) -> str:
+    def should_route_to_custom_model(self, prompt: str) -> bool:
         pass
diff --git a/openhands/router/plan/__init__.py b/openhands/router/plan/__init__.py
new file mode 100644
index 000000000000..845831646df8
--- /dev/null
+++ b/openhands/router/plan/__init__.py
@@ -0,0 +1,4 @@
+from openhands.router.plan.llm_based import LLMBasedPlanRouter
+from openhands.router.plan.rule_based import RuleBasedPlanRouter
+
+__all__ = ['RuleBasedPlanRouter', 'LLMBasedPlanRouter']
diff --git a/openhands/router/plan/plan.py b/openhands/router/plan/llm_based.py
similarity index 62%
rename from openhands/router/plan/plan.py
rename to openhands/router/plan/llm_based.py
index 29ca12a531ab..f2cd8efe88e6 100644
--- a/openhands/router/plan/plan.py
+++ b/openhands/router/plan/llm_based.py
@@ -18,26 +18,12 @@
 }
 
 
-class PlanRouter(BaseRouter):
+class LLMBasedPlanRouter(BaseRouter):
     """
-    Router that routes the prompt requiring plan generation to specialized reasoning models.
+    Router that routes the prompt that is judged by a LLM as complex and requires a step-by-step plan.
     """
 
-    REASONING_MODEL: str = 'o1-preview-2024-09-12'
-
-    def route(self, prompt: str) -> str:
-        """
-        Routes the prompt to the specialized reasoning model.
-
-        Parameters:
-        - prompt (str): the prompt to be routed
-
-        Returns:
-        - str: the response from the specialized reasoning model
-        """
-        return self.REASONING_MODEL
-
-    def _requires_plan_generation(self, prompt: str) -> bool:
+    def should_route_to_custom_model(self, prompt: str) -> bool:
         messages = []
 
         messages.append(
diff --git a/openhands/router/plan/rule_based.py b/openhands/router/plan/rule_based.py
new file mode 100644
index 000000000000..2e45858d88dd
--- /dev/null
+++ b/openhands/router/plan/rule_based.py
@@ -0,0 +1,11 @@
+from openhands.router.base import BaseRouter
+
+
+class RuleBasedPlanRouter(BaseRouter):
+    """
+    Router that detects if the prompt contains the word "plan" or "planning".
+    """
+
+    def should_route_to_custom_model(self, prompt: str) -> bool:
+        # Returns True if the prompt contains the word "plan" or "planning"
+        return 'plan' in prompt or 'planning' in prompt

From 910ba8cb1d94dee538c041756bbf1b2a4a979d97 Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Fri, 10 Jan 2025 15:29:50 +0700
Subject: [PATCH 04/21] fix bug

---
 .../agenthub/codeact_agent/codeact_agent.py      | 16 +++++++++++++---
 openhands/core/config/app_config.py              |  2 +-
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index e9df80cf4ac1..0fa8ec30c8ff 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -381,11 +381,21 @@ def step(self, state: State) -> Action:
         if latest_user_message and latest_user_message.content.strip() == '/exit':
             return AgentFinishAction()
 
+        params: dict = {}
+
+        # check if the user requests a plan
+        if (
+            latest_user_message
+            and self.plan_router
+            and self.plan_router.should_route_to_custom_model(
+                latest_user_message.content
+            )
+        ):
+            params['use_reasoning_model'] = True
+
         # prepare what we want to send to the LLM
         messages = self._get_messages(state)
-        params: dict = {
-            'messages': self.llm.format_messages_for_llm(messages),
-        }
+        params['messages'] = (self.llm.format_messages_for_llm(messages),)
         params['tools'] = self.tools
         if self.mock_function_calling:
             params['mock_function_calling'] = True
diff --git a/openhands/core/config/app_config.py b/openhands/core/config/app_config.py
index 6249cc56d9b9..db386c1e4ea9 100644
--- a/openhands/core/config/app_config.py
+++ b/openhands/core/config/app_config.py
@@ -52,7 +52,7 @@ class AppConfig:
     default_agent: str = OH_DEFAULT_AGENT
     sandbox: SandboxConfig = field(default_factory=SandboxConfig)
     security: SecurityConfig = field(default_factory=SecurityConfig)
-    model_routing = field(default_factory=ModelRoutingConfig)
+    model_routing: ModelRoutingConfig = field(default_factory=ModelRoutingConfig)
     runtime: str = 'docker'
     file_store: str = 'local'
     file_store_path: str = '/tmp/openhands_file_store'

From b73f3ecbda5062fe2d5b6eca21e66f59e3a6887e Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Fri, 10 Jan 2025 09:32:27 +0000
Subject: [PATCH 05/21] working cli

---
 openhands/agenthub/codeact_agent/codeact_agent.py | 2 +-
 openhands/llm/llm.py                              | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index 0fa8ec30c8ff..cf642db42af9 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -395,7 +395,7 @@ def step(self, state: State) -> Action:
 
         # prepare what we want to send to the LLM
         messages = self._get_messages(state)
-        params['messages'] = (self.llm.format_messages_for_llm(messages),)
+        params['messages'] = self.llm.format_messages_for_llm(messages)
         params['tools'] = self.tools
         if self.mock_function_calling:
             params['mock_function_calling'] = True
diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index fc9c731e299d..b8bc039b00ae 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -12,6 +12,7 @@
 with warnings.catch_warnings():
     warnings.simplefilter('ignore')
     import litellm
+    # litellm.set_verbose = True
 
 from litellm import ChatCompletionMessageToolCall, ModelInfo, PromptTokensDetails
 from litellm import Message as LiteLLMMessage

From 54d44013dfff6a0b0bcbaf5036ba52ae83439c8e Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Fri, 10 Jan 2025 09:51:04 +0000
Subject: [PATCH 06/21] update config template

---
 config.template.toml                              | 11 +++++++++++
 openhands/agenthub/codeact_agent/codeact_agent.py |  2 +-
 openhands/core/config/agent_config.py             |  1 +
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/config.template.toml b/config.template.toml
index de0ebf3a578f..2acd09ef6a7b 100644
--- a/config.template.toml
+++ b/config.template.toml
@@ -226,6 +226,9 @@ codeact_enable_jupyter = true
 # List of microagents to disable
 #disabled_microagents = []
 
+# Whether to enable plan routing to reasoning models
+#enable_plan_routing = false
+
 [agent.RepoExplorerAgent]
 # Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially
 # useful when an agent doesn't demand high quality but uses a lot of tokens
@@ -276,6 +279,14 @@ llm_config = 'gpt3'
 # The security analyzer to use (For Headless / CLI only -  In Web this is overridden by Session Init)
 #security_analyzer = ""
 
+################################ Model Routing ###############################
+# Configuration for model routing features
+##############################################################################
+[model_routing]
+
+# The reasoning model to use for plan generation
+reasoning_model = "o1-preview-2024-09-12"
+
 #################################### Eval ####################################
 # Configuration for the evaluation, please refer to the specific evaluation
 # plugin for the available options
diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index cf642db42af9..f67aa83699f7 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -121,7 +121,7 @@ def __init__(
         self.condenser = Condenser.from_config(self.config.condenser)
         logger.debug(f'Using condenser: {self.condenser}')
 
-        self.plan_router = None if config.enable_plan_routing else RuleBasedPlanRouter()
+        self.plan_router = RuleBasedPlanRouter() if config.enable_plan_routing else None
 
     def get_action_message(
         self,
diff --git a/openhands/core/config/agent_config.py b/openhands/core/config/agent_config.py
index 5b3a86b33f46..9bc192f0fed4 100644
--- a/openhands/core/config/agent_config.py
+++ b/openhands/core/config/agent_config.py
@@ -20,6 +20,7 @@ class AgentConfig:
         use_microagents: Whether to use microagents at all. Default is True.
         disabled_microagents: A list of microagents to disable. Default is None.
         condenser: Configuration for the memory condenser. Default is NoOpCondenserConfig.
+        enable_plan_routing: Whether to enable plan routing to reasoning models. Default is False.
     """
 
     codeact_enable_browsing: bool = True

From 06db2d640a24c9fb14dbf75c3ee41ce658116747 Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Fri, 10 Jan 2025 10:16:37 +0000
Subject: [PATCH 07/21] use via ui

---
 openhands/llm/llm.py                | 2 +-
 openhands/server/session/session.py | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index b8bc039b00ae..c684ed17bce7 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -649,7 +649,7 @@ def __str__(self):
             return f'LLM(model={self.config.model}, api_version={self.config.api_version}, base_url={self.config.base_url})'
         elif self.config.base_url:
             return f'LLM(model={self.config.model}, base_url={self.config.base_url})'
-        return f'LLM(model={self.config.model})'
+        return f'LLM(model={self.config.model},reasoning_model={self.model_routing_config.reasoning_model if self.model_routing_config else None})'
 
     def __repr__(self):
         return str(self)
diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py
index a481fbd27078..a8dbf88ea7da 100644
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -90,7 +90,10 @@ async def initialize_agent(
 
         # TODO: override other LLM config & agent config groups (#2075)
 
-        llm = LLM(config=self.config.get_llm_config_from_agent(agent_cls))
+        llm = LLM(
+            config=self.config.get_llm_config_from_agent(agent_cls),
+            model_routing_config=self.config.model_routing,
+        )
         agent_config = self.config.get_agent_config(agent_cls)
         agent = Agent.get_cls(agent_cls)(llm, agent_config)
 

From b5973cd7452452ecfaca3a3e0f7d8fe82fcc0b56 Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Fri, 10 Jan 2025 10:38:54 +0000
Subject: [PATCH 08/21] remove dotenv

---
 .../agenthub/codeact_agent/codeact_agent.py   |  7 +++--
 openhands/router/plan/.env.example            |  3 --
 openhands/router/plan/llm_based.py            | 30 +++++++------------
 3 files changed, 15 insertions(+), 25 deletions(-)
 delete mode 100644 openhands/router/plan/.env.example

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index f67aa83699f7..e25ae1084036 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -38,7 +38,7 @@
 from openhands.events.serialization.event import truncate_content
 from openhands.llm.llm import LLM
 from openhands.memory.condenser import Condenser
-from openhands.router.plan import RuleBasedPlanRouter
+from openhands.router.plan import LLMBasedPlanRouter
 from openhands.runtime.plugins import (
     AgentSkillsRequirement,
     JupyterRequirement,
@@ -121,7 +121,10 @@ def __init__(
         self.condenser = Condenser.from_config(self.config.condenser)
         logger.debug(f'Using condenser: {self.condenser}')
 
-        self.plan_router = RuleBasedPlanRouter() if config.enable_plan_routing else None
+        # self.plan_router = RuleBasedPlanRouter() if config.enable_plan_routing else None
+        self.plan_router = (
+            LLMBasedPlanRouter(self.llm.config) if config.enable_plan_routing else None
+        )
 
     def get_action_message(
         self,
diff --git a/openhands/router/plan/.env.example b/openhands/router/plan/.env.example
deleted file mode 100644
index 0bb7550d1314..000000000000
--- a/openhands/router/plan/.env.example
+++ /dev/null
@@ -1,3 +0,0 @@
-LITELLM_API_KEY=
-LITELLM_BASE_URL=
-LITELLM_MODEL=
diff --git a/openhands/router/plan/llm_based.py b/openhands/router/plan/llm_based.py
index f2cd8efe88e6..a1f2ce41b797 100644
--- a/openhands/router/plan/llm_based.py
+++ b/openhands/router/plan/llm_based.py
@@ -1,28 +1,22 @@
-import os
-from os import path
-
-from dotenv import load_dotenv
-from litellm import completion
+import copy
 
+from openhands.core.config import LLMConfig
+from openhands.llm.llm import LLM
 from openhands.router.base import BaseRouter
 from openhands.router.plan.prompts import ANALYZE_PROMPT
 
-# Load the environment variables
-dotenv_path = path.join(path.dirname(__file__), '.env')
-load_dotenv(dotenv_path)
-
-litellm_config = {
-    'model': os.environ['LITELLM_MODEL'],
-    'api_key': os.environ['LITELLM_API_KEY'],
-    'base_url': os.environ['LITELLM_BASE_URL'],
-}
-
 
 class LLMBasedPlanRouter(BaseRouter):
     """
     Router that routes the prompt that is judged by a LLM as complex and requires a step-by-step plan.
     """
 
+    def __init__(self, llm_config: LLMConfig):
+        super().__init__()
+
+        judge_llm_config = copy.deepcopy(llm_config)
+        self.judge_llm = LLM(judge_llm_config)
+
     def should_route_to_custom_model(self, prompt: str) -> bool:
         messages = []
 
@@ -33,11 +27,7 @@ def should_route_to_custom_model(self, prompt: str) -> bool:
             }
         )
 
-        response = completion(
+        response = self.judge_llm.completion(
             messages=messages,
-            **litellm_config,
-            temperature=0.0,
-            max_tokens=10,
-            stream=False,
         )
         return int(response['choices'][0]['message']['content'].strip()) == 1

From e3c8a9ed28c55697889609613b4bb2490bbb0b89 Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Fri, 10 Jan 2025 12:49:50 +0000
Subject: [PATCH 09/21] update judge prompt

---
 openhands/router/plan/llm_based.py |  3 +++
 openhands/router/plan/prompts.py   | 38 +++++++++++++++++++++++++++---
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/openhands/router/plan/llm_based.py b/openhands/router/plan/llm_based.py
index a1f2ce41b797..365cbee3f985 100644
--- a/openhands/router/plan/llm_based.py
+++ b/openhands/router/plan/llm_based.py
@@ -11,6 +11,8 @@ class LLMBasedPlanRouter(BaseRouter):
     Router that routes the prompt that is judged by a LLM as complex and requires a step-by-step plan.
     """
 
+    JUDGE_MODEL = 'gpt-4o'
+
     def __init__(self, llm_config: LLMConfig):
         super().__init__()
 
@@ -29,5 +31,6 @@ def should_route_to_custom_model(self, prompt: str) -> bool:
 
         response = self.judge_llm.completion(
             messages=messages,
+            model=self.JUDGE_MODEL,
         )
         return int(response['choices'][0]['message']['content'].strip()) == 1
diff --git a/openhands/router/plan/prompts.py b/openhands/router/plan/prompts.py
index e9af5d75fd03..e06d7b61843b 100644
--- a/openhands/router/plan/prompts.py
+++ b/openhands/router/plan/prompts.py
@@ -1,8 +1,40 @@
-ANALYZE_PROMPT = """Analyze this prompt to see if it already contains a step-by-step plan or requires more detailed plan generation:
+ANALYZE_PROMPT = """Analyze this prompt to see if it already contains a step-by-step plan or requires more detailed plan generation.
 
----
+Some example scenarios that require generating a step-by-step plan:
+
+1. Structured Rule-Based Tasks with Well-Defined Constraints
+    * Example: In a synthetic task, adhering to a sequence like loosening nuts before removing wheels is critical
+
+2. Tasks Requiring Step-by-Step Reasoning to plan a structured chain of actions
+	* Example: In a synthetic task, objects must be manipulated in a sequence to achieve a configuration
+
+3. Scenarios with Limited Resources or Strict Constraints
+	* Tasks that require resource-sensitive planning, such as minimizing actions or handling tools efficiently
+	* Example: In a synthetic task, we need to efficiently coordinate robot actions across rooms and minimize energy consumption costs
+
+4. Generalization in Familiar Symbolic Representations
+	* Tasks where the rules remain consistent, and the specific instances change.
+	* Example: When we need to adapt strategies to new but structured instances of tasks.
+
+5. Requests Requiring Self-Evaluation
+	* Self-evaluation mechanism enables the identification and correction of errors mid-process.
+	* Example: When we need to reevaluate actions and adjust plans or actions based on constraints.
+
+In context of software engineering, below are some scenarios where plan generation is required:
+
+1. Dependency and Workflow Management
+    * Automating and optimizing CI/CD pipelines, build processes, and package dependency resolution.
+	* Example: Resolving complex dependency graphs or sequencing multi-step deployments.
+2. Code Refactoring and Debugging
+	* Planning systematic changes for refactoring large codebases and isolating root causes during debugging.
+	* Example: Refactoring monolithic code into modular components while preserving functionality.
+3. Infrastructure and Resource Planning
+	* Designing and optimizing Infrastructure as Code (IaC) changes and dynamic resource allocation.
+	* Example: Planning cloud resource provisioning while adhering to dependency constraints.
+
+=== BEGIN USER MESSAGE ===
 {message}
----
+=== END USER MESSAGE ===
 
 Only respond with 0 for no plan generation required or 1 for plan generation required.
 """

From 27a83dbd3ad879ea6c8674fd9fe9901f31f3f67f Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Fri, 10 Jan 2025 12:53:57 +0000
Subject: [PATCH 10/21] update prompt

---
 openhands/llm/llm.py             | 1 -
 openhands/router/plan/prompts.py | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index c684ed17bce7..2e6ccd9960c7 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -12,7 +12,6 @@
 with warnings.catch_warnings():
     warnings.simplefilter('ignore')
     import litellm
-    # litellm.set_verbose = True
 
 from litellm import ChatCompletionMessageToolCall, ModelInfo, PromptTokensDetails
 from litellm import Message as LiteLLMMessage
diff --git a/openhands/router/plan/prompts.py b/openhands/router/plan/prompts.py
index e06d7b61843b..154bd1180941 100644
--- a/openhands/router/plan/prompts.py
+++ b/openhands/router/plan/prompts.py
@@ -31,6 +31,8 @@
 3. Infrastructure and Resource Planning
 	* Designing and optimizing Infrastructure as Code (IaC) changes and dynamic resource allocation.
 	* Example: Planning cloud resource provisioning while adhering to dependency constraints.
+4. High-level Requirements to Low-level Implementation Mapping
+    * Translating high-level requirements into detailed implementation steps and ensuring consistency.
 
 === BEGIN USER MESSAGE ===
 {message}

From 6f86ad9841e71660869ca128d0168d6d1bd17583 Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Fri, 10 Jan 2025 13:08:31 +0000
Subject: [PATCH 11/21] update prompt

---
 openhands/router/plan/prompts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/router/plan/prompts.py b/openhands/router/plan/prompts.py
index 154bd1180941..9bea73e261af 100644
--- a/openhands/router/plan/prompts.py
+++ b/openhands/router/plan/prompts.py
@@ -1,4 +1,4 @@
-ANALYZE_PROMPT = """Analyze this prompt to see if it already contains a step-by-step plan or requires more detailed plan generation.
+ANALYZE_PROMPT = """Analyze this prompt to see if it requires a detailed plan generation.
 
 Some example scenarios that require generating a step-by-step plan:
 

From ec2d16262cb2d2172084ae71891687350247a350 Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Fri, 10 Jan 2025 13:22:38 +0000
Subject: [PATCH 12/21] adjust rule-based router

---
 openhands/router/plan/rule_based.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/openhands/router/plan/rule_based.py b/openhands/router/plan/rule_based.py
index 2e45858d88dd..cb50dfdd9924 100644
--- a/openhands/router/plan/rule_based.py
+++ b/openhands/router/plan/rule_based.py
@@ -3,9 +3,9 @@
 
 class RuleBasedPlanRouter(BaseRouter):
     """
-    Router that detects if the prompt contains the word "plan" or "planning".
+    Router that detects if the prompt contains the string "plan".
     """
 
     def should_route_to_custom_model(self, prompt: str) -> bool:
-        # Returns True if the prompt contains the word "plan" or "planning"
-        return 'plan' in prompt or 'planning' in prompt
+        # Returns True if the prompt contains the word "plan"
+        return 'plan' in prompt

From 9bf5a7f225db552a0f564f2d79ee755e8925e12f Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Sun, 12 Jan 2025 14:19:45 +0000
Subject: [PATCH 13/21] fix indentation

---
 .../agenthub/codeact_agent/codeact_agent.py   |  1 -
 openhands/llm/llm.py                          |  2 ++
 openhands/router/plan/llm_based.py            |  4 +--
 openhands/router/plan/prompts.py              | 34 +++++++++++++++----
 openhands/utils/trajectory.py                 |  4 +++
 5 files changed, 36 insertions(+), 9 deletions(-)
 create mode 100644 openhands/utils/trajectory.py

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index e25ae1084036..7d22eb50c51e 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -121,7 +121,6 @@ def __init__(
         self.condenser = Condenser.from_config(self.config.condenser)
         logger.debug(f'Using condenser: {self.condenser}')
 
-        # self.plan_router = RuleBasedPlanRouter() if config.enable_plan_routing else None
         self.plan_router = (
             LLMBasedPlanRouter(self.llm.config) if config.enable_plan_routing else None
         )
diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index 2e6ccd9960c7..d8fd1b018eb3 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -12,6 +12,7 @@
 with warnings.catch_warnings():
     warnings.simplefilter('ignore')
     import litellm
+    # litellm.set_verbose = True
 
 from litellm import ChatCompletionMessageToolCall, ModelInfo, PromptTokensDetails
 from litellm import Message as LiteLLMMessage
@@ -94,6 +95,7 @@ def __init__(
         Args:
             config: The LLM configuration.
             metrics: The metrics to use.
+            model_routing_config: The model routing configuration.
         """
         self._tried_model_info = False
         self.metrics: Metrics = (
diff --git a/openhands/router/plan/llm_based.py b/openhands/router/plan/llm_based.py
index 365cbee3f985..ba8a8b4315a5 100644
--- a/openhands/router/plan/llm_based.py
+++ b/openhands/router/plan/llm_based.py
@@ -3,7 +3,7 @@
 from openhands.core.config import LLMConfig
 from openhands.llm.llm import LLM
 from openhands.router.base import BaseRouter
-from openhands.router.plan.prompts import ANALYZE_PROMPT
+from openhands.router.plan.prompts import USER_MESSAGE_PLANNING_ANALYZE_PROMPT
 
 
 class LLMBasedPlanRouter(BaseRouter):
@@ -25,7 +25,7 @@ def should_route_to_custom_model(self, prompt: str) -> bool:
         messages.append(
             {
                 'role': 'user',
-                'content': ANALYZE_PROMPT.format(message=prompt),
+                'content': USER_MESSAGE_PLANNING_ANALYZE_PROMPT.format(message=prompt),
             }
         )
 
diff --git a/openhands/router/plan/prompts.py b/openhands/router/plan/prompts.py
index 9bea73e261af..49ae5fed182f 100644
--- a/openhands/router/plan/prompts.py
+++ b/openhands/router/plan/prompts.py
@@ -1,4 +1,8 @@
-ANALYZE_PROMPT = """Analyze this prompt to see if it requires a detailed plan generation.
+############################################
+########  PLAN GENERATION PROMPTS   ########
+############################################
+
+USER_MESSAGE_PLANNING_ANALYZE_PROMPT = """Analyze this prompt to see if it requires a detailed plan generation.
 
 Some example scenarios that require generating a step-by-step plan:
 
@@ -24,13 +28,13 @@
 
 1. Dependency and Workflow Management
     * Automating and optimizing CI/CD pipelines, build processes, and package dependency resolution.
-	* Example: Resolving complex dependency graphs or sequencing multi-step deployments.
+    * Example: Resolving complex dependency graphs or sequencing multi-step deployments.
 2. Code Refactoring and Debugging
-	* Planning systematic changes for refactoring large codebases and isolating root causes during debugging.
-	* Example: Refactoring monolithic code into modular components while preserving functionality.
+    * Planning systematic changes for refactoring large codebases and isolating root causes during debugging.
+    * Example: Refactoring monolithic code into modular components while preserving functionality.
 3. Infrastructure and Resource Planning
-	* Designing and optimizing Infrastructure as Code (IaC) changes and dynamic resource allocation.
-	* Example: Planning cloud resource provisioning while adhering to dependency constraints.
+    * Designing and optimizing Infrastructure as Code (IaC) changes and dynamic resource allocation.
+    * Example: Planning cloud resource provisioning while adhering to dependency constraints.
 4. High-level Requirements to Low-level Implementation Mapping
     * Translating high-level requirements into detailed implementation steps and ensuring consistency.
 
@@ -40,3 +44,21 @@
 
 Only respond with 0 for no plan generation required or 1 for plan generation required.
 """
+
+############################################
+########  REASONING JUDGE PROMPTS   ########
+############################################
+
+TRAJECTORY_JUDGE_REASONING_SYSTEM_PROMPT = """You are an expert judge evaluating AI assistant interactions. Your task is to determine if:
+- the AI assistant is struggling with some issues when performing the task and needs help from a human expert to guide it
+- the next step is complex and needs to be carefully reasoned to solve e.g. identifying a hard-to-find bug in a codebase
+
+Respond only with 0 if the AI assistant is not struggling or the task is not complex. Otherwise, respond with 1."""
+
+TRAJECTORY_JUDGE_REASONING_USER_PROMPT = """Please evaluate the following interaction (or part of the recent interaction) between an AI assistant and a user:
+
+=== INTERACTION LOG ===
+{interaction}
+=== END INTERACTION ===
+
+Based on the above interaction, do we need to provide additional guidance to the AI assistant or is the task complex and requires careful reasoning to solve? Respond with 0 if no guidance is needed or the task is not complex. Otherwise, respond with 1."""
diff --git a/openhands/utils/trajectory.py b/openhands/utils/trajectory.py
new file mode 100644
index 000000000000..5c83d839c26e
--- /dev/null
+++ b/openhands/utils/trajectory.py
@@ -0,0 +1,4 @@
+"""
+Utilities for formatting OpenHands's raw LLM completion.
+Original code from: https://github.com/SWE-Gym/SWE-Gym/blob/main/scripts/openhands-verifier/aggregate_stats_pass_at_n.ipynb
+"""

From 8e05f3f12c27e3162c79da6eddd1dea0abd625dd Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Sun, 12 Jan 2025 15:31:57 +0000
Subject: [PATCH 14/21] use full trajectory

---
 .../agenthub/codeact_agent/codeact_agent.py   |  21 ++--
 openhands/llm/llm.py                          |   1 -
 openhands/router/plan/llm_based.py            |  21 ++--
 openhands/router/plan/prompts.py              |   2 +-
 openhands/utils/trajectory.py                 | 114 +++++++++++++++++-
 5 files changed, 138 insertions(+), 21 deletions(-)

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index 7d22eb50c51e..a9e01dd3b292 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -45,6 +45,7 @@
     PluginRequirement,
 )
 from openhands.utils.prompt import PromptManager
+from openhands.utils.trajectory import format_trajectory
 
 
 class CodeActAgent(Agent):
@@ -385,19 +386,19 @@ def step(self, state: State) -> Action:
 
         params: dict = {}
 
-        # check if the user requests a plan
-        if (
-            latest_user_message
-            and self.plan_router
-            and self.plan_router.should_route_to_custom_model(
-                latest_user_message.content
-            )
+        # prepare what we want to send to the LLM
+        messages = self._get_messages(state)
+        messages_dict = self.llm.format_messages_for_llm(messages)
+        params['messages'] = messages_dict
+
+        formatted_trajectory = format_trajectory(messages_dict)
+
+        # check if model routing is needed
+        if self.plan_router and self.plan_router.should_route_to_custom_model(
+            formatted_trajectory
         ):
             params['use_reasoning_model'] = True
 
-        # prepare what we want to send to the LLM
-        messages = self._get_messages(state)
-        params['messages'] = self.llm.format_messages_for_llm(messages)
         params['tools'] = self.tools
         if self.mock_function_calling:
             params['mock_function_calling'] = True
diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index d8fd1b018eb3..743eb1dd13c5 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -12,7 +12,6 @@
 with warnings.catch_warnings():
     warnings.simplefilter('ignore')
     import litellm
-    # litellm.set_verbose = True
 
 from litellm import ChatCompletionMessageToolCall, ModelInfo, PromptTokensDetails
 from litellm import Message as LiteLLMMessage
diff --git a/openhands/router/plan/llm_based.py b/openhands/router/plan/llm_based.py
index ba8a8b4315a5..8ada3d537980 100644
--- a/openhands/router/plan/llm_based.py
+++ b/openhands/router/plan/llm_based.py
@@ -3,7 +3,10 @@
 from openhands.core.config import LLMConfig
 from openhands.llm.llm import LLM
 from openhands.router.base import BaseRouter
-from openhands.router.plan.prompts import USER_MESSAGE_PLANNING_ANALYZE_PROMPT
+from openhands.router.plan.prompts import (
+    TRAJECTORY_JUDGE_REASONING_SYSTEM_PROMPT,
+    TRAJECTORY_JUDGE_REASONING_USER_PROMPT,
+)
 
 
 class LLMBasedPlanRouter(BaseRouter):
@@ -20,14 +23,18 @@ def __init__(self, llm_config: LLMConfig):
         self.judge_llm = LLM(judge_llm_config)
 
     def should_route_to_custom_model(self, prompt: str) -> bool:
-        messages = []
-
-        messages.append(
+        messages = [
+            {
+                'role': 'system',
+                'content': TRAJECTORY_JUDGE_REASONING_SYSTEM_PROMPT,
+            },
             {
                 'role': 'user',
-                'content': USER_MESSAGE_PLANNING_ANALYZE_PROMPT.format(message=prompt),
-            }
-        )
+                'content': TRAJECTORY_JUDGE_REASONING_USER_PROMPT.format(
+                    interaction_log=prompt
+                ),
+            },
+        ]
 
         response = self.judge_llm.completion(
             messages=messages,
diff --git a/openhands/router/plan/prompts.py b/openhands/router/plan/prompts.py
index 49ae5fed182f..90ecc336e8f5 100644
--- a/openhands/router/plan/prompts.py
+++ b/openhands/router/plan/prompts.py
@@ -58,7 +58,7 @@
 TRAJECTORY_JUDGE_REASONING_USER_PROMPT = """Please evaluate the following interaction (or part of the recent interaction) between an AI assistant and a user:
 
 === INTERACTION LOG ===
-{interaction}
+{interaction_log}
 === END INTERACTION ===
 
 Based on the above interaction, do we need to provide additional guidance to the AI assistant or is the task complex and requires careful reasoning to solve? Respond with 0 if no guidance is needed or the task is not complex. Otherwise, respond with 1."""
diff --git a/openhands/utils/trajectory.py b/openhands/utils/trajectory.py
index 5c83d839c26e..4d2e233292b1 100644
--- a/openhands/utils/trajectory.py
+++ b/openhands/utils/trajectory.py
@@ -1,4 +1,114 @@
-"""
-Utilities for formatting OpenHands's raw LLM completion.
+"""Utility functions for processing and formatting trajectories.
 Original code from: https://github.com/SWE-Gym/SWE-Gym/blob/main/scripts/openhands-verifier/aggregate_stats_pass_at_n.ipynb
 """
+
+import json
+
+
+def _convert_content(content) -> str:
+    ret = ''
+    if isinstance(content, list):
+        for item in content:
+            assert item['type'] == 'text', 'Only text is supported for now'
+            ret += f'{item["text"]}\n'
+    else:
+        assert isinstance(content, str), 'Only str is supported for now'
+        ret = content
+    return ret
+
+
+def _convert_tool_call_to_string(tool_call) -> str:
+    """Convert tool call to content in string format."""
+    if 'function' not in tool_call:
+        raise ValueError("Tool call must contain 'function' key.")
+    if 'id' not in tool_call:
+        raise ValueError("Tool call must contain 'id' key.")
+    if 'type' not in tool_call:
+        raise ValueError("Tool call must contain 'type' key.")
+    if tool_call['type'] != 'function':
+        raise ValueError("Tool call type must be 'function'.")
+
+    ret = f"<function={tool_call['function']['name']}>\n"
+    try:
+        args = json.loads(tool_call['function']['arguments'])
+    except json.JSONDecodeError as e:
+        raise ValueError(
+            f"Failed to parse arguments as JSON. Arguments: {tool_call['function']['arguments']}"
+        ) from e
+    for param_name, param_value in args.items():
+        is_multiline = isinstance(param_value, str) and '\n' in param_value
+        ret += f'<parameter={param_name}>'
+        if is_multiline:
+            ret += '\n'
+        ret += f'{param_value}'
+        if is_multiline:
+            ret += '\n'
+        ret += '</parameter>\n'
+    ret += '</function>'
+    return ret
+
+
+def format_trajectory(traj: list[dict]) -> str:
+    output = ''
+    system_message = None
+
+    # Handle system message if present
+    if traj[0]['role'] == 'system':
+        system_message = traj[0]
+        traj = traj[1:]
+        content = _convert_content(system_message['content'])
+        output += "*** System Message that describes the assistant's behavior ***\n"
+        output += f'{content}\n'
+
+    # Merge consecutive user messages first
+    merged_traj = []
+    current_messages = []
+
+    for i, message in enumerate(traj):
+        if message['role'] == 'user':
+            current_messages.append(message)
+        else:
+            if current_messages:
+                # Merge all accumulated user messages into one
+                merged_content = '\n'.join(
+                    _convert_content(msg['content']) for msg in current_messages
+                )
+                merged_traj.append({'role': 'user', 'content': merged_content})
+                current_messages = []
+            merged_traj.append(message)
+
+    # Don't forget to handle any remaining user messages
+    if current_messages:
+        merged_content = '\n'.join(
+            _convert_content(msg['content']) for msg in current_messages
+        )
+        merged_traj.append({'role': 'user', 'content': merged_content})
+
+    # Now process the merged trajectory
+    for i, message in enumerate(merged_traj):
+        role = message['role']
+        content_: str | list = message['content']
+        content = _convert_content(content_) if isinstance(content_, list) else content_
+        turn_id = i // 2 + 1
+        output += '-' * 100 + '\n'
+        output += f'*** Turn {turn_id} - {role.upper() if role != "tool" else "TOOL EXECUTION RESULT"} ***\n'
+
+        if role == 'user':
+            output += f'{content}\n'
+        elif role == 'tool':
+            output += f'{content}\n'
+        elif role == 'assistant':
+            output += f'{content}\n'
+            if (
+                'tool_calls' in message
+                and message['tool_calls'] is not None
+                and len(message['tool_calls']) > 0
+            ):
+                for toolcall_id, tool_call in enumerate(message['tool_calls']):
+                    output += f'### Tool Call {toolcall_id}\n'
+                    output += f'{_convert_tool_call_to_string(tool_call)}\n'
+        else:
+            raise ValueError(f'Unexpected role: {role}')
+
+    output += '-' * 100 + '\n'
+    return output

From ddc324831b89c52368aba66d69969773a6ee69b0 Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Tue, 14 Jan 2025 13:51:48 +0700
Subject: [PATCH 15/21] refactor traj formatter and add tests

---
 .../agenthub/codeact_agent/codeact_agent.py   |   2 +-
 openhands/utils/trajectory.py                 | 133 ++++++++----------
 tests/unit/test_trajectory_formatter.py       | 117 +++++++++++++++
 3 files changed, 178 insertions(+), 74 deletions(-)
 create mode 100644 tests/unit/test_trajectory_formatter.py

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index a9e01dd3b292..6f43f4397b14 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -391,7 +391,7 @@ def step(self, state: State) -> Action:
         messages_dict = self.llm.format_messages_for_llm(messages)
         params['messages'] = messages_dict
 
-        formatted_trajectory = format_trajectory(messages_dict)
+        formatted_trajectory = format_trajectory(messages)
 
         # check if model routing is needed
         if self.plan_router and self.plan_router.should_route_to_custom_model(
diff --git a/openhands/utils/trajectory.py b/openhands/utils/trajectory.py
index 4d2e233292b1..bba09729f120 100644
--- a/openhands/utils/trajectory.py
+++ b/openhands/utils/trajectory.py
@@ -1,112 +1,99 @@
-"""Utility functions for processing and formatting trajectories.
+"""
+Utility functions for processing and formatting trajectories.
 Original code from: https://github.com/SWE-Gym/SWE-Gym/blob/main/scripts/openhands-verifier/aggregate_stats_pass_at_n.ipynb
 """
 
 import json
 
+from litellm import ChatCompletionMessageToolCall
+
+from openhands.core.message import ImageContent, Message, TextContent
+
+
+def convert_content(content: list[TextContent | ImageContent]) -> str:
+    """Converts a list of message content to a single string."""
+    return '\n'.join(item.text for item in content if item.type == 'text')
+
 
-def _convert_content(content) -> str:
-    ret = ''
-    if isinstance(content, list):
-        for item in content:
-            assert item['type'] == 'text', 'Only text is supported for now'
-            ret += f'{item["text"]}\n'
-    else:
-        assert isinstance(content, str), 'Only str is supported for now'
-        ret = content
-    return ret
-
-
-def _convert_tool_call_to_string(tool_call) -> str:
-    """Convert tool call to content in string format."""
-    if 'function' not in tool_call:
-        raise ValueError("Tool call must contain 'function' key.")
-    if 'id' not in tool_call:
-        raise ValueError("Tool call must contain 'id' key.")
-    if 'type' not in tool_call:
-        raise ValueError("Tool call must contain 'type' key.")
-    if tool_call['type'] != 'function':
-        raise ValueError("Tool call type must be 'function'.")
-
-    ret = f"<function={tool_call['function']['name']}>\n"
+def convert_tool_call_to_string(tool_call: ChatCompletionMessageToolCall) -> str:
+    """Converts tool call arguments to a string representation."""
     try:
-        args = json.loads(tool_call['function']['arguments'])
+        args = json.loads(tool_call.function.arguments)
     except json.JSONDecodeError as e:
         raise ValueError(
             f"Failed to parse arguments as JSON. Arguments: {tool_call['function']['arguments']}"
         ) from e
+
+    tool_call_str = f'<function={tool_call.function.name}>\n'
     for param_name, param_value in args.items():
-        is_multiline = isinstance(param_value, str) and '\n' in param_value
-        ret += f'<parameter={param_name}>'
-        if is_multiline:
-            ret += '\n'
-        ret += f'{param_value}'
-        if is_multiline:
-            ret += '\n'
-        ret += '</parameter>\n'
-    ret += '</function>'
-    return ret
-
-
-def format_trajectory(traj: list[dict]) -> str:
-    output = ''
-    system_message = None
+        is_multiline_value = isinstance(param_value, str) and '\n' in param_value
+        param_value = '\n' + param_value + '\n' if is_multiline_value else param_value
+        tool_call_str += f'<parameter={param_name}>{param_value}</parameter>\n'
+    tool_call_str += '</function>'
+    return tool_call_str
 
-    # Handle system message if present
-    if traj[0]['role'] == 'system':
-        system_message = traj[0]
-        traj = traj[1:]
-        content = _convert_content(system_message['content'])
-        output += "*** System Message that describes the assistant's behavior ***\n"
-        output += f'{content}\n'
 
-    # Merge consecutive user messages first
+def merge_user_messages(traj: list[Message]) -> list[Message]:
+    """Merges consecutive user messages into a single message."""
     merged_traj = []
     current_messages = []
 
-    for i, message in enumerate(traj):
-        if message['role'] == 'user':
+    for message in traj:
+        if message.role == 'user':
             current_messages.append(message)
         else:
             if current_messages:
-                # Merge all accumulated user messages into one
                 merged_content = '\n'.join(
-                    _convert_content(msg['content']) for msg in current_messages
+                    convert_content(msg.content) for msg in current_messages
+                )
+                merged_traj.append(
+                    Message(role='user', content=[TextContent(text=merged_content)])
                 )
-                merged_traj.append({'role': 'user', 'content': merged_content})
                 current_messages = []
             merged_traj.append(message)
 
-    # Don't forget to handle any remaining user messages
     if current_messages:
         merged_content = '\n'.join(
-            _convert_content(msg['content']) for msg in current_messages
+            convert_content(msg.content) for msg in current_messages
+        )
+        merged_traj.append(
+            Message(role='user', content=[TextContent(text=merged_content)])
         )
-        merged_traj.append({'role': 'user', 'content': merged_content})
 
-    # Now process the merged trajectory
+    return merged_traj
+
+
+def format_trajectory(traj: list[Message]) -> str:
+    """Formats the message trajectory into a human-readable string."""
+    output = ''
+    system_message = None
+
+    if traj:
+        # Handle system message if present
+        if traj[0].role == 'system':
+            system_message = traj[0]
+            traj = traj[1:]
+            content = convert_content(system_message.content)
+            output += "*** System Message that describes the assistant's behavior ***\n"
+            output += f'{content}\n'
+
+    # Merge consecutive user messages
+    merged_traj = merge_user_messages(traj)
+
+    # Process the merged trajectory
     for i, message in enumerate(merged_traj):
-        role = message['role']
-        content_: str | list = message['content']
-        content = _convert_content(content_) if isinstance(content_, list) else content_
+        role = message.role
+        content = convert_content(message.content)
         turn_id = i // 2 + 1
         output += '-' * 100 + '\n'
         output += f'*** Turn {turn_id} - {role.upper() if role != "tool" else "TOOL EXECUTION RESULT"} ***\n'
 
-        if role == 'user':
-            output += f'{content}\n'
-        elif role == 'tool':
-            output += f'{content}\n'
-        elif role == 'assistant':
+        if role == 'user' or role == 'tool' or role == 'assistant':
             output += f'{content}\n'
-            if (
-                'tool_calls' in message
-                and message['tool_calls'] is not None
-                and len(message['tool_calls']) > 0
-            ):
-                for toolcall_id, tool_call in enumerate(message['tool_calls']):
+            if role == 'assistant' and message.tool_calls:
+                for toolcall_id, tool_call in enumerate(message.tool_calls):
                     output += f'### Tool Call {toolcall_id}\n'
-                    output += f'{_convert_tool_call_to_string(tool_call)}\n'
+                    output += f'{convert_tool_call_to_string(tool_call)}\n'
         else:
             raise ValueError(f'Unexpected role: {role}')
 
diff --git a/tests/unit/test_trajectory_formatter.py b/tests/unit/test_trajectory_formatter.py
new file mode 100644
index 000000000000..9dcbaa6cd08c
--- /dev/null
+++ b/tests/unit/test_trajectory_formatter.py
@@ -0,0 +1,117 @@
+import pytest
+from litellm import ChatCompletionMessageToolCall
+
+from openhands.core.message import Message, TextContent
+from openhands.utils.trajectory import format_trajectory
+
+
+# Helper function to create a mock ChatCompletionMessageToolCall
+def create_mock_tool_call(name: str, arguments: str):
+    return ChatCompletionMessageToolCall(
+        function={'name': name, 'arguments': arguments}
+    )
+
+
+def test_empty_trajectory():
+    traj = []
+    assert (
+        format_trajectory(traj)
+        == """----------------------------------------------------------------------------------------------------
+"""
+    )
+
+
+def test_system_message_only():
+    traj = [
+        Message(
+            role='system', content=[TextContent(text='System behavior description.')]
+        )
+    ]
+    expected_output = """*** System Message that describes the assistant's behavior ***
+System behavior description.
+----------------------------------------------------------------------------------------------------
+"""
+    assert format_trajectory(traj) == expected_output
+
+
+def test_user_messages_only():
+    traj = [
+        Message(
+            role='user',
+            content=[TextContent(text='Hello.'), TextContent(text='How are you?')],
+        )
+    ]
+    expected_output = """----------------------------------------------------------------------------------------------------
+*** Turn 1 - USER ***
+Hello.
+How are you?
+----------------------------------------------------------------------------------------------------
+"""
+    assert format_trajectory(traj) == expected_output
+
+
+def test_mixed_messages():
+    traj = [
+        Message(
+            role='system', content=[TextContent(text='System behavior description.')]
+        ),
+        Message(role='user', content=[TextContent(text='Hello.')]),
+        Message(role='assistant', content=[TextContent(text='Hi there!')]),
+        Message(role='user', content=[TextContent(text='你好')]),
+        Message(role='assistant', content=[TextContent(text='你好')]),
+    ]
+    expected_output = """*** System Message that describes the assistant's behavior ***
+System behavior description.
+----------------------------------------------------------------------------------------------------
+*** Turn 1 - USER ***
+Hello.
+----------------------------------------------------------------------------------------------------
+*** Turn 1 - ASSISTANT ***
+Hi there!
+----------------------------------------------------------------------------------------------------
+*** Turn 2 - USER ***
+你好
+----------------------------------------------------------------------------------------------------
+*** Turn 2 - ASSISTANT ***
+你好
+----------------------------------------------------------------------------------------------------
+"""
+    assert format_trajectory(traj) == expected_output
+
+
+def test_tool_call_handling():
+    tool_call = create_mock_tool_call(
+        name='fn', arguments='{"param1": "value1", "param2": "value2"}'
+    )
+    traj = [
+        Message(
+            role='assistant',
+            content=[TextContent(text='Running the tool.')],
+            tool_calls=[tool_call],
+        )
+    ]
+    expected_output = """----------------------------------------------------------------------------------------------------
+*** Turn 1 - ASSISTANT ***
+Running the tool.
+### Tool Call 0
+<function=fn>
+<parameter=param1>value1</parameter>
+<parameter=param2>value2</parameter>
+</function>
+----------------------------------------------------------------------------------------------------
+"""
+    print(format_trajectory(traj))
+    assert format_trajectory(traj) == expected_output
+
+
+def test_invalid_tool_call():
+    tool_call = create_mock_tool_call(name='fn', arguments='invalid json')
+    traj = [
+        Message(
+            role='assistant',
+            content=[TextContent(text='Running the tool.')],
+            tool_calls=[tool_call],
+        )
+    ]
+    with pytest.raises(ValueError, match='Failed to parse arguments as JSON'):
+        format_trajectory(traj)

From 472d95cdb15a446d7cd8f0a8cf239c4887b4192f Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Tue, 14 Jan 2025 14:00:24 +0700
Subject: [PATCH 16/21] add o1 to fn calling models

---
 evaluation/benchmarks/swe_bench/run_infer.py      |  1 +
 openhands/agenthub/codeact_agent/codeact_agent.py | 15 +++++++--------
 openhands/llm/llm.py                              |  1 +
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py
index bf065ada9734..6fee7f6da395 100644
--- a/evaluation/benchmarks/swe_bench/run_infer.py
+++ b/evaluation/benchmarks/swe_bench/run_infer.py
@@ -150,6 +150,7 @@ def get_config(
         codeact_enable_browsing=RUN_WITH_BROWSING,
         codeact_enable_llm_editor=False,
         condenser=metadata.condenser_config,
+        # enable_plan_routing=True,
     )
     config.set_agent_config(agent_config)
     return config
diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index 6f43f4397b14..e5c3afd29dae 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -388,16 +388,15 @@ def step(self, state: State) -> Action:
 
         # prepare what we want to send to the LLM
         messages = self._get_messages(state)
-        messages_dict = self.llm.format_messages_for_llm(messages)
-        params['messages'] = messages_dict
-
-        formatted_trajectory = format_trajectory(messages)
+        params['messages'] = self.llm.format_messages_for_llm(messages)
 
         # check if model routing is needed
-        if self.plan_router and self.plan_router.should_route_to_custom_model(
-            formatted_trajectory
-        ):
-            params['use_reasoning_model'] = True
+        if self.plan_router:
+            formatted_trajectory = format_trajectory(messages)
+
+            if self.plan_router.should_route_to_custom_model(formatted_trajectory):
+                logger.info('🧭 Routing to custom model...')
+                params['use_reasoning_model'] = True
 
         params['tools'] = self.tools
         if self.mock_function_calling:
diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index 743eb1dd13c5..4fad046e4faf 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -71,6 +71,7 @@
     'claude-3-5-haiku-20241022',
     'gpt-4o-mini',
     'gpt-4o',
+    'o1',
 ]
 
 

From dbc2412065dcc9ad3eba52d5cdeb4afcd7949841 Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Tue, 4 Feb 2025 10:58:49 +0000
Subject: [PATCH 17/21] refactor to use llm objects

---
 .../agenthub/codeact_agent/codeact_agent.py   | 69 +++++++++++++------
 openhands/controller/agent.py                 |  1 +
 openhands/core/config/agent_config.py         |  3 -
 openhands/core/config/app_config.py           |  9 ++-
 openhands/core/config/llm_config.py           |  1 +
 openhands/core/config/model_routing_config.py | 34 ++-------
 openhands/core/config/utils.py                |  2 +-
 openhands/core/setup.py                       | 10 ++-
 openhands/llm/llm.py                          | 17 +----
 openhands/router/__init__.py                  |  4 ++
 openhands/router/base.py                      | 15 +++-
 openhands/router/plan/__init__.py             |  3 +-
 openhands/router/plan/llm_based.py            | 47 ++++++++++---
 openhands/router/plan/rule_based.py           | 11 ---
 openhands/server/session/session.py           | 13 +++-
 15 files changed, 140 insertions(+), 99 deletions(-)
 create mode 100644 openhands/router/__init__.py
 delete mode 100644 openhands/router/plan/rule_based.py

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index ce02f578c752..96419c6bdcfc 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -8,7 +8,7 @@
 import openhands.agenthub.codeact_agent.function_calling as codeact_function_calling
 from openhands.controller.agent import Agent
 from openhands.controller.state.state import State
-from openhands.core.config import AgentConfig
+from openhands.core.config import AgentConfig, ModelRoutingConfig
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.message import ImageContent, Message, TextContent
 from openhands.core.schema import ActionType
@@ -39,7 +39,7 @@
 from openhands.events.serialization.event import truncate_content
 from openhands.llm.llm import LLM
 from openhands.memory.condenser import Condenser
-from openhands.router.plan import LLMBasedPlanRouter
+from openhands.router import BaseRouter, LLMBasedPlanRouter
 from openhands.runtime.plugins import (
     AgentSkillsRequirement,
     JupyterRequirement,
@@ -82,11 +82,14 @@ def __init__(
         self,
         llm: LLM,
         config: AgentConfig,
+        model_routing_config: ModelRoutingConfig | None = None,
+        routing_llms: dict[str, LLM] | None = None,
     ) -> None:
         """Initializes a new instance of the CodeActAgent class.
 
         Parameters:
         - llm (LLM): The llm to be used by this agent
+        - routing_llms (dict[str, LLM]): The llms to be selected for routing
         """
         super().__init__(llm, config)
         self.pending_actions: deque[Action] = deque()
@@ -123,9 +126,17 @@ def __init__(
         self.condenser = Condenser.from_config(self.config.condenser)
         logger.debug(f'Using condenser: {self.condenser}')
 
-        self.plan_router = (
-            LLMBasedPlanRouter(self.llm.config) if config.enable_plan_routing else None
-        )
+        self.router: BaseRouter | None = None
+
+        if config.enable_plan_routing:
+            assert model_routing_config is not None and routing_llms is not None
+            self.router = LLMBasedPlanRouter(
+                llm=self.llm,
+                routing_llms=routing_llms or dict(),
+                model_routing_config=model_routing_config,
+            )
+
+        self.active_llm: LLM | None = None  # The LLM chosen by the router
 
     def get_action_message(
         self,
@@ -162,6 +173,9 @@ def get_action_message(
             rather than being returned immediately. They will be processed later when all corresponding
             tool call results are available.
         """
+        # Handle the case where self.active_llm is None
+        active_llm_ = self.active_llm or self.llm
+
         # create a regular message from an event
         if isinstance(
             action,
@@ -227,7 +241,7 @@ def get_action_message(
         elif isinstance(action, MessageAction):
             role = 'user' if action.source == 'user' else 'assistant'
             content = [TextContent(text=action.content or '')]
-            if self.llm.vision_is_active() and action.image_urls:
+            if active_llm_.vision_is_active() and action.image_urls:
                 content.append(ImageContent(image_urls=action.image_urls))
             return [
                 Message(
@@ -278,8 +292,11 @@ def get_observation_message(
         Raises:
             ValueError: If the observation type is unknown
         """
+        # Handle the case where self.active_llm is None
+        active_llm_ = self.active_llm or self.llm
+
         message: Message
-        max_message_chars = self.llm.config.max_message_chars
+        max_message_chars = active_llm_.config.max_message_chars
         if isinstance(obs, CmdOutputObservation):
             # if it doesn't have tool call metadata, it was triggered by a user action
             if obs.tool_call_metadata is None:
@@ -402,22 +419,29 @@ def step(self, state: State) -> Action:
 
         params: dict = {}
 
-        # prepare what we want to send to the LLM
-        messages = self._get_messages(state)
-        params['messages'] = self.llm.format_messages_for_llm(messages)
-
         # check if model routing is needed
-        if self.plan_router:
-            formatted_trajectory = format_trajectory(messages)
-
-            if self.plan_router.should_route_to_custom_model(formatted_trajectory):
-                logger.info('🧭 Routing to custom model...')
-                params['use_reasoning_model'] = True
+        if self.router:
+            if self.active_llm is None:
+                messages = self._get_messages(state)
+                formatted_trajectory = format_trajectory(messages)
+                self.active_llm = self.router.should_route_to(formatted_trajectory)
+
+                if self.active_llm != self.llm:
+                    logger.warning(f'🧭 Routing to custom model: {self.active_llm}')
+        else:
+            self.active_llm = self.llm
 
         params['tools'] = self.tools
-        if self.mock_function_calling:
+        if not self.active_llm.is_function_calling_active():
             params['mock_function_calling'] = True
-        response = self.llm.completion(**params)
+
+        # prepare what we want to send to the LLM
+        # NOTE: We need to call this here when self.active_llm is correctly set
+        messages = self._get_messages(state)
+        params['messages'] = self.active_llm.format_messages_for_llm(messages)
+
+        response = self.active_llm.completion(**params)
+
         actions = codeact_function_calling.response_to_actions(response)
         for action in actions:
             self.pending_actions.append(action)
@@ -458,13 +482,16 @@ def _get_messages(self, state: State) -> list[Message]:
         if not self.prompt_manager:
             raise Exception('Prompt Manager not instantiated.')
 
+        # Handle the case where self.active_llm is None
+        active_llm_ = self.active_llm or self.llm
+
         messages: list[Message] = [
             Message(
                 role='system',
                 content=[
                     TextContent(
                         text=self.prompt_manager.get_system_message(),
-                        cache_prompt=self.llm.is_caching_prompt_active(),
+                        cache_prompt=active_llm_.is_caching_prompt_active(),
                     )
                 ],
             )
@@ -535,7 +562,7 @@ def _get_messages(self, state: State) -> list[Message]:
 
                     messages.append(msg)
 
-        if self.llm.is_caching_prompt_active():
+        if active_llm_.is_caching_prompt_active():
             # NOTE: this is only needed for anthropic
             # following logic here:
             # https://github.com/anthropics/anthropic-quickstarts/blob/8f734fd08c425c6ec91ddd613af04ff87d70c5a0/computer-use-demo/computer_use_demo/loop.py#L241-L262
diff --git a/openhands/controller/agent.py b/openhands/controller/agent.py
index 43a55d935249..8577b179b3d0 100644
--- a/openhands/controller/agent.py
+++ b/openhands/controller/agent.py
@@ -32,6 +32,7 @@ def __init__(
         self,
         llm: LLM,
         config: 'AgentConfig',
+        **kwargs,
     ):
         self.llm = llm
         self.config = config
diff --git a/openhands/core/config/agent_config.py b/openhands/core/config/agent_config.py
index fa3f3985ecfb..3fb9071061d2 100644
--- a/openhands/core/config/agent_config.py
+++ b/openhands/core/config/agent_config.py
@@ -1,7 +1,6 @@
 from pydantic import BaseModel, Field
 
 from openhands.core.config.condenser_config import CondenserConfig, NoOpCondenserConfig
-from openhands.core.config.model_routing_config import ModelRoutingConfig
 
 
 class AgentConfig(BaseModel):
@@ -19,7 +18,6 @@ class AgentConfig(BaseModel):
         enable_prompt_extensions: Whether to use prompt extensions (e.g., microagents, inject runtime info). Default is True.
         disabled_microagents: A list of microagents to disable. Default is None.
         condenser: Configuration for the memory condenser. Default is NoOpCondenserConfig.
-        enable_plan_routing: Whether to enable plan routing to reasoning models. Default is False.
     """
 
     codeact_enable_browsing: bool = Field(default=True)
@@ -34,4 +32,3 @@ class AgentConfig(BaseModel):
     disabled_microagents: list[str] | None = Field(default=None)
     condenser: CondenserConfig = Field(default_factory=NoOpCondenserConfig)
     enable_plan_routing: bool = Field(default=False)
-    model_routing: ModelRoutingConfig = Field(default_factory=ModelRoutingConfig)
diff --git a/openhands/core/config/app_config.py b/openhands/core/config/app_config.py
index 8c995d1ee3db..b3d38e78eede 100644
--- a/openhands/core/config/app_config.py
+++ b/openhands/core/config/app_config.py
@@ -10,6 +10,7 @@
     model_defaults_to_dict,
 )
 from openhands.core.config.llm_config import LLMConfig
+from openhands.core.config.model_routing_config import ModelRoutingConfig
 from openhands.core.config.sandbox_config import SandboxConfig
 from openhands.core.config.security_config import SecurityConfig
 
@@ -20,6 +21,7 @@ class AppConfig(BaseModel):
     Attributes:
         llms: Dictionary mapping LLM names to their configurations.
             The default configuration is stored under the 'llm' key.
+        routing_llms: Dictionary mapping LLM for routing' names to their configurations.
         agents: Dictionary mapping agent names to their configurations.
             The default configuration is stored under the 'agent' key.
         default_agent: Name of the default agent to use.
@@ -48,10 +50,12 @@ class AppConfig(BaseModel):
     """
 
     llms: dict[str, LLMConfig] = Field(default_factory=dict)
+    routing_llms: dict[str, LLMConfig] = Field(default_factory=dict)
     agents: dict = Field(default_factory=dict)
     default_agent: str = Field(default=OH_DEFAULT_AGENT)
     sandbox: SandboxConfig = Field(default_factory=SandboxConfig)
     security: SecurityConfig = Field(default_factory=SecurityConfig)
+    model_routing: ModelRoutingConfig = Field(default_factory=ModelRoutingConfig)
     runtime: str = Field(default='docker')
     file_store: str = Field(default='local')
     file_store_path: str = Field(default='/tmp/openhands_file_store')
@@ -94,7 +98,10 @@ def get_llm_config(self, name='llm') -> LLMConfig:
         return self.llms['llm']
 
     def set_llm_config(self, value: LLMConfig, name='llm') -> None:
-        self.llms[name] = value
+        if value.for_routing:
+            self.routing_llms[name] = value
+        else:
+            self.llms[name] = value
 
     def get_agent_config(self, name='agent') -> AgentConfig:
         """'agent' is the name for default config (for backward compatibility prior to 0.8)."""
diff --git a/openhands/core/config/llm_config.py b/openhands/core/config/llm_config.py
index cb1581634da1..a00654f9e4c3 100644
--- a/openhands/core/config/llm_config.py
+++ b/openhands/core/config/llm_config.py
@@ -86,6 +86,7 @@ class LLMConfig(BaseModel):
     custom_tokenizer: str | None = Field(default=None)
     native_tool_calling: bool | None = Field(default=None)
     reasoning_effort: str | None = Field(default='high')
+    for_routing: bool = Field(default=False)
 
     model_config = {'extra': 'forbid'}
 
diff --git a/openhands/core/config/model_routing_config.py b/openhands/core/config/model_routing_config.py
index 902a7fcaa782..349389f3b88a 100644
--- a/openhands/core/config/model_routing_config.py
+++ b/openhands/core/config/model_routing_config.py
@@ -1,32 +1,6 @@
-from dataclasses import dataclass, fields
+from pydantic import BaseModel, Field
 
-from openhands.core.config.config_utils import get_field_info
 
-
-@dataclass
-class ModelRoutingConfig:
-    reasoning_model: str = 'o1-preview-2024-09-12'
-
-    def defaults_to_dict(self) -> dict:
-        """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
-        dict = {}
-        for f in fields(self):
-            dict[f.name] = get_field_info(f)
-        return dict
-
-    def __str__(self):
-        attr_str = []
-        for f in fields(self):
-            attr_name = f.name
-            attr_value = getattr(self, f.name)
-
-            attr_str.append(f'{attr_name}={repr(attr_value)}')
-
-        return f"ModelRoutingConfig({', '.join(attr_str)})"
-
-    @classmethod
-    def from_dict(cls, model_routing_config_dict: dict) -> 'ModelRoutingConfig':
-        return cls(**model_routing_config_dict)
-
-    def __repr__(self):
-        return self.__str__()
+class ModelRoutingConfig(BaseModel):
+    reasoning_llm_config_name: str = Field(default='reasoning_model')
+    judge_llm_config_name: str = Field(default='judge_model')
diff --git a/openhands/core/config/utils.py b/openhands/core/config/utils.py
index 29aa9ca90600..c2bc5effaebd 100644
--- a/openhands/core/config/utils.py
+++ b/openhands/core/config/utils.py
@@ -162,7 +162,7 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
                     logger.openhands_logger.debug(
                         'Attempt to load model routing config from config toml'
                     )
-                    model_routing_config = ModelRoutingConfig.from_dict(value)
+                    model_routing_config = ModelRoutingConfig(**value)
                     cfg.model_routing = model_routing_config
 
                     logger.openhands_logger.debug(
diff --git a/openhands/core/setup.py b/openhands/core/setup.py
index 6785d4fae88b..1adb36f95983 100644
--- a/openhands/core/setup.py
+++ b/openhands/core/setup.py
@@ -60,10 +60,18 @@ def create_agent(runtime: Runtime, config: AppConfig) -> Agent:
     agent_cls: Type[Agent] = Agent.get_cls(config.default_agent)
     agent_config = config.get_agent_config(config.default_agent)
     llm_config = config.get_llm_config_from_agent(config.default_agent)
+    routing_llms_config = config.routing_llms
     model_routing_config = config.model_routing
+    routing_llms = {}
+    for config_name, routing_llm_config in routing_llms_config.items():
+        routing_llms[config_name] = LLM(
+            config=routing_llm_config,
+        )
     agent = agent_cls(
-        llm=LLM(config=llm_config, model_routing_config=model_routing_config),
+        llm=LLM(config=llm_config),
         config=agent_config,
+        model_routing_config=model_routing_config,
+        routing_llms=routing_llms,
     )
     if agent.prompt_manager:
         microagents = runtime.get_microagents_from_selected_repo(None)
diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index 1e28a436e69a..d609e1434764 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -7,7 +7,7 @@
 
 import requests
 
-from openhands.core.config import LLMConfig, ModelRoutingConfig
+from openhands.core.config import LLMConfig
 
 with warnings.catch_warnings():
     warnings.simplefilter('ignore')
@@ -111,7 +111,6 @@ def __init__(
         config: LLMConfig,
         metrics: Metrics | None = None,
         retry_listener: Callable[[int, int], None] | None = None,
-        model_routing_config: ModelRoutingConfig | None = None,
     ):
         """Initializes the LLM. If LLMConfig is passed, its values will be the fallback.
 
@@ -120,7 +119,6 @@ def __init__(
         Args:
             config: The LLM configuration.
             metrics: The metrics to use.
-            model_routing_config: The model routing configuration.
         """
         self._tried_model_info = False
         self.metrics: Metrics = (
@@ -128,7 +126,6 @@ def __init__(
         )
         self.cost_metric_supported: bool = True
         self.config: LLMConfig = copy.deepcopy(config)
-        self.model_routing_config = model_routing_config
 
         self.model_info: ModelInfo | None = None
         self.retry_listener = retry_listener
@@ -202,7 +199,6 @@ def wrapper(*args, **kwargs):
 
             messages: list[dict[str, Any]] | dict[str, Any] = []
             mock_function_calling = kwargs.pop('mock_function_calling', False)
-            use_reasoning_model = kwargs.pop('use_reasoning_model', False)
 
             # some callers might send the model and messages directly
             # litellm allows positional args, like completion(model, messages, **kwargs)
@@ -235,15 +231,6 @@ def wrapper(*args, **kwargs):
                     kwargs['stop'] = STOP_WORDS
                 mock_fncall_tools = kwargs.pop('tools')
 
-            if use_reasoning_model:
-                if self.model_routing_config is None:
-                    raise ValueError(
-                        'Model routing config is required for model routing.'
-                    )
-
-                # Replace the model with the reasoning model
-                kwargs['model'] = self.model_routing_config.reasoning_model
-
             # if we have no messages, something went very wrong
             if not messages:
                 raise ValueError(
@@ -693,7 +680,7 @@ def __str__(self):
             return f'LLM(model={self.config.model}, api_version={self.config.api_version}, base_url={self.config.base_url})'
         elif self.config.base_url:
             return f'LLM(model={self.config.model}, base_url={self.config.base_url})'
-        return f'LLM(model={self.config.model},reasoning_model={self.model_routing_config.reasoning_model if self.model_routing_config else None})'
+        return f'LLM(model={self.config.model})'
 
     def __repr__(self):
         return str(self)
diff --git a/openhands/router/__init__.py b/openhands/router/__init__.py
new file mode 100644
index 000000000000..32058b2b386f
--- /dev/null
+++ b/openhands/router/__init__.py
@@ -0,0 +1,4 @@
+from openhands.router.base import BaseRouter
+from openhands.router.plan.llm_based import LLMBasedPlanRouter
+
+__all__ = ['BaseRouter', 'LLMBasedPlanRouter']
diff --git a/openhands/router/base.py b/openhands/router/base.py
index ccc7ad47f1c5..111cb23f6814 100644
--- a/openhands/router/base.py
+++ b/openhands/router/base.py
@@ -1,7 +1,20 @@
 from abc import ABC, abstractmethod
 
+from openhands.core.config.model_routing_config import ModelRoutingConfig
+from openhands.llm.llm import LLM
+
 
 class BaseRouter(ABC):
+    def __init__(
+        self,
+        llm: LLM,
+        routing_llms: dict[str, LLM],
+        model_routing_config: ModelRoutingConfig,
+    ):
+        self.llm = llm
+        self.routing_llms = routing_llms
+        self.model_routing_config = model_routing_config
+
     @abstractmethod
-    def should_route_to_custom_model(self, prompt: str) -> bool:
+    def should_route_to(self, prompt: str) -> LLM:
         pass
diff --git a/openhands/router/plan/__init__.py b/openhands/router/plan/__init__.py
index 845831646df8..323c4dddf224 100644
--- a/openhands/router/plan/__init__.py
+++ b/openhands/router/plan/__init__.py
@@ -1,4 +1,3 @@
 from openhands.router.plan.llm_based import LLMBasedPlanRouter
-from openhands.router.plan.rule_based import RuleBasedPlanRouter
 
-__all__ = ['RuleBasedPlanRouter', 'LLMBasedPlanRouter']
+__all__ = ['LLMBasedPlanRouter']
diff --git a/openhands/router/plan/llm_based.py b/openhands/router/plan/llm_based.py
index 8ada3d537980..066ba8186922 100644
--- a/openhands/router/plan/llm_based.py
+++ b/openhands/router/plan/llm_based.py
@@ -1,6 +1,4 @@
-import copy
-
-from openhands.core.config import LLMConfig
+from openhands.core.config import ModelRoutingConfig
 from openhands.llm.llm import LLM
 from openhands.router.base import BaseRouter
 from openhands.router.plan.prompts import (
@@ -14,15 +12,22 @@ class LLMBasedPlanRouter(BaseRouter):
     Router that routes the prompt that is judged by a LLM as complex and requires a step-by-step plan.
     """
 
-    JUDGE_MODEL = 'gpt-4o'
+    def __init__(
+        self,
+        llm: LLM,
+        routing_llms: dict[str, LLM],
+        model_routing_config: ModelRoutingConfig,
+    ):
+        super().__init__(llm, routing_llms, model_routing_config)
 
-    def __init__(self, llm_config: LLMConfig):
-        super().__init__()
+        self._validate_model_routing_config(model_routing_config, routing_llms)
 
-        judge_llm_config = copy.deepcopy(llm_config)
-        self.judge_llm = LLM(judge_llm_config)
+        self.judge_llm = routing_llms[model_routing_config.judge_llm_config_name]
+        self.reasoning_llm = routing_llms[
+            model_routing_config.reasoning_llm_config_name
+        ]
 
-    def should_route_to_custom_model(self, prompt: str) -> bool:
+    def should_route_to(self, prompt: str) -> LLM:
         messages = [
             {
                 'role': 'system',
@@ -38,6 +43,26 @@ def should_route_to_custom_model(self, prompt: str) -> bool:
 
         response = self.judge_llm.completion(
             messages=messages,
-            model=self.JUDGE_MODEL,
         )
-        return int(response['choices'][0]['message']['content'].strip()) == 1
+        if int(response['choices'][0]['message']['content'].strip()) == 1:
+            return self.reasoning_llm
+        return self.llm
+
+    def _validate_model_routing_config(
+        self, model_routing_config: ModelRoutingConfig, routing_llms: dict[str, LLM]
+    ):
+        if (
+            not model_routing_config.judge_llm_config_name
+            or not model_routing_config.reasoning_llm_config_name
+        ):
+            raise ValueError(
+                'Judge LLM and Reasoning LLM config names must be provided'
+            )
+        if model_routing_config.judge_llm_config_name not in routing_llms:
+            raise ValueError(
+                f'Judge LLM config {model_routing_config.judge_llm_config_name} not found'
+            )
+        if model_routing_config.reasoning_llm_config_name not in routing_llms:
+            raise ValueError(
+                f'Reasoning LLM config {model_routing_config.reasoning_llm_config_name} not found'
+            )
diff --git a/openhands/router/plan/rule_based.py b/openhands/router/plan/rule_based.py
deleted file mode 100644
index cb50dfdd9924..000000000000
--- a/openhands/router/plan/rule_based.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from openhands.router.base import BaseRouter
-
-
-class RuleBasedPlanRouter(BaseRouter):
-    """
-    Router that detects if the prompt contains the string "plan".
-    """
-
-    def should_route_to_custom_model(self, prompt: str) -> bool:
-        # Returns True if the prompt contains the word "plan"
-        return 'plan' in prompt
diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py
index 4990122c4e5f..d3fc02530127 100644
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -105,6 +105,11 @@ async def initialize_agent(
         # TODO: override other LLM config & agent config groups (#2075)
 
         llm = self._create_llm(agent_cls)
+        routing_llms = {}
+        for config_name, routing_llm_config in self.config.routing_llms.items():
+            routing_llms[config_name] = LLM(
+                config=routing_llm_config,
+            )
         agent_config = self.config.get_agent_config(agent_cls)
 
         if settings.enable_default_condenser:
@@ -114,7 +119,12 @@ async def initialize_agent(
             logger.info(f'Enabling default condenser: {default_condenser_config}')
             agent_config.condenser = default_condenser_config
 
-        agent = Agent.get_cls(agent_cls)(llm, agent_config)
+        agent = Agent.get_cls(agent_cls)(
+            llm=llm,
+            config=agent_config,
+            model_routing_config=self.config.model_routing,
+            routing_llms=routing_llms,
+        )
 
         github_token = None
         selected_repository = None
@@ -149,7 +159,6 @@ def _create_llm(self, agent_cls: str | None) -> LLM:
         return LLM(
             config=self.config.get_llm_config_from_agent(agent_cls),
             retry_listener=self._notify_on_llm_retry,
-            model_routing_config=self.config.model_routing,
         )
 
     def _notify_on_llm_retry(self, retries: int, max: int) -> None:

From ea4474ddc85eec5e2a1a4df0d104d4f18f1bf7c1 Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Tue, 4 Feb 2025 13:05:10 +0000
Subject: [PATCH 18/21] fix bug

---
 .../agenthub/codeact_agent/codeact_agent.py   | 11 +++++-----
 openhands/core/config/utils.py                | 21 +++++++++----------
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index 96419c6bdcfc..a1b87d1f0ea4 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -421,13 +421,12 @@ def step(self, state: State) -> Action:
 
         # check if model routing is needed
         if self.router:
-            if self.active_llm is None:
-                messages = self._get_messages(state)
-                formatted_trajectory = format_trajectory(messages)
-                self.active_llm = self.router.should_route_to(formatted_trajectory)
+            messages = self._get_messages(state)
+            formatted_trajectory = format_trajectory(messages)
+            self.active_llm = self.router.should_route_to(formatted_trajectory)
 
-                if self.active_llm != self.llm:
-                    logger.warning(f'🧭 Routing to custom model: {self.active_llm}')
+            if self.active_llm != self.llm:
+                logger.warning(f'🧭 Routing to custom model: {self.active_llm}')
         else:
             self.active_llm = self.llm
 
diff --git a/openhands/core/config/utils.py b/openhands/core/config/utils.py
index c2bc5effaebd..24d030792862 100644
--- a/openhands/core/config/utils.py
+++ b/openhands/core/config/utils.py
@@ -158,16 +158,6 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
                                 continue
                             llm_group_ids.add(nested_key)
                             cfg.set_llm_config(llm_config, nested_key)
-                elif key is not None and key.lower() == 'model_routing':
-                    logger.openhands_logger.debug(
-                        'Attempt to load model routing config from config toml'
-                    )
-                    model_routing_config = ModelRoutingConfig(**value)
-                    cfg.model_routing = model_routing_config
-
-                    logger.openhands_logger.debug(
-                        'Attempt to load default LLM config from config toml'
-                    )
 
                     # Extract generic LLM fields, which are not nested LLM configs
                     generic_llm_fields = {}
@@ -199,13 +189,22 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
 
                             custom_llm_config = LLMConfig(**merged_llm_dict)
                             cfg.set_llm_config(custom_llm_config, nested_key)
-
                 elif key is not None and key.lower() == 'security':
                     logger.openhands_logger.debug(
                         'Attempt to load security config from config toml'
                     )
                     security_config = SecurityConfig(**value)
                     cfg.security = security_config
+                elif key is not None and key.lower() == 'model_routing':
+                    logger.openhands_logger.debug(
+                        'Attempt to load model routing config from config toml'
+                    )
+                    model_routing_config = ModelRoutingConfig(**value)
+                    cfg.model_routing = model_routing_config
+
+                    logger.openhands_logger.debug(
+                        'Attempt to load default LLM config from config toml'
+                    )
                 elif not key.startswith('sandbox') and key.lower() != 'core':
                     logger.openhands_logger.warning(
                         f'Unknown key in {toml_file}: "{key}"'

From b7a0c9585c91eb58f00b2c99211a09992cb0f7ef Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Tue, 4 Feb 2025 13:11:13 +0000
Subject: [PATCH 19/21] update config template

---
 config.template.toml | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/config.template.toml b/config.template.toml
index 7650b06334e5..6d5de0348b9e 100644
--- a/config.template.toml
+++ b/config.template.toml
@@ -293,7 +293,16 @@ llm_config = 'gpt3'
 [model_routing]
 
 # The reasoning model to use for plan generation
-reasoning_model = "o1-preview-2024-09-12"
+reasoning_llm_config_name = 'reasoning_model'
+judge_llm_config_name = 'judge_model'
+
+[llm.judge_model]
+model = "gpt-4o"
+api_key = ""
+
+[llm.reasoning_model]
+model = "o1"
+api_key = ""
 
 #################################### Eval ####################################
 # Configuration for the evaluation, please refer to the specific evaluation

From 7d0132f625dcdc17b7cf207bd23a707c27406bc5 Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Tue, 4 Feb 2025 16:00:07 +0000
Subject: [PATCH 20/21] add gap control

---
 openhands/agenthub/dummy_agent/agent.py |  2 +-
 openhands/router/plan/llm_based.py      | 14 +++++++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/openhands/agenthub/dummy_agent/agent.py b/openhands/agenthub/dummy_agent/agent.py
index f7a654bf75b4..ad69d01936f6 100644
--- a/openhands/agenthub/dummy_agent/agent.py
+++ b/openhands/agenthub/dummy_agent/agent.py
@@ -46,7 +46,7 @@ class DummyAgent(Agent):
     without making any LLM calls.
     """
 
-    def __init__(self, llm: LLM, config: AgentConfig):
+    def __init__(self, llm: LLM, config: AgentConfig, **kwargs):
         super().__init__(llm, config)
         self.steps: list[ActionObs] = [
             {
diff --git a/openhands/router/plan/llm_based.py b/openhands/router/plan/llm_based.py
index 066ba8186922..85515ea23139 100644
--- a/openhands/router/plan/llm_based.py
+++ b/openhands/router/plan/llm_based.py
@@ -12,6 +12,8 @@ class LLMBasedPlanRouter(BaseRouter):
     Router that routes the prompt that is judged by a LLM as complex and requires a step-by-step plan.
     """
 
+    NUM_TURNS_GAP = 5
+
     def __init__(
         self,
         llm: LLM,
@@ -26,8 +28,15 @@ def __init__(
         self.reasoning_llm = routing_llms[
             model_routing_config.reasoning_llm_config_name
         ]
+        self.routed_turns: list[int] = []
+        self.cur_turn_num = 0
 
     def should_route_to(self, prompt: str) -> LLM:
+        self.cur_turn_num += 1
+
+        if self.cur_turn_num - max(self.routed_turns, default=0) < self.NUM_TURNS_GAP:
+            return self.llm
+
         messages = [
             {
                 'role': 'system',
@@ -44,7 +53,10 @@ def should_route_to(self, prompt: str) -> LLM:
         response = self.judge_llm.completion(
             messages=messages,
         )
-        if int(response['choices'][0]['message']['content'].strip()) == 1:
+        should_route = int(response['choices'][0]['message']['content'].strip()) == 1
+
+        if should_route:
+            self.routed_turns.append(self.cur_turn_num)
             return self.reasoning_llm
         return self.llm
 

From a70e97906c97bddb7945856c81c8c19d62127823 Mon Sep 17 00:00:00 2001
From: Hoang Tran <descience.thh10@gmail.com>
Date: Thu, 6 Feb 2025 04:36:27 +0000
Subject: [PATCH 21/21] working eval

---
 evaluation/benchmarks/swe_bench/run_infer.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py
index c4305bfb6629..2f3a612c30ec 100644
--- a/evaluation/benchmarks/swe_bench/run_infer.py
+++ b/evaluation/benchmarks/swe_bench/run_infer.py
@@ -1,4 +1,5 @@
 import asyncio
+import copy
 import json
 import os
 import tempfile
@@ -33,6 +34,7 @@
     SandboxConfig,
     get_llm_config_arg,
     get_parser,
+    load_from_toml,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -155,15 +157,19 @@ def get_config(
             metadata.llm_config, metadata.eval_output_dir, instance['instance_id']
         )
     )
+    config_copy = copy.deepcopy(config)
+    load_from_toml(config_copy)
     agent_config = AgentConfig(
         codeact_enable_jupyter=False,
         codeact_enable_browsing=RUN_WITH_BROWSING,
         codeact_enable_llm_editor=False,
         condenser=metadata.condenser_config,
         enable_prompt_extensions=False,
-        # enable_plan_routing=True,
+        enable_plan_routing=config_copy.get_agent_config().enable_plan_routing,
     )
     config.set_agent_config(agent_config)
+    config.routing_llms = config_copy.routing_llms
+    config.model_routing = config_copy.model_routing
     return config