From cec10a1a40fb1a0c5912602c7671d6bb7b4a285a Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Mon, 6 Jan 2025 15:40:41 +0700 Subject: [PATCH 01/21] prototype --- openhands/router/base.py | 7 ++++ openhands/router/plan/.env.example | 3 ++ openhands/router/plan/plan.py | 56 ++++++++++++++++++++++++++++++ openhands/router/plan/prompts.py | 8 +++++ 4 files changed, 74 insertions(+) create mode 100644 openhands/router/base.py create mode 100644 openhands/router/plan/.env.example create mode 100644 openhands/router/plan/plan.py create mode 100644 openhands/router/plan/prompts.py diff --git a/openhands/router/base.py b/openhands/router/base.py new file mode 100644 index 000000000000..0fb3578793ba --- /dev/null +++ b/openhands/router/base.py @@ -0,0 +1,7 @@ +from abc import ABC, abstractmethod + + +class BaseRouter(ABC): + @abstractmethod + def route(self, prompt: str) -> str: + pass diff --git a/openhands/router/plan/.env.example b/openhands/router/plan/.env.example new file mode 100644 index 000000000000..0bb7550d1314 --- /dev/null +++ b/openhands/router/plan/.env.example @@ -0,0 +1,3 @@ +LITELLM_API_KEY= +LITELLM_BASE_URL= +LITELLM_MODEL= diff --git a/openhands/router/plan/plan.py b/openhands/router/plan/plan.py new file mode 100644 index 000000000000..860fefac0521 --- /dev/null +++ b/openhands/router/plan/plan.py @@ -0,0 +1,56 @@ +import os +from os import path + +from dotenv import load_dotenv +from litellm import completion + +from openhands.router.base import BaseRouter +from openhands.router.plan.prompts import ANALYZE_PROMPT + +# Load the environment variables +dotenv_path = path.join(path.dirname(__file__), '.env') +load_dotenv(dotenv_path) + +litellm_config = { + 'model': os.environ['LITELLM_MODEL'], + 'api_key': os.environ['LITELLM_API_KEY'], + 'base_url': os.environ['LITELLM_BASE_URL'], +} + + +class PlanRouter(BaseRouter): + """ + Router that routes the prompt requiring plan generation to specialized reasoning models. + """ + + def route(self, prompt: str) -> str: + """ + Routes the prompt to the specialized reasoning model. + + Parameters: + - prompt (str): the prompt to be routed + + Returns: + - str: the response from the specialized reasoning model + """ + + raise NotImplementedError + + def _requires_plan_generation(self, prompt: str) -> bool: + messages = [] + + messages.append( + { + 'role': 'user', + 'content': ANALYZE_PROMPT.format(message=prompt), + } + ) + + response = completion( + messages=messages, + **litellm_config, + temperature=0.0, + max_tokens=10, + stream=False, + ) + return int(response['choices'][0]['message']['content'].strip()) == 1 diff --git a/openhands/router/plan/prompts.py b/openhands/router/plan/prompts.py new file mode 100644 index 000000000000..e9af5d75fd03 --- /dev/null +++ b/openhands/router/plan/prompts.py @@ -0,0 +1,8 @@ +ANALYZE_PROMPT = """Analyze this prompt to see if it already contains a step-by-step plan or requires more detailed plan generation: + +--- +{message} +--- + +Only respond with 0 for no plan generation required or 1 for plan generation required. +""" From c33ba45f263393f61d00938f3d106af5addf18bd Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Thu, 9 Jan 2025 21:06:44 +0700 Subject: [PATCH 02/21] add routing config --- openhands/core/config/__init__.py | 2 ++ openhands/core/config/app_config.py | 2 ++ openhands/core/config/model_routing_config.py | 32 +++++++++++++++++++ openhands/core/config/utils.py | 12 ++++--- openhands/core/setup.py | 7 ++-- openhands/llm/llm.py | 3 +- openhands/router/plan/plan.py | 5 +-- 7 files changed, 52 insertions(+), 11 deletions(-) create mode 100644 openhands/core/config/model_routing_config.py diff --git a/openhands/core/config/__init__.py b/openhands/core/config/__init__.py index 2e0f87e32143..d085101681a8 100644 --- a/openhands/core/config/__init__.py +++ b/openhands/core/config/__init__.py @@ -6,6 +6,7 @@ get_field_info, ) from openhands.core.config.llm_config import LLMConfig +from openhands.core.config.model_routing_config import ModelRoutingConfig from openhands.core.config.sandbox_config import SandboxConfig from openhands.core.config.security_config import SecurityConfig from openhands.core.config.utils import ( @@ -27,6 +28,7 @@ 'LLMConfig', 'SandboxConfig', 'SecurityConfig', + 'ModelRoutingConfig', 'load_app_config', 'load_from_env', 'load_from_toml', diff --git a/openhands/core/config/app_config.py b/openhands/core/config/app_config.py index 2dbb4aeaa8c4..6249cc56d9b9 100644 --- a/openhands/core/config/app_config.py +++ b/openhands/core/config/app_config.py @@ -9,6 +9,7 @@ get_field_info, ) from openhands.core.config.llm_config import LLMConfig +from openhands.core.config.model_routing_config import ModelRoutingConfig from openhands.core.config.sandbox_config import SandboxConfig from openhands.core.config.security_config import SecurityConfig @@ -51,6 +52,7 @@ class AppConfig: default_agent: str = OH_DEFAULT_AGENT sandbox: SandboxConfig = field(default_factory=SandboxConfig) security: SecurityConfig = field(default_factory=SecurityConfig) + model_routing = field(default_factory=ModelRoutingConfig) runtime: str = 'docker' file_store: str = 'local' file_store_path: str = '/tmp/openhands_file_store' diff --git a/openhands/core/config/model_routing_config.py b/openhands/core/config/model_routing_config.py new file mode 100644 index 000000000000..902a7fcaa782 --- /dev/null +++ b/openhands/core/config/model_routing_config.py @@ -0,0 +1,32 @@ +from dataclasses import dataclass, fields + +from openhands.core.config.config_utils import get_field_info + + +@dataclass +class ModelRoutingConfig: + reasoning_model: str = 'o1-preview-2024-09-12' + + def defaults_to_dict(self) -> dict: + """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional.""" + dict = {} + for f in fields(self): + dict[f.name] = get_field_info(f) + return dict + + def __str__(self): + attr_str = [] + for f in fields(self): + attr_name = f.name + attr_value = getattr(self, f.name) + + attr_str.append(f'{attr_name}={repr(attr_value)}') + + return f"ModelRoutingConfig({', '.join(attr_str)})" + + @classmethod + def from_dict(cls, model_routing_config_dict: dict) -> 'ModelRoutingConfig': + return cls(**model_routing_config_dict) + + def __repr__(self): + return self.__str__() diff --git a/openhands/core/config/utils.py b/openhands/core/config/utils.py index 7719ce0d59b1..93f22762a2b1 100644 --- a/openhands/core/config/utils.py +++ b/openhands/core/config/utils.py @@ -14,11 +14,9 @@ from openhands.core import logger from openhands.core.config.agent_config import AgentConfig from openhands.core.config.app_config import AppConfig -from openhands.core.config.config_utils import ( - OH_DEFAULT_AGENT, - OH_MAX_ITERATIONS, -) +from openhands.core.config.config_utils import OH_DEFAULT_AGENT, OH_MAX_ITERATIONS from openhands.core.config.llm_config import LLMConfig +from openhands.core.config.model_routing_config import ModelRoutingConfig from openhands.core.config.sandbox_config import SandboxConfig from openhands.core.config.security_config import SecurityConfig from openhands.storage import get_file_store @@ -141,6 +139,12 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'): ) agent_config = AgentConfig(**nested_value) cfg.set_agent_config(agent_config, nested_key) + elif key is not None and key.lower() == 'model_routing': + logger.openhands_logger.debug( + 'Attempt to load model routing config from config toml' + ) + model_routing_config = ModelRoutingConfig.from_dict(value) + cfg.model_routing = model_routing_config elif key is not None and key.lower() == 'llm': logger.openhands_logger.debug( 'Attempt to load default LLM config from config toml' diff --git a/openhands/core/setup.py b/openhands/core/setup.py index 28888478017a..4fde6963b76f 100644 --- a/openhands/core/setup.py +++ b/openhands/core/setup.py @@ -6,9 +6,7 @@ from openhands.controller import AgentController from openhands.controller.agent import Agent from openhands.controller.state.state import State -from openhands.core.config import ( - AppConfig, -) +from openhands.core.config import AppConfig from openhands.core.logger import openhands_logger as logger from openhands.events import EventStream from openhands.llm.llm import LLM @@ -61,8 +59,9 @@ def create_agent(runtime: Runtime, config: AppConfig) -> Agent: agent_cls: Type[Agent] = Agent.get_cls(config.default_agent) agent_config = config.get_agent_config(config.default_agent) llm_config = config.get_llm_config_from_agent(config.default_agent) + model_routing_config = config.model_routing agent = agent_cls( - llm=LLM(config=llm_config), + llm=LLM(config=llm_config, model_routing_config=model_routing_config), config=agent_config, ) if agent.prompt_manager: diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 743d6535ba3b..1f2191717264 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -7,7 +7,7 @@ import requests -from openhands.core.config import LLMConfig +from openhands.core.config import LLMConfig, ModelRoutingConfig with warnings.catch_warnings(): warnings.simplefilter('ignore') @@ -85,6 +85,7 @@ def __init__( self, config: LLMConfig, metrics: Metrics | None = None, + model_routing_config: ModelRoutingConfig | None = None, ): """Initializes the LLM. If LLMConfig is passed, its values will be the fallback. diff --git a/openhands/router/plan/plan.py b/openhands/router/plan/plan.py index 860fefac0521..29ca12a531ab 100644 --- a/openhands/router/plan/plan.py +++ b/openhands/router/plan/plan.py @@ -23,6 +23,8 @@ class PlanRouter(BaseRouter): Router that routes the prompt requiring plan generation to specialized reasoning models. """ + REASONING_MODEL: str = 'o1-preview-2024-09-12' + def route(self, prompt: str) -> str: """ Routes the prompt to the specialized reasoning model. @@ -33,8 +35,7 @@ def route(self, prompt: str) -> str: Returns: - str: the response from the specialized reasoning model """ - - raise NotImplementedError + return self.REASONING_MODEL def _requires_plan_generation(self, prompt: str) -> bool: messages = [] From 7b0872472aaf66e332799e7366161159dff6f57f Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Thu, 9 Jan 2025 22:06:31 +0700 Subject: [PATCH 03/21] wire up with codeact and llm --- .../agenthub/codeact_agent/codeact_agent.py | 3 +++ openhands/core/config/agent_config.py | 1 + openhands/llm/llm.py | 11 ++++++++++ openhands/router/base.py | 2 +- openhands/router/plan/__init__.py | 4 ++++ .../router/plan/{plan.py => llm_based.py} | 20 +++---------------- openhands/router/plan/rule_based.py | 11 ++++++++++ 7 files changed, 34 insertions(+), 18 deletions(-) create mode 100644 openhands/router/plan/__init__.py rename openhands/router/plan/{plan.py => llm_based.py} (62%) create mode 100644 openhands/router/plan/rule_based.py diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index d8b5702a235d..e9df80cf4ac1 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -38,6 +38,7 @@ from openhands.events.serialization.event import truncate_content from openhands.llm.llm import LLM from openhands.memory.condenser import Condenser +from openhands.router.plan import RuleBasedPlanRouter from openhands.runtime.plugins import ( AgentSkillsRequirement, JupyterRequirement, @@ -120,6 +121,8 @@ def __init__( self.condenser = Condenser.from_config(self.config.condenser) logger.debug(f'Using condenser: {self.condenser}') + self.plan_router = None if config.enable_plan_routing else RuleBasedPlanRouter() + def get_action_message( self, action: Action, diff --git a/openhands/core/config/agent_config.py b/openhands/core/config/agent_config.py index 77e9dbc1e32d..5b3a86b33f46 100644 --- a/openhands/core/config/agent_config.py +++ b/openhands/core/config/agent_config.py @@ -32,6 +32,7 @@ class AgentConfig: use_microagents: bool = True disabled_microagents: list[str] | None = None condenser: CondenserConfig = field(default_factory=NoOpCondenserConfig) # type: ignore + enable_plan_routing: bool = False def defaults_to_dict(self) -> dict: """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional.""" diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 1f2191717264..fc9c731e299d 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -101,6 +101,7 @@ def __init__( ) self.cost_metric_supported: bool = True self.config: LLMConfig = copy.deepcopy(config) + self.model_routing_config = model_routing_config self.model_info: ModelInfo | None = None @@ -159,6 +160,7 @@ def wrapper(*args, **kwargs): messages: list[dict[str, Any]] | dict[str, Any] = [] mock_function_calling = kwargs.pop('mock_function_calling', False) + use_reasoning_model = kwargs.pop('use_reasoning_model', False) # some callers might send the model and messages directly # litellm allows positional args, like completion(model, messages, **kwargs) @@ -190,6 +192,15 @@ def wrapper(*args, **kwargs): kwargs['stop'] = STOP_WORDS mock_fncall_tools = kwargs.pop('tools') + if use_reasoning_model: + if self.model_routing_config is None: + raise ValueError( + 'Model routing config is required for model routing.' + ) + + # Replace the model with the reasoning model + kwargs['model'] = self.model_routing_config.reasoning_model + # if we have no messages, something went very wrong if not messages: raise ValueError( diff --git a/openhands/router/base.py b/openhands/router/base.py index 0fb3578793ba..ccc7ad47f1c5 100644 --- a/openhands/router/base.py +++ b/openhands/router/base.py @@ -3,5 +3,5 @@ class BaseRouter(ABC): @abstractmethod - def route(self, prompt: str) -> str: + def should_route_to_custom_model(self, prompt: str) -> bool: pass diff --git a/openhands/router/plan/__init__.py b/openhands/router/plan/__init__.py new file mode 100644 index 000000000000..845831646df8 --- /dev/null +++ b/openhands/router/plan/__init__.py @@ -0,0 +1,4 @@ +from openhands.router.plan.llm_based import LLMBasedPlanRouter +from openhands.router.plan.rule_based import RuleBasedPlanRouter + +__all__ = ['RuleBasedPlanRouter', 'LLMBasedPlanRouter'] diff --git a/openhands/router/plan/plan.py b/openhands/router/plan/llm_based.py similarity index 62% rename from openhands/router/plan/plan.py rename to openhands/router/plan/llm_based.py index 29ca12a531ab..f2cd8efe88e6 100644 --- a/openhands/router/plan/plan.py +++ b/openhands/router/plan/llm_based.py @@ -18,26 +18,12 @@ } -class PlanRouter(BaseRouter): +class LLMBasedPlanRouter(BaseRouter): """ - Router that routes the prompt requiring plan generation to specialized reasoning models. + Router that routes the prompt that is judged by a LLM as complex and requires a step-by-step plan. """ - REASONING_MODEL: str = 'o1-preview-2024-09-12' - - def route(self, prompt: str) -> str: - """ - Routes the prompt to the specialized reasoning model. - - Parameters: - - prompt (str): the prompt to be routed - - Returns: - - str: the response from the specialized reasoning model - """ - return self.REASONING_MODEL - - def _requires_plan_generation(self, prompt: str) -> bool: + def should_route_to_custom_model(self, prompt: str) -> bool: messages = [] messages.append( diff --git a/openhands/router/plan/rule_based.py b/openhands/router/plan/rule_based.py new file mode 100644 index 000000000000..2e45858d88dd --- /dev/null +++ b/openhands/router/plan/rule_based.py @@ -0,0 +1,11 @@ +from openhands.router.base import BaseRouter + + +class RuleBasedPlanRouter(BaseRouter): + """ + Router that detects if the prompt contains the word "plan" or "planning". + """ + + def should_route_to_custom_model(self, prompt: str) -> bool: + # Returns True if the prompt contains the word "plan" or "planning" + return 'plan' in prompt or 'planning' in prompt From 910ba8cb1d94dee538c041756bbf1b2a4a979d97 Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Fri, 10 Jan 2025 15:29:50 +0700 Subject: [PATCH 04/21] fix bug --- .../agenthub/codeact_agent/codeact_agent.py | 16 +++++++++++++--- openhands/core/config/app_config.py | 2 +- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index e9df80cf4ac1..0fa8ec30c8ff 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -381,11 +381,21 @@ def step(self, state: State) -> Action: if latest_user_message and latest_user_message.content.strip() == '/exit': return AgentFinishAction() + params: dict = {} + + # check if the user requests a plan + if ( + latest_user_message + and self.plan_router + and self.plan_router.should_route_to_custom_model( + latest_user_message.content + ) + ): + params['use_reasoning_model'] = True + # prepare what we want to send to the LLM messages = self._get_messages(state) - params: dict = { - 'messages': self.llm.format_messages_for_llm(messages), - } + params['messages'] = (self.llm.format_messages_for_llm(messages),) params['tools'] = self.tools if self.mock_function_calling: params['mock_function_calling'] = True diff --git a/openhands/core/config/app_config.py b/openhands/core/config/app_config.py index 6249cc56d9b9..db386c1e4ea9 100644 --- a/openhands/core/config/app_config.py +++ b/openhands/core/config/app_config.py @@ -52,7 +52,7 @@ class AppConfig: default_agent: str = OH_DEFAULT_AGENT sandbox: SandboxConfig = field(default_factory=SandboxConfig) security: SecurityConfig = field(default_factory=SecurityConfig) - model_routing = field(default_factory=ModelRoutingConfig) + model_routing: ModelRoutingConfig = field(default_factory=ModelRoutingConfig) runtime: str = 'docker' file_store: str = 'local' file_store_path: str = '/tmp/openhands_file_store' From b73f3ecbda5062fe2d5b6eca21e66f59e3a6887e Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Fri, 10 Jan 2025 09:32:27 +0000 Subject: [PATCH 05/21] working cli --- openhands/agenthub/codeact_agent/codeact_agent.py | 2 +- openhands/llm/llm.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index 0fa8ec30c8ff..cf642db42af9 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -395,7 +395,7 @@ def step(self, state: State) -> Action: # prepare what we want to send to the LLM messages = self._get_messages(state) - params['messages'] = (self.llm.format_messages_for_llm(messages),) + params['messages'] = self.llm.format_messages_for_llm(messages) params['tools'] = self.tools if self.mock_function_calling: params['mock_function_calling'] = True diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index fc9c731e299d..b8bc039b00ae 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -12,6 +12,7 @@ with warnings.catch_warnings(): warnings.simplefilter('ignore') import litellm + # litellm.set_verbose = True from litellm import ChatCompletionMessageToolCall, ModelInfo, PromptTokensDetails from litellm import Message as LiteLLMMessage From 54d44013dfff6a0b0bcbaf5036ba52ae83439c8e Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Fri, 10 Jan 2025 09:51:04 +0000 Subject: [PATCH 06/21] update config template --- config.template.toml | 11 +++++++++++ openhands/agenthub/codeact_agent/codeact_agent.py | 2 +- openhands/core/config/agent_config.py | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/config.template.toml b/config.template.toml index de0ebf3a578f..2acd09ef6a7b 100644 --- a/config.template.toml +++ b/config.template.toml @@ -226,6 +226,9 @@ codeact_enable_jupyter = true # List of microagents to disable #disabled_microagents = [] +# Whether to enable plan routing to reasoning models +#enable_plan_routing = false + [agent.RepoExplorerAgent] # Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially # useful when an agent doesn't demand high quality but uses a lot of tokens @@ -276,6 +279,14 @@ llm_config = 'gpt3' # The security analyzer to use (For Headless / CLI only - In Web this is overridden by Session Init) #security_analyzer = "" +################################ Model Routing ############################### +# Configuration for model routing features +############################################################################## +[model_routing] + +# The reasoning model to use for plan generation +reasoning_model = "o1-preview-2024-09-12" + #################################### Eval #################################### # Configuration for the evaluation, please refer to the specific evaluation # plugin for the available options diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index cf642db42af9..f67aa83699f7 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -121,7 +121,7 @@ def __init__( self.condenser = Condenser.from_config(self.config.condenser) logger.debug(f'Using condenser: {self.condenser}') - self.plan_router = None if config.enable_plan_routing else RuleBasedPlanRouter() + self.plan_router = RuleBasedPlanRouter() if config.enable_plan_routing else None def get_action_message( self, diff --git a/openhands/core/config/agent_config.py b/openhands/core/config/agent_config.py index 5b3a86b33f46..9bc192f0fed4 100644 --- a/openhands/core/config/agent_config.py +++ b/openhands/core/config/agent_config.py @@ -20,6 +20,7 @@ class AgentConfig: use_microagents: Whether to use microagents at all. Default is True. disabled_microagents: A list of microagents to disable. Default is None. condenser: Configuration for the memory condenser. Default is NoOpCondenserConfig. + enable_plan_routing: Whether to enable plan routing to reasoning models. Default is False. """ codeact_enable_browsing: bool = True From 06db2d640a24c9fb14dbf75c3ee41ce658116747 Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Fri, 10 Jan 2025 10:16:37 +0000 Subject: [PATCH 07/21] use via ui --- openhands/llm/llm.py | 2 +- openhands/server/session/session.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index b8bc039b00ae..c684ed17bce7 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -649,7 +649,7 @@ def __str__(self): return f'LLM(model={self.config.model}, api_version={self.config.api_version}, base_url={self.config.base_url})' elif self.config.base_url: return f'LLM(model={self.config.model}, base_url={self.config.base_url})' - return f'LLM(model={self.config.model})' + return f'LLM(model={self.config.model},reasoning_model={self.model_routing_config.reasoning_model if self.model_routing_config else None})' def __repr__(self): return str(self) diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py index a481fbd27078..a8dbf88ea7da 100644 --- a/openhands/server/session/session.py +++ b/openhands/server/session/session.py @@ -90,7 +90,10 @@ async def initialize_agent( # TODO: override other LLM config & agent config groups (#2075) - llm = LLM(config=self.config.get_llm_config_from_agent(agent_cls)) + llm = LLM( + config=self.config.get_llm_config_from_agent(agent_cls), + model_routing_config=self.config.model_routing, + ) agent_config = self.config.get_agent_config(agent_cls) agent = Agent.get_cls(agent_cls)(llm, agent_config) From b5973cd7452452ecfaca3a3e0f7d8fe82fcc0b56 Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Fri, 10 Jan 2025 10:38:54 +0000 Subject: [PATCH 08/21] remove dotenv --- .../agenthub/codeact_agent/codeact_agent.py | 7 +++-- openhands/router/plan/.env.example | 3 -- openhands/router/plan/llm_based.py | 30 +++++++------------ 3 files changed, 15 insertions(+), 25 deletions(-) delete mode 100644 openhands/router/plan/.env.example diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index f67aa83699f7..e25ae1084036 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -38,7 +38,7 @@ from openhands.events.serialization.event import truncate_content from openhands.llm.llm import LLM from openhands.memory.condenser import Condenser -from openhands.router.plan import RuleBasedPlanRouter +from openhands.router.plan import LLMBasedPlanRouter from openhands.runtime.plugins import ( AgentSkillsRequirement, JupyterRequirement, @@ -121,7 +121,10 @@ def __init__( self.condenser = Condenser.from_config(self.config.condenser) logger.debug(f'Using condenser: {self.condenser}') - self.plan_router = RuleBasedPlanRouter() if config.enable_plan_routing else None + # self.plan_router = RuleBasedPlanRouter() if config.enable_plan_routing else None + self.plan_router = ( + LLMBasedPlanRouter(self.llm.config) if config.enable_plan_routing else None + ) def get_action_message( self, diff --git a/openhands/router/plan/.env.example b/openhands/router/plan/.env.example deleted file mode 100644 index 0bb7550d1314..000000000000 --- a/openhands/router/plan/.env.example +++ /dev/null @@ -1,3 +0,0 @@ -LITELLM_API_KEY= -LITELLM_BASE_URL= -LITELLM_MODEL= diff --git a/openhands/router/plan/llm_based.py b/openhands/router/plan/llm_based.py index f2cd8efe88e6..a1f2ce41b797 100644 --- a/openhands/router/plan/llm_based.py +++ b/openhands/router/plan/llm_based.py @@ -1,28 +1,22 @@ -import os -from os import path - -from dotenv import load_dotenv -from litellm import completion +import copy +from openhands.core.config import LLMConfig +from openhands.llm.llm import LLM from openhands.router.base import BaseRouter from openhands.router.plan.prompts import ANALYZE_PROMPT -# Load the environment variables -dotenv_path = path.join(path.dirname(__file__), '.env') -load_dotenv(dotenv_path) - -litellm_config = { - 'model': os.environ['LITELLM_MODEL'], - 'api_key': os.environ['LITELLM_API_KEY'], - 'base_url': os.environ['LITELLM_BASE_URL'], -} - class LLMBasedPlanRouter(BaseRouter): """ Router that routes the prompt that is judged by a LLM as complex and requires a step-by-step plan. """ + def __init__(self, llm_config: LLMConfig): + super().__init__() + + judge_llm_config = copy.deepcopy(llm_config) + self.judge_llm = LLM(judge_llm_config) + def should_route_to_custom_model(self, prompt: str) -> bool: messages = [] @@ -33,11 +27,7 @@ def should_route_to_custom_model(self, prompt: str) -> bool: } ) - response = completion( + response = self.judge_llm.completion( messages=messages, - **litellm_config, - temperature=0.0, - max_tokens=10, - stream=False, ) return int(response['choices'][0]['message']['content'].strip()) == 1 From e3c8a9ed28c55697889609613b4bb2490bbb0b89 Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Fri, 10 Jan 2025 12:49:50 +0000 Subject: [PATCH 09/21] update judge prompt --- openhands/router/plan/llm_based.py | 3 +++ openhands/router/plan/prompts.py | 38 +++++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/openhands/router/plan/llm_based.py b/openhands/router/plan/llm_based.py index a1f2ce41b797..365cbee3f985 100644 --- a/openhands/router/plan/llm_based.py +++ b/openhands/router/plan/llm_based.py @@ -11,6 +11,8 @@ class LLMBasedPlanRouter(BaseRouter): Router that routes the prompt that is judged by a LLM as complex and requires a step-by-step plan. """ + JUDGE_MODEL = 'gpt-4o' + def __init__(self, llm_config: LLMConfig): super().__init__() @@ -29,5 +31,6 @@ def should_route_to_custom_model(self, prompt: str) -> bool: response = self.judge_llm.completion( messages=messages, + model=self.JUDGE_MODEL, ) return int(response['choices'][0]['message']['content'].strip()) == 1 diff --git a/openhands/router/plan/prompts.py b/openhands/router/plan/prompts.py index e9af5d75fd03..e06d7b61843b 100644 --- a/openhands/router/plan/prompts.py +++ b/openhands/router/plan/prompts.py @@ -1,8 +1,40 @@ -ANALYZE_PROMPT = """Analyze this prompt to see if it already contains a step-by-step plan or requires more detailed plan generation: +ANALYZE_PROMPT = """Analyze this prompt to see if it already contains a step-by-step plan or requires more detailed plan generation. ---- +Some example scenarios that require generating a step-by-step plan: + +1. Structured Rule-Based Tasks with Well-Defined Constraints + * Example: In a synthetic task, adhering to a sequence like loosening nuts before removing wheels is critical + +2. Tasks Requiring Step-by-Step Reasoning to plan a structured chain of actions + * Example: In a synthetic task, objects must be manipulated in a sequence to achieve a configuration + +3. Scenarios with Limited Resources or Strict Constraints + * Tasks that require resource-sensitive planning, such as minimizing actions or handling tools efficiently + * Example: In a synthetic task, we need to efficiently coordinate robot actions across rooms and minimize energy consumption costs + +4. Generalization in Familiar Symbolic Representations + * Tasks where the rules remain consistent, and the specific instances change. + * Example: When we need to adapt strategies to new but structured instances of tasks. + +5. Requests Requiring Self-Evaluation + * Self-evaluation mechanism enables the identification and correction of errors mid-process. + * Example: When we need to reevaluate actions and adjust plans or actions based on constraints. + +In context of software engineering, below are some scenarios where plan generation is required: + +1. Dependency and Workflow Management + * Automating and optimizing CI/CD pipelines, build processes, and package dependency resolution. + * Example: Resolving complex dependency graphs or sequencing multi-step deployments. +2. Code Refactoring and Debugging + * Planning systematic changes for refactoring large codebases and isolating root causes during debugging. + * Example: Refactoring monolithic code into modular components while preserving functionality. +3. Infrastructure and Resource Planning + * Designing and optimizing Infrastructure as Code (IaC) changes and dynamic resource allocation. + * Example: Planning cloud resource provisioning while adhering to dependency constraints. + +=== BEGIN USER MESSAGE === {message} ---- +=== END USER MESSAGE === Only respond with 0 for no plan generation required or 1 for plan generation required. """ From 27a83dbd3ad879ea6c8674fd9fe9901f31f3f67f Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Fri, 10 Jan 2025 12:53:57 +0000 Subject: [PATCH 10/21] update prompt --- openhands/llm/llm.py | 1 - openhands/router/plan/prompts.py | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index c684ed17bce7..2e6ccd9960c7 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -12,7 +12,6 @@ with warnings.catch_warnings(): warnings.simplefilter('ignore') import litellm - # litellm.set_verbose = True from litellm import ChatCompletionMessageToolCall, ModelInfo, PromptTokensDetails from litellm import Message as LiteLLMMessage diff --git a/openhands/router/plan/prompts.py b/openhands/router/plan/prompts.py index e06d7b61843b..154bd1180941 100644 --- a/openhands/router/plan/prompts.py +++ b/openhands/router/plan/prompts.py @@ -31,6 +31,8 @@ 3. Infrastructure and Resource Planning * Designing and optimizing Infrastructure as Code (IaC) changes and dynamic resource allocation. * Example: Planning cloud resource provisioning while adhering to dependency constraints. +4. High-level Requirements to Low-level Implementation Mapping + * Translating high-level requirements into detailed implementation steps and ensuring consistency. === BEGIN USER MESSAGE === {message} From 6f86ad9841e71660869ca128d0168d6d1bd17583 Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Fri, 10 Jan 2025 13:08:31 +0000 Subject: [PATCH 11/21] update prompt --- openhands/router/plan/prompts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands/router/plan/prompts.py b/openhands/router/plan/prompts.py index 154bd1180941..9bea73e261af 100644 --- a/openhands/router/plan/prompts.py +++ b/openhands/router/plan/prompts.py @@ -1,4 +1,4 @@ -ANALYZE_PROMPT = """Analyze this prompt to see if it already contains a step-by-step plan or requires more detailed plan generation. +ANALYZE_PROMPT = """Analyze this prompt to see if it requires a detailed plan generation. Some example scenarios that require generating a step-by-step plan: From ec2d16262cb2d2172084ae71891687350247a350 Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Fri, 10 Jan 2025 13:22:38 +0000 Subject: [PATCH 12/21] adjust rule-based router --- openhands/router/plan/rule_based.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openhands/router/plan/rule_based.py b/openhands/router/plan/rule_based.py index 2e45858d88dd..cb50dfdd9924 100644 --- a/openhands/router/plan/rule_based.py +++ b/openhands/router/plan/rule_based.py @@ -3,9 +3,9 @@ class RuleBasedPlanRouter(BaseRouter): """ - Router that detects if the prompt contains the word "plan" or "planning". + Router that detects if the prompt contains the string "plan". """ def should_route_to_custom_model(self, prompt: str) -> bool: - # Returns True if the prompt contains the word "plan" or "planning" - return 'plan' in prompt or 'planning' in prompt + # Returns True if the prompt contains the word "plan" + return 'plan' in prompt From 9bf5a7f225db552a0f564f2d79ee755e8925e12f Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Sun, 12 Jan 2025 14:19:45 +0000 Subject: [PATCH 13/21] fix indentation --- .../agenthub/codeact_agent/codeact_agent.py | 1 - openhands/llm/llm.py | 2 ++ openhands/router/plan/llm_based.py | 4 +-- openhands/router/plan/prompts.py | 34 +++++++++++++++---- openhands/utils/trajectory.py | 4 +++ 5 files changed, 36 insertions(+), 9 deletions(-) create mode 100644 openhands/utils/trajectory.py diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index e25ae1084036..7d22eb50c51e 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -121,7 +121,6 @@ def __init__( self.condenser = Condenser.from_config(self.config.condenser) logger.debug(f'Using condenser: {self.condenser}') - # self.plan_router = RuleBasedPlanRouter() if config.enable_plan_routing else None self.plan_router = ( LLMBasedPlanRouter(self.llm.config) if config.enable_plan_routing else None ) diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 2e6ccd9960c7..d8fd1b018eb3 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -12,6 +12,7 @@ with warnings.catch_warnings(): warnings.simplefilter('ignore') import litellm + # litellm.set_verbose = True from litellm import ChatCompletionMessageToolCall, ModelInfo, PromptTokensDetails from litellm import Message as LiteLLMMessage @@ -94,6 +95,7 @@ def __init__( Args: config: The LLM configuration. metrics: The metrics to use. + model_routing_config: The model routing configuration. """ self._tried_model_info = False self.metrics: Metrics = ( diff --git a/openhands/router/plan/llm_based.py b/openhands/router/plan/llm_based.py index 365cbee3f985..ba8a8b4315a5 100644 --- a/openhands/router/plan/llm_based.py +++ b/openhands/router/plan/llm_based.py @@ -3,7 +3,7 @@ from openhands.core.config import LLMConfig from openhands.llm.llm import LLM from openhands.router.base import BaseRouter -from openhands.router.plan.prompts import ANALYZE_PROMPT +from openhands.router.plan.prompts import USER_MESSAGE_PLANNING_ANALYZE_PROMPT class LLMBasedPlanRouter(BaseRouter): @@ -25,7 +25,7 @@ def should_route_to_custom_model(self, prompt: str) -> bool: messages.append( { 'role': 'user', - 'content': ANALYZE_PROMPT.format(message=prompt), + 'content': USER_MESSAGE_PLANNING_ANALYZE_PROMPT.format(message=prompt), } ) diff --git a/openhands/router/plan/prompts.py b/openhands/router/plan/prompts.py index 9bea73e261af..49ae5fed182f 100644 --- a/openhands/router/plan/prompts.py +++ b/openhands/router/plan/prompts.py @@ -1,4 +1,8 @@ -ANALYZE_PROMPT = """Analyze this prompt to see if it requires a detailed plan generation. +############################################ +######## PLAN GENERATION PROMPTS ######## +############################################ + +USER_MESSAGE_PLANNING_ANALYZE_PROMPT = """Analyze this prompt to see if it requires a detailed plan generation. Some example scenarios that require generating a step-by-step plan: @@ -24,13 +28,13 @@ 1. Dependency and Workflow Management * Automating and optimizing CI/CD pipelines, build processes, and package dependency resolution. - * Example: Resolving complex dependency graphs or sequencing multi-step deployments. + * Example: Resolving complex dependency graphs or sequencing multi-step deployments. 2. Code Refactoring and Debugging - * Planning systematic changes for refactoring large codebases and isolating root causes during debugging. - * Example: Refactoring monolithic code into modular components while preserving functionality. + * Planning systematic changes for refactoring large codebases and isolating root causes during debugging. + * Example: Refactoring monolithic code into modular components while preserving functionality. 3. Infrastructure and Resource Planning - * Designing and optimizing Infrastructure as Code (IaC) changes and dynamic resource allocation. - * Example: Planning cloud resource provisioning while adhering to dependency constraints. + * Designing and optimizing Infrastructure as Code (IaC) changes and dynamic resource allocation. + * Example: Planning cloud resource provisioning while adhering to dependency constraints. 4. High-level Requirements to Low-level Implementation Mapping * Translating high-level requirements into detailed implementation steps and ensuring consistency. @@ -40,3 +44,21 @@ Only respond with 0 for no plan generation required or 1 for plan generation required. """ + +############################################ +######## REASONING JUDGE PROMPTS ######## +############################################ + +TRAJECTORY_JUDGE_REASONING_SYSTEM_PROMPT = """You are an expert judge evaluating AI assistant interactions. Your task is to determine if: +- the AI assistant is struggling with some issues when performing the task and needs help from a human expert to guide it +- the next step is complex and needs to be carefully reasoned to solve e.g. identifying a hard-to-find bug in a codebase + +Respond only with 0 if the AI assistant is not struggling or the task is not complex. Otherwise, respond with 1.""" + +TRAJECTORY_JUDGE_REASONING_USER_PROMPT = """Please evaluate the following interaction (or part of the recent interaction) between an AI assistant and a user: + +=== INTERACTION LOG === +{interaction} +=== END INTERACTION === + +Based on the above interaction, do we need to provide additional guidance to the AI assistant or is the task complex and requires careful reasoning to solve? Respond with 0 if no guidance is needed or the task is not complex. Otherwise, respond with 1.""" diff --git a/openhands/utils/trajectory.py b/openhands/utils/trajectory.py new file mode 100644 index 000000000000..5c83d839c26e --- /dev/null +++ b/openhands/utils/trajectory.py @@ -0,0 +1,4 @@ +""" +Utilities for formatting OpenHands's raw LLM completion. +Original code from: https://github.com/SWE-Gym/SWE-Gym/blob/main/scripts/openhands-verifier/aggregate_stats_pass_at_n.ipynb +""" From 8e05f3f12c27e3162c79da6eddd1dea0abd625dd Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Sun, 12 Jan 2025 15:31:57 +0000 Subject: [PATCH 14/21] use full trajectory --- .../agenthub/codeact_agent/codeact_agent.py | 21 ++-- openhands/llm/llm.py | 1 - openhands/router/plan/llm_based.py | 21 ++-- openhands/router/plan/prompts.py | 2 +- openhands/utils/trajectory.py | 114 +++++++++++++++++- 5 files changed, 138 insertions(+), 21 deletions(-) diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index 7d22eb50c51e..a9e01dd3b292 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -45,6 +45,7 @@ PluginRequirement, ) from openhands.utils.prompt import PromptManager +from openhands.utils.trajectory import format_trajectory class CodeActAgent(Agent): @@ -385,19 +386,19 @@ def step(self, state: State) -> Action: params: dict = {} - # check if the user requests a plan - if ( - latest_user_message - and self.plan_router - and self.plan_router.should_route_to_custom_model( - latest_user_message.content - ) + # prepare what we want to send to the LLM + messages = self._get_messages(state) + messages_dict = self.llm.format_messages_for_llm(messages) + params['messages'] = messages_dict + + formatted_trajectory = format_trajectory(messages_dict) + + # check if model routing is needed + if self.plan_router and self.plan_router.should_route_to_custom_model( + formatted_trajectory ): params['use_reasoning_model'] = True - # prepare what we want to send to the LLM - messages = self._get_messages(state) - params['messages'] = self.llm.format_messages_for_llm(messages) params['tools'] = self.tools if self.mock_function_calling: params['mock_function_calling'] = True diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index d8fd1b018eb3..743eb1dd13c5 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -12,7 +12,6 @@ with warnings.catch_warnings(): warnings.simplefilter('ignore') import litellm - # litellm.set_verbose = True from litellm import ChatCompletionMessageToolCall, ModelInfo, PromptTokensDetails from litellm import Message as LiteLLMMessage diff --git a/openhands/router/plan/llm_based.py b/openhands/router/plan/llm_based.py index ba8a8b4315a5..8ada3d537980 100644 --- a/openhands/router/plan/llm_based.py +++ b/openhands/router/plan/llm_based.py @@ -3,7 +3,10 @@ from openhands.core.config import LLMConfig from openhands.llm.llm import LLM from openhands.router.base import BaseRouter -from openhands.router.plan.prompts import USER_MESSAGE_PLANNING_ANALYZE_PROMPT +from openhands.router.plan.prompts import ( + TRAJECTORY_JUDGE_REASONING_SYSTEM_PROMPT, + TRAJECTORY_JUDGE_REASONING_USER_PROMPT, +) class LLMBasedPlanRouter(BaseRouter): @@ -20,14 +23,18 @@ def __init__(self, llm_config: LLMConfig): self.judge_llm = LLM(judge_llm_config) def should_route_to_custom_model(self, prompt: str) -> bool: - messages = [] - - messages.append( + messages = [ + { + 'role': 'system', + 'content': TRAJECTORY_JUDGE_REASONING_SYSTEM_PROMPT, + }, { 'role': 'user', - 'content': USER_MESSAGE_PLANNING_ANALYZE_PROMPT.format(message=prompt), - } - ) + 'content': TRAJECTORY_JUDGE_REASONING_USER_PROMPT.format( + interaction_log=prompt + ), + }, + ] response = self.judge_llm.completion( messages=messages, diff --git a/openhands/router/plan/prompts.py b/openhands/router/plan/prompts.py index 49ae5fed182f..90ecc336e8f5 100644 --- a/openhands/router/plan/prompts.py +++ b/openhands/router/plan/prompts.py @@ -58,7 +58,7 @@ TRAJECTORY_JUDGE_REASONING_USER_PROMPT = """Please evaluate the following interaction (or part of the recent interaction) between an AI assistant and a user: === INTERACTION LOG === -{interaction} +{interaction_log} === END INTERACTION === Based on the above interaction, do we need to provide additional guidance to the AI assistant or is the task complex and requires careful reasoning to solve? Respond with 0 if no guidance is needed or the task is not complex. Otherwise, respond with 1.""" diff --git a/openhands/utils/trajectory.py b/openhands/utils/trajectory.py index 5c83d839c26e..4d2e233292b1 100644 --- a/openhands/utils/trajectory.py +++ b/openhands/utils/trajectory.py @@ -1,4 +1,114 @@ -""" -Utilities for formatting OpenHands's raw LLM completion. +"""Utility functions for processing and formatting trajectories. Original code from: https://github.com/SWE-Gym/SWE-Gym/blob/main/scripts/openhands-verifier/aggregate_stats_pass_at_n.ipynb """ + +import json + + +def _convert_content(content) -> str: + ret = '' + if isinstance(content, list): + for item in content: + assert item['type'] == 'text', 'Only text is supported for now' + ret += f'{item["text"]}\n' + else: + assert isinstance(content, str), 'Only str is supported for now' + ret = content + return ret + + +def _convert_tool_call_to_string(tool_call) -> str: + """Convert tool call to content in string format.""" + if 'function' not in tool_call: + raise ValueError("Tool call must contain 'function' key.") + if 'id' not in tool_call: + raise ValueError("Tool call must contain 'id' key.") + if 'type' not in tool_call: + raise ValueError("Tool call must contain 'type' key.") + if tool_call['type'] != 'function': + raise ValueError("Tool call type must be 'function'.") + + ret = f"\n" + try: + args = json.loads(tool_call['function']['arguments']) + except json.JSONDecodeError as e: + raise ValueError( + f"Failed to parse arguments as JSON. Arguments: {tool_call['function']['arguments']}" + ) from e + for param_name, param_value in args.items(): + is_multiline = isinstance(param_value, str) and '\n' in param_value + ret += f'' + if is_multiline: + ret += '\n' + ret += f'{param_value}' + if is_multiline: + ret += '\n' + ret += '\n' + ret += '' + return ret + + +def format_trajectory(traj: list[dict]) -> str: + output = '' + system_message = None + + # Handle system message if present + if traj[0]['role'] == 'system': + system_message = traj[0] + traj = traj[1:] + content = _convert_content(system_message['content']) + output += "*** System Message that describes the assistant's behavior ***\n" + output += f'{content}\n' + + # Merge consecutive user messages first + merged_traj = [] + current_messages = [] + + for i, message in enumerate(traj): + if message['role'] == 'user': + current_messages.append(message) + else: + if current_messages: + # Merge all accumulated user messages into one + merged_content = '\n'.join( + _convert_content(msg['content']) for msg in current_messages + ) + merged_traj.append({'role': 'user', 'content': merged_content}) + current_messages = [] + merged_traj.append(message) + + # Don't forget to handle any remaining user messages + if current_messages: + merged_content = '\n'.join( + _convert_content(msg['content']) for msg in current_messages + ) + merged_traj.append({'role': 'user', 'content': merged_content}) + + # Now process the merged trajectory + for i, message in enumerate(merged_traj): + role = message['role'] + content_: str | list = message['content'] + content = _convert_content(content_) if isinstance(content_, list) else content_ + turn_id = i // 2 + 1 + output += '-' * 100 + '\n' + output += f'*** Turn {turn_id} - {role.upper() if role != "tool" else "TOOL EXECUTION RESULT"} ***\n' + + if role == 'user': + output += f'{content}\n' + elif role == 'tool': + output += f'{content}\n' + elif role == 'assistant': + output += f'{content}\n' + if ( + 'tool_calls' in message + and message['tool_calls'] is not None + and len(message['tool_calls']) > 0 + ): + for toolcall_id, tool_call in enumerate(message['tool_calls']): + output += f'### Tool Call {toolcall_id}\n' + output += f'{_convert_tool_call_to_string(tool_call)}\n' + else: + raise ValueError(f'Unexpected role: {role}') + + output += '-' * 100 + '\n' + return output From ddc324831b89c52368aba66d69969773a6ee69b0 Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Tue, 14 Jan 2025 13:51:48 +0700 Subject: [PATCH 15/21] refactor traj formatter and add tests --- .../agenthub/codeact_agent/codeact_agent.py | 2 +- openhands/utils/trajectory.py | 133 ++++++++---------- tests/unit/test_trajectory_formatter.py | 117 +++++++++++++++ 3 files changed, 178 insertions(+), 74 deletions(-) create mode 100644 tests/unit/test_trajectory_formatter.py diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index a9e01dd3b292..6f43f4397b14 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -391,7 +391,7 @@ def step(self, state: State) -> Action: messages_dict = self.llm.format_messages_for_llm(messages) params['messages'] = messages_dict - formatted_trajectory = format_trajectory(messages_dict) + formatted_trajectory = format_trajectory(messages) # check if model routing is needed if self.plan_router and self.plan_router.should_route_to_custom_model( diff --git a/openhands/utils/trajectory.py b/openhands/utils/trajectory.py index 4d2e233292b1..bba09729f120 100644 --- a/openhands/utils/trajectory.py +++ b/openhands/utils/trajectory.py @@ -1,112 +1,99 @@ -"""Utility functions for processing and formatting trajectories. +""" +Utility functions for processing and formatting trajectories. Original code from: https://github.com/SWE-Gym/SWE-Gym/blob/main/scripts/openhands-verifier/aggregate_stats_pass_at_n.ipynb """ import json +from litellm import ChatCompletionMessageToolCall + +from openhands.core.message import ImageContent, Message, TextContent + + +def convert_content(content: list[TextContent | ImageContent]) -> str: + """Converts a list of message content to a single string.""" + return '\n'.join(item.text for item in content if item.type == 'text') + -def _convert_content(content) -> str: - ret = '' - if isinstance(content, list): - for item in content: - assert item['type'] == 'text', 'Only text is supported for now' - ret += f'{item["text"]}\n' - else: - assert isinstance(content, str), 'Only str is supported for now' - ret = content - return ret - - -def _convert_tool_call_to_string(tool_call) -> str: - """Convert tool call to content in string format.""" - if 'function' not in tool_call: - raise ValueError("Tool call must contain 'function' key.") - if 'id' not in tool_call: - raise ValueError("Tool call must contain 'id' key.") - if 'type' not in tool_call: - raise ValueError("Tool call must contain 'type' key.") - if tool_call['type'] != 'function': - raise ValueError("Tool call type must be 'function'.") - - ret = f"\n" +def convert_tool_call_to_string(tool_call: ChatCompletionMessageToolCall) -> str: + """Converts tool call arguments to a string representation.""" try: - args = json.loads(tool_call['function']['arguments']) + args = json.loads(tool_call.function.arguments) except json.JSONDecodeError as e: raise ValueError( f"Failed to parse arguments as JSON. Arguments: {tool_call['function']['arguments']}" ) from e + + tool_call_str = f'\n' for param_name, param_value in args.items(): - is_multiline = isinstance(param_value, str) and '\n' in param_value - ret += f'' - if is_multiline: - ret += '\n' - ret += f'{param_value}' - if is_multiline: - ret += '\n' - ret += '\n' - ret += '' - return ret - - -def format_trajectory(traj: list[dict]) -> str: - output = '' - system_message = None + is_multiline_value = isinstance(param_value, str) and '\n' in param_value + param_value = '\n' + param_value + '\n' if is_multiline_value else param_value + tool_call_str += f'{param_value}\n' + tool_call_str += '' + return tool_call_str - # Handle system message if present - if traj[0]['role'] == 'system': - system_message = traj[0] - traj = traj[1:] - content = _convert_content(system_message['content']) - output += "*** System Message that describes the assistant's behavior ***\n" - output += f'{content}\n' - # Merge consecutive user messages first +def merge_user_messages(traj: list[Message]) -> list[Message]: + """Merges consecutive user messages into a single message.""" merged_traj = [] current_messages = [] - for i, message in enumerate(traj): - if message['role'] == 'user': + for message in traj: + if message.role == 'user': current_messages.append(message) else: if current_messages: - # Merge all accumulated user messages into one merged_content = '\n'.join( - _convert_content(msg['content']) for msg in current_messages + convert_content(msg.content) for msg in current_messages + ) + merged_traj.append( + Message(role='user', content=[TextContent(text=merged_content)]) ) - merged_traj.append({'role': 'user', 'content': merged_content}) current_messages = [] merged_traj.append(message) - # Don't forget to handle any remaining user messages if current_messages: merged_content = '\n'.join( - _convert_content(msg['content']) for msg in current_messages + convert_content(msg.content) for msg in current_messages + ) + merged_traj.append( + Message(role='user', content=[TextContent(text=merged_content)]) ) - merged_traj.append({'role': 'user', 'content': merged_content}) - # Now process the merged trajectory + return merged_traj + + +def format_trajectory(traj: list[Message]) -> str: + """Formats the message trajectory into a human-readable string.""" + output = '' + system_message = None + + if traj: + # Handle system message if present + if traj[0].role == 'system': + system_message = traj[0] + traj = traj[1:] + content = convert_content(system_message.content) + output += "*** System Message that describes the assistant's behavior ***\n" + output += f'{content}\n' + + # Merge consecutive user messages + merged_traj = merge_user_messages(traj) + + # Process the merged trajectory for i, message in enumerate(merged_traj): - role = message['role'] - content_: str | list = message['content'] - content = _convert_content(content_) if isinstance(content_, list) else content_ + role = message.role + content = convert_content(message.content) turn_id = i // 2 + 1 output += '-' * 100 + '\n' output += f'*** Turn {turn_id} - {role.upper() if role != "tool" else "TOOL EXECUTION RESULT"} ***\n' - if role == 'user': - output += f'{content}\n' - elif role == 'tool': - output += f'{content}\n' - elif role == 'assistant': + if role == 'user' or role == 'tool' or role == 'assistant': output += f'{content}\n' - if ( - 'tool_calls' in message - and message['tool_calls'] is not None - and len(message['tool_calls']) > 0 - ): - for toolcall_id, tool_call in enumerate(message['tool_calls']): + if role == 'assistant' and message.tool_calls: + for toolcall_id, tool_call in enumerate(message.tool_calls): output += f'### Tool Call {toolcall_id}\n' - output += f'{_convert_tool_call_to_string(tool_call)}\n' + output += f'{convert_tool_call_to_string(tool_call)}\n' else: raise ValueError(f'Unexpected role: {role}') diff --git a/tests/unit/test_trajectory_formatter.py b/tests/unit/test_trajectory_formatter.py new file mode 100644 index 000000000000..9dcbaa6cd08c --- /dev/null +++ b/tests/unit/test_trajectory_formatter.py @@ -0,0 +1,117 @@ +import pytest +from litellm import ChatCompletionMessageToolCall + +from openhands.core.message import Message, TextContent +from openhands.utils.trajectory import format_trajectory + + +# Helper function to create a mock ChatCompletionMessageToolCall +def create_mock_tool_call(name: str, arguments: str): + return ChatCompletionMessageToolCall( + function={'name': name, 'arguments': arguments} + ) + + +def test_empty_trajectory(): + traj = [] + assert ( + format_trajectory(traj) + == """---------------------------------------------------------------------------------------------------- +""" + ) + + +def test_system_message_only(): + traj = [ + Message( + role='system', content=[TextContent(text='System behavior description.')] + ) + ] + expected_output = """*** System Message that describes the assistant's behavior *** +System behavior description. +---------------------------------------------------------------------------------------------------- +""" + assert format_trajectory(traj) == expected_output + + +def test_user_messages_only(): + traj = [ + Message( + role='user', + content=[TextContent(text='Hello.'), TextContent(text='How are you?')], + ) + ] + expected_output = """---------------------------------------------------------------------------------------------------- +*** Turn 1 - USER *** +Hello. +How are you? +---------------------------------------------------------------------------------------------------- +""" + assert format_trajectory(traj) == expected_output + + +def test_mixed_messages(): + traj = [ + Message( + role='system', content=[TextContent(text='System behavior description.')] + ), + Message(role='user', content=[TextContent(text='Hello.')]), + Message(role='assistant', content=[TextContent(text='Hi there!')]), + Message(role='user', content=[TextContent(text='你好')]), + Message(role='assistant', content=[TextContent(text='你好')]), + ] + expected_output = """*** System Message that describes the assistant's behavior *** +System behavior description. +---------------------------------------------------------------------------------------------------- +*** Turn 1 - USER *** +Hello. +---------------------------------------------------------------------------------------------------- +*** Turn 1 - ASSISTANT *** +Hi there! +---------------------------------------------------------------------------------------------------- +*** Turn 2 - USER *** +你好 +---------------------------------------------------------------------------------------------------- +*** Turn 2 - ASSISTANT *** +你好 +---------------------------------------------------------------------------------------------------- +""" + assert format_trajectory(traj) == expected_output + + +def test_tool_call_handling(): + tool_call = create_mock_tool_call( + name='fn', arguments='{"param1": "value1", "param2": "value2"}' + ) + traj = [ + Message( + role='assistant', + content=[TextContent(text='Running the tool.')], + tool_calls=[tool_call], + ) + ] + expected_output = """---------------------------------------------------------------------------------------------------- +*** Turn 1 - ASSISTANT *** +Running the tool. +### Tool Call 0 + +value1 +value2 + +---------------------------------------------------------------------------------------------------- +""" + print(format_trajectory(traj)) + assert format_trajectory(traj) == expected_output + + +def test_invalid_tool_call(): + tool_call = create_mock_tool_call(name='fn', arguments='invalid json') + traj = [ + Message( + role='assistant', + content=[TextContent(text='Running the tool.')], + tool_calls=[tool_call], + ) + ] + with pytest.raises(ValueError, match='Failed to parse arguments as JSON'): + format_trajectory(traj) From 472d95cdb15a446d7cd8f0a8cf239c4887b4192f Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Tue, 14 Jan 2025 14:00:24 +0700 Subject: [PATCH 16/21] add o1 to fn calling models --- evaluation/benchmarks/swe_bench/run_infer.py | 1 + openhands/agenthub/codeact_agent/codeact_agent.py | 15 +++++++-------- openhands/llm/llm.py | 1 + 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py index bf065ada9734..6fee7f6da395 100644 --- a/evaluation/benchmarks/swe_bench/run_infer.py +++ b/evaluation/benchmarks/swe_bench/run_infer.py @@ -150,6 +150,7 @@ def get_config( codeact_enable_browsing=RUN_WITH_BROWSING, codeact_enable_llm_editor=False, condenser=metadata.condenser_config, + # enable_plan_routing=True, ) config.set_agent_config(agent_config) return config diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index 6f43f4397b14..e5c3afd29dae 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -388,16 +388,15 @@ def step(self, state: State) -> Action: # prepare what we want to send to the LLM messages = self._get_messages(state) - messages_dict = self.llm.format_messages_for_llm(messages) - params['messages'] = messages_dict - - formatted_trajectory = format_trajectory(messages) + params['messages'] = self.llm.format_messages_for_llm(messages) # check if model routing is needed - if self.plan_router and self.plan_router.should_route_to_custom_model( - formatted_trajectory - ): - params['use_reasoning_model'] = True + if self.plan_router: + formatted_trajectory = format_trajectory(messages) + + if self.plan_router.should_route_to_custom_model(formatted_trajectory): + logger.info('🧭 Routing to custom model...') + params['use_reasoning_model'] = True params['tools'] = self.tools if self.mock_function_calling: diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 743eb1dd13c5..4fad046e4faf 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -71,6 +71,7 @@ 'claude-3-5-haiku-20241022', 'gpt-4o-mini', 'gpt-4o', + 'o1', ] From dbc2412065dcc9ad3eba52d5cdeb4afcd7949841 Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Tue, 4 Feb 2025 10:58:49 +0000 Subject: [PATCH 17/21] refactor to use llm objects --- .../agenthub/codeact_agent/codeact_agent.py | 69 +++++++++++++------ openhands/controller/agent.py | 1 + openhands/core/config/agent_config.py | 3 - openhands/core/config/app_config.py | 9 ++- openhands/core/config/llm_config.py | 1 + openhands/core/config/model_routing_config.py | 34 ++------- openhands/core/config/utils.py | 2 +- openhands/core/setup.py | 10 ++- openhands/llm/llm.py | 17 +---- openhands/router/__init__.py | 4 ++ openhands/router/base.py | 15 +++- openhands/router/plan/__init__.py | 3 +- openhands/router/plan/llm_based.py | 47 ++++++++++--- openhands/router/plan/rule_based.py | 11 --- openhands/server/session/session.py | 13 +++- 15 files changed, 140 insertions(+), 99 deletions(-) create mode 100644 openhands/router/__init__.py delete mode 100644 openhands/router/plan/rule_based.py diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index ce02f578c752..96419c6bdcfc 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -8,7 +8,7 @@ import openhands.agenthub.codeact_agent.function_calling as codeact_function_calling from openhands.controller.agent import Agent from openhands.controller.state.state import State -from openhands.core.config import AgentConfig +from openhands.core.config import AgentConfig, ModelRoutingConfig from openhands.core.logger import openhands_logger as logger from openhands.core.message import ImageContent, Message, TextContent from openhands.core.schema import ActionType @@ -39,7 +39,7 @@ from openhands.events.serialization.event import truncate_content from openhands.llm.llm import LLM from openhands.memory.condenser import Condenser -from openhands.router.plan import LLMBasedPlanRouter +from openhands.router import BaseRouter, LLMBasedPlanRouter from openhands.runtime.plugins import ( AgentSkillsRequirement, JupyterRequirement, @@ -82,11 +82,14 @@ def __init__( self, llm: LLM, config: AgentConfig, + model_routing_config: ModelRoutingConfig | None = None, + routing_llms: dict[str, LLM] | None = None, ) -> None: """Initializes a new instance of the CodeActAgent class. Parameters: - llm (LLM): The llm to be used by this agent + - routing_llms (dict[str, LLM]): The llms to be selected for routing """ super().__init__(llm, config) self.pending_actions: deque[Action] = deque() @@ -123,9 +126,17 @@ def __init__( self.condenser = Condenser.from_config(self.config.condenser) logger.debug(f'Using condenser: {self.condenser}') - self.plan_router = ( - LLMBasedPlanRouter(self.llm.config) if config.enable_plan_routing else None - ) + self.router: BaseRouter | None = None + + if config.enable_plan_routing: + assert model_routing_config is not None and routing_llms is not None + self.router = LLMBasedPlanRouter( + llm=self.llm, + routing_llms=routing_llms or dict(), + model_routing_config=model_routing_config, + ) + + self.active_llm: LLM | None = None # The LLM chosen by the router def get_action_message( self, @@ -162,6 +173,9 @@ def get_action_message( rather than being returned immediately. They will be processed later when all corresponding tool call results are available. """ + # Handle the case where self.active_llm is None + active_llm_ = self.active_llm or self.llm + # create a regular message from an event if isinstance( action, @@ -227,7 +241,7 @@ def get_action_message( elif isinstance(action, MessageAction): role = 'user' if action.source == 'user' else 'assistant' content = [TextContent(text=action.content or '')] - if self.llm.vision_is_active() and action.image_urls: + if active_llm_.vision_is_active() and action.image_urls: content.append(ImageContent(image_urls=action.image_urls)) return [ Message( @@ -278,8 +292,11 @@ def get_observation_message( Raises: ValueError: If the observation type is unknown """ + # Handle the case where self.active_llm is None + active_llm_ = self.active_llm or self.llm + message: Message - max_message_chars = self.llm.config.max_message_chars + max_message_chars = active_llm_.config.max_message_chars if isinstance(obs, CmdOutputObservation): # if it doesn't have tool call metadata, it was triggered by a user action if obs.tool_call_metadata is None: @@ -402,22 +419,29 @@ def step(self, state: State) -> Action: params: dict = {} - # prepare what we want to send to the LLM - messages = self._get_messages(state) - params['messages'] = self.llm.format_messages_for_llm(messages) - # check if model routing is needed - if self.plan_router: - formatted_trajectory = format_trajectory(messages) - - if self.plan_router.should_route_to_custom_model(formatted_trajectory): - logger.info('🧭 Routing to custom model...') - params['use_reasoning_model'] = True + if self.router: + if self.active_llm is None: + messages = self._get_messages(state) + formatted_trajectory = format_trajectory(messages) + self.active_llm = self.router.should_route_to(formatted_trajectory) + + if self.active_llm != self.llm: + logger.warning(f'🧭 Routing to custom model: {self.active_llm}') + else: + self.active_llm = self.llm params['tools'] = self.tools - if self.mock_function_calling: + if not self.active_llm.is_function_calling_active(): params['mock_function_calling'] = True - response = self.llm.completion(**params) + + # prepare what we want to send to the LLM + # NOTE: We need to call this here when self.active_llm is correctly set + messages = self._get_messages(state) + params['messages'] = self.active_llm.format_messages_for_llm(messages) + + response = self.active_llm.completion(**params) + actions = codeact_function_calling.response_to_actions(response) for action in actions: self.pending_actions.append(action) @@ -458,13 +482,16 @@ def _get_messages(self, state: State) -> list[Message]: if not self.prompt_manager: raise Exception('Prompt Manager not instantiated.') + # Handle the case where self.active_llm is None + active_llm_ = self.active_llm or self.llm + messages: list[Message] = [ Message( role='system', content=[ TextContent( text=self.prompt_manager.get_system_message(), - cache_prompt=self.llm.is_caching_prompt_active(), + cache_prompt=active_llm_.is_caching_prompt_active(), ) ], ) @@ -535,7 +562,7 @@ def _get_messages(self, state: State) -> list[Message]: messages.append(msg) - if self.llm.is_caching_prompt_active(): + if active_llm_.is_caching_prompt_active(): # NOTE: this is only needed for anthropic # following logic here: # https://github.com/anthropics/anthropic-quickstarts/blob/8f734fd08c425c6ec91ddd613af04ff87d70c5a0/computer-use-demo/computer_use_demo/loop.py#L241-L262 diff --git a/openhands/controller/agent.py b/openhands/controller/agent.py index 43a55d935249..8577b179b3d0 100644 --- a/openhands/controller/agent.py +++ b/openhands/controller/agent.py @@ -32,6 +32,7 @@ def __init__( self, llm: LLM, config: 'AgentConfig', + **kwargs, ): self.llm = llm self.config = config diff --git a/openhands/core/config/agent_config.py b/openhands/core/config/agent_config.py index fa3f3985ecfb..3fb9071061d2 100644 --- a/openhands/core/config/agent_config.py +++ b/openhands/core/config/agent_config.py @@ -1,7 +1,6 @@ from pydantic import BaseModel, Field from openhands.core.config.condenser_config import CondenserConfig, NoOpCondenserConfig -from openhands.core.config.model_routing_config import ModelRoutingConfig class AgentConfig(BaseModel): @@ -19,7 +18,6 @@ class AgentConfig(BaseModel): enable_prompt_extensions: Whether to use prompt extensions (e.g., microagents, inject runtime info). Default is True. disabled_microagents: A list of microagents to disable. Default is None. condenser: Configuration for the memory condenser. Default is NoOpCondenserConfig. - enable_plan_routing: Whether to enable plan routing to reasoning models. Default is False. """ codeact_enable_browsing: bool = Field(default=True) @@ -34,4 +32,3 @@ class AgentConfig(BaseModel): disabled_microagents: list[str] | None = Field(default=None) condenser: CondenserConfig = Field(default_factory=NoOpCondenserConfig) enable_plan_routing: bool = Field(default=False) - model_routing: ModelRoutingConfig = Field(default_factory=ModelRoutingConfig) diff --git a/openhands/core/config/app_config.py b/openhands/core/config/app_config.py index 8c995d1ee3db..b3d38e78eede 100644 --- a/openhands/core/config/app_config.py +++ b/openhands/core/config/app_config.py @@ -10,6 +10,7 @@ model_defaults_to_dict, ) from openhands.core.config.llm_config import LLMConfig +from openhands.core.config.model_routing_config import ModelRoutingConfig from openhands.core.config.sandbox_config import SandboxConfig from openhands.core.config.security_config import SecurityConfig @@ -20,6 +21,7 @@ class AppConfig(BaseModel): Attributes: llms: Dictionary mapping LLM names to their configurations. The default configuration is stored under the 'llm' key. + routing_llms: Dictionary mapping LLM for routing' names to their configurations. agents: Dictionary mapping agent names to their configurations. The default configuration is stored under the 'agent' key. default_agent: Name of the default agent to use. @@ -48,10 +50,12 @@ class AppConfig(BaseModel): """ llms: dict[str, LLMConfig] = Field(default_factory=dict) + routing_llms: dict[str, LLMConfig] = Field(default_factory=dict) agents: dict = Field(default_factory=dict) default_agent: str = Field(default=OH_DEFAULT_AGENT) sandbox: SandboxConfig = Field(default_factory=SandboxConfig) security: SecurityConfig = Field(default_factory=SecurityConfig) + model_routing: ModelRoutingConfig = Field(default_factory=ModelRoutingConfig) runtime: str = Field(default='docker') file_store: str = Field(default='local') file_store_path: str = Field(default='/tmp/openhands_file_store') @@ -94,7 +98,10 @@ def get_llm_config(self, name='llm') -> LLMConfig: return self.llms['llm'] def set_llm_config(self, value: LLMConfig, name='llm') -> None: - self.llms[name] = value + if value.for_routing: + self.routing_llms[name] = value + else: + self.llms[name] = value def get_agent_config(self, name='agent') -> AgentConfig: """'agent' is the name for default config (for backward compatibility prior to 0.8).""" diff --git a/openhands/core/config/llm_config.py b/openhands/core/config/llm_config.py index cb1581634da1..a00654f9e4c3 100644 --- a/openhands/core/config/llm_config.py +++ b/openhands/core/config/llm_config.py @@ -86,6 +86,7 @@ class LLMConfig(BaseModel): custom_tokenizer: str | None = Field(default=None) native_tool_calling: bool | None = Field(default=None) reasoning_effort: str | None = Field(default='high') + for_routing: bool = Field(default=False) model_config = {'extra': 'forbid'} diff --git a/openhands/core/config/model_routing_config.py b/openhands/core/config/model_routing_config.py index 902a7fcaa782..349389f3b88a 100644 --- a/openhands/core/config/model_routing_config.py +++ b/openhands/core/config/model_routing_config.py @@ -1,32 +1,6 @@ -from dataclasses import dataclass, fields +from pydantic import BaseModel, Field -from openhands.core.config.config_utils import get_field_info - -@dataclass -class ModelRoutingConfig: - reasoning_model: str = 'o1-preview-2024-09-12' - - def defaults_to_dict(self) -> dict: - """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional.""" - dict = {} - for f in fields(self): - dict[f.name] = get_field_info(f) - return dict - - def __str__(self): - attr_str = [] - for f in fields(self): - attr_name = f.name - attr_value = getattr(self, f.name) - - attr_str.append(f'{attr_name}={repr(attr_value)}') - - return f"ModelRoutingConfig({', '.join(attr_str)})" - - @classmethod - def from_dict(cls, model_routing_config_dict: dict) -> 'ModelRoutingConfig': - return cls(**model_routing_config_dict) - - def __repr__(self): - return self.__str__() +class ModelRoutingConfig(BaseModel): + reasoning_llm_config_name: str = Field(default='reasoning_model') + judge_llm_config_name: str = Field(default='judge_model') diff --git a/openhands/core/config/utils.py b/openhands/core/config/utils.py index 29aa9ca90600..c2bc5effaebd 100644 --- a/openhands/core/config/utils.py +++ b/openhands/core/config/utils.py @@ -162,7 +162,7 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'): logger.openhands_logger.debug( 'Attempt to load model routing config from config toml' ) - model_routing_config = ModelRoutingConfig.from_dict(value) + model_routing_config = ModelRoutingConfig(**value) cfg.model_routing = model_routing_config logger.openhands_logger.debug( diff --git a/openhands/core/setup.py b/openhands/core/setup.py index 6785d4fae88b..1adb36f95983 100644 --- a/openhands/core/setup.py +++ b/openhands/core/setup.py @@ -60,10 +60,18 @@ def create_agent(runtime: Runtime, config: AppConfig) -> Agent: agent_cls: Type[Agent] = Agent.get_cls(config.default_agent) agent_config = config.get_agent_config(config.default_agent) llm_config = config.get_llm_config_from_agent(config.default_agent) + routing_llms_config = config.routing_llms model_routing_config = config.model_routing + routing_llms = {} + for config_name, routing_llm_config in routing_llms_config.items(): + routing_llms[config_name] = LLM( + config=routing_llm_config, + ) agent = agent_cls( - llm=LLM(config=llm_config, model_routing_config=model_routing_config), + llm=LLM(config=llm_config), config=agent_config, + model_routing_config=model_routing_config, + routing_llms=routing_llms, ) if agent.prompt_manager: microagents = runtime.get_microagents_from_selected_repo(None) diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 1e28a436e69a..d609e1434764 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -7,7 +7,7 @@ import requests -from openhands.core.config import LLMConfig, ModelRoutingConfig +from openhands.core.config import LLMConfig with warnings.catch_warnings(): warnings.simplefilter('ignore') @@ -111,7 +111,6 @@ def __init__( config: LLMConfig, metrics: Metrics | None = None, retry_listener: Callable[[int, int], None] | None = None, - model_routing_config: ModelRoutingConfig | None = None, ): """Initializes the LLM. If LLMConfig is passed, its values will be the fallback. @@ -120,7 +119,6 @@ def __init__( Args: config: The LLM configuration. metrics: The metrics to use. - model_routing_config: The model routing configuration. """ self._tried_model_info = False self.metrics: Metrics = ( @@ -128,7 +126,6 @@ def __init__( ) self.cost_metric_supported: bool = True self.config: LLMConfig = copy.deepcopy(config) - self.model_routing_config = model_routing_config self.model_info: ModelInfo | None = None self.retry_listener = retry_listener @@ -202,7 +199,6 @@ def wrapper(*args, **kwargs): messages: list[dict[str, Any]] | dict[str, Any] = [] mock_function_calling = kwargs.pop('mock_function_calling', False) - use_reasoning_model = kwargs.pop('use_reasoning_model', False) # some callers might send the model and messages directly # litellm allows positional args, like completion(model, messages, **kwargs) @@ -235,15 +231,6 @@ def wrapper(*args, **kwargs): kwargs['stop'] = STOP_WORDS mock_fncall_tools = kwargs.pop('tools') - if use_reasoning_model: - if self.model_routing_config is None: - raise ValueError( - 'Model routing config is required for model routing.' - ) - - # Replace the model with the reasoning model - kwargs['model'] = self.model_routing_config.reasoning_model - # if we have no messages, something went very wrong if not messages: raise ValueError( @@ -693,7 +680,7 @@ def __str__(self): return f'LLM(model={self.config.model}, api_version={self.config.api_version}, base_url={self.config.base_url})' elif self.config.base_url: return f'LLM(model={self.config.model}, base_url={self.config.base_url})' - return f'LLM(model={self.config.model},reasoning_model={self.model_routing_config.reasoning_model if self.model_routing_config else None})' + return f'LLM(model={self.config.model})' def __repr__(self): return str(self) diff --git a/openhands/router/__init__.py b/openhands/router/__init__.py new file mode 100644 index 000000000000..32058b2b386f --- /dev/null +++ b/openhands/router/__init__.py @@ -0,0 +1,4 @@ +from openhands.router.base import BaseRouter +from openhands.router.plan.llm_based import LLMBasedPlanRouter + +__all__ = ['BaseRouter', 'LLMBasedPlanRouter'] diff --git a/openhands/router/base.py b/openhands/router/base.py index ccc7ad47f1c5..111cb23f6814 100644 --- a/openhands/router/base.py +++ b/openhands/router/base.py @@ -1,7 +1,20 @@ from abc import ABC, abstractmethod +from openhands.core.config.model_routing_config import ModelRoutingConfig +from openhands.llm.llm import LLM + class BaseRouter(ABC): + def __init__( + self, + llm: LLM, + routing_llms: dict[str, LLM], + model_routing_config: ModelRoutingConfig, + ): + self.llm = llm + self.routing_llms = routing_llms + self.model_routing_config = model_routing_config + @abstractmethod - def should_route_to_custom_model(self, prompt: str) -> bool: + def should_route_to(self, prompt: str) -> LLM: pass diff --git a/openhands/router/plan/__init__.py b/openhands/router/plan/__init__.py index 845831646df8..323c4dddf224 100644 --- a/openhands/router/plan/__init__.py +++ b/openhands/router/plan/__init__.py @@ -1,4 +1,3 @@ from openhands.router.plan.llm_based import LLMBasedPlanRouter -from openhands.router.plan.rule_based import RuleBasedPlanRouter -__all__ = ['RuleBasedPlanRouter', 'LLMBasedPlanRouter'] +__all__ = ['LLMBasedPlanRouter'] diff --git a/openhands/router/plan/llm_based.py b/openhands/router/plan/llm_based.py index 8ada3d537980..066ba8186922 100644 --- a/openhands/router/plan/llm_based.py +++ b/openhands/router/plan/llm_based.py @@ -1,6 +1,4 @@ -import copy - -from openhands.core.config import LLMConfig +from openhands.core.config import ModelRoutingConfig from openhands.llm.llm import LLM from openhands.router.base import BaseRouter from openhands.router.plan.prompts import ( @@ -14,15 +12,22 @@ class LLMBasedPlanRouter(BaseRouter): Router that routes the prompt that is judged by a LLM as complex and requires a step-by-step plan. """ - JUDGE_MODEL = 'gpt-4o' + def __init__( + self, + llm: LLM, + routing_llms: dict[str, LLM], + model_routing_config: ModelRoutingConfig, + ): + super().__init__(llm, routing_llms, model_routing_config) - def __init__(self, llm_config: LLMConfig): - super().__init__() + self._validate_model_routing_config(model_routing_config, routing_llms) - judge_llm_config = copy.deepcopy(llm_config) - self.judge_llm = LLM(judge_llm_config) + self.judge_llm = routing_llms[model_routing_config.judge_llm_config_name] + self.reasoning_llm = routing_llms[ + model_routing_config.reasoning_llm_config_name + ] - def should_route_to_custom_model(self, prompt: str) -> bool: + def should_route_to(self, prompt: str) -> LLM: messages = [ { 'role': 'system', @@ -38,6 +43,26 @@ def should_route_to_custom_model(self, prompt: str) -> bool: response = self.judge_llm.completion( messages=messages, - model=self.JUDGE_MODEL, ) - return int(response['choices'][0]['message']['content'].strip()) == 1 + if int(response['choices'][0]['message']['content'].strip()) == 1: + return self.reasoning_llm + return self.llm + + def _validate_model_routing_config( + self, model_routing_config: ModelRoutingConfig, routing_llms: dict[str, LLM] + ): + if ( + not model_routing_config.judge_llm_config_name + or not model_routing_config.reasoning_llm_config_name + ): + raise ValueError( + 'Judge LLM and Reasoning LLM config names must be provided' + ) + if model_routing_config.judge_llm_config_name not in routing_llms: + raise ValueError( + f'Judge LLM config {model_routing_config.judge_llm_config_name} not found' + ) + if model_routing_config.reasoning_llm_config_name not in routing_llms: + raise ValueError( + f'Reasoning LLM config {model_routing_config.reasoning_llm_config_name} not found' + ) diff --git a/openhands/router/plan/rule_based.py b/openhands/router/plan/rule_based.py deleted file mode 100644 index cb50dfdd9924..000000000000 --- a/openhands/router/plan/rule_based.py +++ /dev/null @@ -1,11 +0,0 @@ -from openhands.router.base import BaseRouter - - -class RuleBasedPlanRouter(BaseRouter): - """ - Router that detects if the prompt contains the string "plan". - """ - - def should_route_to_custom_model(self, prompt: str) -> bool: - # Returns True if the prompt contains the word "plan" - return 'plan' in prompt diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py index 4990122c4e5f..d3fc02530127 100644 --- a/openhands/server/session/session.py +++ b/openhands/server/session/session.py @@ -105,6 +105,11 @@ async def initialize_agent( # TODO: override other LLM config & agent config groups (#2075) llm = self._create_llm(agent_cls) + routing_llms = {} + for config_name, routing_llm_config in self.config.routing_llms.items(): + routing_llms[config_name] = LLM( + config=routing_llm_config, + ) agent_config = self.config.get_agent_config(agent_cls) if settings.enable_default_condenser: @@ -114,7 +119,12 @@ async def initialize_agent( logger.info(f'Enabling default condenser: {default_condenser_config}') agent_config.condenser = default_condenser_config - agent = Agent.get_cls(agent_cls)(llm, agent_config) + agent = Agent.get_cls(agent_cls)( + llm=llm, + config=agent_config, + model_routing_config=self.config.model_routing, + routing_llms=routing_llms, + ) github_token = None selected_repository = None @@ -149,7 +159,6 @@ def _create_llm(self, agent_cls: str | None) -> LLM: return LLM( config=self.config.get_llm_config_from_agent(agent_cls), retry_listener=self._notify_on_llm_retry, - model_routing_config=self.config.model_routing, ) def _notify_on_llm_retry(self, retries: int, max: int) -> None: From ea4474ddc85eec5e2a1a4df0d104d4f18f1bf7c1 Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Tue, 4 Feb 2025 13:05:10 +0000 Subject: [PATCH 18/21] fix bug --- .../agenthub/codeact_agent/codeact_agent.py | 11 +++++----- openhands/core/config/utils.py | 21 +++++++++---------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index 96419c6bdcfc..a1b87d1f0ea4 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -421,13 +421,12 @@ def step(self, state: State) -> Action: # check if model routing is needed if self.router: - if self.active_llm is None: - messages = self._get_messages(state) - formatted_trajectory = format_trajectory(messages) - self.active_llm = self.router.should_route_to(formatted_trajectory) + messages = self._get_messages(state) + formatted_trajectory = format_trajectory(messages) + self.active_llm = self.router.should_route_to(formatted_trajectory) - if self.active_llm != self.llm: - logger.warning(f'🧭 Routing to custom model: {self.active_llm}') + if self.active_llm != self.llm: + logger.warning(f'🧭 Routing to custom model: {self.active_llm}') else: self.active_llm = self.llm diff --git a/openhands/core/config/utils.py b/openhands/core/config/utils.py index c2bc5effaebd..24d030792862 100644 --- a/openhands/core/config/utils.py +++ b/openhands/core/config/utils.py @@ -158,16 +158,6 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'): continue llm_group_ids.add(nested_key) cfg.set_llm_config(llm_config, nested_key) - elif key is not None and key.lower() == 'model_routing': - logger.openhands_logger.debug( - 'Attempt to load model routing config from config toml' - ) - model_routing_config = ModelRoutingConfig(**value) - cfg.model_routing = model_routing_config - - logger.openhands_logger.debug( - 'Attempt to load default LLM config from config toml' - ) # Extract generic LLM fields, which are not nested LLM configs generic_llm_fields = {} @@ -199,13 +189,22 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'): custom_llm_config = LLMConfig(**merged_llm_dict) cfg.set_llm_config(custom_llm_config, nested_key) - elif key is not None and key.lower() == 'security': logger.openhands_logger.debug( 'Attempt to load security config from config toml' ) security_config = SecurityConfig(**value) cfg.security = security_config + elif key is not None and key.lower() == 'model_routing': + logger.openhands_logger.debug( + 'Attempt to load model routing config from config toml' + ) + model_routing_config = ModelRoutingConfig(**value) + cfg.model_routing = model_routing_config + + logger.openhands_logger.debug( + 'Attempt to load default LLM config from config toml' + ) elif not key.startswith('sandbox') and key.lower() != 'core': logger.openhands_logger.warning( f'Unknown key in {toml_file}: "{key}"' From b7a0c9585c91eb58f00b2c99211a09992cb0f7ef Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Tue, 4 Feb 2025 13:11:13 +0000 Subject: [PATCH 19/21] update config template --- config.template.toml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/config.template.toml b/config.template.toml index 7650b06334e5..6d5de0348b9e 100644 --- a/config.template.toml +++ b/config.template.toml @@ -293,7 +293,16 @@ llm_config = 'gpt3' [model_routing] # The reasoning model to use for plan generation -reasoning_model = "o1-preview-2024-09-12" +reasoning_llm_config_name = 'reasoning_model' +judge_llm_config_name = 'judge_model' + +[llm.judge_model] +model = "gpt-4o" +api_key = "" + +[llm.reasoning_model] +model = "o1" +api_key = "" #################################### Eval #################################### # Configuration for the evaluation, please refer to the specific evaluation From 7d0132f625dcdc17b7cf207bd23a707c27406bc5 Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Tue, 4 Feb 2025 16:00:07 +0000 Subject: [PATCH 20/21] add gap control --- openhands/agenthub/dummy_agent/agent.py | 2 +- openhands/router/plan/llm_based.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/openhands/agenthub/dummy_agent/agent.py b/openhands/agenthub/dummy_agent/agent.py index f7a654bf75b4..ad69d01936f6 100644 --- a/openhands/agenthub/dummy_agent/agent.py +++ b/openhands/agenthub/dummy_agent/agent.py @@ -46,7 +46,7 @@ class DummyAgent(Agent): without making any LLM calls. """ - def __init__(self, llm: LLM, config: AgentConfig): + def __init__(self, llm: LLM, config: AgentConfig, **kwargs): super().__init__(llm, config) self.steps: list[ActionObs] = [ { diff --git a/openhands/router/plan/llm_based.py b/openhands/router/plan/llm_based.py index 066ba8186922..85515ea23139 100644 --- a/openhands/router/plan/llm_based.py +++ b/openhands/router/plan/llm_based.py @@ -12,6 +12,8 @@ class LLMBasedPlanRouter(BaseRouter): Router that routes the prompt that is judged by a LLM as complex and requires a step-by-step plan. """ + NUM_TURNS_GAP = 5 + def __init__( self, llm: LLM, @@ -26,8 +28,15 @@ def __init__( self.reasoning_llm = routing_llms[ model_routing_config.reasoning_llm_config_name ] + self.routed_turns: list[int] = [] + self.cur_turn_num = 0 def should_route_to(self, prompt: str) -> LLM: + self.cur_turn_num += 1 + + if self.cur_turn_num - max(self.routed_turns, default=0) < self.NUM_TURNS_GAP: + return self.llm + messages = [ { 'role': 'system', @@ -44,7 +53,10 @@ def should_route_to(self, prompt: str) -> LLM: response = self.judge_llm.completion( messages=messages, ) - if int(response['choices'][0]['message']['content'].strip()) == 1: + should_route = int(response['choices'][0]['message']['content'].strip()) == 1 + + if should_route: + self.routed_turns.append(self.cur_turn_num) return self.reasoning_llm return self.llm From a70e97906c97bddb7945856c81c8c19d62127823 Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Thu, 6 Feb 2025 04:36:27 +0000 Subject: [PATCH 21/21] working eval --- evaluation/benchmarks/swe_bench/run_infer.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py index c4305bfb6629..2f3a612c30ec 100644 --- a/evaluation/benchmarks/swe_bench/run_infer.py +++ b/evaluation/benchmarks/swe_bench/run_infer.py @@ -1,4 +1,5 @@ import asyncio +import copy import json import os import tempfile @@ -33,6 +34,7 @@ SandboxConfig, get_llm_config_arg, get_parser, + load_from_toml, ) from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime, run_controller @@ -155,15 +157,19 @@ def get_config( metadata.llm_config, metadata.eval_output_dir, instance['instance_id'] ) ) + config_copy = copy.deepcopy(config) + load_from_toml(config_copy) agent_config = AgentConfig( codeact_enable_jupyter=False, codeact_enable_browsing=RUN_WITH_BROWSING, codeact_enable_llm_editor=False, condenser=metadata.condenser_config, enable_prompt_extensions=False, - # enable_plan_routing=True, + enable_plan_routing=config_copy.get_agent_config().enable_plan_routing, ) config.set_agent_config(agent_config) + config.routing_llms = config_copy.routing_llms + config.model_routing = config_copy.model_routing return config