Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue #5609: Use litellm's modify_params with default True #5611

Merged
merged 10 commits into from
Dec 16, 2024
5 changes: 5 additions & 0 deletions config.template.toml
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,11 @@ model = "gpt-4o"
# Drop any unmapped (unsupported) params without causing an exception
#drop_params = false

# Allow litellm to modify parameters to make them compatible with providers
# for example by inserting a default message (like 'continue') when a message is empty
# and the provider's API would give an error otherwise
#modify_params = true

# Using the prompt caching feature if provided by the LLM and supported
#caching_prompt = true

Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/EDA/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def process_instance(

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/agent_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def process_instance(

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)

if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/aider_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ def process_instance(

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)

if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/biocoder/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ def process_instance(

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)

if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/bird/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ def execute_sql(db_path, sql):

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/browsing_delegation/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def process_instance(

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)

if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/commit0_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ def commit0_setup(dataset: pd.DataFrame, repo_split: str) -> pd.DataFrame:

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
llm_config.log_completions = True

if llm_config is None:
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/discoverybench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ def create_dataset(repo_location: str, split: str = 'test'):

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/gaia/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def process_instance(

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/gorilla/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def process_instance(

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/gpqa/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def process_instance(

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/humanevalfix/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def process_instance(

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/logic_reasoning/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def process_instance(

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/miniwob/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def process_instance(

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/mint/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ def process_instance(

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/ml_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/scienceagentbench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def process_instance(

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
5 changes: 2 additions & 3 deletions evaluation/benchmarks/swe_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from datasets import load_dataset

import openhands.agenthub

from evaluation.utils.shared import (
EvalException,
EvalMetadata,
Expand Down Expand Up @@ -76,7 +75,7 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
'5. Think about edgecases and make sure your fix handles them as well\n'
"Your thinking should be thorough and so it's fine if it's very long.\n"
)
)

if RUN_WITH_BROWSING:
instruction += (
Expand Down Expand Up @@ -489,7 +488,7 @@ def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
llm_config.log_completions = True

if llm_config is None:
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/toolqa/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/webarena/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def process_instance(

llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 2 additions & 0 deletions openhands/core/config/llm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class LLMConfig:
log_completions_folder: The folder to log LLM completions to. Required if log_completions is True.
draft_editor: A more efficient LLM to use for file editing. Introduced in [PR 3985](https://github.com/All-Hands-AI/OpenHands/pull/3985).
custom_tokenizer: A custom tokenizer to use for token counting.
modify_params: Allow litellm to modify parameters to make them compatible with the provider. For example, insert default messages when empty. Defaults to True.
"""

model: str = 'claude-3-5-sonnet-20241022'
Expand Down Expand Up @@ -79,6 +80,7 @@ class LLMConfig:
log_completions_folder: str = os.path.join(LOG_DIR, 'completions')
draft_editor: Optional['LLMConfig'] = None
custom_tokenizer: str | None = None
modify_params: bool = True

def defaults_to_dict(self) -> dict:
"""Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
Expand Down
10 changes: 7 additions & 3 deletions openhands/core/config/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,9 +243,9 @@ def finalize_config(cfg: AppConfig):
)


# Utility function for command line --group argument
# Utility function for command line -l (--llm-config) argument
def get_llm_config_arg(
llm_config_arg: str, toml_file: str = 'config.toml'
llm_config_arg: str, toml_file: str = 'config.toml', evaluation: bool = False
) -> LLMConfig | None:
"""Get a group of llm settings from the config file.

Expand All @@ -268,6 +268,7 @@ def get_llm_config_arg(
Args:
llm_config_arg: The group of llm settings to get from the config.toml file.
toml_file: Path to the configuration file to read from. Defaults to 'config.toml'.
evaluation: If True, sets modify_params=False for evaluation purposes. Defaults to False.

Returns:
LLMConfig: The LLMConfig object with the settings from the config file.
Expand Down Expand Up @@ -296,7 +297,10 @@ def get_llm_config_arg(

# update the llm config with the specified section
if 'llm' in toml_config and llm_config_arg in toml_config['llm']:
return LLMConfig.from_dict(toml_config['llm'][llm_config_arg])
config = LLMConfig.from_dict(toml_config['llm'][llm_config_arg])
if evaluation:
config.modify_params = False
return config
logger.openhands_logger.debug(f'Loading from toml failed for {llm_config_arg}')
return None

Expand Down
1 change: 1 addition & 0 deletions openhands/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def __init__(
temperature=self.config.temperature,
top_p=self.config.top_p,
drop_params=self.config.drop_params,
modify_params=self.config.modify_params,
)

self._completion_unwrapped = self._completion
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ reportlab = "*"
[tool.coverage.run]
concurrency = ["gevent"]


[tool.poetry.group.runtime.dependencies]
jupyterlab = "*"
notebook = "*"
Expand Down Expand Up @@ -130,6 +131,7 @@ ignore = ["D1"]
[tool.ruff.lint.pydocstyle]
convention = "google"


[tool.poetry.group.evaluation.dependencies]
streamlit = "*"
whatthepatch = "*"
Expand Down
Loading