Skip to content

Commit

Permalink
Use litellm's modify params (#5636)
Browse files Browse the repository at this point in the history
  • Loading branch information
enyst authored Dec 17, 2024
1 parent f9d052c commit 3297e4d
Show file tree
Hide file tree
Showing 24 changed files with 60 additions and 1 deletion.
4 changes: 4 additions & 0 deletions config.template.toml
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@ model = "gpt-4o"
# Drop any unmapped (unsupported) params without causing an exception
#drop_params = false

# Modify params for litellm to do transformations like adding a default message, when a message is empty.
# Note: this setting is global, unlike drop_params, it cannot be overridden in each call to litellm.
#modify_params = true

# Using the prompt caching feature if provided by the LLM and supported
#caching_prompt = true

Expand Down
3 changes: 3 additions & 0 deletions evaluation/benchmarks/EDA/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,9 @@ def process_instance(
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False

if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 2 additions & 0 deletions evaluation/benchmarks/agent_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,8 @@ def process_instance(
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False

if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
Expand Down
2 changes: 2 additions & 0 deletions evaluation/benchmarks/aider_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,8 @@ def process_instance(
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False

if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
Expand Down
2 changes: 2 additions & 0 deletions evaluation/benchmarks/biocoder/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,8 @@ def process_instance(
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False

if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
Expand Down
2 changes: 2 additions & 0 deletions evaluation/benchmarks/bird/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,8 @@ def execute_sql(db_path, sql):
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 2 additions & 0 deletions evaluation/benchmarks/browsing_delegation/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ def process_instance(
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False

if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
Expand Down
2 changes: 2 additions & 0 deletions evaluation/benchmarks/commit0_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,8 @@ def commit0_setup(dataset: pd.DataFrame, repo_split: str) -> pd.DataFrame:
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False
llm_config.log_completions = True

if llm_config is None:
Expand Down
2 changes: 2 additions & 0 deletions evaluation/benchmarks/discoverybench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,8 @@ def create_dataset(repo_location: str, split: str = 'test'):
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
3 changes: 3 additions & 0 deletions evaluation/benchmarks/gaia/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,9 @@ def process_instance(
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False

if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 2 additions & 0 deletions evaluation/benchmarks/gorilla/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@ def process_instance(
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
3 changes: 3 additions & 0 deletions evaluation/benchmarks/gpqa/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,9 @@ def process_instance(
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False

if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 2 additions & 0 deletions evaluation/benchmarks/humanevalfix/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,8 @@ def process_instance(
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 2 additions & 0 deletions evaluation/benchmarks/logic_reasoning/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,8 @@ def process_instance(
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 2 additions & 0 deletions evaluation/benchmarks/miniwob/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,8 @@ def process_instance(
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 2 additions & 0 deletions evaluation/benchmarks/mint/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,8 @@ def process_instance(
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
3 changes: 3 additions & 0 deletions evaluation/benchmarks/ml_bench/run_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ def classify_error(llm: LLM, failed_case: dict) -> str:
# for details of how to set `llm_config`
if args.llm_config:
specified_llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
specified_llm_config.modify_params = False

if specified_llm_config:
config.llm = specified_llm_config
logger.info(f'Config for evaluation: {config}')
Expand Down
2 changes: 2 additions & 0 deletions evaluation/benchmarks/ml_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,8 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 2 additions & 0 deletions evaluation/benchmarks/scienceagentbench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,8 @@ def process_instance(
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 2 additions & 0 deletions evaluation/benchmarks/swe_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,8 @@ def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config.log_completions = True
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False

if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
Expand Down
3 changes: 3 additions & 0 deletions evaluation/benchmarks/toolqa/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,9 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False

if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
2 changes: 2 additions & 0 deletions evaluation/benchmarks/webarena/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,8 @@ def process_instance(
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

Expand Down
4 changes: 4 additions & 0 deletions openhands/core/config/llm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class LLMConfig:
output_cost_per_token: The cost per output token. This will available in logs for the user to check.
ollama_base_url: The base URL for the OLLAMA API.
drop_params: Drop any unmapped (unsupported) params without causing an exception.
modify_params: Modify params allows litellm to do transformations like adding a default message, when a message is empty.
disable_vision: If model is vision capable, this option allows to disable image processing (useful for cost reduction).
caching_prompt: Use the prompt caching feature if provided by the LLM and supported by the provider.
log_completions: Whether to log LLM completions to the state.
Expand Down Expand Up @@ -72,7 +73,10 @@ class LLMConfig:
input_cost_per_token: float | None = None
output_cost_per_token: float | None = None
ollama_base_url: str | None = None
# This setting can be sent in each call to litellm
drop_params: bool = True
# Note: this setting is actually global, unlike drop_params
modify_params: bool = True
disable_vision: bool | None = None
caching_prompt: bool = True
log_completions: bool = False
Expand Down
6 changes: 5 additions & 1 deletion openhands/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ def __init__(
self.cost_metric_supported: bool = True
self.config: LLMConfig = copy.deepcopy(config)

# litellm actually uses base Exception here for unknown model
self.model_info: ModelInfo | None = None

if self.config.log_completions:
Expand Down Expand Up @@ -206,6 +205,11 @@ def wrapper(*args, **kwargs):
'anthropic-beta': 'prompt-caching-2024-07-31',
}

# set litellm modify_params to the configured value
# True by default to allow litellm to do transformations like adding a default message, when a message is empty
# NOTE: this setting is global; unlike drop_params, it cannot be overridden in the litellm completion partial
litellm.modify_params = self.config.modify_params

try:
# Record start time for latency measurement
start_time = time.time()
Expand Down

0 comments on commit 3297e4d

Please sign in to comment.