Skip to content

Commit

Permalink
feat: Enable llm_completions logging in aider_bench
Browse files Browse the repository at this point in the history
- Added update_llm_config_for_completions_logging to imports
- Modified get_config to accept instance parameter
- Updated llm_config to enable completions logging
- Updated process_instance to pass instance to get_config

This change makes aider_bench save llm_completions in the same way as swe_bench,
with completions being saved in {eval_output_dir}/llm_completions/{instance_id}/
  • Loading branch information
openhands-agent committed Feb 25, 2025
1 parent 1c72676 commit 92e98f6
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions evaluation/benchmarks/aider_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_llm_config_for_completions_logging,
)
from openhands.controller.state.state import State
from openhands.core.config import (
Expand All @@ -45,6 +46,7 @@


def get_config(
instance: pd.Series,
metadata: EvalMetadata,
) -> AppConfig:
config = AppConfig(
Expand All @@ -67,7 +69,13 @@ def get_config(
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
# Update llm_config to enable completions logging
llm_config = update_llm_config_for_completions_logging(
metadata.llm_config,
metadata.eval_output_dir,
str(instance.instance_id)
)
config.set_llm_config(llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config.enable_prompt_extensions = False

Expand Down Expand Up @@ -170,7 +178,7 @@ def process_instance(
metadata: EvalMetadata,
reset_logger: bool = True,
) -> EvalOutput:
config = get_config(metadata)
config = get_config(instance, metadata)

# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
if reset_logger:
Expand Down

0 comments on commit 92e98f6

Please sign in to comment.