Skip to content

Commit

Permalink
fix refs in evaluation dir
Browse files Browse the repository at this point in the history
  • Loading branch information
rbren committed Dec 30, 2024
1 parent 73063c8 commit 16ea0bd
Show file tree
Hide file tree
Showing 22 changed files with 22 additions and 22 deletions.
2 changes: 1 addition & 1 deletion evaluation/benchmarks/EDA/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='eventstream',
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='python:3.12-bookworm',
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/agent_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime=os.environ.get('RUNTIME', 'eventstream'),
runtime=os.environ.get('RUNTIME', 'docker'),
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='python:3.12-slim',
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/aider_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime=os.environ.get('RUNTIME', 'eventstream'),
runtime=os.environ.get('RUNTIME', 'docker'),
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='python:3.11-bookworm',
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/biocoder/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='eventstream',
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image=BIOCODER_BENCH_CONTAINER_IMAGE,
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/bird/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='eventstream',
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='python:3.12-bookworm',
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/browsing_delegation/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='eventstream',
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='python:3.12-bookworm',
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/commit0_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def get_config(
default_agent=metadata.agent_class,
run_as_openhands=False,
max_iterations=metadata.max_iterations,
runtime=os.environ.get('RUNTIME', 'eventstream'),
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox=SandboxConfig(
base_container_image=base_container_image,
enable_auto_lint=True,
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/discoverybench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='eventstream',
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='python:3.12-bookworm',
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/gaia/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='eventstream',
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='python:3.12-bookworm',
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/gorilla/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='eventstream',
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='python:3.12-bookworm',
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/gpqa/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='eventstream',
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='python:3.12-bookworm',
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/humanevalfix/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='eventstream',
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='python:3.12-bookworm',
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/logic_reasoning/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='eventstream',
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='xingyaoww/od-eval-logic-reasoning:v1.0',
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/miniwob/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime=os.environ.get('RUNTIME', 'eventstream'),
runtime=os.environ.get('RUNTIME', 'docker'),
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='xingyaoww/od-eval-miniwob:v1.0',
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/mint/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='eventstream',
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='xingyaoww/od-eval-mint:v1.0',
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/ml_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='eventstream',
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='public.ecr.aws/i5g0m1f6/ml-bench',
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/scienceagentbench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime=os.environ.get('RUNTIME', 'eventstream'),
runtime=os.environ.get('RUNTIME', 'docker'),
max_budget_per_task=4,
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/swe_bench/eval_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def get_config(instance: pd.Series) -> AppConfig:
)
config = AppConfig(
run_as_openhands=False,
runtime=os.environ.get('RUNTIME', 'eventstream'),
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox=SandboxConfig(
base_container_image=base_container_image,
use_host_network=False,
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/swe_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def get_config(
default_agent=metadata.agent_class,
run_as_openhands=False,
max_iterations=metadata.max_iterations,
runtime=os.environ.get('RUNTIME', 'eventstream'),
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox=SandboxConfig(
base_container_image=base_container_image,
enable_auto_lint=True,
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/toolqa/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='eventstream',
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='python:3.12-bookworm',
Expand Down
2 changes: 1 addition & 1 deletion evaluation/benchmarks/webarena/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='eventstream',
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='python:3.12-bookworm',
Expand Down
2 changes: 1 addition & 1 deletion evaluation/integration_tests/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime=os.environ.get('RUNTIME', 'eventstream'),
runtime=os.environ.get('RUNTIME', 'docker'),
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
# use default base_container_image
Expand Down

0 comments on commit 16ea0bd

Please sign in to comment.