Skip to content

Commit

Permalink
Merge branch 'main' into kevin
Browse files Browse the repository at this point in the history
  • Loading branch information
SmartManoj committed Feb 16, 2025
2 parents 287b8dc + 30e39e8 commit c9c21e2
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 6 deletions.
4 changes: 3 additions & 1 deletion evaluation/benchmarks/the_agent_company/browsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,9 @@ def pre_login(
obs: BrowserOutputObservation = runtime.run_action(browser_action)
logger.debug(obs, extra={'msg_type': 'OBSERVATION'})
if save_screenshots:
image_data = base64.b64decode(obs.screenshot)
image_data = base64.b64decode(
obs.screenshot.replace('data:image/png;base64,', '')
)
with open(os.path.join(directory, f'{image_id}.png'), 'wb') as file:
file.write(image_data)
image_id += 1
14 changes: 12 additions & 2 deletions evaluation/benchmarks/the_agent_company/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def get_config(
task_short_name: str,
mount_path_on_host: str,
llm_config: LLMConfig,
agent_config: AgentConfig,
agent_config: AgentConfig | None,
) -> AppConfig:
config = AppConfig(
run_as_openhands=False,
Expand Down Expand Up @@ -159,11 +159,21 @@ def run_solver(
os.makedirs(screenshots_dir, exist_ok=True)
for image_id, obs in enumerate(state.history):
if isinstance(obs, BrowserOutputObservation):
image_data = base64.b64decode(obs.screenshot)
image_data = base64.b64decode(
obs.screenshot.replace('data:image/png;base64,', '')
)
with open(
os.path.join(screenshots_dir, f'{image_id}.png'), 'wb'
) as file:
file.write(image_data)
if obs.set_of_marks:
som_image_data = base64.b64decode(
obs.set_of_marks.replace('data:image/png;base64,', '')
)
with open(
os.path.join(screenshots_dir, f'{image_id}_som.png'), 'wb'
) as file:
file.write(som_image_data)

if save_final_state:
os.makedirs(state_dir, exist_ok=True)
Expand Down
4 changes: 2 additions & 2 deletions evaluation/benchmarks/the_agent_company/scripts/run_infer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,6 @@ temp_file="tasks_${START_PERCENTILE}_${END_PERCENTILE}.md"
sed -n "${start_line},${end_line}p" tasks.md > "$temp_file"

while IFS= read -r task_image; do
docker pull $task_image

# Remove prefix using ## to remove longest matching pattern from start
task_name=${task_image##ghcr.io/theagentcompany/}

Expand All @@ -144,6 +142,8 @@ while IFS= read -r task_image; do
continue
fi

docker pull $task_image

# Build the Python command
COMMAND="poetry run python run_infer.py \
--agent-llm-config \"$AGENT_LLM_CONFIG\" \
Expand Down
3 changes: 3 additions & 0 deletions openhands/core/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,13 @@ class RollingLogger:
max_lines: int
char_limit: int
log_lines: list[str]
all_lines: str

def __init__(self, max_lines=10, char_limit=80):
self.max_lines = max_lines
self.char_limit = char_limit
self.log_lines = [''] * self.max_lines
self.all_lines = ''

def is_enabled(self):
return DEBUG and sys.stdout.isatty()
Expand All @@ -184,6 +186,7 @@ def add_line(self, line):
self.log_lines.pop(0)
self.log_lines.append(line[: self.char_limit])
self.print_lines()
self.all_lines += line + '\n'

def write_immediately(self, line):
self._write(line)
Expand Down
4 changes: 3 additions & 1 deletion openhands/runtime/builder/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,10 @@ def build(
)

except subprocess.CalledProcessError as e:
logger.error(f'Image build failed:\n{e}')
logger.error(f'Image build failed:\n{e}') # TODO: {e} is empty
logger.error(f'Command output:\n{e.output}')
if self.rolling_logger.is_enabled():
logger.error("Docker build output:\n" + self.rolling_logger.all_lines) # Show the error
raise

except subprocess.TimeoutExpired:
Expand Down
27 changes: 27 additions & 0 deletions tests/unit/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
finalize_config,
get_agent_config_arg,
get_llm_config_arg,
load_app_config,
load_from_env,
load_from_toml,
)
Expand Down Expand Up @@ -809,3 +810,29 @@ def test_get_agent_config_arg(temp_toml_file):
assert not agent_config2.memory_enabled
assert agent_config2.enable_prompt_extensions
assert agent_config2.memory_max_threads == 10


def test_agent_config_custom_group_name(temp_toml_file):
temp_toml = """
[core]
max_iterations = 99
[agent.group1]
memory_enabled = true
[agent.group2]
memory_enabled = false
"""
with open(temp_toml_file, 'w') as f:
f.write(temp_toml)

# just a sanity check that load app config wouldn't fail
app_config = load_app_config(config_file=temp_toml_file)
assert app_config.max_iterations == 99

# run_infer in evaluation can use `get_agent_config_arg` to load custom
# agent configs with any group name (not just agent name)
agent_config1 = get_agent_config_arg('group1', temp_toml_file)
assert agent_config1.memory_enabled
agent_config2 = get_agent_config_arg('group2', temp_toml_file)
assert not agent_config2.memory_enabled

0 comments on commit c9c21e2

Please sign in to comment.