Skip to content

Commit

Permalink
remove keep_prompt from everywhere
Browse files Browse the repository at this point in the history
  • Loading branch information
xingyaoww committed Nov 18, 2024
1 parent 7ef9e37 commit 20721e3
Show file tree
Hide file tree
Showing 16 changed files with 32 additions and 62 deletions.
4 changes: 1 addition & 3 deletions evaluation/agent_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ def complete_runtime(

action = CmdRunAction(
command=f'chmod +x ./{script_name} && ./{script_name}',
keep_prompt=False,
)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
Expand All @@ -158,8 +157,7 @@ def complete_runtime(
logger.info(f'Running get ground truth cmd: {script_name}')

action = CmdRunAction(
command=f'chmod +x ./{script_name} && ./{script_name}',
keep_prompt=False,
command=f'chmod +x ./{script_name} && ./{script_name}'
)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
Expand Down
5 changes: 1 addition & 4 deletions evaluation/aider_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,7 @@ def complete_runtime(
)
logger.info(f'Running test file: {script_name}')

action = CmdRunAction(
command=f'python3 -m unittest {script_name}',
keep_prompt=False,
)
action = CmdRunAction(command=f'python3 -m unittest {script_name}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
Expand Down
6 changes: 2 additions & 4 deletions evaluation/biocoder/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def complete_runtime(
if obs.exit_code == 0:
test_result['metadata']['1_copy_change_success'] = True

action = CmdRunAction(command=f'cat {generated_path}', keep_prompt=False)
action = CmdRunAction(command=f'cat {generated_path}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
Expand All @@ -221,9 +221,7 @@ def complete_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0

action = CmdRunAction(
command='cat /testing_files/results_biocoder.json', keep_prompt=False
)
action = CmdRunAction(command='cat /testing_files/results_biocoder.json')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
if obs.exit_code == 0:
Expand Down
10 changes: 2 additions & 8 deletions evaluation/bird/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,10 +266,7 @@ def initialize_runtime(
runtime.copy_to(db_file, '/workspace')

# Check the database is copied
action = CmdRunAction(
command='cd /workspace && ls -l',
keep_prompt=False,
)
action = CmdRunAction(command='cd /workspace && ls -l')
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
Expand Down Expand Up @@ -298,10 +295,7 @@ def complete_runtime(
instance_id = instance.instance_id.replace('/', '__')
path = os.path.join('/workspace', f'{instance_id}.py')

action = CmdRunAction(
command=f'cat {path}',
keep_prompt=False,
)
action = CmdRunAction(command=f'cat {path}')
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})

Expand Down
4 changes: 1 addition & 3 deletions evaluation/humanevalfix/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,7 @@ def complete_runtime(
num_workers = LANGUAGE_TO_NUM_WORKERS[language]
python_imports = '\n'.join(IMPORT_HELPER[language])

action = CmdRunAction(
command=f'cat /workspace/{_get_instance_id(instance)}.py', keep_prompt=False
)
action = CmdRunAction(command=f'cat /workspace/{_get_instance_id(instance)}.py')
obs = runtime.run_action(action)
assert obs.exit_code == 0

Expand Down
2 changes: 1 addition & 1 deletion evaluation/integration_tests/tests/t01_fix_simple_typo.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def initialize_runtime(cls, runtime: Runtime) -> None:
@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
# check if the file /workspace/bad.txt has been fixed
action = CmdRunAction(command='cat /workspace/bad.txt', keep_prompt=False)
action = CmdRunAction(command='cat /workspace/bad.txt')
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
Expand Down
6 changes: 3 additions & 3 deletions evaluation/integration_tests/tests/t02_add_bash_hello.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ class Test(BaseIntegrationTest):

@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
action = CmdRunAction(command='mkdir -p /workspace')
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')

@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
# check if the file /workspace/hello.sh exists
action = CmdRunAction(command='cat /workspace/hello.sh', keep_prompt=False)
action = CmdRunAction(command='cat /workspace/hello.sh')
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
Expand All @@ -26,7 +26,7 @@ def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
)

# execute the script
action = CmdRunAction(command='bash /workspace/hello.sh', keep_prompt=False)
action = CmdRunAction(command='bash /workspace/hello.sh')
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
Expand Down
6 changes: 3 additions & 3 deletions evaluation/integration_tests/tests/t03_jupyter_write_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ class Test(BaseIntegrationTest):

@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
action = CmdRunAction(command='mkdir -p /workspace')
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')

@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
# check if the file /workspace/hello.sh exists
action = CmdRunAction(command='cat /workspace/test.txt', keep_prompt=False)
action = CmdRunAction(command='cat /workspace/test.txt')
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
Expand All @@ -26,7 +26,7 @@ def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
)

# execute the script
action = CmdRunAction(command='cat /workspace/test.txt', keep_prompt=False)
action = CmdRunAction(command='cat /workspace/test.txt')
obs = runtime.run_action(action)

if obs.exit_code != 0:
Expand Down
14 changes: 6 additions & 8 deletions evaluation/integration_tests/tests/t04_git_staging.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,31 +10,29 @@ class Test(BaseIntegrationTest):

@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
action = CmdRunAction(command='mkdir -p /workspace')
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')

# git init
action = CmdRunAction(command='git init', keep_prompt=False)
action = CmdRunAction(command='git init')
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')

# create README.md
action = CmdRunAction(
command='echo \'print("hello world")\' > hello.py', keep_prompt=False
)
action = CmdRunAction(command='echo \'print("hello world")\' > hello.py')
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')

# git add README.md
action = CmdRunAction(command='git add hello.py', keep_prompt=False)
action = CmdRunAction(command='git add hello.py')
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')

@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
# check if the file /workspace/hello.py exists
action = CmdRunAction(command='cat /workspace/hello.py', keep_prompt=False)
action = CmdRunAction(command='cat /workspace/hello.py')
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
Expand All @@ -43,7 +41,7 @@ def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
)

# check if the staging area is empty
action = CmdRunAction(command='git status', keep_prompt=False)
action = CmdRunAction(command='git status')
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
Expand Down
7 changes: 3 additions & 4 deletions evaluation/integration_tests/tests/t05_simple_browsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,11 @@ class Test(BaseIntegrationTest):

@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
action = CmdRunAction(command='mkdir -p /workspace')
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')

action = CmdRunAction(command='mkdir -p /tmp/server', keep_prompt=False)
action = CmdRunAction(command='mkdir -p /tmp/server')
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')

Expand All @@ -101,8 +101,7 @@ def initialize_runtime(cls, runtime: Runtime) -> None:

# create README.md
action = CmdRunAction(
command='cd /tmp/server && nohup python3 -m http.server 8000 &',
keep_prompt=False,
command='cd /tmp/server && nohup python3 -m http.server 8000 &'
)
obs = runtime.run_action(action)

Expand Down
2 changes: 1 addition & 1 deletion evaluation/ml_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def complete_runtime(
eval_script = os.path.join(task_path, 'run.sh')
logger.info(f'Running evaluation script: {eval_script}')

action = CmdRunAction(command=f'cat {eval_script}', keep_prompt=False)
action = CmdRunAction(command=f'cat {eval_script}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
if obs.exit_code == 0:
Expand Down
10 changes: 2 additions & 8 deletions evaluation/scienceagentbench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,7 @@ def initialize_runtime(
runtime.copy_to(dataset_dir, '/workspace/benchmark/datasets', recursive=True)

# Check the dataset exists
action = CmdRunAction(
command='cd /workspace/benchmark/datasets && ls',
keep_prompt=False,
)
action = CmdRunAction(command='cd /workspace/benchmark/datasets && ls')
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
Expand Down Expand Up @@ -154,10 +151,7 @@ def complete_runtime(

assert obs.exit_code == 0

action = CmdRunAction(
command=f'cat pred_programs/{instance.pred_program_name}',
keep_prompt=False,
)
action = CmdRunAction(command=f'cat pred_programs/{instance.pred_program_name}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)

Expand Down
10 changes: 4 additions & 6 deletions evaluation/swe_bench/eval_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def process_instance(
"(patch --batch --fuzz=5 -p1 -i /tmp/patch.diff && echo 'APPLY_PATCH_PASS' || "
"echo 'APPLY_PATCH_FAIL')))"
)
action = CmdRunAction(command=exec_command, keep_prompt=False)
action = CmdRunAction(command=exec_command)
action.timeout = 600
obs = runtime.run_action(action)
assert isinstance(obs, CmdOutputObservation)
Expand All @@ -200,9 +200,7 @@ def process_instance(

# Run eval script in background and save output to log file
log_file = '/tmp/eval_output.log'
action = CmdRunAction(
command=f'/tmp/eval.sh > {log_file} 2>&1 & echo $!', keep_prompt=False
)
action = CmdRunAction(command=f'/tmp/eval.sh > {log_file} 2>&1 & echo $!')
action.timeout = 60 # Short timeout just to get the process ID
obs = runtime.run_action(action)

Expand All @@ -224,7 +222,7 @@ def process_instance(
instance['test_result']['report']['test_timeout'] = True
break
check_action = CmdRunAction(
command=f'ps -p {pid} > /dev/null; echo $?', keep_prompt=False
command=f'ps -p {pid} > /dev/null; echo $?'
)
check_action.timeout = 60
check_obs = runtime.run_action(check_action)
Expand All @@ -242,7 +240,7 @@ def process_instance(
time.sleep(30) # Wait for 30 seconds before checking again

# Read the log file
cat_action = CmdRunAction(command=f'cat {log_file}', keep_prompt=False)
cat_action = CmdRunAction(command=f'cat {log_file}')
cat_action.timeout = 300
cat_obs = runtime.run_action(cat_action)

Expand Down
3 changes: 1 addition & 2 deletions evaluation/swe_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,8 +350,7 @@ def complete_runtime(
git_patch = None
while n_retries < 5:
action = CmdRunAction(
command=f'git diff --no-color --cached {instance["base_commit"]}',
keep_prompt=False,
command=f'git diff --no-color --cached {instance["base_commit"]}'
)
action.timeout = 600 + 100 * n_retries
logger.info(action, extra={'msg_type': 'ACTION'})
Expand Down
1 change: 0 additions & 1 deletion openhands/events/serialization/event.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ def event_to_memory(event: 'Event', max_message_chars: int) -> dict:
# runnable actions have some extra fields used in the BE/FE, which should not be sent to the LLM
if 'args' in d:
d['args'].pop('blocking', None)
d['args'].pop('keep_prompt', None)
d['args'].pop('confirmation_state', None)

if 'extras' in d:
Expand Down
4 changes: 1 addition & 3 deletions tests/unit/test_action_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,10 @@ def serialization_deserialization(
serialized_action_memory = event_to_memory(action_instance, max_message_chars)
original_memory_dict = original_action_dict.copy()

# we don't send backend properties like id or 'keep_prompt'
# we don't send backend properties like id
original_memory_dict.pop('id', None)
original_memory_dict.pop('timestamp', None)
if 'args' in original_memory_dict:
original_memory_dict['args'].pop('keep_prompt', None)
original_memory_dict['args'].pop('blocking', None)
original_memory_dict['args'].pop('confirmation_state', None)

Expand Down Expand Up @@ -101,7 +100,6 @@ def test_cmd_run_action_serialization_deserialization():
'blocking': False,
'command': 'echo "Hello world"',
'thought': '',
'keep_prompt': True,
'hidden': False,
'confirmation_state': ActionConfirmationStatus.CONFIRMED,
},
Expand Down

0 comments on commit 20721e3

Please sign in to comment.