remove keep_prompt from everywhere

All-Hands-AI · Nov 18, 2024 · 20721e3 · 20721e3
1 parent 7ef9e37
commit 20721e3
Show file tree

Hide file tree

Showing 16 changed files with 32 additions and 62 deletions.
diff --git a/evaluation/agent_bench/run_infer.py b/evaluation/agent_bench/run_infer.py
@@ -131,7 +131,6 @@ def complete_runtime(
 
         action = CmdRunAction(
             command=f'chmod +x ./{script_name} && ./{script_name}',
-            keep_prompt=False,
         )
         logger.info(action, extra={'msg_type': 'ACTION'})
         obs = runtime.run_action(action)
@@ -158,8 +157,7 @@ def complete_runtime(
             logger.info(f'Running get ground truth cmd: {script_name}')
 
             action = CmdRunAction(
-                command=f'chmod +x ./{script_name} && ./{script_name}',
-                keep_prompt=False,
+                command=f'chmod +x ./{script_name} && ./{script_name}'
             )
             logger.info(action, extra={'msg_type': 'ACTION'})
             obs = runtime.run_action(action)

diff --git a/evaluation/aider_bench/run_infer.py b/evaluation/aider_bench/run_infer.py
@@ -143,10 +143,7 @@ def complete_runtime(
         )
         logger.info(f'Running test file: {script_name}')
 
-    action = CmdRunAction(
-        command=f'python3 -m unittest {script_name}',
-        keep_prompt=False,
-    )
+    action = CmdRunAction(command=f'python3 -m unittest {script_name}')
     logger.info(action, extra={'msg_type': 'ACTION'})
     obs = runtime.run_action(action)
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})

diff --git a/evaluation/biocoder/run_infer.py b/evaluation/biocoder/run_infer.py
@@ -197,7 +197,7 @@ def complete_runtime(
     if obs.exit_code == 0:
         test_result['metadata']['1_copy_change_success'] = True
 
-        action = CmdRunAction(command=f'cat {generated_path}', keep_prompt=False)
+        action = CmdRunAction(command=f'cat {generated_path}')
         logger.info(action, extra={'msg_type': 'ACTION'})
         obs = runtime.run_action(action)
         assert obs.exit_code == 0
@@ -221,9 +221,7 @@ def complete_runtime(
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert obs.exit_code == 0
 
-    action = CmdRunAction(
-        command='cat /testing_files/results_biocoder.json', keep_prompt=False
-    )
+    action = CmdRunAction(command='cat /testing_files/results_biocoder.json')
     logger.info(action, extra={'msg_type': 'ACTION'})
     obs = runtime.run_action(action)
     if obs.exit_code == 0:

diff --git a/evaluation/bird/run_infer.py b/evaluation/bird/run_infer.py
@@ -266,10 +266,7 @@ def initialize_runtime(
     runtime.copy_to(db_file, '/workspace')
 
     # Check the database is copied
-    action = CmdRunAction(
-        command='cd /workspace && ls -l',
-        keep_prompt=False,
-    )
+    action = CmdRunAction(command='cd /workspace && ls -l')
     obs = runtime.run_action(action)
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert obs.exit_code == 0
@@ -298,10 +295,7 @@ def complete_runtime(
     instance_id = instance.instance_id.replace('/', '__')
     path = os.path.join('/workspace', f'{instance_id}.py')
 
-    action = CmdRunAction(
-        command=f'cat {path}',
-        keep_prompt=False,
-    )
+    action = CmdRunAction(command=f'cat {path}')
     obs = runtime.run_action(action)
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
 

diff --git a/evaluation/humanevalfix/run_infer.py b/evaluation/humanevalfix/run_infer.py
@@ -169,9 +169,7 @@ def complete_runtime(
     num_workers = LANGUAGE_TO_NUM_WORKERS[language]
     python_imports = '\n'.join(IMPORT_HELPER[language])
 
-    action = CmdRunAction(
-        command=f'cat /workspace/{_get_instance_id(instance)}.py', keep_prompt=False
-    )
+    action = CmdRunAction(command=f'cat /workspace/{_get_instance_id(instance)}.py')
     obs = runtime.run_action(action)
     assert obs.exit_code == 0
 

diff --git a/evaluation/integration_tests/tests/t01_fix_simple_typo.py b/evaluation/integration_tests/tests/t01_fix_simple_typo.py
@@ -24,7 +24,7 @@ def initialize_runtime(cls, runtime: Runtime) -> None:
     @classmethod
     def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
         # check if the file /workspace/bad.txt has been fixed
-        action = CmdRunAction(command='cat /workspace/bad.txt', keep_prompt=False)
+        action = CmdRunAction(command='cat /workspace/bad.txt')
         obs = runtime.run_action(action)
         if obs.exit_code != 0:
             return TestResult(

diff --git a/evaluation/integration_tests/tests/t02_add_bash_hello.py b/evaluation/integration_tests/tests/t02_add_bash_hello.py
@@ -10,14 +10,14 @@ class Test(BaseIntegrationTest):
 
     @classmethod
     def initialize_runtime(cls, runtime: Runtime) -> None:
-        action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
+        action = CmdRunAction(command='mkdir -p /workspace')
         obs = runtime.run_action(action)
         assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
 
     @classmethod
     def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
         # check if the file /workspace/hello.sh exists
-        action = CmdRunAction(command='cat /workspace/hello.sh', keep_prompt=False)
+        action = CmdRunAction(command='cat /workspace/hello.sh')
         obs = runtime.run_action(action)
         if obs.exit_code != 0:
             return TestResult(
@@ -26,7 +26,7 @@ def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
             )
 
         # execute the script
-        action = CmdRunAction(command='bash /workspace/hello.sh', keep_prompt=False)
+        action = CmdRunAction(command='bash /workspace/hello.sh')
         obs = runtime.run_action(action)
         if obs.exit_code != 0:
             return TestResult(

diff --git a/evaluation/integration_tests/tests/t03_jupyter_write_file.py b/evaluation/integration_tests/tests/t03_jupyter_write_file.py
@@ -10,14 +10,14 @@ class Test(BaseIntegrationTest):
 
     @classmethod
     def initialize_runtime(cls, runtime: Runtime) -> None:
-        action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
+        action = CmdRunAction(command='mkdir -p /workspace')
         obs = runtime.run_action(action)
         assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
 
     @classmethod
     def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
         # check if the file /workspace/hello.sh exists
-        action = CmdRunAction(command='cat /workspace/test.txt', keep_prompt=False)
+        action = CmdRunAction(command='cat /workspace/test.txt')
         obs = runtime.run_action(action)
         if obs.exit_code != 0:
             return TestResult(
@@ -26,7 +26,7 @@ def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
             )
 
         # execute the script
-        action = CmdRunAction(command='cat /workspace/test.txt', keep_prompt=False)
+        action = CmdRunAction(command='cat /workspace/test.txt')
         obs = runtime.run_action(action)
 
         if obs.exit_code != 0:

diff --git a/evaluation/integration_tests/tests/t04_git_staging.py b/evaluation/integration_tests/tests/t04_git_staging.py
@@ -10,31 +10,29 @@ class Test(BaseIntegrationTest):
 
     @classmethod
     def initialize_runtime(cls, runtime: Runtime) -> None:
-        action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
+        action = CmdRunAction(command='mkdir -p /workspace')
         obs = runtime.run_action(action)
         assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
 
         # git init
-        action = CmdRunAction(command='git init', keep_prompt=False)
+        action = CmdRunAction(command='git init')
         obs = runtime.run_action(action)
         assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
 
         # create README.md
-        action = CmdRunAction(
-            command='echo \'print("hello world")\' > hello.py', keep_prompt=False
-        )
+        action = CmdRunAction(command='echo \'print("hello world")\' > hello.py')
         obs = runtime.run_action(action)
         assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
 
         # git add README.md
-        action = CmdRunAction(command='git add hello.py', keep_prompt=False)
+        action = CmdRunAction(command='git add hello.py')
         obs = runtime.run_action(action)
         assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
 
     @classmethod
     def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
         # check if the file /workspace/hello.py exists
-        action = CmdRunAction(command='cat /workspace/hello.py', keep_prompt=False)
+        action = CmdRunAction(command='cat /workspace/hello.py')
         obs = runtime.run_action(action)
         if obs.exit_code != 0:
             return TestResult(
@@ -43,7 +41,7 @@ def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
             )
 
         # check if the staging area is empty
-        action = CmdRunAction(command='git status', keep_prompt=False)
+        action = CmdRunAction(command='git status')
         obs = runtime.run_action(action)
         if obs.exit_code != 0:
             return TestResult(

diff --git a/evaluation/integration_tests/tests/t05_simple_browsing.py b/evaluation/integration_tests/tests/t05_simple_browsing.py
@@ -83,11 +83,11 @@ class Test(BaseIntegrationTest):
 
     @classmethod
     def initialize_runtime(cls, runtime: Runtime) -> None:
-        action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
+        action = CmdRunAction(command='mkdir -p /workspace')
         obs = runtime.run_action(action)
         assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
 
-        action = CmdRunAction(command='mkdir -p /tmp/server', keep_prompt=False)
+        action = CmdRunAction(command='mkdir -p /tmp/server')
         obs = runtime.run_action(action)
         assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
 
@@ -101,8 +101,7 @@ def initialize_runtime(cls, runtime: Runtime) -> None:
 
         # create README.md
         action = CmdRunAction(
-            command='cd /tmp/server && nohup python3 -m http.server 8000 &',
-            keep_prompt=False,
+            command='cd /tmp/server && nohup python3 -m http.server 8000 &'
         )
         obs = runtime.run_action(action)
 

diff --git a/evaluation/ml_bench/run_infer.py b/evaluation/ml_bench/run_infer.py
@@ -161,7 +161,7 @@ def complete_runtime(
     eval_script = os.path.join(task_path, 'run.sh')
     logger.info(f'Running evaluation script: {eval_script}')
 
-    action = CmdRunAction(command=f'cat {eval_script}', keep_prompt=False)
+    action = CmdRunAction(command=f'cat {eval_script}')
     logger.info(action, extra={'msg_type': 'ACTION'})
     obs = runtime.run_action(action)
     if obs.exit_code == 0:

diff --git a/evaluation/scienceagentbench/run_infer.py b/evaluation/scienceagentbench/run_infer.py
@@ -121,10 +121,7 @@ def initialize_runtime(
     runtime.copy_to(dataset_dir, '/workspace/benchmark/datasets', recursive=True)
 
     # Check the dataset exists
-    action = CmdRunAction(
-        command='cd /workspace/benchmark/datasets && ls',
-        keep_prompt=False,
-    )
+    action = CmdRunAction(command='cd /workspace/benchmark/datasets && ls')
     obs = runtime.run_action(action)
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert obs.exit_code == 0
@@ -154,10 +151,7 @@ def complete_runtime(
 
     assert obs.exit_code == 0
 
-    action = CmdRunAction(
-        command=f'cat pred_programs/{instance.pred_program_name}',
-        keep_prompt=False,
-    )
+    action = CmdRunAction(command=f'cat pred_programs/{instance.pred_program_name}')
     logger.info(action, extra={'msg_type': 'ACTION'})
     obs = runtime.run_action(action)
 

diff --git a/evaluation/swe_bench/eval_infer.py b/evaluation/swe_bench/eval_infer.py
@@ -177,7 +177,7 @@ def process_instance(
         "(patch --batch --fuzz=5 -p1 -i /tmp/patch.diff && echo 'APPLY_PATCH_PASS' || "
         "echo 'APPLY_PATCH_FAIL')))"
     )
-    action = CmdRunAction(command=exec_command, keep_prompt=False)
+    action = CmdRunAction(command=exec_command)
     action.timeout = 600
     obs = runtime.run_action(action)
     assert isinstance(obs, CmdOutputObservation)
@@ -200,9 +200,7 @@ def process_instance(
 
             # Run eval script in background and save output to log file
             log_file = '/tmp/eval_output.log'
-            action = CmdRunAction(
-                command=f'/tmp/eval.sh > {log_file} 2>&1 & echo $!', keep_prompt=False
-            )
+            action = CmdRunAction(command=f'/tmp/eval.sh > {log_file} 2>&1 & echo $!')
             action.timeout = 60  # Short timeout just to get the process ID
             obs = runtime.run_action(action)
 
@@ -224,7 +222,7 @@ def process_instance(
                         instance['test_result']['report']['test_timeout'] = True
                         break
                     check_action = CmdRunAction(
-                        command=f'ps -p {pid} > /dev/null; echo $?', keep_prompt=False
+                        command=f'ps -p {pid} > /dev/null; echo $?'
                     )
                     check_action.timeout = 60
                     check_obs = runtime.run_action(check_action)
@@ -242,7 +240,7 @@ def process_instance(
                     time.sleep(30)  # Wait for 30 seconds before checking again
 
                 # Read the log file
-                cat_action = CmdRunAction(command=f'cat {log_file}', keep_prompt=False)
+                cat_action = CmdRunAction(command=f'cat {log_file}')
                 cat_action.timeout = 300
                 cat_obs = runtime.run_action(cat_action)
 

diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py
@@ -350,8 +350,7 @@ def complete_runtime(
     git_patch = None
     while n_retries < 5:
         action = CmdRunAction(
-            command=f'git diff --no-color --cached {instance["base_commit"]}',
-            keep_prompt=False,
+            command=f'git diff --no-color --cached {instance["base_commit"]}'
         )
         action.timeout = 600 + 100 * n_retries
         logger.info(action, extra={'msg_type': 'ACTION'})

diff --git a/openhands/events/serialization/event.py b/openhands/events/serialization/event.py
@@ -106,7 +106,6 @@ def event_to_memory(event: 'Event', max_message_chars: int) -> dict:
     # runnable actions have some extra fields used in the BE/FE, which should not be sent to the LLM
     if 'args' in d:
         d['args'].pop('blocking', None)
-        d['args'].pop('keep_prompt', None)
         d['args'].pop('confirmation_state', None)
 
     if 'extras' in d:

diff --git a/tests/unit/test_action_serialization.py b/tests/unit/test_action_serialization.py
@@ -43,11 +43,10 @@ def serialization_deserialization(
     serialized_action_memory = event_to_memory(action_instance, max_message_chars)
     original_memory_dict = original_action_dict.copy()
 
-    # we don't send backend properties like id or 'keep_prompt'
+    # we don't send backend properties like id
     original_memory_dict.pop('id', None)
     original_memory_dict.pop('timestamp', None)
     if 'args' in original_memory_dict:
-        original_memory_dict['args'].pop('keep_prompt', None)
         original_memory_dict['args'].pop('blocking', None)
         original_memory_dict['args'].pop('confirmation_state', None)
 
@@ -101,7 +100,6 @@ def test_cmd_run_action_serialization_deserialization():
             'blocking': False,
             'command': 'echo "Hello world"',
             'thought': '',
-            'keep_prompt': True,
             'hidden': False,
             'confirmation_state': ActionConfirmationStatus.CONFIRMED,
         },