diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index a2b70078fce0..7189371f0bc3 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -662,6 +662,7 @@ async def _step(self) -> None:
action = self.agent.step(self.state)
if action is None:
raise LLMNoActionError('No action was returned')
+ action._source = EventSource.AGENT # type: ignore [attr-defined]
except (
LLMMalformedActionError,
LLMNoActionError,
@@ -720,7 +721,7 @@ async def _step(self) -> None:
== ActionConfirmationStatus.AWAITING_CONFIRMATION
):
await self.set_agent_state_to(AgentState.AWAITING_USER_CONFIRMATION)
- self.event_stream.add_event(action, EventSource.AGENT)
+ self.event_stream.add_event(action, action._source) # type: ignore [attr-defined]
await self.update_state_after_step()
diff --git a/openhands/controller/replay.py b/openhands/controller/replay.py
index 6441e6b3196e..bae54b10e7c6 100644
--- a/openhands/controller/replay.py
+++ b/openhands/controller/replay.py
@@ -1,6 +1,8 @@
from openhands.core.logger import openhands_logger as logger
from openhands.events.action.action import Action
+from openhands.events.action.message import MessageAction
from openhands.events.event import Event, EventSource
+from openhands.events.observation.empty import NullObservation
class ReplayManager:
@@ -15,9 +17,31 @@ class ReplayManager:
initial state of the trajectory.
"""
- def __init__(self, replay_events: list[Event] | None):
+ def __init__(self, events: list[Event] | None):
+ replay_events = []
+ for event in events or []:
+ if event.source == EventSource.ENVIRONMENT:
+ # ignore ENVIRONMENT events as they are not issued by
+ # the user or agent, and should not be replayed
+ continue
+ if isinstance(event, NullObservation):
+ # ignore NullObservation
+ continue
+ replay_events.append(event)
+
if replay_events:
- logger.info(f'Replay logs loaded, events length = {len(replay_events)}')
+ logger.info(f'Replay events loaded, events length = {len(replay_events)}')
+ for index in range(len(replay_events) - 1):
+ event = replay_events[index]
+ if isinstance(event, MessageAction) and event.wait_for_response:
+ # For any message waiting for response that is not the last
+ # event, we override wait_for_response to False, as a response
+ # would have been included in the next event, and we don't
+ # want the user to interfere with the replay process
+ logger.info(
+ 'Replay events contains wait_for_response message action, ignoring wait_for_response'
+ )
+ event.wait_for_response = False
self.replay_events = replay_events
self.replay_mode = bool(replay_events)
self.replay_index = 0
@@ -27,7 +51,6 @@ def _replayable(self) -> bool:
self.replay_events is not None
and self.replay_index < len(self.replay_events)
and isinstance(self.replay_events[self.replay_index], Action)
- and self.replay_events[self.replay_index].source != EventSource.USER
)
def should_replay(self) -> bool:
diff --git a/openhands/core/main.py b/openhands/core/main.py
index 5c3b38a21b64..474757d9c70c 100644
--- a/openhands/core/main.py
+++ b/openhands/core/main.py
@@ -231,6 +231,10 @@ def load_replay_log(trajectory_path: str) -> tuple[list[Event] | None, Action]:
events = []
for item in data:
event = event_from_dict(item)
+ if event.source == EventSource.ENVIRONMENT:
+ # ignore ENVIRONMENT events as they are not issued by
+ # the user or agent, and should not be replayed
+ continue
# cannot add an event with _id to event stream
event._id = None # type: ignore[attr-defined]
events.append(event)
diff --git a/tests/runtime/test_replay.py b/tests/runtime/test_replay.py
index b66ef30a7a7e..1fc374a38ce5 100644
--- a/tests/runtime/test_replay.py
+++ b/tests/runtime/test_replay.py
@@ -10,6 +10,8 @@
from openhands.core.main import run_controller
from openhands.core.schema.agent import AgentState
from openhands.events.action.empty import NullAction
+from openhands.events.action.message import MessageAction
+from openhands.events.event import EventSource
from openhands.events.observation.commands import CmdOutputObservation
@@ -46,6 +48,36 @@ def test_simple_replay(temp_dir, runtime_cls, run_as_openhands):
_close_test_runtime(runtime)
+def test_simple_gui_replay(temp_dir, runtime_cls, run_as_openhands):
+ """
+ A simple replay test that involves simple terminal operations and edits
+ (writing a Vue.js App), using the default agent
+
+ Note:
+ 1. This trajectory is exported from GUI mode, meaning it has extra
+ environmental actions that don't appear in headless mode's trajectories
+ 2. In GUI mode, agents typically don't finish; rather, they wait for the next
+ task from the user, so this exported trajectory ends with awaiting_user_input
+ """
+ runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+
+ config = _get_config('basic_gui_mode')
+
+ state: State | None = asyncio.run(
+ run_controller(
+ config=config,
+ initial_user_action=NullAction(),
+ runtime=runtime,
+ # exit on message, otherwise this would be stuck on waiting for user input
+ exit_on_message=True,
+ )
+ )
+
+ assert state.agent_state == AgentState.FINISHED
+
+ _close_test_runtime(runtime)
+
+
def test_replay_wrong_initial_state(temp_dir, runtime_cls, run_as_openhands):
"""
Replay requires a consistent initial state to start with, otherwise it might
@@ -78,3 +110,43 @@ def test_replay_wrong_initial_state(temp_dir, runtime_cls, run_as_openhands):
assert has_error_in_action
_close_test_runtime(runtime)
+
+
+def test_replay_basic_interactions(temp_dir, runtime_cls, run_as_openhands):
+ """
+ Replay a trajectory that involves interactions, i.e. with user messages
+ in the middle. This tests two things:
+ 1) The controller should be able to replay all actions without human
+ interference (no asking for user input).
+ 2) The user messages in the trajectory should appear in the history.
+ """
+ runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+
+ config = _get_config('basic_interactions')
+
+ state: State | None = asyncio.run(
+ run_controller(
+ config=config,
+ initial_user_action=NullAction(),
+ runtime=runtime,
+ )
+ )
+
+ assert state.agent_state == AgentState.FINISHED
+
+ # all user messages appear in the history, so that after a replay (assuming
+ # the trajectory doesn't end with `finish` action), LLM knows about all the
+ # context and can continue
+ user_messages = [
+ "what's 1+1?",
+ "No, I mean by Goldbach's conjecture!",
+ 'Finish please',
+ ]
+ i = 0
+ for event in state.history:
+ if isinstance(event, MessageAction) and event._source == EventSource.USER:
+ assert event.message == user_messages[i]
+ i += 1
+ assert i == len(user_messages)
+
+ _close_test_runtime(runtime)
diff --git a/tests/runtime/trajs/basic_gui_mode.json b/tests/runtime/trajs/basic_gui_mode.json
new file mode 100644
index 000000000000..a11dd473aece
--- /dev/null
+++ b/tests/runtime/trajs/basic_gui_mode.json
@@ -0,0 +1,631 @@
+[
+ {
+ "id": 0,
+ "timestamp": "2025-01-20T20:29:13.519638",
+ "source": "environment",
+ "message": "",
+ "observation": "agent_state_changed",
+ "content": "",
+ "extras": {
+ "agent_state": "loading"
+ }
+ },
+ {
+ "id": 1,
+ "timestamp": "2025-01-20T20:29:32.163218",
+ "source": "environment",
+ "message": "Agent state changed to init",
+ "action": "change_agent_state",
+ "args": {
+ "agent_state": "init",
+ "thought": ""
+ }
+ },
+ {
+ "id": 2,
+ "timestamp": "2025-01-20T20:29:32.165837",
+ "source": "environment",
+ "message": "No observation",
+ "cause": 1,
+ "observation": "null",
+ "content": "",
+ "extras": {}
+ },
+ {
+ "id": 3,
+ "timestamp": "2025-01-20T20:29:32.176175",
+ "source": "environment",
+ "message": "",
+ "observation": "agent_state_changed",
+ "content": "",
+ "extras": {
+ "agent_state": "init"
+ }
+ },
+ {
+ "id": 4,
+ "timestamp": "2025-01-20T20:29:32.262843",
+ "source": "user",
+ "message": "I want to create a VueJS app that allows me to:\n* See all the items on my todo list\n* add a new item to the list\n* mark an item as done\n* totally remove an item from the list\n* change the text of an item\n* set a due date on the item\n\nThis should be a client-only app with no backend. The list should persist in localStorage.",
+ "action": "message",
+ "args": {
+ "content": "I want to create a VueJS app that allows me to:\n* See all the items on my todo list\n* add a new item to the list\n* mark an item as done\n* totally remove an item from the list\n* change the text of an item\n* set a due date on the item\n\nThis should be a client-only app with no backend. The list should persist in localStorage.",
+ "image_urls": [],
+ "wait_for_response": false
+ }
+ },
+ {
+ "id": 5,
+ "timestamp": "2025-01-20T20:29:32.286160",
+ "source": "environment",
+ "message": "",
+ "observation": "agent_state_changed",
+ "content": "",
+ "extras": {
+ "agent_state": "running"
+ }
+ },
+ {
+ "id": 6,
+ "timestamp": "2025-01-20T20:29:32.289136",
+ "source": "user",
+ "message": "No observation",
+ "cause": 4,
+ "observation": "null",
+ "content": "",
+ "extras": {}
+ },
+ {
+ "id": 7,
+ "timestamp": "2025-01-20T20:29:35.040676",
+ "source": "agent",
+ "message": "Running command: mkdir -p /workspace/todo-app",
+ "action": "run",
+ "tool_call_metadata": {
+ "function_name": "execute_bash",
+ "tool_call_id": "toolu_01W9Z8jBctr8X2frZV9p1RYs",
+ "model_response": {
+ "id": "chatcmpl-6bb9f175-c587-44b4-8c8d-f91fc4827590",
+ "created": 1737433775,
+ "model": "claude-3-5-sonnet-20241022",
+ "object": "chat.completion",
+ "system_fingerprint": null,
+ "choices": [
+ {
+ "finish_reason": "tool_calls",
+ "index": 0,
+ "message": {
+ "content": "I'll help you create a Vue.js application with all the requested todo list functionality. Let's create this step by step.\n\nFirst, let's create a new directory and set up the basic files:",
+ "role": "assistant",
+ "tool_calls": [
+ {
+ "index": 1,
+ "function": {
+ "arguments": "{\"command\": \"mkdir -p /workspace/todo-app\"}",
+ "name": "execute_bash"
+ },
+ "id": "toolu_01W9Z8jBctr8X2frZV9p1RYs",
+ "type": "function"
+ }
+ ],
+ "function_call": null
+ }
+ }
+ ],
+ "usage": {
+ "completion_tokens": 104,
+ "prompt_tokens": 3439,
+ "total_tokens": 3543,
+ "completion_tokens_details": null,
+ "prompt_tokens_details": {
+ "audio_tokens": null,
+ "cached_tokens": 0
+ },
+ "cache_creation_input_tokens": 3435,
+ "cache_read_input_tokens": 0
+ },
+ "service_tier": null
+ },
+ "total_calls_in_response": 1
+ },
+ "args": {
+ "command": "mkdir -p /workspace/todo-app",
+ "is_input": false,
+ "thought": "I'll help you create a Vue.js application with all the requested todo list functionality. Let's create this step by step.\n\nFirst, let's create a new directory and set up the basic files:",
+ "blocking": false,
+ "hidden": false,
+ "confirmation_state": "confirmed"
+ }
+ },
+ {
+ "id": 8,
+ "timestamp": "2025-01-20T20:29:35.699033",
+ "source": "agent",
+ "message": "Command `mkdir -p /workspace/todo-app` executed with exit code 0.",
+ "cause": 7,
+ "observation": "run",
+ "tool_call_metadata": {
+ "function_name": "execute_bash",
+ "tool_call_id": "toolu_01W9Z8jBctr8X2frZV9p1RYs",
+ "model_response": {
+ "id": "chatcmpl-6bb9f175-c587-44b4-8c8d-f91fc4827590",
+ "created": 1737433775,
+ "model": "claude-3-5-sonnet-20241022",
+ "object": "chat.completion",
+ "system_fingerprint": null,
+ "choices": [
+ {
+ "finish_reason": "tool_calls",
+ "index": 0,
+ "message": {
+ "content": "I'll help you create a Vue.js application with all the requested todo list functionality. Let's create this step by step.\n\nFirst, let's create a new directory and set up the basic files:",
+ "role": "assistant",
+ "tool_calls": [
+ {
+ "index": 1,
+ "function": {
+ "arguments": "{\"command\": \"mkdir -p /workspace/todo-app\"}",
+ "name": "execute_bash"
+ },
+ "id": "toolu_01W9Z8jBctr8X2frZV9p1RYs",
+ "type": "function"
+ }
+ ],
+ "function_call": null
+ }
+ }
+ ],
+ "usage": {
+ "completion_tokens": 104,
+ "prompt_tokens": 3439,
+ "total_tokens": 3543,
+ "completion_tokens_details": null,
+ "prompt_tokens_details": {
+ "audio_tokens": null,
+ "cached_tokens": 0
+ },
+ "cache_creation_input_tokens": 3435,
+ "cache_read_input_tokens": 0
+ },
+ "service_tier": null
+ },
+ "total_calls_in_response": 1
+ },
+ "content": "",
+ "extras": {
+ "command": "mkdir -p /workspace/todo-app",
+ "metadata": {
+ "exit_code": 0,
+ "pid": -1,
+ "username": "openhands",
+ "hostname": "18f4db943d2f",
+ "working_dir": "/workspace",
+ "py_interpreter_path": "/openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python",
+ "prefix": "",
+ "suffix": "\n[The command completed with exit code 0.]"
+ },
+ "hidden": false
+ },
+ "success": true
+ },
+ {
+ "id": 9,
+ "timestamp": "2025-01-20T20:29:48.925379",
+ "source": "agent",
+ "message": "",
+ "action": "edit",
+ "tool_call_metadata": {
+ "function_name": "str_replace_editor",
+ "tool_call_id": "toolu_01YYLXwwdBLwtMmjr5Sfsieg",
+ "model_response": {
+ "id": "chatcmpl-55936480-cbf9-4d24-bd4a-329bd2f1f65c",
+ "created": 1737433788,
+ "model": "claude-3-5-sonnet-20241022",
+ "object": "chat.completion",
+ "system_fingerprint": null,
+ "choices": [
+ {
+ "finish_reason": "tool_calls",
+ "index": 0,
+ "message": {
+ "content": "Let's create the main HTML file:",
+ "role": "assistant",
+ "tool_calls": [
+ {
+ "index": 1,
+ "function": {
+ "arguments": "{\"command\": \"create\", \"path\": \"/workspace/todo-app/index.html\", \"file_text\": \"\\n\\n
\\n \\n \\n Vue Todo App\\n \\n \\n \\n\\n\\n \\n
Todo List
\\n \\n \\n
\\n\\n \\n
\\n
\\n
\\n No todos yet. Add one above!\\n
\\n
\\n
\\n
\\n
\\n\\n \\n\\n\"}",
+ "name": "str_replace_editor"
+ },
+ "id": "toolu_01YYLXwwdBLwtMmjr5Sfsieg",
+ "type": "function"
+ }
+ ],
+ "function_call": null
+ }
+ }
+ ],
+ "usage": {
+ "completion_tokens": 1142,
+ "prompt_tokens": 3601,
+ "total_tokens": 4743,
+ "completion_tokens_details": null,
+ "prompt_tokens_details": {
+ "audio_tokens": null,
+ "cached_tokens": 3435
+ },
+ "cache_creation_input_tokens": 160,
+ "cache_read_input_tokens": 3435
+ },
+ "service_tier": null
+ },
+ "total_calls_in_response": 1
+ },
+ "args": {
+ "path": "/workspace/todo-app/index.html",
+ "content": "",
+ "start": 1,
+ "end": -1,
+ "thought": "Let's create the main HTML file:",
+ "impl_source": "oh_aci",
+ "translated_ipython_code": "print(file_editor(**{'command': 'create', 'path': '/workspace/todo-app/index.html', 'file_text': '\\n\\n\\n \\n \\n Vue Todo App\\n \\n \\n \\n\\n\\n \\n
Todo List
\\n \\n \\n
\\n\\n \\n
\\n
\\n
\\n No todos yet. Add one above!\\n
\\n
\\n
\\n
\\n
\\n\\n \\n\\n'}))"
+ }
+ },
+ {
+ "id": 10,
+ "timestamp": "2025-01-20T20:29:48.967655",
+ "source": "agent",
+ "message": "I edited the file /workspace/todo-app/index.html.",
+ "cause": 9,
+ "observation": "edit",
+ "tool_call_metadata": {
+ "function_name": "str_replace_editor",
+ "tool_call_id": "toolu_01YYLXwwdBLwtMmjr5Sfsieg",
+ "model_response": {
+ "id": "chatcmpl-55936480-cbf9-4d24-bd4a-329bd2f1f65c",
+ "created": 1737433788,
+ "model": "claude-3-5-sonnet-20241022",
+ "object": "chat.completion",
+ "system_fingerprint": null,
+ "choices": [
+ {
+ "finish_reason": "tool_calls",
+ "index": 0,
+ "message": {
+ "content": "Let's create the main HTML file:",
+ "role": "assistant",
+ "tool_calls": [
+ {
+ "index": 1,
+ "function": {
+ "arguments": "{\"command\": \"create\", \"path\": \"/workspace/todo-app/index.html\", \"file_text\": \"\\n\\n\\n \\n \\n Vue Todo App\\n \\n \\n \\n\\n\\n \\n
Todo List
\\n \\n \\n
\\n\\n \\n
\\n
\\n
\\n No todos yet. Add one above!\\n
\\n
\\n
\\n
\\n
\\n\\n \\n\\n\"}",
+ "name": "str_replace_editor"
+ },
+ "id": "toolu_01YYLXwwdBLwtMmjr5Sfsieg",
+ "type": "function"
+ }
+ ],
+ "function_call": null
+ }
+ }
+ ],
+ "usage": {
+ "completion_tokens": 1142,
+ "prompt_tokens": 3601,
+ "total_tokens": 4743,
+ "completion_tokens_details": null,
+ "prompt_tokens_details": {
+ "audio_tokens": null,
+ "cached_tokens": 3435
+ },
+ "cache_creation_input_tokens": 160,
+ "cache_read_input_tokens": 3435
+ },
+ "service_tier": null
+ },
+ "total_calls_in_response": 1
+ },
+ "content": "--- /workspace/todo-app/index.html\n+++ /workspace/todo-app/index.html\n@@ -0,0 +1,43 @@\n+\n+\n+\n+ \n+ \n+ Vue Todo App\n+ \n+ \n+ \n+\n+\n+ \n+
Todo List
\n+\n+ \n+
\n@@ -1,0 +45,56 @@\n+ \n+
\n+
\n+
\n+ No todos yet. Add one above!\n+
\n+
\n+
\n+
\n+
\n+\n+ \n+\n+",
+ "extras": {
+ "path": "/workspace/todo-app/index.html",
+ "prev_exist": false,
+ "old_content": null,
+ "new_content": "\n\n\n \n \n Vue Todo App\n \n \n \n\n\n \n
Todo List
\n \n \n
\n\n \n
\n
\n
\n No todos yet. Add one above!\n
\n
\n
\n
\n
\n\n \n\n",
+ "impl_source": "oh_aci",
+ "formatted_output_and_error": "File created successfully at: /workspace/todo-app/index.html"
+ }
+ },
+ {
+ "id": 11,
+ "timestamp": "2025-01-20T20:29:56.899226",
+ "source": "agent",
+ "message": "",
+ "action": "edit",
+ "tool_call_metadata": {
+ "function_name": "str_replace_editor",
+ "tool_call_id": "toolu_01GpJj2ShdFjb9ZTY4CVxi9C",
+ "model_response": {
+ "id": "chatcmpl-34fef176-276b-4a18-892f-688b08f51225",
+ "created": 1737433796,
+ "model": "claude-3-5-sonnet-20241022",
+ "object": "chat.completion",
+ "system_fingerprint": null,
+ "choices": [
+ {
+ "finish_reason": "tool_calls",
+ "index": 0,
+ "message": {
+ "content": "Now, let's create the JavaScript file with the Vue application logic:",
+ "role": "assistant",
+ "tool_calls": [
+ {
+ "index": 1,
+ "function": {
+ "arguments": "{\"command\": \"create\", \"path\": \"/workspace/todo-app/app.js\", \"file_text\": \"const { createApp } = Vue\\n\\nconst STORAGE_KEY = 'vue-todo-list'\\n\\ncreateApp({\\n data() {\\n return {\\n todos: [],\\n newTodoText: '',\\n newTodoDueDate: '',\\n editingTodo: null,\\n editingText: '',\\n editingDueDate: ''\\n }\\n },\\n methods: {\\n loadTodos() {\\n const savedTodos = localStorage.getItem(STORAGE_KEY)\\n if (savedTodos) {\\n this.todos = JSON.parse(savedTodos)\\n }\\n },\\n saveTodos() {\\n localStorage.setItem(STORAGE_KEY, JSON.stringify(this.todos))\\n },\\n addTodo() {\\n if (!this.newTodoText.trim()) return\\n\\n const todo = {\\n id: Date.now(),\\n text: this.newTodoText.trim(),\\n completed: false,\\n dueDate: this.newTodoDueDate || null\\n }\\n\\n this.todos.push(todo)\\n this.newTodoText = ''\\n this.newTodoDueDate = ''\\n this.saveTodos()\\n },\\n toggleTodo(todo) {\\n todo.completed = !todo.completed\\n this.saveTodos()\\n },\\n removeTodo(todo) {\\n const index = this.todos.indexOf(todo)\\n if (index > -1) {\\n this.todos.splice(index, 1)\\n this.saveTodos()\\n }\\n },\\n startEdit(todo) {\\n this.editingTodo = todo.id\\n this.editingText = todo.text\\n this.editingDueDate = todo.dueDate || ''\\n },\\n saveTodoEdit(todo) {\\n if (!this.editingText.trim()) return\\n\\n todo.text = this.editingText.trim()\\n todo.dueDate = this.editingDueDate || null\\n this.editingTodo = null\\n this.saveTodos()\\n },\\n cancelEdit() {\\n this.editingTodo = null\\n },\\n formatDate(dateString) {\\n return new Date(dateString).toLocaleDateString()\\n }\\n },\\n mounted() {\\n this.loadTodos()\\n }\\n}).mount('#app')\"}",
+ "name": "str_replace_editor"
+ },
+ "id": "toolu_01GpJj2ShdFjb9ZTY4CVxi9C",
+ "type": "function"
+ }
+ ],
+ "function_call": null
+ }
+ }
+ ],
+ "usage": {
+ "completion_tokens": 693,
+ "prompt_tokens": 4769,
+ "total_tokens": 5462,
+ "completion_tokens_details": null,
+ "prompt_tokens_details": {
+ "audio_tokens": null,
+ "cached_tokens": 3595
+ },
+ "cache_creation_input_tokens": 1168,
+ "cache_read_input_tokens": 3595
+ },
+ "service_tier": null
+ },
+ "total_calls_in_response": 1
+ },
+ "args": {
+ "path": "/workspace/todo-app/app.js",
+ "content": "",
+ "start": 1,
+ "end": -1,
+ "thought": "Now, let's create the JavaScript file with the Vue application logic:",
+ "impl_source": "oh_aci",
+ "translated_ipython_code": "print(file_editor(**{'command': 'create', 'path': '/workspace/todo-app/app.js', 'file_text': \"const { createApp } = Vue\\n\\nconst STORAGE_KEY = 'vue-todo-list'\\n\\ncreateApp({\\n data() {\\n return {\\n todos: [],\\n newTodoText: '',\\n newTodoDueDate: '',\\n editingTodo: null,\\n editingText: '',\\n editingDueDate: ''\\n }\\n },\\n methods: {\\n loadTodos() {\\n const savedTodos = localStorage.getItem(STORAGE_KEY)\\n if (savedTodos) {\\n this.todos = JSON.parse(savedTodos)\\n }\\n },\\n saveTodos() {\\n localStorage.setItem(STORAGE_KEY, JSON.stringify(this.todos))\\n },\\n addTodo() {\\n if (!this.newTodoText.trim()) return\\n\\n const todo = {\\n id: Date.now(),\\n text: this.newTodoText.trim(),\\n completed: false,\\n dueDate: this.newTodoDueDate || null\\n }\\n\\n this.todos.push(todo)\\n this.newTodoText = ''\\n this.newTodoDueDate = ''\\n this.saveTodos()\\n },\\n toggleTodo(todo) {\\n todo.completed = !todo.completed\\n this.saveTodos()\\n },\\n removeTodo(todo) {\\n const index = this.todos.indexOf(todo)\\n if (index > -1) {\\n this.todos.splice(index, 1)\\n this.saveTodos()\\n }\\n },\\n startEdit(todo) {\\n this.editingTodo = todo.id\\n this.editingText = todo.text\\n this.editingDueDate = todo.dueDate || ''\\n },\\n saveTodoEdit(todo) {\\n if (!this.editingText.trim()) return\\n\\n todo.text = this.editingText.trim()\\n todo.dueDate = this.editingDueDate || null\\n this.editingTodo = null\\n this.saveTodos()\\n },\\n cancelEdit() {\\n this.editingTodo = null\\n },\\n formatDate(dateString) {\\n return new Date(dateString).toLocaleDateString()\\n }\\n },\\n mounted() {\\n this.loadTodos()\\n }\\n}).mount('#app')\"}))"
+ }
+ },
+ {
+ "id": 12,
+ "timestamp": "2025-01-20T20:29:56.933329",
+ "source": "agent",
+ "message": "I edited the file /workspace/todo-app/app.js.",
+ "cause": 11,
+ "observation": "edit",
+ "tool_call_metadata": {
+ "function_name": "str_replace_editor",
+ "tool_call_id": "toolu_01GpJj2ShdFjb9ZTY4CVxi9C",
+ "model_response": {
+ "id": "chatcmpl-34fef176-276b-4a18-892f-688b08f51225",
+ "created": 1737433796,
+ "model": "claude-3-5-sonnet-20241022",
+ "object": "chat.completion",
+ "system_fingerprint": null,
+ "choices": [
+ {
+ "finish_reason": "tool_calls",
+ "index": 0,
+ "message": {
+ "content": "Now, let's create the JavaScript file with the Vue application logic:",
+ "role": "assistant",
+ "tool_calls": [
+ {
+ "index": 1,
+ "function": {
+ "arguments": "{\"command\": \"create\", \"path\": \"/workspace/todo-app/app.js\", \"file_text\": \"const { createApp } = Vue\\n\\nconst STORAGE_KEY = 'vue-todo-list'\\n\\ncreateApp({\\n data() {\\n return {\\n todos: [],\\n newTodoText: '',\\n newTodoDueDate: '',\\n editingTodo: null,\\n editingText: '',\\n editingDueDate: ''\\n }\\n },\\n methods: {\\n loadTodos() {\\n const savedTodos = localStorage.getItem(STORAGE_KEY)\\n if (savedTodos) {\\n this.todos = JSON.parse(savedTodos)\\n }\\n },\\n saveTodos() {\\n localStorage.setItem(STORAGE_KEY, JSON.stringify(this.todos))\\n },\\n addTodo() {\\n if (!this.newTodoText.trim()) return\\n\\n const todo = {\\n id: Date.now(),\\n text: this.newTodoText.trim(),\\n completed: false,\\n dueDate: this.newTodoDueDate || null\\n }\\n\\n this.todos.push(todo)\\n this.newTodoText = ''\\n this.newTodoDueDate = ''\\n this.saveTodos()\\n },\\n toggleTodo(todo) {\\n todo.completed = !todo.completed\\n this.saveTodos()\\n },\\n removeTodo(todo) {\\n const index = this.todos.indexOf(todo)\\n if (index > -1) {\\n this.todos.splice(index, 1)\\n this.saveTodos()\\n }\\n },\\n startEdit(todo) {\\n this.editingTodo = todo.id\\n this.editingText = todo.text\\n this.editingDueDate = todo.dueDate || ''\\n },\\n saveTodoEdit(todo) {\\n if (!this.editingText.trim()) return\\n\\n todo.text = this.editingText.trim()\\n todo.dueDate = this.editingDueDate || null\\n this.editingTodo = null\\n this.saveTodos()\\n },\\n cancelEdit() {\\n this.editingTodo = null\\n },\\n formatDate(dateString) {\\n return new Date(dateString).toLocaleDateString()\\n }\\n },\\n mounted() {\\n this.loadTodos()\\n }\\n}).mount('#app')\"}",
+ "name": "str_replace_editor"
+ },
+ "id": "toolu_01GpJj2ShdFjb9ZTY4CVxi9C",
+ "type": "function"
+ }
+ ],
+ "function_call": null
+ }
+ }
+ ],
+ "usage": {
+ "completion_tokens": 693,
+ "prompt_tokens": 4769,
+ "total_tokens": 5462,
+ "completion_tokens_details": null,
+ "prompt_tokens_details": {
+ "audio_tokens": null,
+ "cached_tokens": 3595
+ },
+ "cache_creation_input_tokens": 1168,
+ "cache_read_input_tokens": 3595
+ },
+ "service_tier": null
+ },
+ "total_calls_in_response": 1
+ },
+ "content": "--- /workspace/todo-app/app.js\n+++ /workspace/todo-app/app.js\n@@ -0,0 +1 @@\n+const { createApp } = Vue\n@@ -1,0 +3,73 @@\n+const STORAGE_KEY = 'vue-todo-list'\n+\n+createApp({\n+ data() {\n+ return {\n+ todos: [],\n+ newTodoText: '',\n+ newTodoDueDate: '',\n+ editingTodo: null,\n+ editingText: '',\n+ editingDueDate: ''\n+ }\n+ },\n+ methods: {\n+ loadTodos() {\n+ const savedTodos = localStorage.getItem(STORAGE_KEY)\n+ if (savedTodos) {\n+ this.todos = JSON.parse(savedTodos)\n+ }\n+ },\n+ saveTodos() {\n+ localStorage.setItem(STORAGE_KEY, JSON.stringify(this.todos))\n+ },\n+ addTodo() {\n+ if (!this.newTodoText.trim()) return\n+\n+ const todo = {\n+ id: Date.now(),\n+ text: this.newTodoText.trim(),\n+ completed: false,\n+ dueDate: this.newTodoDueDate || null\n+ }\n+\n+ this.todos.push(todo)\n+ this.newTodoText = ''\n+ this.newTodoDueDate = ''\n+ this.saveTodos()\n+ },\n+ toggleTodo(todo) {\n+ todo.completed = !todo.completed\n+ this.saveTodos()\n+ },\n+ removeTodo(todo) {\n+ const index = this.todos.indexOf(todo)\n+ if (index > -1) {\n+ this.todos.splice(index, 1)\n+ this.saveTodos()\n+ }\n+ },\n+ startEdit(todo) {\n+ this.editingTodo = todo.id\n+ this.editingText = todo.text\n+ this.editingDueDate = todo.dueDate || ''\n+ },\n+ saveTodoEdit(todo) {\n+ if (!this.editingText.trim()) return\n+\n+ todo.text = this.editingText.trim()\n+ todo.dueDate = this.editingDueDate || null\n+ this.editingTodo = null\n+ this.saveTodos()\n+ },\n+ cancelEdit() {\n+ this.editingTodo = null\n+ },\n+ formatDate(dateString) {\n+ return new Date(dateString).toLocaleDateString()\n+ }\n+ },\n+ mounted() {\n+ this.loadTodos()\n+ }\n+}).mount('#app')",
+ "extras": {
+ "path": "/workspace/todo-app/app.js",
+ "prev_exist": false,
+ "old_content": null,
+ "new_content": "const { createApp } = Vue\n\nconst STORAGE_KEY = 'vue-todo-list'\n\ncreateApp({\n data() {\n return {\n todos: [],\n newTodoText: '',\n newTodoDueDate: '',\n editingTodo: null,\n editingText: '',\n editingDueDate: ''\n }\n },\n methods: {\n loadTodos() {\n const savedTodos = localStorage.getItem(STORAGE_KEY)\n if (savedTodos) {\n this.todos = JSON.parse(savedTodos)\n }\n },\n saveTodos() {\n localStorage.setItem(STORAGE_KEY, JSON.stringify(this.todos))\n },\n addTodo() {\n if (!this.newTodoText.trim()) return\n\n const todo = {\n id: Date.now(),\n text: this.newTodoText.trim(),\n completed: false,\n dueDate: this.newTodoDueDate || null\n }\n\n this.todos.push(todo)\n this.newTodoText = ''\n this.newTodoDueDate = ''\n this.saveTodos()\n },\n toggleTodo(todo) {\n todo.completed = !todo.completed\n this.saveTodos()\n },\n removeTodo(todo) {\n const index = this.todos.indexOf(todo)\n if (index > -1) {\n this.todos.splice(index, 1)\n this.saveTodos()\n }\n },\n startEdit(todo) {\n this.editingTodo = todo.id\n this.editingText = todo.text\n this.editingDueDate = todo.dueDate || ''\n },\n saveTodoEdit(todo) {\n if (!this.editingText.trim()) return\n\n todo.text = this.editingText.trim()\n todo.dueDate = this.editingDueDate || null\n this.editingTodo = null\n this.saveTodos()\n },\n cancelEdit() {\n this.editingTodo = null\n },\n formatDate(dateString) {\n return new Date(dateString).toLocaleDateString()\n }\n },\n mounted() {\n this.loadTodos()\n }\n}).mount('#app')",
+ "impl_source": "oh_aci",
+ "formatted_output_and_error": "File created successfully at: /workspace/todo-app/app.js"
+ }
+ },
+ {
+ "id": 13,
+ "timestamp": "2025-01-20T20:29:59.282034",
+ "source": "agent",
+ "message": "Running command: cd /workspace/todo-app && python3 -m http.server 8000",
+ "action": "run",
+ "tool_call_metadata": {
+ "function_name": "execute_bash",
+ "tool_call_id": "toolu_01DwihWr4ooPZS1CGTLAWrFP",
+ "model_response": {
+ "id": "chatcmpl-51a85fbb-dafc-4519-a7d2-df369e38d76f",
+ "created": 1737433799,
+ "model": "claude-3-5-sonnet-20241022",
+ "object": "chat.completion",
+ "system_fingerprint": null,
+ "choices": [
+ {
+ "finish_reason": "tool_calls",
+ "index": 0,
+ "message": {
+ "content": "Let's serve this application using a simple Python HTTP server:",
+ "role": "assistant",
+ "tool_calls": [
+ {
+ "index": 1,
+ "function": {
+ "arguments": "{\"command\": \"cd /workspace/todo-app && python3 -m http.server 8000\"}",
+ "name": "execute_bash"
+ },
+ "id": "toolu_01DwihWr4ooPZS1CGTLAWrFP",
+ "type": "function"
+ }
+ ],
+ "function_call": null
+ }
+ }
+ ],
+ "usage": {
+ "completion_tokens": 85,
+ "prompt_tokens": 5488,
+ "total_tokens": 5573,
+ "completion_tokens_details": null,
+ "prompt_tokens_details": {
+ "audio_tokens": null,
+ "cached_tokens": 4763
+ },
+ "cache_creation_input_tokens": 719,
+ "cache_read_input_tokens": 4763
+ },
+ "service_tier": null
+ },
+ "total_calls_in_response": 1
+ },
+ "args": {
+ "command": "cd /workspace/todo-app && python3 -m http.server 8000",
+ "is_input": false,
+ "thought": "Let's serve this application using a simple Python HTTP server:",
+ "blocking": false,
+ "hidden": false,
+ "confirmation_state": "confirmed"
+ }
+ },
+ {
+ "id": 14,
+ "timestamp": "2025-01-20T20:30:29.862264",
+ "source": "agent",
+ "message": "Command `cd /workspace/todo-app && python3 -m http.server 8000` executed with exit code -1.",
+ "cause": 13,
+ "observation": "run",
+ "tool_call_metadata": {
+ "function_name": "execute_bash",
+ "tool_call_id": "toolu_01DwihWr4ooPZS1CGTLAWrFP",
+ "model_response": {
+ "id": "chatcmpl-51a85fbb-dafc-4519-a7d2-df369e38d76f",
+ "created": 1737433799,
+ "model": "claude-3-5-sonnet-20241022",
+ "object": "chat.completion",
+ "system_fingerprint": null,
+ "choices": [
+ {
+ "finish_reason": "tool_calls",
+ "index": 0,
+ "message": {
+ "content": "Let's serve this application using a simple Python HTTP server:",
+ "role": "assistant",
+ "tool_calls": [
+ {
+ "index": 1,
+ "function": {
+ "arguments": "{\"command\": \"cd /workspace/todo-app && python3 -m http.server 8000\"}",
+ "name": "execute_bash"
+ },
+ "id": "toolu_01DwihWr4ooPZS1CGTLAWrFP",
+ "type": "function"
+ }
+ ],
+ "function_call": null
+ }
+ }
+ ],
+ "usage": {
+ "completion_tokens": 85,
+ "prompt_tokens": 5488,
+ "total_tokens": 5573,
+ "completion_tokens_details": null,
+ "prompt_tokens_details": {
+ "audio_tokens": null,
+ "cached_tokens": 4763
+ },
+ "cache_creation_input_tokens": 719,
+ "cache_read_input_tokens": 4763
+ },
+ "service_tier": null
+ },
+ "total_calls_in_response": 1
+ },
+ "content": "Serving HTTP on 0.0.0.0 port 8000 (http://0.0.0.0:8000/) ...",
+ "extras": {
+ "command": "cd /workspace/todo-app && python3 -m http.server 8000",
+ "metadata": {
+ "exit_code": -1,
+ "pid": -1,
+ "username": null,
+ "hostname": null,
+ "working_dir": null,
+ "py_interpreter_path": null,
+ "prefix": "",
+ "suffix": "\n[The command has no new output after 30 seconds. You may wait longer to see additional output by sending empty command '', send other commands to interact with the current process, or send keys to interrupt/kill the command.]"
+ },
+ "hidden": false
+ },
+ "success": false
+ },
+ {
+ "id": 15,
+ "timestamp": "2025-01-20T20:30:35.414886",
+ "source": "agent",
+ "message": "I've created a complete Vue.js todo application with all the requested features. Here's what the application includes:\n\n1. Todo List Features:\n - View all todos\n - Add new todos with optional due dates\n - Mark todos as complete/incomplete\n - Delete todos\n - Edit todo text and due date\n - Persistent storage using localStorage\n\n2. UI Features:\n - Clean, responsive design using Bootstrap\n - Completed todos are shown with strikethrough\n - Due dates are displayed in a readable format\n - Edit mode with save/cancel functionality\n - Form validation to prevent empty todos\n\nThe application is now running on port 8000. You can access it by opening a web browser and navigating to http://localhost:8000\n\nTo use the application:\n1. Add a new todo by typing in the text field and optionally selecting a due date\n2. Click the checkbox to mark a todo as complete\n3. Click the \"Edit\" button to modify a todo's text or due date\n4. Click the \"Delete\" button to remove a todo\n5. All changes are automatically saved to localStorage\n\nThe data will persist even if you close and reopen your browser. Would you like me to explain any specific part of the implementation or make any adjustments to the functionality?",
+ "action": "message",
+ "args": {
+ "content": "I've created a complete Vue.js todo application with all the requested features. Here's what the application includes:\n\n1. Todo List Features:\n - View all todos\n - Add new todos with optional due dates\n - Mark todos as complete/incomplete\n - Delete todos\n - Edit todo text and due date\n - Persistent storage using localStorage\n\n2. UI Features:\n - Clean, responsive design using Bootstrap\n - Completed todos are shown with strikethrough\n - Due dates are displayed in a readable format\n - Edit mode with save/cancel functionality\n - Form validation to prevent empty todos\n\nThe application is now running on port 8000. You can access it by opening a web browser and navigating to http://localhost:8000\n\nTo use the application:\n1. Add a new todo by typing in the text field and optionally selecting a due date\n2. Click the checkbox to mark a todo as complete\n3. Click the \"Edit\" button to modify a todo's text or due date\n4. Click the \"Delete\" button to remove a todo\n5. All changes are automatically saved to localStorage\n\nThe data will persist even if you close and reopen your browser. Would you like me to explain any specific part of the implementation or make any adjustments to the functionality?",
+ "image_urls": null,
+ "wait_for_response": true
+ }
+ },
+ {
+ "id": 16,
+ "timestamp": "2025-01-20T20:30:35.421080",
+ "source": "agent",
+ "message": "No observation",
+ "cause": 15,
+ "observation": "null",
+ "content": "",
+ "extras": {}
+ },
+ {
+ "id": 17,
+ "timestamp": "2025-01-20T20:30:35.429675",
+ "source": "environment",
+ "message": "",
+ "observation": "agent_state_changed",
+ "content": "",
+ "extras": {
+ "agent_state": "awaiting_user_input"
+ }
+ }
+]
diff --git a/tests/runtime/trajs/basic_interactions.json b/tests/runtime/trajs/basic_interactions.json
new file mode 100644
index 000000000000..f97bcaa654e3
--- /dev/null
+++ b/tests/runtime/trajs/basic_interactions.json
@@ -0,0 +1,128 @@
+[
+ {
+ "id": 0,
+ "timestamp": "2025-02-01T00:14:10.787622",
+ "source": "user",
+ "message": "what's 1+1?",
+ "action": "message",
+ "args": {
+ "content": "what's 1+1?",
+ "image_urls": null,
+ "wait_for_response": false
+ },
+ "timeout": 120
+ },
+ {
+ "id": 3,
+ "timestamp": "2025-02-01T00:14:12.686046",
+ "source": "agent",
+ "message": "1 + 1 equals 2.",
+ "action": "message",
+ "args": {
+ "content": "1 + 1 equals 2.",
+ "image_urls": null,
+ "wait_for_response": true
+ },
+ "timeout": 120
+ },
+ {
+ "id": 6,
+ "timestamp": "2025-02-01T00:17:23.228442",
+ "source": "user",
+ "message": "No, I mean by Goldbach's conjecture!",
+ "action": "message",
+ "args": {
+ "content": "No, I mean by Goldbach's conjecture!",
+ "image_urls": null,
+ "wait_for_response": false
+ },
+ "timeout": 120
+ },
+ {
+ "id": 9,
+ "timestamp": "2025-02-01T00:17:25.586532",
+ "source": "agent",
+ "message": "Goldbach's conjecture is an unsolved problem in number theory that suggests every even integer greater than 2 can be expressed as the sum of two prime numbers. However, it doesn't directly apply to the sum of 1 + 1, as 1 is not considered a prime number. The conjecture is more relevant to larger even numbers.",
+ "action": "message",
+ "args": {
+ "content": "Goldbach's conjecture is an unsolved problem in number theory that suggests every even integer greater than 2 can be expressed as the sum of two prime numbers. However, it doesn't directly apply to the sum of 1 + 1, as 1 is not considered a prime number. The conjecture is more relevant to larger even numbers.",
+ "image_urls": null,
+ "wait_for_response": true
+ },
+ "timeout": 120
+ },
+ {
+ "id": 12,
+ "timestamp": "2025-02-01T00:18:59.847433",
+ "source": "user",
+ "message": "Finish please",
+ "action": "message",
+ "args": {
+ "content": "Finish please",
+ "image_urls": null,
+ "wait_for_response": false
+ },
+ "timeout": 120
+ },
+ {
+ "id": 15,
+ "timestamp": "2025-02-01T00:19:01.168854",
+ "source": "agent",
+ "message": "All done! What's next on the agenda?",
+ "action": "finish",
+ "tool_call_metadata": {
+ "function_name": "finish",
+ "tool_call_id": "call_IpyczPgEAwtpRWFZ2RNokTdH",
+ "model_response": {
+ "id": "chatcmpl-Aw30O6Ka8n1X3F6F7YUh91NxXBE0y",
+ "created": 1738397940,
+ "model": "gpt-4o-2024-08-06",
+ "object": "chat.completion",
+ "system_fingerprint": "fp_4691090a87",
+ "choices": [
+ {
+ "finish_reason": "tool_calls",
+ "index": 0,
+ "message": {
+ "content": null,
+ "role": "assistant",
+ "tool_calls": [
+ {
+ "function": {
+ "arguments": "{}",
+ "name": "finish"
+ },
+ "id": "call_IpyczPgEAwtpRWFZ2RNokTdH",
+ "type": "function"
+ }
+ ],
+ "function_call": null
+ }
+ }
+ ],
+ "usage": {
+ "completion_tokens": 10,
+ "prompt_tokens": 2473,
+ "total_tokens": 2483,
+ "completion_tokens_details": {
+ "accepted_prediction_tokens": 0,
+ "audio_tokens": 0,
+ "reasoning_tokens": 0,
+ "rejected_prediction_tokens": 0
+ },
+ "prompt_tokens_details": {
+ "audio_tokens": 0,
+ "cached_tokens": 2304
+ }
+ },
+ "service_tier": "default"
+ },
+ "total_calls_in_response": 1
+ },
+ "args": {
+ "outputs": {},
+ "thought": ""
+ },
+ "timeout": 120
+ }
+]