diff --git a/openhands/storage/google_cloud.py b/openhands/storage/google_cloud.py
index da763363ab3d..19465c0442ce 100644
--- a/openhands/storage/google_cloud.py
+++ b/openhands/storage/google_cloud.py
@@ -60,5 +60,19 @@ def list(self, path: str) -> List[str]:
         return list(blobs)
 
     def delete(self, path: str) -> None:
-        blob = self.bucket.blob(path)
-        blob.delete()
+        # Sanitize path
+        if not path or path == '/':
+            path = ''
+        if path.endswith('/'):
+            path = path[:-1]
+
+        # Try to delete any child resources (Assume the path is a directory)
+        for blob in self.bucket.list_blobs(prefix=f'{path}/'):
+            blob.delete()
+
+        # Next try to delete item as a file
+        try:
+            blob = self.bucket.blob(path)
+            blob.delete()
+        except NotFound:
+            pass
diff --git a/openhands/storage/s3.py b/openhands/storage/s3.py
index 86be398c5172..429e660011e4 100644
--- a/openhands/storage/s3.py
+++ b/openhands/storage/s3.py
@@ -66,36 +66,53 @@ def read(self, path: str) -> str:
             )
 
     def list(self, path: str) -> list[str]:
-        if path and path != '/' and not path.endswith('/'):
+        if not path or path == '/':
+            path = ''
+        elif not path.endswith('/'):
             path += '/'
-        try:
-            response = self.client.list_objects_v2(Bucket=self.bucket, Prefix=path)
-            # Check if 'Contents' exists in the response
-            if 'Contents' in response:
-                objects = [obj['Key'] for obj in response['Contents']]
-                return objects
-            else:
-                return list()
-        except botocore.exceptions.ClientError as e:
-            # Catch all S3-related errors
-            if e.response['Error']['Code'] == 'NoSuchBucket':
-                raise FileNotFoundError(
-                    f"Error: The bucket '{self.bucket}' does not exist."
-                )
-            elif e.response['Error']['Code'] == 'AccessDenied':
-                raise FileNotFoundError(
-                    f"Error: Access denied to bucket '{self.bucket}'."
-                )
-            else:
-                raise FileNotFoundError(f"Error: {e.response['Error']['Message']}")
-        except Exception as e:
-            raise FileNotFoundError(
-                f"Error: Failed to read from bucket '{self.bucket}' at path {path}: {e}"
-            )
+        # The delimiter logic screens out directories, so we can't use it. :(
+        # For example, given a structure:
+        #   foo/bar/zap.txt
+        #   foo/bar/bang.txt
+        #   ping.txt
+        # prefix=None, delimiter="/"   yields  ["ping.txt"]  # :(
+        # prefix="foo", delimiter="/"  yields  []  # :(
+        results = set()
+        prefix_len = len(path)
+        response = self.client.list_objects_v2(Bucket=self.bucket, Prefix=path)
+        contents = response.get('Contents')
+        if not contents:
+            return []
+        paths = [obj['Key'] for obj in response['Contents']]
+        for sub_path in paths:
+            if sub_path == path:
+                continue
+            try:
+                index = sub_path.index('/', prefix_len + 1)
+                if index != prefix_len:
+                    results.add(sub_path[: index + 1])
+            except ValueError:
+                results.add(sub_path)
+        return list(results)
 
     def delete(self, path: str) -> None:
         try:
+            # Sanitize path
+            if not path or path == '/':
+                path = ''
+            if path.endswith('/'):
+                path = path[:-1]
+
+            # Try to delete any child resources (Assume the path is a directory)
+            response = self.client.list_objects_v2(
+                Bucket=self.bucket, Prefix=f'{path}/'
+            )
+            for content in response.get('Contents') or []:
+                self.client.delete_object(Bucket=self.bucket, Key=content['Key'])
+
+            # Next try to delete item as a file
             self.client.delete_object(Bucket=self.bucket, Key=path)
+
         except botocore.exceptions.ClientError as e:
             if e.response['Error']['Code'] == 'NoSuchBucket':
                 raise FileNotFoundError(
diff --git a/poetry.lock b/poetry.lock
index ea3d47abcf94..1f7aa66cd3ca 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -3944,19 +3944,19 @@ pydantic = ">=1.10"
 
 [[package]]
 name = "llama-index"
-version = "0.12.13"
+version = "0.12.14"
 description = "Interface between LLMs and your data"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "llama_index-0.12.13-py3-none-any.whl", hash = "sha256:0b285aa451ced6bd8da40df99068ac96badf8b5725c4edc29f2bce4da2ffd8bc"},
-    {file = "llama_index-0.12.13.tar.gz", hash = "sha256:1e39a397dcc51dabe280c121fd8d5451a6a84595233a8b26caa54d9b7ecf9ffc"},
+    {file = "llama_index-0.12.14-py3-none-any.whl", hash = "sha256:cafbac9f08f1f7293169bfd3c75545db3b761742ea829ba6940c3f2c3b1c2d26"},
+    {file = "llama_index-0.12.14.tar.gz", hash = "sha256:aa74315b32e93a77e285519459d77b98be7db9ae4c5aa64aac2c54cc919c838f"},
 ]
 
 [package.dependencies]
 llama-index-agent-openai = ">=0.4.0,<0.5.0"
 llama-index-cli = ">=0.4.0,<0.5.0"
-llama-index-core = ">=0.12.13,<0.13.0"
+llama-index-core = ">=0.12.14,<0.13.0"
 llama-index-embeddings-openai = ">=0.3.0,<0.4.0"
 llama-index-indices-managed-llama-cloud = ">=0.4.0"
 llama-index-llms-openai = ">=0.3.0,<0.4.0"
@@ -4001,13 +4001,13 @@ llama-index-llms-openai = ">=0.3.0,<0.4.0"
 
 [[package]]
 name = "llama-index-core"
-version = "0.12.13"
+version = "0.12.14"
 description = "Interface between LLMs and your data"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "llama_index_core-0.12.13-py3-none-any.whl", hash = "sha256:9708bb594bbddffd6ff0767242e49d8978d1ba60a2e62e071d9d123ad2f17e6f"},
-    {file = "llama_index_core-0.12.13.tar.gz", hash = "sha256:77af0161246ce1de38efc17cb6438dfff9e9558af00bcfac7dd4d0b7325efa4b"},
+    {file = "llama_index_core-0.12.14-py3-none-any.whl", hash = "sha256:6fdb30e3fadf98e7df75f9db5d06f6a7f8503ca545a71e048d786ff88012bd50"},
+    {file = "llama_index_core-0.12.14.tar.gz", hash = "sha256:378bbf5bf4d1a8c692d3a980c1a6ed3be7a9afb676a4960429dea15f62d06cd3"},
 ]
 
 [package.dependencies]
diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py
index 07ff1310b689..b22c87331a90 100644
--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@@ -3,6 +3,7 @@
 from uuid import uuid4
 
 import pytest
+from litellm import ContextWindowExceededError
 
 from openhands.controller.agent import Agent
 from openhands.controller.agent_controller import AgentController
@@ -552,3 +553,49 @@ def on_event(event: Event):
     assert (
         state.metrics.accumulated_cost == 10.0 * 3
     ), f'Expected accumulated cost to be 30.0, but got {state.metrics.accumulated_cost}'
+
+
+@pytest.mark.asyncio
+async def test_context_window_exceeded_error_handling(mock_agent, mock_event_stream):
+    """Test that context window exceeded errors are handled correctly by truncating history."""
+
+    class StepState:
+        def __init__(self):
+            self.has_errored = False
+
+        def step(self, state: State):
+            # Append a few messages to the history -- these will be truncated when we throw the error
+            state.history = [
+                MessageAction(content='Test message 0'),
+                MessageAction(content='Test message 1'),
+            ]
+
+            error = ContextWindowExceededError(
+                message='prompt is too long: 233885 tokens > 200000 maximum',
+                model='',
+                llm_provider='',
+            )
+            self.has_errored = True
+            raise error
+
+    state = StepState()
+    mock_agent.step = state.step
+
+    controller = AgentController(
+        agent=mock_agent,
+        event_stream=mock_event_stream,
+        max_iterations=10,
+        sid='test',
+        confirmation_mode=False,
+        headless_mode=True,
+    )
+
+    # Set the agent running and take a step in the controller -- this is similar
+    # to taking a single step using `run_controller`, but much easier to control
+    # termination for testing purposes
+    controller.state.agent_state = AgentState.RUNNING
+    await controller._step()
+
+    # Check that the error was thrown and the history has been truncated
+    assert state.has_errored
+    assert controller.state.history == [MessageAction(content='Test message 1')]
diff --git a/tests/unit/test_storage.py b/tests/unit/test_storage.py
index 46915c181492..37b52f0a924a 100644
--- a/tests/unit/test_storage.py
+++ b/tests/unit/test_storage.py
@@ -4,15 +4,19 @@
 import shutil
 from abc import ABC
 from dataclasses import dataclass, field
-from io import StringIO
+from io import BytesIO, StringIO
 from typing import Dict, List, Optional
 from unittest import TestCase
 from unittest.mock import patch
 
+import botocore.exceptions
+from google.api_core.exceptions import NotFound
+
 from openhands.storage.files import FileStore
 from openhands.storage.google_cloud import GoogleCloudFileStore
 from openhands.storage.local import LocalFileStore
 from openhands.storage.memory import InMemoryFileStore
+from openhands.storage.s3 import S3FileStore
 
 
 class _StorageTest(ABC):
@@ -73,6 +77,35 @@ def test_deep_list(self):
         store.delete('foo/bar/qux.txt')
         store.delete('foo/bar/quux.txt')
 
+    def test_directory_deletion(self):
+        store = self.get_store()
+        # Create a directory structure
+        store.write('foo/bar/baz.txt', 'Hello, world!')
+        store.write('foo/bar/qux.txt', 'Hello, world!')
+        store.write('foo/other.txt', 'Hello, world!')
+        store.write('foo/bar/subdir/file.txt', 'Hello, world!')
+
+        # Verify initial structure
+        self.assertEqual(store.list(''), ['foo/'])
+        self.assertEqual(sorted(store.list('foo')), ['foo/bar/', 'foo/other.txt'])
+        self.assertEqual(
+            sorted(store.list('foo/bar')),
+            ['foo/bar/baz.txt', 'foo/bar/qux.txt', 'foo/bar/subdir/'],
+        )
+
+        # Delete a directory
+        store.delete('foo/bar')
+
+        # Verify directory and its contents are gone, but other files remain
+        self.assertEqual(store.list(''), ['foo/'])
+        self.assertEqual(store.list('foo'), ['foo/other.txt'])
+
+        # Delete root directory
+        store.delete('foo')
+
+        # Verify everything is gone
+        self.assertEqual(store.list(''), [])
+
 
 class TestLocalFileStore(TestCase, _StorageTest):
     def setUp(self):
@@ -94,6 +127,12 @@ def setUp(self):
             self.store = GoogleCloudFileStore('dear-liza')
 
 
+class TestS3FileStore(TestCase, _StorageTest):
+    def setUp(self):
+        with patch('boto3.client', lambda service, **kwargs: _MockS3Client()):
+            self.store = S3FileStore('dear-liza')
+
+
 # I would have liked to use cloud-storage-mocker here but the python versions were incompatible :(
 # If we write tests for the S3 storage class I would definitely recommend we use moto.
 class _MockGoogleCloudClient:
@@ -131,6 +170,8 @@ def open(self, op: str):
             return _MockGoogleCloudBlobWriter(self)
 
     def delete(self):
+        if self.name not in self.bucket.blobs_by_path:
+            raise NotFound('Blob not found')
         del self.bucket.blobs_by_path[self.name]
 
 
@@ -152,3 +193,77 @@ def __exit__(self, exc_type, exc_val, exc_tb):
         blob = self.blob
         blob.content = self.content
         blob.bucket.blobs_by_path[blob.name] = blob
+
+
+class _MockS3Client:
+    def __init__(self):
+        self.objects_by_bucket: Dict[str, Dict[str, _MockS3Object]] = {}
+
+    def put_object(self, Bucket: str, Key: str, Body: str | bytes) -> None:
+        if Bucket not in self.objects_by_bucket:
+            self.objects_by_bucket[Bucket] = {}
+        self.objects_by_bucket[Bucket][Key] = _MockS3Object(Key, Body)
+
+    def get_object(self, Bucket: str, Key: str) -> Dict:
+        if Bucket not in self.objects_by_bucket:
+            raise botocore.exceptions.ClientError(
+                {
+                    'Error': {
+                        'Code': 'NoSuchBucket',
+                        'Message': f"The bucket '{Bucket}' does not exist",
+                    }
+                },
+                'GetObject',
+            )
+        if Key not in self.objects_by_bucket[Bucket]:
+            raise botocore.exceptions.ClientError(
+                {
+                    'Error': {
+                        'Code': 'NoSuchKey',
+                        'Message': f"The specified key '{Key}' does not exist",
+                    }
+                },
+                'GetObject',
+            )
+        content = self.objects_by_bucket[Bucket][Key].content
+        if isinstance(content, bytes):
+            return {'Body': BytesIO(content)}
+        return {'Body': StringIO(content)}
+
+    def list_objects_v2(self, Bucket: str, Prefix: str = '') -> Dict:
+        if Bucket not in self.objects_by_bucket:
+            raise botocore.exceptions.ClientError(
+                {
+                    'Error': {
+                        'Code': 'NoSuchBucket',
+                        'Message': f"The bucket '{Bucket}' does not exist",
+                    }
+                },
+                'ListObjectsV2',
+            )
+        objects = self.objects_by_bucket[Bucket]
+        contents = [
+            {'Key': key}
+            for key in objects.keys()
+            if not Prefix or key.startswith(Prefix)
+        ]
+        return {'Contents': contents} if contents else {}
+
+    def delete_object(self, Bucket: str, Key: str) -> None:
+        if Bucket not in self.objects_by_bucket:
+            raise botocore.exceptions.ClientError(
+                {
+                    'Error': {
+                        'Code': 'NoSuchBucket',
+                        'Message': f"The bucket '{Bucket}' does not exist",
+                    }
+                },
+                'DeleteObject',
+            )
+        self.objects_by_bucket[Bucket].pop(Key, None)
+
+
+@dataclass
+class _MockS3Object:
+    key: str
+    content: str | bytes