diff --git a/openhands/storage/google_cloud.py b/openhands/storage/google_cloud.py index da763363ab3d..19465c0442ce 100644 --- a/openhands/storage/google_cloud.py +++ b/openhands/storage/google_cloud.py @@ -60,5 +60,19 @@ def list(self, path: str) -> List[str]: return list(blobs) def delete(self, path: str) -> None: - blob = self.bucket.blob(path) - blob.delete() + # Sanitize path + if not path or path == '/': + path = '' + if path.endswith('/'): + path = path[:-1] + + # Try to delete any child resources (Assume the path is a directory) + for blob in self.bucket.list_blobs(prefix=f'{path}/'): + blob.delete() + + # Next try to delete item as a file + try: + blob = self.bucket.blob(path) + blob.delete() + except NotFound: + pass diff --git a/openhands/storage/s3.py b/openhands/storage/s3.py index 86be398c5172..429e660011e4 100644 --- a/openhands/storage/s3.py +++ b/openhands/storage/s3.py @@ -66,36 +66,53 @@ def read(self, path: str) -> str: ) def list(self, path: str) -> list[str]: - if path and path != '/' and not path.endswith('/'): + if not path or path == '/': + path = '' + elif not path.endswith('/'): path += '/' - try: - response = self.client.list_objects_v2(Bucket=self.bucket, Prefix=path) - # Check if 'Contents' exists in the response - if 'Contents' in response: - objects = [obj['Key'] for obj in response['Contents']] - return objects - else: - return list() - except botocore.exceptions.ClientError as e: - # Catch all S3-related errors - if e.response['Error']['Code'] == 'NoSuchBucket': - raise FileNotFoundError( - f"Error: The bucket '{self.bucket}' does not exist." - ) - elif e.response['Error']['Code'] == 'AccessDenied': - raise FileNotFoundError( - f"Error: Access denied to bucket '{self.bucket}'." - ) - else: - raise FileNotFoundError(f"Error: {e.response['Error']['Message']}") - except Exception as e: - raise FileNotFoundError( - f"Error: Failed to read from bucket '{self.bucket}' at path {path}: {e}" - ) + # The delimiter logic screens out directories, so we can't use it. :( + # For example, given a structure: + # foo/bar/zap.txt + # foo/bar/bang.txt + # ping.txt + # prefix=None, delimiter="/" yields ["ping.txt"] # :( + # prefix="foo", delimiter="/" yields [] # :( + results = set() + prefix_len = len(path) + response = self.client.list_objects_v2(Bucket=self.bucket, Prefix=path) + contents = response.get('Contents') + if not contents: + return [] + paths = [obj['Key'] for obj in response['Contents']] + for sub_path in paths: + if sub_path == path: + continue + try: + index = sub_path.index('/', prefix_len + 1) + if index != prefix_len: + results.add(sub_path[: index + 1]) + except ValueError: + results.add(sub_path) + return list(results) def delete(self, path: str) -> None: try: + # Sanitize path + if not path or path == '/': + path = '' + if path.endswith('/'): + path = path[:-1] + + # Try to delete any child resources (Assume the path is a directory) + response = self.client.list_objects_v2( + Bucket=self.bucket, Prefix=f'{path}/' + ) + for content in response.get('Contents') or []: + self.client.delete_object(Bucket=self.bucket, Key=content['Key']) + + # Next try to delete item as a file self.client.delete_object(Bucket=self.bucket, Key=path) + except botocore.exceptions.ClientError as e: if e.response['Error']['Code'] == 'NoSuchBucket': raise FileNotFoundError( diff --git a/poetry.lock b/poetry.lock index ea3d47abcf94..1f7aa66cd3ca 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3944,19 +3944,19 @@ pydantic = ">=1.10" [[package]] name = "llama-index" -version = "0.12.13" +version = "0.12.14" description = "Interface between LLMs and your data" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "llama_index-0.12.13-py3-none-any.whl", hash = "sha256:0b285aa451ced6bd8da40df99068ac96badf8b5725c4edc29f2bce4da2ffd8bc"}, - {file = "llama_index-0.12.13.tar.gz", hash = "sha256:1e39a397dcc51dabe280c121fd8d5451a6a84595233a8b26caa54d9b7ecf9ffc"}, + {file = "llama_index-0.12.14-py3-none-any.whl", hash = "sha256:cafbac9f08f1f7293169bfd3c75545db3b761742ea829ba6940c3f2c3b1c2d26"}, + {file = "llama_index-0.12.14.tar.gz", hash = "sha256:aa74315b32e93a77e285519459d77b98be7db9ae4c5aa64aac2c54cc919c838f"}, ] [package.dependencies] llama-index-agent-openai = ">=0.4.0,<0.5.0" llama-index-cli = ">=0.4.0,<0.5.0" -llama-index-core = ">=0.12.13,<0.13.0" +llama-index-core = ">=0.12.14,<0.13.0" llama-index-embeddings-openai = ">=0.3.0,<0.4.0" llama-index-indices-managed-llama-cloud = ">=0.4.0" llama-index-llms-openai = ">=0.3.0,<0.4.0" @@ -4001,13 +4001,13 @@ llama-index-llms-openai = ">=0.3.0,<0.4.0" [[package]] name = "llama-index-core" -version = "0.12.13" +version = "0.12.14" description = "Interface between LLMs and your data" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "llama_index_core-0.12.13-py3-none-any.whl", hash = "sha256:9708bb594bbddffd6ff0767242e49d8978d1ba60a2e62e071d9d123ad2f17e6f"}, - {file = "llama_index_core-0.12.13.tar.gz", hash = "sha256:77af0161246ce1de38efc17cb6438dfff9e9558af00bcfac7dd4d0b7325efa4b"}, + {file = "llama_index_core-0.12.14-py3-none-any.whl", hash = "sha256:6fdb30e3fadf98e7df75f9db5d06f6a7f8503ca545a71e048d786ff88012bd50"}, + {file = "llama_index_core-0.12.14.tar.gz", hash = "sha256:378bbf5bf4d1a8c692d3a980c1a6ed3be7a9afb676a4960429dea15f62d06cd3"}, ] [package.dependencies] diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py index 07ff1310b689..b22c87331a90 100644 --- a/tests/unit/test_agent_controller.py +++ b/tests/unit/test_agent_controller.py @@ -3,6 +3,7 @@ from uuid import uuid4 import pytest +from litellm import ContextWindowExceededError from openhands.controller.agent import Agent from openhands.controller.agent_controller import AgentController @@ -552,3 +553,49 @@ def on_event(event: Event): assert ( state.metrics.accumulated_cost == 10.0 * 3 ), f'Expected accumulated cost to be 30.0, but got {state.metrics.accumulated_cost}' + + +@pytest.mark.asyncio +async def test_context_window_exceeded_error_handling(mock_agent, mock_event_stream): + """Test that context window exceeded errors are handled correctly by truncating history.""" + + class StepState: + def __init__(self): + self.has_errored = False + + def step(self, state: State): + # Append a few messages to the history -- these will be truncated when we throw the error + state.history = [ + MessageAction(content='Test message 0'), + MessageAction(content='Test message 1'), + ] + + error = ContextWindowExceededError( + message='prompt is too long: 233885 tokens > 200000 maximum', + model='', + llm_provider='', + ) + self.has_errored = True + raise error + + state = StepState() + mock_agent.step = state.step + + controller = AgentController( + agent=mock_agent, + event_stream=mock_event_stream, + max_iterations=10, + sid='test', + confirmation_mode=False, + headless_mode=True, + ) + + # Set the agent running and take a step in the controller -- this is similar + # to taking a single step using `run_controller`, but much easier to control + # termination for testing purposes + controller.state.agent_state = AgentState.RUNNING + await controller._step() + + # Check that the error was thrown and the history has been truncated + assert state.has_errored + assert controller.state.history == [MessageAction(content='Test message 1')] diff --git a/tests/unit/test_storage.py b/tests/unit/test_storage.py index 46915c181492..37b52f0a924a 100644 --- a/tests/unit/test_storage.py +++ b/tests/unit/test_storage.py @@ -4,15 +4,19 @@ import shutil from abc import ABC from dataclasses import dataclass, field -from io import StringIO +from io import BytesIO, StringIO from typing import Dict, List, Optional from unittest import TestCase from unittest.mock import patch +import botocore.exceptions +from google.api_core.exceptions import NotFound + from openhands.storage.files import FileStore from openhands.storage.google_cloud import GoogleCloudFileStore from openhands.storage.local import LocalFileStore from openhands.storage.memory import InMemoryFileStore +from openhands.storage.s3 import S3FileStore class _StorageTest(ABC): @@ -73,6 +77,35 @@ def test_deep_list(self): store.delete('foo/bar/qux.txt') store.delete('foo/bar/quux.txt') + def test_directory_deletion(self): + store = self.get_store() + # Create a directory structure + store.write('foo/bar/baz.txt', 'Hello, world!') + store.write('foo/bar/qux.txt', 'Hello, world!') + store.write('foo/other.txt', 'Hello, world!') + store.write('foo/bar/subdir/file.txt', 'Hello, world!') + + # Verify initial structure + self.assertEqual(store.list(''), ['foo/']) + self.assertEqual(sorted(store.list('foo')), ['foo/bar/', 'foo/other.txt']) + self.assertEqual( + sorted(store.list('foo/bar')), + ['foo/bar/baz.txt', 'foo/bar/qux.txt', 'foo/bar/subdir/'], + ) + + # Delete a directory + store.delete('foo/bar') + + # Verify directory and its contents are gone, but other files remain + self.assertEqual(store.list(''), ['foo/']) + self.assertEqual(store.list('foo'), ['foo/other.txt']) + + # Delete root directory + store.delete('foo') + + # Verify everything is gone + self.assertEqual(store.list(''), []) + class TestLocalFileStore(TestCase, _StorageTest): def setUp(self): @@ -94,6 +127,12 @@ def setUp(self): self.store = GoogleCloudFileStore('dear-liza') +class TestS3FileStore(TestCase, _StorageTest): + def setUp(self): + with patch('boto3.client', lambda service, **kwargs: _MockS3Client()): + self.store = S3FileStore('dear-liza') + + # I would have liked to use cloud-storage-mocker here but the python versions were incompatible :( # If we write tests for the S3 storage class I would definitely recommend we use moto. class _MockGoogleCloudClient: @@ -131,6 +170,8 @@ def open(self, op: str): return _MockGoogleCloudBlobWriter(self) def delete(self): + if self.name not in self.bucket.blobs_by_path: + raise NotFound('Blob not found') del self.bucket.blobs_by_path[self.name] @@ -152,3 +193,77 @@ def __exit__(self, exc_type, exc_val, exc_tb): blob = self.blob blob.content = self.content blob.bucket.blobs_by_path[blob.name] = blob + + +class _MockS3Client: + def __init__(self): + self.objects_by_bucket: Dict[str, Dict[str, _MockS3Object]] = {} + + def put_object(self, Bucket: str, Key: str, Body: str | bytes) -> None: + if Bucket not in self.objects_by_bucket: + self.objects_by_bucket[Bucket] = {} + self.objects_by_bucket[Bucket][Key] = _MockS3Object(Key, Body) + + def get_object(self, Bucket: str, Key: str) -> Dict: + if Bucket not in self.objects_by_bucket: + raise botocore.exceptions.ClientError( + { + 'Error': { + 'Code': 'NoSuchBucket', + 'Message': f"The bucket '{Bucket}' does not exist", + } + }, + 'GetObject', + ) + if Key not in self.objects_by_bucket[Bucket]: + raise botocore.exceptions.ClientError( + { + 'Error': { + 'Code': 'NoSuchKey', + 'Message': f"The specified key '{Key}' does not exist", + } + }, + 'GetObject', + ) + content = self.objects_by_bucket[Bucket][Key].content + if isinstance(content, bytes): + return {'Body': BytesIO(content)} + return {'Body': StringIO(content)} + + def list_objects_v2(self, Bucket: str, Prefix: str = '') -> Dict: + if Bucket not in self.objects_by_bucket: + raise botocore.exceptions.ClientError( + { + 'Error': { + 'Code': 'NoSuchBucket', + 'Message': f"The bucket '{Bucket}' does not exist", + } + }, + 'ListObjectsV2', + ) + objects = self.objects_by_bucket[Bucket] + contents = [ + {'Key': key} + for key in objects.keys() + if not Prefix or key.startswith(Prefix) + ] + return {'Contents': contents} if contents else {} + + def delete_object(self, Bucket: str, Key: str) -> None: + if Bucket not in self.objects_by_bucket: + raise botocore.exceptions.ClientError( + { + 'Error': { + 'Code': 'NoSuchBucket', + 'Message': f"The bucket '{Bucket}' does not exist", + } + }, + 'DeleteObject', + ) + self.objects_by_bucket[Bucket].pop(Key, None) + + +@dataclass +class _MockS3Object: + key: str + content: str | bytes