Skip to content

Commit

Permalink
Revert "Fix pr #5242: Switch dependency to browsergym-core"
Browse files Browse the repository at this point in the history
This reverts commit c45cb45.
  • Loading branch information
enyst committed Nov 25, 2024
1 parent c45cb45 commit d9ffbea
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 166 deletions.
4 changes: 1 addition & 3 deletions tests/unit/resolver/test_resolve_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,10 +551,8 @@ def test_file_instruction_with_repo_instruction():
- Setup: `poetry install --with test --with dev`
- Testing: `poetry run pytest tests/test_*.py`
When you think you have fixed the issue through code changes, please finish the interaction."""
# Normalize line endings to avoid test failures
assert instruction.replace('\r\n', '\n') == expected_instruction.replace('\r\n', '\n')
assert instruction == expected_instruction
assert issue_handler.issue_type == 'issue'
assert image_urls == []

Expand Down
12 changes: 5 additions & 7 deletions tests/unit/test_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@

import pytest

# Mock chromadb before importing LongTermMemory
with patch.dict('sys.modules', {'chromadb': MagicMock()}):
from openhands.core.config import AgentConfig, LLMConfig
from openhands.events.event import Event, EventSource
from openhands.events.stream import EventStream
from openhands.memory.memory import LongTermMemory
from openhands.storage.files import FileStore
from openhands.core.config import AgentConfig, LLMConfig
from openhands.events.event import Event, EventSource
from openhands.events.stream import EventStream
from openhands.memory.memory import LongTermMemory
from openhands.storage.files import FileStore


@pytest.fixture
Expand Down
89 changes: 43 additions & 46 deletions tests/unit/test_runtime_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ def mock_docker_client():


@pytest.fixture
def docker_runtime_builder(mock_docker_client):
return DockerRuntimeBuilder(mock_docker_client)
def docker_runtime_builder():
client = docker.from_env()
return DockerRuntimeBuilder(client)


def _check_source_code_in_dir(temp_dir):
Expand Down Expand Up @@ -493,16 +494,9 @@ def test_build_image_from_scratch(docker_runtime_builder, tmp_path):
f.write("""FROM php:latest
CMD ["sh", "-c", "echo 'Hello, World!'"]
""")

# Mock Docker client
mock_client = MagicMock()
mock_image = MagicMock()
mock_client.images.get.return_value = mock_image
docker_runtime_builder.docker_client = mock_client

# Mock build method
docker_runtime_builder.build = MagicMock(return_value=f'{tags[0]}')

built_image_name = None
container = None
client = docker.from_env()
try:
built_image_name = docker_runtime_builder.build(
context_path,
Expand All @@ -512,10 +506,23 @@ def test_build_image_from_scratch(docker_runtime_builder, tmp_path):
assert built_image_name == f'{tags[0]}'

# Verify the image was created
mock_client.images.get.assert_called_once_with(tags[0])
image = client.images.get(tags[0])
assert image is not None

except docker.errors.ImageNotFound:
pytest.fail('test_build_image_from_scratch: test image not found!')
except Exception as e:
pytest.fail(f'test_build_image_from_scratch: Build failed with error: {str(e)}')

finally:
# Clean up the container
if container:
try:
container.remove(force=True)
logger.info(f'Removed test container: `{container.id}`')
except Exception as e:
logger.warning(
f'Failed to remove test container `{container.id}`: {str(e)}'
)

# Clean up the image
Expand All @@ -537,25 +544,17 @@ def _format_size_to_gb(bytes_size):


def test_list_dangling_images():
# Mock Docker client
mock_client = MagicMock()
mock_image = MagicMock()
mock_image.tags = ['test:latest']
mock_image.attrs = {'Size': 1024 * 1024 * 1024} # 1 GB
mock_client.images.list.return_value = [mock_image]

with patch('docker.from_env', return_value=mock_client):
client = docker.from_env()
dangling_images = client.images.list(filters={'dangling': True})
if dangling_images and len(dangling_images) > 0:
for image in dangling_images:
if 'Size' in image.attrs and isinstance(image.attrs['Size'], int):
size_gb = _format_size_to_gb(image.attrs['Size'])
logger.info(f'Dangling image: {image.tags}, Size: {size_gb} GB')
else:
logger.info(f'Dangling image: {image.tags}, Size: n/a')
else:
logger.info('No dangling images found')
client = docker.from_env()
dangling_images = client.images.list(filters={'dangling': True})
if dangling_images and len(dangling_images) > 0:
for image in dangling_images:
if 'Size' in image.attrs and isinstance(image.attrs['Size'], int):
size_gb = _format_size_to_gb(image.attrs['Size'])
logger.info(f'Dangling image: {image.tags}, Size: {size_gb} GB')
else:
logger.info(f'Dangling image: {image.tags}, Size: n/a')
else:
logger.info('No dangling images found')


def test_build_image_from_repo(docker_runtime_builder, tmp_path):
Expand All @@ -567,16 +566,9 @@ def test_build_image_from_repo(docker_runtime_builder, tmp_path):
f.write(f"""FROM {DEFAULT_BASE_IMAGE}
CMD ["sh", "-c", "echo 'Hello, World!'"]
""")

# Mock Docker client
mock_client = MagicMock()
mock_image = MagicMock()
mock_client.images.get.return_value = mock_image
docker_runtime_builder.docker_client = mock_client

# Mock build method
docker_runtime_builder.build = MagicMock(return_value=f'{tags[0]}')

built_image_name = None
container = None
client = docker.from_env()
try:
built_image_name = docker_runtime_builder.build(
context_path,
Expand All @@ -585,11 +577,16 @@ def test_build_image_from_repo(docker_runtime_builder, tmp_path):
)
assert built_image_name == f'{tags[0]}'

# Verify the image was created
mock_client.images.get.assert_called_once_with(tags[0])
image = client.images.get(tags[0])
assert image is not None

except Exception as e:
pytest.fail(f'test_build_image_from_repo: Build failed with error: {str(e)}')
except docker.errors.ImageNotFound:
pytest.fail('test_build_image_from_repo: test image not found!')

finally:
# Clean up the container
if container:
try:
container.remove(force=True)
logger.info(f'Removed test container: `{container.id}`')
except Exception as e:
Expand Down
160 changes: 50 additions & 110 deletions tests/unit/test_security.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,87 +49,47 @@ def add_events(event_stream: EventStream, data: list[tuple[Event, EventSource]])


def test_msg(temp_dir: str):
mock_container = MagicMock()
mock_container.status = 'running'
mock_container.attrs = {
'NetworkSettings': {'Ports': {'8000/tcp': [{'HostPort': 34567}]}}
}
mock_docker = MagicMock()
mock_docker.from_env().containers.list.return_value = [mock_container]

mock_requests = MagicMock()
mock_requests.get().json.return_value = {'id': 'mock-session-id'}
mock_requests.post().json.side_effect = [
{'monitor_id': 'mock-monitor-id'},
[],
[],
file_store = get_file_store('local', temp_dir)
event_stream = EventStream('main', file_store)
policy = """
raise "Disallow ABC [risk=medium]" if:
(msg: Message)
"ABC" in msg.content
"""
InvariantAnalyzer(event_stream, policy)
data = [
(MessageAction('Hello world!'), EventSource.USER),
(MessageAction('AB!'), EventSource.AGENT),
(MessageAction('Hello world!'), EventSource.USER),
(MessageAction('ABC!'), EventSource.AGENT),
]

with (
patch(f'{InvariantAnalyzer.__module__}.docker', mock_docker),
patch(f'{InvariantClient.__module__}.requests', mock_requests),
):
file_store = get_file_store('local', temp_dir)
event_stream = EventStream('main', file_store)
policy = """
raise "Disallow ABC [risk=medium]" if:
(msg: Message)
"ABC" in msg.content
"""
InvariantAnalyzer(event_stream, policy)
data = [
(MessageAction('Hello world!'), EventSource.USER),
(MessageAction('AB!'), EventSource.AGENT),
(MessageAction('Hello world!'), EventSource.USER),
(MessageAction('ABC!'), EventSource.AGENT),
]
add_events(event_stream, data)
for i in range(3):
assert data[i][0].security_risk == ActionSecurityRisk.LOW
assert data[3][0].security_risk == ActionSecurityRisk.MEDIUM
add_events(event_stream, data)
for i in range(3):
assert data[i][0].security_risk == ActionSecurityRisk.LOW
assert data[3][0].security_risk == ActionSecurityRisk.MEDIUM


@pytest.mark.parametrize(
'cmd,expected_risk',
[('rm -rf root_dir', ActionSecurityRisk.MEDIUM), ['ls', ActionSecurityRisk.LOW]],
)
def test_cmd(cmd, expected_risk, temp_dir: str):
mock_container = MagicMock()
mock_container.status = 'running'
mock_container.attrs = {
'NetworkSettings': {'Ports': {'8000/tcp': [{'HostPort': 34567}]}}
}
mock_docker = MagicMock()
mock_docker.from_env().containers.list.return_value = [mock_container]

mock_requests = MagicMock()
mock_requests.get().json.return_value = {'id': 'mock-session-id'}
mock_requests.post().json.side_effect = [
{'monitor_id': 'mock-monitor-id'},
[],
[],
file_store = get_file_store('local', temp_dir)
event_stream = EventStream('main', file_store)
policy = """
raise "Disallow rm -rf [risk=medium]" if:
(call: ToolCall)
call is tool:run
match("rm -rf", call.function.arguments.command)
"""
InvariantAnalyzer(event_stream, policy)
data = [
(MessageAction('Hello world!'), EventSource.USER),
(CmdRunAction(cmd), EventSource.USER),
]

with (
patch(f'{InvariantAnalyzer.__module__}.docker', mock_docker),
patch(f'{InvariantClient.__module__}.requests', mock_requests),
):
file_store = get_file_store('local', temp_dir)
event_stream = EventStream('main', file_store)
policy = """
raise "Disallow rm -rf [risk=medium]" if:
(call: ToolCall)
call is tool:run
match("rm -rf", call.function.arguments.command)
"""
InvariantAnalyzer(event_stream, policy)
data = [
(MessageAction('Hello world!'), EventSource.USER),
(CmdRunAction(cmd), EventSource.USER),
]
add_events(event_stream, data)
assert data[0][0].security_risk == ActionSecurityRisk.LOW
assert data[1][0].security_risk == expected_risk
add_events(event_stream, data)
assert data[0][0].security_risk == ActionSecurityRisk.LOW
assert data[1][0].security_risk == expected_risk


@pytest.mark.parametrize(
Expand All @@ -140,46 +100,26 @@ def test_cmd(cmd, expected_risk, temp_dir: str):
],
)
def test_leak_secrets(code, expected_risk, temp_dir: str):
mock_container = MagicMock()
mock_container.status = 'running'
mock_container.attrs = {
'NetworkSettings': {'Ports': {'8000/tcp': [{'HostPort': 34567}]}}
}
mock_docker = MagicMock()
mock_docker.from_env().containers.list.return_value = [mock_container]
file_store = get_file_store('local', temp_dir)
event_stream = EventStream('main', file_store)
policy = """
from invariant.detectors import secrets
mock_requests = MagicMock()
mock_requests.get().json.return_value = {'id': 'mock-session-id'}
mock_requests.post().json.side_effect = [
{'monitor_id': 'mock-monitor-id'},
[],
[],
raise "Disallow writing secrets [risk=medium]" if:
(call: ToolCall)
call is tool:run_ipython
any(secrets(call.function.arguments.code))
"""
InvariantAnalyzer(event_stream, policy)
data = [
(MessageAction('Hello world!'), EventSource.USER),
(IPythonRunCellAction(code), EventSource.AGENT),
(IPythonRunCellAction('hello'), EventSource.AGENT),
]

with (
patch(f'{InvariantAnalyzer.__module__}.docker', mock_docker),
patch(f'{InvariantClient.__module__}.requests', mock_requests),
):
file_store = get_file_store('local', temp_dir)
event_stream = EventStream('main', file_store)
policy = """
from invariant.detectors import secrets
raise "Disallow writing secrets [risk=medium]" if:
(call: ToolCall)
call is tool:run_ipython
any(secrets(call.function.arguments.code))
"""
InvariantAnalyzer(event_stream, policy)
data = [
(MessageAction('Hello world!'), EventSource.USER),
(IPythonRunCellAction(code), EventSource.AGENT),
(IPythonRunCellAction('hello'), EventSource.AGENT),
]
add_events(event_stream, data)
assert data[0][0].security_risk == ActionSecurityRisk.LOW
assert data[1][0].security_risk == expected_risk
assert data[2][0].security_risk == ActionSecurityRisk.LOW
add_events(event_stream, data)
assert data[0][0].security_risk == ActionSecurityRisk.LOW
assert data[1][0].security_risk == expected_risk
assert data[2][0].security_risk == ActionSecurityRisk.LOW


def test_unsafe_python_code(temp_dir: str):
Expand Down

0 comments on commit d9ffbea

Please sign in to comment.