Skip to content

Commit

Permalink
feat: browsing agent test
Browse files Browse the repository at this point in the history
  • Loading branch information
tomfrombayesians committed Feb 19, 2025
1 parent f4e5fb2 commit 06f53b1
Showing 1 changed file with 156 additions and 0 deletions.
156 changes: 156 additions & 0 deletions tests/unit/test_browsing_agent_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
"""Tests for the browsing agent utils."""

import pytest

from openhands.agenthub.browsing_agent.utils import (
ParseError,
compress_string,
extract_html_tags,
parse_html_tags,
parse_html_tags_raise,
yaml_parser,
)


def test_yaml_parser_valid():
"""Test yaml_parser with valid input."""
valid_yaml = """
key1: value1
key2: value2
"""
value, valid, retry_message = yaml_parser(valid_yaml)
assert valid is True
assert value == {'key1': 'value1', 'key2': 'value2'}
assert retry_message == ''


def test_yaml_parser_invalid():
"""Test yaml_parser with invalid input."""
invalid_yaml = """
key1: value1
key2: : invalid : syntax :
"""
value, valid, retry_message = yaml_parser(invalid_yaml)
assert valid is False
assert value == {}
assert 'valid yaml' in retry_message.lower()


def test_compress_string():
"""Test string compression with redundant paragraphs and lines."""
text = """
First paragraph
with multiple lines
that are unique.
Second paragraph
that repeats.
Third unique
paragraph here.
Second paragraph
that repeats.
"""
result = compress_string(text)

# Check that definitions section exists
assert '<definitions>' in result
assert '</definitions>' in result

# Check that repeated content is replaced with identifiers
definitions = extract_html_tags(result, ['definitions'])['definitions'][0]
assert '§-0' in result # Paragraph identifier
assert 'Second paragraph' in definitions


def test_extract_html_tags_single():
"""Test extracting a single HTML tag."""
text = '<test>Content</test>'
result = extract_html_tags(text, ['test'])
assert result == {'test': ['Content']}


def test_extract_html_tags_multiple():
"""Test extracting multiple HTML tags."""
text = '<tag1>First</tag1><tag2>Second</tag2><tag1>Third</tag1>'
result = extract_html_tags(text, ['tag1', 'tag2'])
assert result == {'tag1': ['First', 'Third'], 'tag2': ['Second']}


def test_extract_html_tags_nested():
"""Test extracting nested HTML tags."""
text = '<outer>Outside<inner>Inside</inner>End</outer>'
result = extract_html_tags(text, ['outer', 'inner'])
assert result == {'outer': ['Outside<inner>Inside</inner>End'], 'inner': ['Inside']}


def test_parse_html_tags_basic():
"""Test basic HTML tag parsing."""
text = '<required>Content</required><optional>Extra</optional>'
result, valid, message = parse_html_tags(
text, keys=('required',), optional_keys=('optional',)
)
assert valid is True
assert result == {'required': 'Content', 'optional': 'Extra'}
assert message == ''


def test_parse_html_tags_missing_required():
"""Test parsing with missing required tags."""
text = '<optional>Present</optional>'
result, valid, message = parse_html_tags(
text, keys=('required',), optional_keys=('optional',)
)
assert valid is False
assert 'Missing the key <required>' in message


def test_parse_html_tags_multiple_instances():
"""Test parsing with multiple instances of the same tag."""
text = '<tag>First</tag><tag>Second</tag>'

# Test without merge_multiple
result, valid, message = parse_html_tags(text, keys=('tag',))
assert valid is False
assert 'multiple instances' in message.lower()

# Test with merge_multiple
result, valid, message = parse_html_tags(text, keys=('tag',), merge_multiple=True)
assert valid is True
assert 'First' in result['tag']
assert 'Second' in result['tag']


def test_parse_html_tags_raise():
"""Test parse_html_tags_raise function."""
valid_text = '<required>Content</required>'
result = parse_html_tags_raise(valid_text, keys=('required',))
assert result == {'required': 'Content'}

invalid_text = '<wrong>Content</wrong>'
with pytest.raises(ParseError):
parse_html_tags_raise(invalid_text, keys=('required',))


def test_parse_html_tags_empty():
"""Test parsing with empty content."""
text = '<tag></tag>'
result, valid, message = parse_html_tags(text, keys=('tag',))
assert valid is True
assert result == {'tag': ''}


def test_parse_html_tags_whitespace():
"""Test parsing with whitespace content."""
text = '<tag> \n </tag>'
result, valid, message = parse_html_tags(text, keys=('tag',))
assert valid is True
assert result == {'tag': ''}


def test_parse_html_tags_case_sensitivity():
"""Test case sensitivity in tag parsing."""
text = '<TAG>Content</TAG>'
result = extract_html_tags(text, ['tag'])
assert result == {} # Tags should be case-sensitive

0 comments on commit 06f53b1

Please sign in to comment.