diff --git a/tests/unit/test_browsing_agent_utils.py b/tests/unit/test_browsing_agent_utils.py new file mode 100644 index 000000000000..941b1ed6aa46 --- /dev/null +++ b/tests/unit/test_browsing_agent_utils.py @@ -0,0 +1,156 @@ +"""Tests for the browsing agent utils.""" + +import pytest + +from openhands.agenthub.browsing_agent.utils import ( + ParseError, + compress_string, + extract_html_tags, + parse_html_tags, + parse_html_tags_raise, + yaml_parser, +) + + +def test_yaml_parser_valid(): + """Test yaml_parser with valid input.""" + valid_yaml = """ + key1: value1 + key2: value2 + """ + value, valid, retry_message = yaml_parser(valid_yaml) + assert valid is True + assert value == {'key1': 'value1', 'key2': 'value2'} + assert retry_message == '' + + +def test_yaml_parser_invalid(): + """Test yaml_parser with invalid input.""" + invalid_yaml = """ + key1: value1 + key2: : invalid : syntax : + """ + value, valid, retry_message = yaml_parser(invalid_yaml) + assert valid is False + assert value == {} + assert 'valid yaml' in retry_message.lower() + + +def test_compress_string(): + """Test string compression with redundant paragraphs and lines.""" + text = """ +First paragraph +with multiple lines +that are unique. + +Second paragraph +that repeats. + +Third unique +paragraph here. + +Second paragraph +that repeats. +""" + result = compress_string(text) + + # Check that definitions section exists + assert '' in result + assert '' in result + + # Check that repeated content is replaced with identifiers + definitions = extract_html_tags(result, ['definitions'])['definitions'][0] + assert 'ยง-0' in result # Paragraph identifier + assert 'Second paragraph' in definitions + + +def test_extract_html_tags_single(): + """Test extracting a single HTML tag.""" + text = 'Content' + result = extract_html_tags(text, ['test']) + assert result == {'test': ['Content']} + + +def test_extract_html_tags_multiple(): + """Test extracting multiple HTML tags.""" + text = 'FirstSecondThird' + result = extract_html_tags(text, ['tag1', 'tag2']) + assert result == {'tag1': ['First', 'Third'], 'tag2': ['Second']} + + +def test_extract_html_tags_nested(): + """Test extracting nested HTML tags.""" + text = 'OutsideInsideEnd' + result = extract_html_tags(text, ['outer', 'inner']) + assert result == {'outer': ['OutsideInsideEnd'], 'inner': ['Inside']} + + +def test_parse_html_tags_basic(): + """Test basic HTML tag parsing.""" + text = 'ContentExtra' + result, valid, message = parse_html_tags( + text, keys=('required',), optional_keys=('optional',) + ) + assert valid is True + assert result == {'required': 'Content', 'optional': 'Extra'} + assert message == '' + + +def test_parse_html_tags_missing_required(): + """Test parsing with missing required tags.""" + text = 'Present' + result, valid, message = parse_html_tags( + text, keys=('required',), optional_keys=('optional',) + ) + assert valid is False + assert 'Missing the key ' in message + + +def test_parse_html_tags_multiple_instances(): + """Test parsing with multiple instances of the same tag.""" + text = 'FirstSecond' + + # Test without merge_multiple + result, valid, message = parse_html_tags(text, keys=('tag',)) + assert valid is False + assert 'multiple instances' in message.lower() + + # Test with merge_multiple + result, valid, message = parse_html_tags(text, keys=('tag',), merge_multiple=True) + assert valid is True + assert 'First' in result['tag'] + assert 'Second' in result['tag'] + + +def test_parse_html_tags_raise(): + """Test parse_html_tags_raise function.""" + valid_text = 'Content' + result = parse_html_tags_raise(valid_text, keys=('required',)) + assert result == {'required': 'Content'} + + invalid_text = 'Content' + with pytest.raises(ParseError): + parse_html_tags_raise(invalid_text, keys=('required',)) + + +def test_parse_html_tags_empty(): + """Test parsing with empty content.""" + text = '' + result, valid, message = parse_html_tags(text, keys=('tag',)) + assert valid is True + assert result == {'tag': ''} + + +def test_parse_html_tags_whitespace(): + """Test parsing with whitespace content.""" + text = ' \n ' + result, valid, message = parse_html_tags(text, keys=('tag',)) + assert valid is True + assert result == {'tag': ''} + + +def test_parse_html_tags_case_sensitivity(): + """Test case sensitivity in tag parsing.""" + text = 'Content' + result = extract_html_tags(text, ['tag']) + assert result == {} # Tags should be case-sensitive