diff --git a/openhands/resolver/issue_definitions.py b/openhands/resolver/issue_definitions.py index a0d0eb570aa7..c4ef33fb351e 100644 --- a/openhands/resolver/issue_definitions.py +++ b/openhands/resolver/issue_definitions.py @@ -5,17 +5,18 @@ from typing import Any, ClassVar import jinja2 -import litellm import requests from openhands.core.config import LLMConfig from openhands.core.logger import openhands_logger as logger from openhands.events.event import Event +from openhands.llm.llm import LLM from openhands.resolver.github_issue import GithubIssue, ReviewThread class IssueHandlerInterface(ABC): issue_type: ClassVar[str] + llm: LLM @abstractmethod def get_converted_issues( @@ -36,7 +37,7 @@ def get_instruction( @abstractmethod def guess_success( - self, issue: GithubIssue, history: list[Event], llm_config: LLMConfig + self, issue: GithubIssue, history: list[Event] ) -> tuple[bool, list[bool] | None, str]: """Guess if the issue has been resolved based on the agent's output.""" pass @@ -45,11 +46,12 @@ def guess_success( class IssueHandler(IssueHandlerInterface): issue_type: ClassVar[str] = 'issue' - def __init__(self, owner: str, repo: str, token: str): + def __init__(self, owner: str, repo: str, token: str, llm_config: LLMConfig): self.download_url = 'https://api.github.com/repos/{}/{}/issues' self.owner = owner self.repo = repo self.token = token + self.llm = LLM(llm_config) def _download_issues_from_github(self) -> list[Any]: url = self.download_url.format(self.owner, self.repo) @@ -218,7 +220,7 @@ def get_instruction( ) def guess_success( - self, issue: GithubIssue, history: list[Event], llm_config: LLMConfig + self, issue: GithubIssue, history: list[Event] ) -> tuple[bool, None | list[bool], str]: """Guess if the issue is fixed based on the history and the issue description.""" last_message = history[-1].message @@ -239,12 +241,7 @@ def guess_success( template = jinja2.Template(f.read()) prompt = template.render(issue_context=issue_context, last_message=last_message) - response = litellm.completion( - model=llm_config.model, - messages=[{'role': 'user', 'content': prompt}], - api_key=llm_config.api_key, - base_url=llm_config.base_url, - ) + response = self.llm.completion(messages=[{'role': 'user', 'content': prompt}]) answer = response.choices[0].message.content.strip() pattern = r'--- success\n*(true|false)\n*--- explanation*\n((?:.|\n)*)' @@ -258,8 +255,8 @@ def guess_success( class PRHandler(IssueHandler): issue_type: ClassVar[str] = 'pr' - def __init__(self, owner: str, repo: str, token: str): - super().__init__(owner, repo, token) + def __init__(self, owner: str, repo: str, token: str, llm_config: LLMConfig): + super().__init__(owner, repo, token, llm_config) self.download_url = 'https://api.github.com/repos/{}/{}/pulls' def __download_pr_metadata( @@ -612,16 +609,9 @@ def get_instruction( ) return instruction, images - def _check_feedback_with_llm( - self, prompt: str, llm_config: LLMConfig - ) -> tuple[bool, str]: + def _check_feedback_with_llm(self, prompt: str) -> tuple[bool, str]: """Helper function to check feedback with LLM and parse response.""" - response = litellm.completion( - model=llm_config.model, - messages=[{'role': 'user', 'content': prompt}], - api_key=llm_config.api_key, - base_url=llm_config.base_url, - ) + response = self.llm.completion(messages=[{'role': 'user', 'content': prompt}]) answer = response.choices[0].message.content.strip() pattern = r'--- success\n*(true|false)\n*--- explanation*\n((?:.|\n)*)' @@ -635,7 +625,6 @@ def _check_review_thread( review_thread: ReviewThread, issues_context: str, last_message: str, - llm_config: LLMConfig, ) -> tuple[bool, str]: """Check if a review thread's feedback has been addressed.""" files_context = json.dumps(review_thread.files, indent=4) @@ -656,14 +645,13 @@ def _check_review_thread( last_message=last_message, ) - return self._check_feedback_with_llm(prompt, llm_config) + return self._check_feedback_with_llm(prompt) def _check_thread_comments( self, thread_comments: list[str], issues_context: str, last_message: str, - llm_config: LLMConfig, ) -> tuple[bool, str]: """Check if thread comments feedback has been addressed.""" thread_context = '\n---\n'.join(thread_comments) @@ -682,14 +670,13 @@ def _check_thread_comments( last_message=last_message, ) - return self._check_feedback_with_llm(prompt, llm_config) + return self._check_feedback_with_llm(prompt) def _check_review_comments( self, review_comments: list[str], issues_context: str, last_message: str, - llm_config: LLMConfig, ) -> tuple[bool, str]: """Check if review comments feedback has been addressed.""" review_context = '\n---\n'.join(review_comments) @@ -708,10 +695,10 @@ def _check_review_comments( last_message=last_message, ) - return self._check_feedback_with_llm(prompt, llm_config) + return self._check_feedback_with_llm(prompt) def guess_success( - self, issue: GithubIssue, history: list[Event], llm_config: LLMConfig + self, issue: GithubIssue, history: list[Event] ) -> tuple[bool, None | list[bool], str]: """Guess if the issue is fixed based on the history and the issue description.""" last_message = history[-1].message @@ -724,7 +711,7 @@ def guess_success( for review_thread in issue.review_threads: if issues_context and last_message: success, explanation = self._check_review_thread( - review_thread, issues_context, last_message, llm_config + review_thread, issues_context, last_message ) else: success, explanation = False, 'Missing context or message' @@ -734,7 +721,7 @@ def guess_success( elif issue.thread_comments: if issue.thread_comments and issues_context and last_message: success, explanation = self._check_thread_comments( - issue.thread_comments, issues_context, last_message, llm_config + issue.thread_comments, issues_context, last_message ) else: success, explanation = ( @@ -747,7 +734,7 @@ def guess_success( # Handle PRs with only review comments (no file-specific review comments or thread comments) if issue.review_comments and issues_context and last_message: success, explanation = self._check_review_comments( - issue.review_comments, issues_context, last_message, llm_config + issue.review_comments, issues_context, last_message ) else: success, explanation = ( diff --git a/openhands/resolver/resolve_all_issues.py b/openhands/resolver/resolve_all_issues.py index 01d076446e97..9c44855a2dd4 100644 --- a/openhands/resolver/resolve_all_issues.py +++ b/openhands/resolver/resolve_all_issues.py @@ -80,7 +80,7 @@ async def resolve_issues( repo_instruction: Repository instruction to use. issue_numbers: List of issue numbers to resolve. """ - issue_handler = issue_handler_factory(issue_type, owner, repo, token) + issue_handler = issue_handler_factory(issue_type, owner, repo, token, llm_config) # Load dataset issues: list[GithubIssue] = issue_handler.get_converted_issues( diff --git a/openhands/resolver/resolve_issue.py b/openhands/resolver/resolve_issue.py index b6cbb9f03c05..63a9e40a05ba 100644 --- a/openhands/resolver/resolve_issue.py +++ b/openhands/resolver/resolve_issue.py @@ -249,7 +249,7 @@ async def on_event(evt): metrics = state.metrics.get() if state.metrics else None # determine success based on the history and the issue description success, comment_success, success_explanation = issue_handler.guess_success( - issue, state.history, llm_config + issue, state.history ) if issue_handler.issue_type == 'pr' and comment_success: @@ -291,12 +291,12 @@ async def on_event(evt): def issue_handler_factory( - issue_type: str, owner: str, repo: str, token: str + issue_type: str, owner: str, repo: str, token: str, llm_config: LLMConfig ) -> IssueHandlerInterface: if issue_type == 'issue': - return IssueHandler(owner, repo, token) + return IssueHandler(owner, repo, token, llm_config) elif issue_type == 'pr': - return PRHandler(owner, repo, token) + return PRHandler(owner, repo, token, llm_config) else: raise ValueError(f'Invalid issue type: {issue_type}') @@ -337,7 +337,7 @@ async def resolve_issue( target_branch: Optional target branch to create PR against (for PRs). reset_logger: Whether to reset the logger for multiprocessing. """ - issue_handler = issue_handler_factory(issue_type, owner, repo, token) + issue_handler = issue_handler_factory(issue_type, owner, repo, token, llm_config) # Load dataset issues: list[GithubIssue] = issue_handler.get_converted_issues( diff --git a/tests/unit/resolver/test_guess_success.py b/tests/unit/resolver/test_guess_success.py index d6b0e946adda..9d1b55487118 100644 --- a/tests/unit/resolver/test_guess_success.py +++ b/tests/unit/resolver/test_guess_success.py @@ -1,7 +1,10 @@ +from unittest.mock import MagicMock, patch + from openhands.core.config import LLMConfig from openhands.events.action.message import MessageAction +from openhands.llm import LLM from openhands.resolver.github_issue import GithubIssue -from openhands.resolver.issue_definitions import IssueHandler +from openhands.resolver.issue_definitions import IssueHandler, PRHandler def test_guess_success_multiline_explanation(): @@ -19,7 +22,11 @@ def test_guess_success_multiline_explanation(): llm_config = LLMConfig(model='test', api_key='test') # Create a mock response with multi-line explanation - mock_response = """--- success + mock_response = MagicMock() + mock_response.choices = [ + MagicMock( + message=MagicMock( + content="""--- success true --- explanation @@ -29,35 +36,17 @@ def test_guess_success_multiline_explanation(): - Updated documentation C Automatic fix generated by OpenHands 🙌""" + ) + ) + ] - # Create a handler instance - handler = IssueHandler('test', 'test', 'test') - - # Mock the litellm.completion call - def mock_completion(*args, **kwargs): - class MockResponse: - class Choice: - class Message: - def __init__(self, content): - self.content = content - - def __init__(self, content): - self.message = self.Message(content) - - def __init__(self, content): - self.choices = [self.Choice(content)] - - return MockResponse(mock_response) - - # Patch the litellm.completion function - import litellm - - original_completion = litellm.completion - litellm.completion = mock_completion + # Use patch to mock the LLM completion call + with patch.object(LLM, 'completion', return_value=mock_response) as mock_completion: + # Create a handler instance + handler = IssueHandler('test', 'test', 'test', llm_config) - try: # Call guess_success - success, _, explanation = handler.guess_success(issue, history, llm_config) + success, _, explanation = handler.guess_success(issue, history) # Verify the results assert success is True @@ -66,6 +55,136 @@ def __init__(self, content): assert 'Added test B' in explanation assert 'Updated documentation C' in explanation assert 'Automatic fix generated by OpenHands' in explanation - finally: - # Restore the original function - litellm.completion = original_completion + + # Verify that LLM completion was called exactly once + mock_completion.assert_called_once() + + +def test_pr_handler_guess_success_with_thread_comments(): + # Create a PR handler instance + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) + + # Create a mock issue with thread comments but no review comments + issue = GithubIssue( + owner='test-owner', + repo='test-repo', + number=1, + title='Test PR', + body='Test Body', + thread_comments=['First comment', 'Second comment'], + closing_issues=['Issue description'], + review_comments=None, + thread_ids=None, + head_branch='test-branch', + ) + + # Create mock history + history = [MessageAction(content='Fixed the issue by implementing X and Y')] + + # Create mock LLM config + llm_config = LLMConfig(model='test-model', api_key='test-key') + + # Mock the LLM response + mock_response = MagicMock() + mock_response.choices = [ + MagicMock( + message=MagicMock( + content="""--- success +true + +--- explanation +The changes successfully address the feedback.""" + ) + ) + ] + + # Test the guess_success method + with patch.object(LLM, 'completion', return_value=mock_response): + success, success_list, explanation = handler.guess_success(issue, history) + + # Verify the results + assert success is True + assert success_list == [True] + assert 'successfully address' in explanation + + +def test_pr_handler_guess_success_only_review_comments(): + # Create a PR handler instance + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) + + # Create a mock issue with only review comments + issue = GithubIssue( + owner='test-owner', + repo='test-repo', + number=1, + title='Test PR', + body='Test Body', + thread_comments=None, + closing_issues=['Issue description'], + review_comments=['Please fix the formatting', 'Add more tests'], + thread_ids=None, + head_branch='test-branch', + ) + + # Create mock history + history = [MessageAction(content='Fixed the formatting and added more tests')] + + # Create mock LLM config + llm_config = LLMConfig(model='test-model', api_key='test-key') + + # Mock the LLM response + mock_response = MagicMock() + mock_response.choices = [ + MagicMock( + message=MagicMock( + content="""--- success +true + +--- explanation +The changes successfully address the review comments.""" + ) + ) + ] + + # Test the guess_success method + with patch.object(LLM, 'completion', return_value=mock_response): + success, success_list, explanation = handler.guess_success(issue, history) + + # Verify the results + assert success is True + assert success_list == [True] + assert 'successfully address' in explanation + + +def test_pr_handler_guess_success_no_comments(): + # Create a PR handler instance + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) + + # Create a mock issue with no comments + issue = GithubIssue( + owner='test-owner', + repo='test-repo', + number=1, + title='Test PR', + body='Test Body', + thread_comments=None, + closing_issues=['Issue description'], + review_comments=None, + thread_ids=None, + head_branch='test-branch', + ) + + # Create mock history + history = [MessageAction(content='Fixed the issue')] + + # Create mock LLM config + llm_config = LLMConfig(model='test-model', api_key='test-key') + + # Test that it returns appropriate message when no comments are present + success, success_list, explanation = handler.guess_success(issue, history) + assert success is False + assert success_list is None + assert explanation == 'No feedback was found to process' diff --git a/tests/unit/resolver/test_issue_handler.py b/tests/unit/resolver/test_issue_handler.py index d0c17d9088e9..7eb20a585e5b 100644 --- a/tests/unit/resolver/test_issue_handler.py +++ b/tests/unit/resolver/test_issue_handler.py @@ -1,8 +1,7 @@ from unittest.mock import MagicMock, patch from openhands.core.config import LLMConfig -from openhands.events.action.message import MessageAction -from openhands.resolver.github_issue import GithubIssue, ReviewThread +from openhands.resolver.github_issue import ReviewThread from openhands.resolver.issue_definitions import IssueHandler, PRHandler @@ -27,7 +26,8 @@ def test_get_converted_issues_initializes_review_comments(): ] # Need two comment responses because we make two API calls # Create an instance of IssueHandler - handler = IssueHandler('test-owner', 'test-repo', 'test-token') + llm_config = LLMConfig(model='test', api_key='test') + handler = IssueHandler('test-owner', 'test-repo', 'test-token', llm_config) # Get converted issues issues = handler.get_converted_issues(issue_numbers=[1]) @@ -46,56 +46,6 @@ def test_get_converted_issues_initializes_review_comments(): assert issues[0].repo == 'test-repo' -def test_pr_handler_guess_success_with_thread_comments(): - # Create a PR handler instance - handler = PRHandler('test-owner', 'test-repo', 'test-token') - - # Create a mock issue with thread comments but no review comments - issue = GithubIssue( - owner='test-owner', - repo='test-repo', - number=1, - title='Test PR', - body='Test Body', - thread_comments=['First comment', 'Second comment'], - closing_issues=['Issue description'], - review_comments=None, - thread_ids=None, - head_branch='test-branch', - ) - - # Create mock history - history = [MessageAction(content='Fixed the issue by implementing X and Y')] - - # Create mock LLM config - llm_config = LLMConfig(model='test-model', api_key='test-key') - - # Mock the LLM response - mock_response = MagicMock() - mock_response.choices = [ - MagicMock( - message=MagicMock( - content="""--- success -true - ---- explanation -The changes successfully address the feedback.""" - ) - ) - ] - - # Test the guess_success method - with patch('litellm.completion', return_value=mock_response): - success, success_list, explanation = handler.guess_success( - issue, history, llm_config - ) - - # Verify the results - assert success is True - assert success_list == [True] - assert 'successfully address' in explanation - - def test_pr_handler_get_converted_issues_with_comments(): # Mock the necessary dependencies with patch('requests.get') as mock_get: @@ -155,7 +105,8 @@ def test_pr_handler_get_converted_issues_with_comments(): mock_post.return_value = mock_graphql_response # Create an instance of PRHandler - handler = PRHandler('test-owner', 'test-repo', 'test-token') + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) # Get converted issues prs = handler.get_converted_issues(issue_numbers=[1]) @@ -178,89 +129,6 @@ def test_pr_handler_get_converted_issues_with_comments(): ] -def test_pr_handler_guess_success_only_review_comments(): - # Create a PR handler instance - handler = PRHandler('test-owner', 'test-repo', 'test-token') - - # Create a mock issue with only review comments - issue = GithubIssue( - owner='test-owner', - repo='test-repo', - number=1, - title='Test PR', - body='Test Body', - thread_comments=None, - closing_issues=['Issue description'], - review_comments=['Please fix the formatting', 'Add more tests'], - thread_ids=None, - head_branch='test-branch', - ) - - # Create mock history - history = [MessageAction(content='Fixed the formatting and added more tests')] - - # Create mock LLM config - llm_config = LLMConfig(model='test-model', api_key='test-key') - - # Mock the LLM response - mock_response = MagicMock() - mock_response.choices = [ - MagicMock( - message=MagicMock( - content="""--- success -true - ---- explanation -The changes successfully address the review comments.""" - ) - ) - ] - - # Test the guess_success method - with patch('litellm.completion', return_value=mock_response): - success, success_list, explanation = handler.guess_success( - issue, history, llm_config - ) - - # Verify the results - assert success is True - assert success_list == [True] - assert 'successfully address' in explanation - - -def test_pr_handler_guess_success_no_comments(): - # Create a PR handler instance - handler = PRHandler('test-owner', 'test-repo', 'test-token') - - # Create a mock issue with no comments - issue = GithubIssue( - owner='test-owner', - repo='test-repo', - number=1, - title='Test PR', - body='Test Body', - thread_comments=None, - closing_issues=['Issue description'], - review_comments=None, - thread_ids=None, - head_branch='test-branch', - ) - - # Create mock history - history = [MessageAction(content='Fixed the issue')] - - # Create mock LLM config - llm_config = LLMConfig(model='test-model', api_key='test-key') - - # Test that it returns appropriate message when no comments are present - success, success_list, explanation = handler.guess_success( - issue, history, llm_config - ) - assert success is False - assert success_list is None - assert explanation == 'No feedback was found to process' - - def test_get_issue_comments_with_specific_comment_id(): # Mock the necessary dependencies with patch('requests.get') as mock_get: @@ -274,7 +142,8 @@ def test_get_issue_comments_with_specific_comment_id(): mock_get.return_value = mock_comments_response # Create an instance of IssueHandler - handler = IssueHandler('test-owner', 'test-repo', 'test-token') + llm_config = LLMConfig(model='test', api_key='test') + handler = IssueHandler('test-owner', 'test-repo', 'test-token', llm_config) # Get comments with a specific comment_id specific_comment = handler._get_issue_comments(issue_number=1, comment_id=123) @@ -361,7 +230,8 @@ def test_pr_handler_get_converted_issues_with_specific_thread_comment(): mock_post.return_value = mock_graphql_response # Create an instance of PRHandler - handler = PRHandler('test-owner', 'test-repo', 'test-token') + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) # Get converted issues prs = handler.get_converted_issues( @@ -463,7 +333,8 @@ def test_pr_handler_get_converted_issues_with_specific_review_thread_comment(): mock_post.return_value = mock_graphql_response # Create an instance of PRHandler - handler = PRHandler('test-owner', 'test-repo', 'test-token') + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) # Get converted issues prs = handler.get_converted_issues( @@ -585,7 +456,8 @@ def test_pr_handler_get_converted_issues_with_specific_comment_and_issue_refs(): mock_post.return_value = mock_graphql_response # Create an instance of PRHandler - handler = PRHandler('test-owner', 'test-repo', 'test-token') + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) # Get converted issues prs = handler.get_converted_issues( @@ -684,7 +556,8 @@ def test_pr_handler_get_converted_issues_with_duplicate_issue_refs(): mock_post.return_value = mock_graphql_response # Create an instance of PRHandler - handler = PRHandler('test-owner', 'test-repo', 'test-token') + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) # Get converted issues prs = handler.get_converted_issues(issue_numbers=[1]) diff --git a/tests/unit/resolver/test_issue_handler_error_handling.py b/tests/unit/resolver/test_issue_handler_error_handling.py index 2b04e3b13111..93a98437168e 100644 --- a/tests/unit/resolver/test_issue_handler_error_handling.py +++ b/tests/unit/resolver/test_issue_handler_error_handling.py @@ -1,12 +1,39 @@ from unittest.mock import MagicMock, patch +import pytest import requests +from litellm.exceptions import RateLimitError -from openhands.resolver.issue_definitions import PRHandler +from openhands.core.config import LLMConfig +from openhands.events.action.message import MessageAction +from openhands.llm.llm import LLM +from openhands.resolver.github_issue import GithubIssue +from openhands.resolver.issue_definitions import IssueHandler, PRHandler + + +@pytest.fixture(autouse=True) +def mock_logger(monkeypatch): + # suppress logging of completion data to file + mock_logger = MagicMock() + monkeypatch.setattr('openhands.llm.debug_mixin.llm_prompt_logger', mock_logger) + monkeypatch.setattr('openhands.llm.debug_mixin.llm_response_logger', mock_logger) + return mock_logger + + +@pytest.fixture +def default_config(): + return LLMConfig( + model='gpt-4o', + api_key='test_key', + num_retries=2, + retry_min_wait=1, + retry_max_wait=2, + ) def test_handle_nonexistent_issue_reference(): - handler = PRHandler('test-owner', 'test-repo', 'test-token') + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) # Mock the requests.get to simulate a 404 error mock_response = MagicMock() @@ -30,7 +57,8 @@ def test_handle_nonexistent_issue_reference(): def test_handle_rate_limit_error(): - handler = PRHandler('test-owner', 'test-repo', 'test-token') + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) # Mock the requests.get to simulate a rate limit error mock_response = MagicMock() @@ -54,7 +82,8 @@ def test_handle_rate_limit_error(): def test_handle_network_error(): - handler = PRHandler('test-owner', 'test-repo', 'test-token') + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) # Mock the requests.get to simulate a network error with patch( @@ -75,7 +104,8 @@ def test_handle_network_error(): def test_successful_issue_reference(): - handler = PRHandler('test-owner', 'test-repo', 'test-token') + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) # Mock a successful response mock_response = MagicMock() @@ -95,3 +125,141 @@ def test_successful_issue_reference(): # The method should return a list with the referenced issue body assert result == ['This is the referenced issue body'] + + +class MockLLMResponse: + """Mock LLM Response class to mimic the actual LLM response structure.""" + + class Choice: + class Message: + def __init__(self, content): + self.content = content + + def __init__(self, content): + self.message = self.Message(content) + + def __init__(self, content): + self.choices = [self.Choice(content)] + + +class DotDict(dict): + """ + A dictionary that supports dot notation access. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + for key, value in self.items(): + if isinstance(value, dict): + self[key] = DotDict(value) + elif isinstance(value, list): + self[key] = [ + DotDict(item) if isinstance(item, dict) else item for item in value + ] + + def __getattr__(self, key): + if key in self: + return self[key] + else: + raise AttributeError( + f"'{self.__class__.__name__}' object has no attribute '{key}'" + ) + + def __setattr__(self, key, value): + self[key] = value + + def __delattr__(self, key): + if key in self: + del self[key] + else: + raise AttributeError( + f"'{self.__class__.__name__}' object has no attribute '{key}'" + ) + + +@patch('openhands.llm.llm.litellm_completion') +def test_guess_success_rate_limit_wait_time(mock_litellm_completion, default_config): + """Test that the retry mechanism in guess_success respects wait time between retries.""" + + with patch('time.sleep') as mock_sleep: + # Simulate a rate limit error followed by a successful response + mock_litellm_completion.side_effect = [ + RateLimitError( + 'Rate limit exceeded', llm_provider='test_provider', model='test_model' + ), + DotDict( + { + 'choices': [ + { + 'message': { + 'content': '--- success\ntrue\n--- explanation\nRetry successful' + } + } + ] + } + ), + ] + + llm = LLM(config=default_config) + handler = IssueHandler('test-owner', 'test-repo', 'test-token', default_config) + handler.llm = llm + + # Mock issue and history + issue = GithubIssue( + owner='test-owner', + repo='test-repo', + number=1, + title='Test Issue', + body='This is a test issue.', + thread_comments=['Please improve error handling'], + ) + history = [MessageAction(content='Fixed error handling.')] + + # Call guess_success + success, _, explanation = handler.guess_success(issue, history) + + # Assertions + assert success is True + assert explanation == 'Retry successful' + assert mock_litellm_completion.call_count == 2 # Two attempts made + mock_sleep.assert_called_once() # Sleep called once between retries + + # Validate wait time + wait_time = mock_sleep.call_args[0][0] + assert ( + default_config.retry_min_wait <= wait_time <= default_config.retry_max_wait + ), f'Expected wait time between {default_config.retry_min_wait} and {default_config.retry_max_wait} seconds, but got {wait_time}' + + +@patch('openhands.llm.llm.litellm_completion') +def test_guess_success_exhausts_retries(mock_completion, default_config): + """Test the retry mechanism in guess_success exhausts retries and raises an error.""" + # Simulate persistent rate limit errors by always raising RateLimitError + mock_completion.side_effect = RateLimitError( + 'Rate limit exceeded', llm_provider='test_provider', model='test_model' + ) + + # Initialize LLM and handler + llm = LLM(config=default_config) + handler = PRHandler('test-owner', 'test-repo', 'test-token', default_config) + handler.llm = llm + + # Mock issue and history + issue = GithubIssue( + owner='test-owner', + repo='test-repo', + number=1, + title='Test Issue', + body='This is a test issue.', + thread_comments=['Please improve error handling'], + ) + history = [MessageAction(content='Fixed error handling.')] + + # Call guess_success and expect it to raise an error after retries + with pytest.raises(RateLimitError): + handler.guess_success(issue, history) + + # Assertions + assert ( + mock_completion.call_count == default_config.num_retries + ) # Initial call + retries diff --git a/tests/unit/resolver/test_issue_references.py b/tests/unit/resolver/test_issue_references.py index 1252f8555540..409f276d5abc 100644 --- a/tests/unit/resolver/test_issue_references.py +++ b/tests/unit/resolver/test_issue_references.py @@ -1,8 +1,10 @@ +from openhands.core.config.llm_config import LLMConfig from openhands.resolver.issue_definitions import IssueHandler def test_extract_issue_references(): - handler = IssueHandler('test-owner', 'test-repo', 'test-token') + llm_config = LLMConfig(model='test', api_key='test') + handler = IssueHandler('test-owner', 'test-repo', 'test-token', llm_config) # Test basic issue reference assert handler._extract_issue_references('Fixes #123') == [123] diff --git a/tests/unit/resolver/test_pr_handler_guess_success.py b/tests/unit/resolver/test_pr_handler_guess_success.py index e7e7705e8747..de0495691529 100644 --- a/tests/unit/resolver/test_pr_handler_guess_success.py +++ b/tests/unit/resolver/test_pr_handler_guess_success.py @@ -3,14 +3,23 @@ from openhands.core.config import LLMConfig from openhands.events.action.message import MessageAction +from openhands.llm.llm import LLM from openhands.resolver.github_issue import GithubIssue, ReviewThread from openhands.resolver.issue_definitions import PRHandler +def mock_llm_response(content): + """Helper function to create a mock LLM response.""" + mock_response = MagicMock() + mock_response.choices = [MagicMock(message=MagicMock(content=content))] + return mock_response + + def test_guess_success_review_threads_litellm_call(): """Test that the litellm.completion() call for review threads contains the expected content.""" # Create a PR handler instance - handler = PRHandler('test-owner', 'test-repo', 'test-token') + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) # Create a mock issue with review threads issue = GithubIssue( @@ -64,11 +73,9 @@ def test_guess_success_review_threads_litellm_call(): ] # Test the guess_success method - with patch('litellm.completion') as mock_completion: + with patch.object(LLM, 'completion') as mock_completion: mock_completion.return_value = mock_response - success, success_list, explanation = handler.guess_success( - issue, history, llm_config - ) + success, success_list, explanation = handler.guess_success(issue, history) # Verify the litellm.completion() calls assert mock_completion.call_count == 2 # One call per review thread @@ -114,7 +121,8 @@ def test_guess_success_review_threads_litellm_call(): def test_guess_success_thread_comments_litellm_call(): """Test that the litellm.completion() call for thread comments contains the expected content.""" # Create a PR handler instance - handler = PRHandler('test-owner', 'test-repo', 'test-token') + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) # Create a mock issue with thread comments issue = GithubIssue( @@ -162,11 +170,9 @@ def test_guess_success_thread_comments_litellm_call(): ] # Test the guess_success method - with patch('litellm.completion') as mock_completion: + with patch.object(LLM, 'completion') as mock_completion: mock_completion.return_value = mock_response - success, success_list, explanation = handler.guess_success( - issue, history, llm_config - ) + success, success_list, explanation = handler.guess_success(issue, history) # Verify the litellm.completion() call mock_completion.assert_called_once() @@ -186,10 +192,8 @@ def test_guess_success_thread_comments_litellm_call(): def test_check_feedback_with_llm(): """Test the _check_feedback_with_llm helper function.""" # Create a PR handler instance - handler = PRHandler('test-owner', 'test-repo', 'test-token') - - # Create mock LLM config - llm_config = LLMConfig(model='test-model', api_key='test-key') + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) # Test cases for different LLM responses test_cases = [ @@ -220,17 +224,16 @@ def test_check_feedback_with_llm(): mock_response.choices = [MagicMock(message=MagicMock(content=case['response']))] # Test the function - with patch('litellm.completion', return_value=mock_response): - success, explanation = handler._check_feedback_with_llm( - 'test prompt', llm_config - ) + with patch.object(LLM, 'completion', return_value=mock_response): + success, explanation = handler._check_feedback_with_llm('test prompt') assert (success, explanation) == case['expected'] def test_check_review_thread(): """Test the _check_review_thread helper function.""" # Create a PR handler instance - handler = PRHandler('test-owner', 'test-repo', 'test-token') + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) # Create test data review_thread = ReviewThread( @@ -241,7 +244,6 @@ def test_check_review_thread(): ['Issue 1 description', 'Issue 2 description'], indent=4 ) last_message = 'I have fixed the formatting and added docstrings' - llm_config = LLMConfig(model='test-model', api_key='test-key') # Mock the LLM response mock_response = MagicMock() @@ -258,10 +260,10 @@ def test_check_review_thread(): ] # Test the function - with patch('litellm.completion') as mock_completion: + with patch.object(LLM, 'completion') as mock_completion: mock_completion.return_value = mock_response success, explanation = handler._check_review_thread( - review_thread, issues_context, last_message, llm_config + review_thread, issues_context, last_message ) # Verify the litellm.completion() call @@ -285,7 +287,8 @@ def test_check_review_thread(): def test_check_thread_comments(): """Test the _check_thread_comments helper function.""" # Create a PR handler instance - handler = PRHandler('test-owner', 'test-repo', 'test-token') + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) # Create test data thread_comments = [ @@ -297,7 +300,6 @@ def test_check_thread_comments(): ['Issue 1 description', 'Issue 2 description'], indent=4 ) last_message = 'I have added error handling and input validation' - llm_config = LLMConfig(model='test-model', api_key='test-key') # Mock the LLM response mock_response = MagicMock() @@ -314,10 +316,10 @@ def test_check_thread_comments(): ] # Test the function - with patch('litellm.completion') as mock_completion: + with patch.object(LLM, 'completion') as mock_completion: mock_completion.return_value = mock_response success, explanation = handler._check_thread_comments( - thread_comments, issues_context, last_message, llm_config + thread_comments, issues_context, last_message ) # Verify the litellm.completion() call @@ -338,7 +340,8 @@ def test_check_thread_comments(): def test_check_review_comments(): """Test the _check_review_comments helper function.""" # Create a PR handler instance - handler = PRHandler('test-owner', 'test-repo', 'test-token') + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) # Create test data review_comments = [ @@ -350,7 +353,6 @@ def test_check_review_comments(): ['Issue 1 description', 'Issue 2 description'], indent=4 ) last_message = 'I have improved code readability and added comments' - llm_config = LLMConfig(model='test-model', api_key='test-key') # Mock the LLM response mock_response = MagicMock() @@ -367,10 +369,10 @@ def test_check_review_comments(): ] # Test the function - with patch('litellm.completion') as mock_completion: + with patch.object(LLM, 'completion') as mock_completion: mock_completion.return_value = mock_response success, explanation = handler._check_review_comments( - review_comments, issues_context, last_message, llm_config + review_comments, issues_context, last_message ) # Verify the litellm.completion() call @@ -391,7 +393,8 @@ def test_check_review_comments(): def test_guess_success_review_comments_litellm_call(): """Test that the litellm.completion() call for review comments contains the expected content.""" # Create a PR handler instance - handler = PRHandler('test-owner', 'test-repo', 'test-token') + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config) # Create a mock issue with review comments issue = GithubIssue( @@ -421,9 +424,6 @@ def test_guess_success_review_comments_litellm_call(): ) ] - # Create mock LLM config - llm_config = LLMConfig(model='test-model', api_key='test-key') - # Mock the LLM response mock_response = MagicMock() mock_response.choices = [ @@ -439,11 +439,9 @@ def test_guess_success_review_comments_litellm_call(): ] # Test the guess_success method - with patch('litellm.completion') as mock_completion: + with patch.object(LLM, 'completion') as mock_completion: mock_completion.return_value = mock_response - success, success_list, explanation = handler.guess_success( - issue, history, llm_config - ) + success, success_list, explanation = handler.guess_success(issue, history) # Verify the litellm.completion() call mock_completion.assert_called_once() diff --git a/tests/unit/resolver/test_resolve_issues.py b/tests/unit/resolver/test_resolve_issues.py index 95da25fc2190..8d54adb87627 100644 --- a/tests/unit/resolver/test_resolve_issues.py +++ b/tests/unit/resolver/test_resolve_issues.py @@ -7,6 +7,7 @@ from openhands.core.config import LLMConfig from openhands.events.action import CmdRunAction from openhands.events.observation import CmdOutputObservation, NullObservation +from openhands.llm.llm import LLM from openhands.resolver.github_issue import GithubIssue, ReviewThread from openhands.resolver.issue_definitions import IssueHandler, PRHandler from openhands.resolver.resolve_issue import ( @@ -84,7 +85,8 @@ def test_initialize_runtime(): def test_download_issues_from_github(): - handler = IssueHandler('owner', 'repo', 'token') + llm_config = LLMConfig(model='test', api_key='test') + handler = IssueHandler('owner', 'repo', 'token', llm_config) mock_issues_response = MagicMock() mock_issues_response.json.side_effect = [ @@ -125,7 +127,8 @@ def get_mock_response(url, *args, **kwargs): def test_download_pr_from_github(): - handler = PRHandler('owner', 'repo', 'token') + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('owner', 'repo', 'token', llm_config) mock_pr_response = MagicMock() mock_pr_response.json.side_effect = [ [ @@ -442,7 +445,8 @@ def test_get_instruction(mock_prompt_template, mock_followup_prompt_template): title='Test Issue', body='This is a test issue refer to image ![First Image](https://sampleimage.com/image1.png)', ) - issue_handler = IssueHandler('owner', 'repo', 'token') + mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key') + issue_handler = IssueHandler('owner', 'repo', 'token', mock_llm_config) instruction, images_urls = issue_handler.get_instruction( issue, mock_prompt_template, None ) @@ -470,7 +474,7 @@ def test_get_instruction(mock_prompt_template, mock_followup_prompt_template): ], ) - pr_handler = PRHandler('owner', 'repo', 'token') + pr_handler = PRHandler('owner', 'repo', 'token', mock_llm_config) instruction, images_urls = pr_handler.get_instruction( issue, mock_followup_prompt_template, None ) @@ -493,7 +497,8 @@ def test_file_instruction(): with open('openhands/resolver/prompts/resolve/basic.jinja', 'r') as f: prompt = f.read() # Test without thread comments - issue_handler = IssueHandler('owner', 'repo', 'token') + mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key') + issue_handler = IssueHandler('owner', 'repo', 'token', mock_llm_config) instruction, images_urls = issue_handler.get_instruction(issue, prompt, None) expected_instruction = """Please fix the following issue for the repository in /workspace. An environment has been set up for you to start working. You may assume all necessary tools are installed. @@ -530,7 +535,8 @@ def test_file_instruction_with_repo_instruction(): ) as f: repo_instruction = f.read() - issue_handler = IssueHandler('owner', 'repo', 'token') + mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key') + issue_handler = IssueHandler('owner', 'repo', 'token', mock_llm_config) instruction, image_urls = issue_handler.get_instruction( issue, prompt, repo_instruction ) @@ -581,11 +587,13 @@ def test_guess_success(): ) ) ] - issue_handler = IssueHandler('owner', 'repo', 'token') + issue_handler = IssueHandler('owner', 'repo', 'token', mock_llm_config) - with patch('litellm.completion', MagicMock(return_value=mock_completion_response)): + with patch.object( + LLM, 'completion', MagicMock(return_value=mock_completion_response) + ): success, comment_success, explanation = issue_handler.guess_success( - mock_issue, mock_history, mock_llm_config + mock_issue, mock_history ) assert issue_handler.issue_type == 'issue' assert comment_success is None @@ -617,11 +625,13 @@ def test_guess_success_with_thread_comments(): ) ) ] - issue_handler = IssueHandler('owner', 'repo', 'token') + issue_handler = IssueHandler('owner', 'repo', 'token', mock_llm_config) - with patch('litellm.completion', MagicMock(return_value=mock_completion_response)): + with patch.object( + LLM, 'completion', MagicMock(return_value=mock_completion_response) + ): success, comment_success, explanation = issue_handler.guess_success( - mock_issue, mock_history, mock_llm_config + mock_issue, mock_history ) assert issue_handler.issue_type == 'issue' assert comment_success is None @@ -648,7 +658,8 @@ def test_instruction_with_thread_comments(): with open('openhands/resolver/prompts/resolve/basic.jinja', 'r') as f: prompt = f.read() - issue_handler = IssueHandler('owner', 'repo', 'token') + llm_config = LLMConfig(model='test', api_key='test') + issue_handler = IssueHandler('owner', 'repo', 'token', llm_config) instruction, images_urls = issue_handler.get_instruction(issue, prompt, None) # Verify that thread comments are included in the instruction @@ -683,11 +694,13 @@ def test_guess_success_failure(): ) ) ] - issue_handler = IssueHandler('owner', 'repo', 'token') + issue_handler = IssueHandler('owner', 'repo', 'token', mock_llm_config) - with patch('litellm.completion', MagicMock(return_value=mock_completion_response)): + with patch.object( + LLM, 'completion', MagicMock(return_value=mock_completion_response) + ): success, comment_success, explanation = issue_handler.guess_success( - mock_issue, mock_history, mock_llm_config + mock_issue, mock_history ) assert issue_handler.issue_type == 'issue' assert comment_success is None @@ -718,11 +731,13 @@ def test_guess_success_negative_case(): ) ) ] - issue_handler = IssueHandler('owner', 'repo', 'token') + issue_handler = IssueHandler('owner', 'repo', 'token', mock_llm_config) - with patch('litellm.completion', MagicMock(return_value=mock_completion_response)): + with patch.object( + LLM, 'completion', MagicMock(return_value=mock_completion_response) + ): success, comment_success, explanation = issue_handler.guess_success( - mock_issue, mock_history, mock_llm_config + mock_issue, mock_history ) assert issue_handler.issue_type == 'issue' assert comment_success is None @@ -749,11 +764,13 @@ def test_guess_success_invalid_output(): mock_completion_response.choices = [ MagicMock(message=MagicMock(content='This is not a valid output')) ] - issue_handler = IssueHandler('owner', 'repo', 'token') + issue_handler = IssueHandler('owner', 'repo', 'token', mock_llm_config) - with patch('litellm.completion', MagicMock(return_value=mock_completion_response)): + with patch.object( + LLM, 'completion', MagicMock(return_value=mock_completion_response) + ): success, comment_success, explanation = issue_handler.guess_success( - mock_issue, mock_history, mock_llm_config + mock_issue, mock_history ) assert issue_handler.issue_type == 'issue' assert comment_success is None @@ -765,7 +782,8 @@ def test_guess_success_invalid_output(): def test_download_pr_with_review_comments(): - handler = PRHandler('owner', 'repo', 'token') + llm_config = LLMConfig(model='test', api_key='test') + handler = PRHandler('owner', 'repo', 'token', llm_config) mock_pr_response = MagicMock() mock_pr_response.json.side_effect = [ [ @@ -831,7 +849,8 @@ def get_mock_response(url, *args, **kwargs): def test_download_issue_with_specific_comment(): - handler = IssueHandler('owner', 'repo', 'token') + llm_config = LLMConfig(model='test', api_key='test') + handler = IssueHandler('owner', 'repo', 'token', llm_config) # Define the specific comment_id to filter specific_comment_id = 101