feat(resolver): Add support for extracting and processing text file a…

…ttachments in GitHub issues This change introduces methods to: - Extract text file attachments from issue bodies - Download and retrieve contents of attached text files - Incorporate attachment contents into issue context for processing - Enhance issue handling with additional context from file attachments The implementation includes new methods for parsing Markdown links, downloading files, and integrating their contents into the issue processing workflow.
All-Hands-AI · Jan 24, 2025 · 27dc150 · 27dc150
1 parent 0b74fd7
commit 27dc150
Show file tree

Hide file tree

Showing 2 changed files with 105 additions and 1 deletion.
diff --git a/openhands/resolver/issue_definitions.py b/openhands/resolver/issue_definitions.py
@@ -154,6 +154,40 @@ def _get_issue_comments(
 
         return all_comments if all_comments else None
 
+    def _extract_attachments(self, issue_body: str) -> list[tuple[str, str]]:
+        """Extract text file attachments from issue body.
+        Returns a list of tuples (file_url, file_name)."""
+        # Match Markdown links that end in .txt, .md, or other text file extensions
+        text_file_pattern = r'\[([^\]]+)\]\((https?://[^\s)]+\.(?:txt|md|log|py|js|json|yaml|yml|xml|csv))\)'
+        matches = re.findall(text_file_pattern, issue_body)
+        return [(url, name) for name, url in matches]
+
+    def _download_attachment(self, file_url: str) -> str | None:
+        """Download a text file attachment and return its contents."""
+        try:
+            headers = {
+                'Authorization': f'token {self.token}',
+                'Accept': 'application/vnd.github.v3.raw',
+            }
+            response = requests.get(file_url, headers=headers)
+            response.raise_for_status()
+            return response.text
+        except requests.exceptions.RequestException as e:
+            logger.warning(f'Failed to download attachment {file_url}: {str(e)}')
+            return None
+
+    def _get_attachments_context(self, issue_body: str) -> str:
+        """Get context from text file attachments in the issue."""
+        attachments = self._extract_attachments(issue_body)
+        attachment_contexts = []
+
+        for file_url, file_name in attachments:
+            content = self._download_attachment(file_url)
+            if content:
+                attachment_contexts.append(f"Content of attached file '{file_name}':\n{content}")
+
+        return '\n\n'.join(attachment_contexts)
+
     def get_converted_issues(
         self, issue_numbers: list[int] | None = None, comment_id: int | None = None
     ) -> list[GithubIssue]:
@@ -239,10 +273,29 @@ def get_instruction(
         images.extend(self._extract_image_urls(issue.body))
         images.extend(self._extract_image_urls(thread_context))
 
+        # Get context from attachments
+        attachments_context = self._get_attachments_context(issue.body)
+
+        # Format issues string with both the issue body and attachments
+        issues_str = issue.body
+        if attachments_context:
+            issues_str += f"\n\nAttached Files:\n{attachments_context}"
+
+        # Add attachments from closing issues if any
+        if issue.closing_issues:
+            closing_issues_str = '\n\n'.join(
+                [f'Referenced Issue:\n{issue}' for issue in issue.closing_issues]
+            )
+            for closing_issue in issue.closing_issues:
+                closing_attachments = self._get_attachments_context(closing_issue)
+                if closing_attachments:
+                    closing_issues_str += f"\n\nAttached Files from Referenced Issue:\n{closing_attachments}"
+            issues_str += f'\n\n{closing_issues_str}'
+
         template = jinja2.Template(prompt_template)
         return (
             template.render(
-                body=issue.title + '\n\n' + issue.body + thread_context,
+                issues=issues_str,
                 repo_instruction=repo_instruction,
             ),
             images,

diff --git a/tests/unit/resolver/test_issue_handler.py b/tests/unit/resolver/test_issue_handler.py
@@ -624,3 +624,54 @@ def test_pr_handler_get_converted_issues_with_duplicate_issue_refs():
                 'External context #1.',
                 'External context #2.',
             ]
+
+
+def test_issue_handler_with_text_attachments():
+    with patch('requests.get') as mock_get:
+        mock_issues_response = MagicMock()
+        mock_issues_response.json.return_value = [
+            {
+                'number': 1,
+                'title': 'Test Issue',
+                'body': 'Test Body with attachment [error.log](https://example.com/error.log) and [config.yaml](https://example.com/config.yaml)',
+            }
+        ]
+
+        # Mock the response for downloading attachments
+        mock_error_log = MagicMock()
+        mock_error_log.text = 'Error: Something went wrong\nStack trace: ...'
+        mock_error_log.raise_for_status = MagicMock()
+
+        mock_config_yaml = MagicMock()
+        mock_config_yaml.text = 'key: value\nsetting: enabled'
+        mock_config_yaml.raise_for_status = MagicMock()
+
+        # Set up the mock to return different responses
+        mock_get.side_effect = [
+            mock_issues_response,  # First call for issues
+            mock_error_log,  # Second call for error.log
+            mock_config_yaml,  # Third call for config.yaml
+        ]
+
+        # Create an instance of IssueHandler
+        llm_config = LLMConfig(model='test', api_key='test')
+        handler = IssueHandler('test-owner', 'test-repo', 'test-token', llm_config)
+
+        # Get converted issues
+        issues = handler.get_converted_issues(issue_numbers=[1])
+
+        # Verify that we got exactly one issue
+        assert len(issues) == 1
+
+        # Get instruction to verify attachment handling
+        instruction, _ = handler.get_instruction(
+            issues[0],
+            'Test template: {{ issues }}',
+            repo_instruction=None
+        )
+
+        # Verify that the instruction contains the attachment contents
+        assert 'Content of attached file \'error.log\'' in instruction
+        assert 'Error: Something went wrong' in instruction
+        assert 'Content of attached file \'config.yaml\'' in instruction
+        assert 'key: value' in instruction