Skip to content

Commit

Permalink
feat(resolver): Add support for extracting and processing text file a…
Browse files Browse the repository at this point in the history
…ttachments in GitHub issues

This change introduces methods to:
- Extract text file attachments from issue bodies
- Download and retrieve contents of attached text files
- Incorporate attachment contents into issue context for processing
- Enhance issue handling with additional context from file attachments

The implementation includes new methods for parsing Markdown links, downloading files, and integrating their contents into the issue processing workflow.
  • Loading branch information
oconnorjoseph committed Jan 24, 2025
1 parent 0b74fd7 commit 27dc150
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 1 deletion.
55 changes: 54 additions & 1 deletion openhands/resolver/issue_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,40 @@ def _get_issue_comments(

return all_comments if all_comments else None

def _extract_attachments(self, issue_body: str) -> list[tuple[str, str]]:
"""Extract text file attachments from issue body.
Returns a list of tuples (file_url, file_name)."""
# Match Markdown links that end in .txt, .md, or other text file extensions
text_file_pattern = r'\[([^\]]+)\]\((https?://[^\s)]+\.(?:txt|md|log|py|js|json|yaml|yml|xml|csv))\)'
matches = re.findall(text_file_pattern, issue_body)
return [(url, name) for name, url in matches]

def _download_attachment(self, file_url: str) -> str | None:
"""Download a text file attachment and return its contents."""
try:
headers = {
'Authorization': f'token {self.token}',
'Accept': 'application/vnd.github.v3.raw',
}
response = requests.get(file_url, headers=headers)
response.raise_for_status()
return response.text
except requests.exceptions.RequestException as e:
logger.warning(f'Failed to download attachment {file_url}: {str(e)}')
return None

def _get_attachments_context(self, issue_body: str) -> str:
"""Get context from text file attachments in the issue."""
attachments = self._extract_attachments(issue_body)
attachment_contexts = []

for file_url, file_name in attachments:
content = self._download_attachment(file_url)
if content:
attachment_contexts.append(f"Content of attached file '{file_name}':\n{content}")

return '\n\n'.join(attachment_contexts)

def get_converted_issues(
self, issue_numbers: list[int] | None = None, comment_id: int | None = None
) -> list[GithubIssue]:
Expand Down Expand Up @@ -239,10 +273,29 @@ def get_instruction(
images.extend(self._extract_image_urls(issue.body))
images.extend(self._extract_image_urls(thread_context))

# Get context from attachments
attachments_context = self._get_attachments_context(issue.body)

# Format issues string with both the issue body and attachments
issues_str = issue.body
if attachments_context:
issues_str += f"\n\nAttached Files:\n{attachments_context}"

# Add attachments from closing issues if any
if issue.closing_issues:
closing_issues_str = '\n\n'.join(
[f'Referenced Issue:\n{issue}' for issue in issue.closing_issues]
)
for closing_issue in issue.closing_issues:
closing_attachments = self._get_attachments_context(closing_issue)
if closing_attachments:
closing_issues_str += f"\n\nAttached Files from Referenced Issue:\n{closing_attachments}"
issues_str += f'\n\n{closing_issues_str}'

template = jinja2.Template(prompt_template)
return (
template.render(
body=issue.title + '\n\n' + issue.body + thread_context,
issues=issues_str,
repo_instruction=repo_instruction,
),
images,
Expand Down
51 changes: 51 additions & 0 deletions tests/unit/resolver/test_issue_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,3 +624,54 @@ def test_pr_handler_get_converted_issues_with_duplicate_issue_refs():
'External context #1.',
'External context #2.',
]


def test_issue_handler_with_text_attachments():
with patch('requests.get') as mock_get:
mock_issues_response = MagicMock()
mock_issues_response.json.return_value = [
{
'number': 1,
'title': 'Test Issue',
'body': 'Test Body with attachment [error.log](https://example.com/error.log) and [config.yaml](https://example.com/config.yaml)',
}
]

# Mock the response for downloading attachments
mock_error_log = MagicMock()
mock_error_log.text = 'Error: Something went wrong\nStack trace: ...'
mock_error_log.raise_for_status = MagicMock()

mock_config_yaml = MagicMock()
mock_config_yaml.text = 'key: value\nsetting: enabled'
mock_config_yaml.raise_for_status = MagicMock()

# Set up the mock to return different responses
mock_get.side_effect = [
mock_issues_response, # First call for issues
mock_error_log, # Second call for error.log
mock_config_yaml, # Third call for config.yaml
]

# Create an instance of IssueHandler
llm_config = LLMConfig(model='test', api_key='test')
handler = IssueHandler('test-owner', 'test-repo', 'test-token', llm_config)

# Get converted issues
issues = handler.get_converted_issues(issue_numbers=[1])

# Verify that we got exactly one issue
assert len(issues) == 1

# Get instruction to verify attachment handling
instruction, _ = handler.get_instruction(
issues[0],
'Test template: {{ issues }}',
repo_instruction=None
)

# Verify that the instruction contains the attachment contents
assert 'Content of attached file \'error.log\'' in instruction
assert 'Error: Something went wrong' in instruction
assert 'Content of attached file \'config.yaml\'' in instruction
assert 'key: value' in instruction

0 comments on commit 27dc150

Please sign in to comment.