From c333938384e1056bec0c8575e7f57d1300b9b3d4 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Thu, 19 Dec 2024 19:39:43 -0500 Subject: [PATCH 1/3] feat(eval): add standard error to swebench summarize outputs (#5700) Co-authored-by: openhands --- .../scripts/eval/summarize_outputs.py | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/evaluation/benchmarks/swe_bench/scripts/eval/summarize_outputs.py b/evaluation/benchmarks/swe_bench/scripts/eval/summarize_outputs.py index b376a70aea28..f08ee73eee8a 100755 --- a/evaluation/benchmarks/swe_bench/scripts/eval/summarize_outputs.py +++ b/evaluation/benchmarks/swe_bench/scripts/eval/summarize_outputs.py @@ -6,6 +6,8 @@ from collections import Counter import pandas as pd +import random +import numpy as np from openhands.events.serialization import event_from_dict from openhands.events.utils import get_pairs_from_events @@ -18,6 +20,18 @@ ] +def get_bootstrap_accuracy_error_bars(values: float | int | bool, num_samples: int = 1000, p_value=0.05) -> tuple[float, float]: + sorted_vals = np.sort( + [ + np.mean(random.sample(values, len(values) // 2)) + for _ in range(num_samples) + ] + ) + bottom_idx = int(num_samples * p_value / 2) + top_idx = int(num_samples * (1.0 - p_value / 2)) + return (sorted_vals[bottom_idx], sorted_vals[top_idx]) + + def process_file(file_path): with open(file_path, 'r') as file: lines = file.readlines() @@ -26,6 +40,7 @@ def process_file(file_path): num_error_lines = 0 num_agent_stuck_in_loop = 0 num_resolved = 0 + resolved_arr = [] num_empty_patch = 0 num_unfinished_runs = 0 error_counter = Counter() @@ -74,6 +89,9 @@ def process_file(file_path): resolved = report.get('resolved', False) if resolved: num_resolved += 1 + resolved_arr.append(1) + else: + resolved_arr.append(0) # Error error = _d.get('error', None) @@ -100,6 +118,7 @@ def process_file(file_path): 'resolved': { 'count': num_resolved, 'percentage': (num_resolved / num_lines * 100) if num_lines > 0 else 0, + 'ci': tuple(x * 100 for x in get_bootstrap_accuracy_error_bars(resolved_arr)), }, 'empty_patches': { 'count': num_empty_patch, @@ -174,6 +193,7 @@ def aggregate_directory(input_path) -> pd.DataFrame: ) df['resolve_rate'] = df['resolved'].apply(lambda x: x['percentage']) + df['resolve_rate_ci'] = df['resolved'].apply(lambda x: x['ci']) df['empty_patch_rate'] = df['empty_patches'].apply(lambda x: x['percentage']) df['unfinished_rate'] = df['unfinished_runs'].apply(lambda x: x['percentage']) df['avg_turns'] = df['statistics'].apply(lambda x: x['avg_turns']) @@ -242,7 +262,7 @@ def aggregate_directory(input_path) -> pd.DataFrame: # Print detailed results for single file print(f'\nResults for {args.input_path}:') print( - f"Number of resolved: {result['resolved']['count']} / {result['total_instances']} ({result['resolved']['percentage']:.2f}%)" + f"Number of resolved: {result['resolved']['count']} / {result['total_instances']} ({result['resolved']['percentage']:.2f}% [{result['resolved']['ci'][0]:.2f}%, {result['resolved']['ci'][1]:.2f}%])" ) print( f"Number of empty patch: {result['empty_patches']['count']} / {result['total_instances']} ({result['empty_patches']['percentage']:.2f}%)" From 5ad361623d9733e5ee54191b0917067aa883695c Mon Sep 17 00:00:00 2001 From: d-walsh Date: Thu, 19 Dec 2024 23:00:00 -0500 Subject: [PATCH 2/3] feat: add support for custom PR titles (#5706) Co-authored-by: David Walsh --- openhands/resolver/send_pull_request.py | 24 ++++++++++++++++--- tests/unit/resolver/test_send_pull_request.py | 24 ++++++++++++------- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/openhands/resolver/send_pull_request.py b/openhands/resolver/send_pull_request.py index d888691e5857..8bc314f6de78 100644 --- a/openhands/resolver/send_pull_request.py +++ b/openhands/resolver/send_pull_request.py @@ -239,6 +239,7 @@ def send_pull_request( additional_message: str | None = None, target_branch: str | None = None, reviewer: str | None = None, + pr_title: str | None = None, ) -> str: """Send a pull request to a GitHub repository. @@ -251,6 +252,8 @@ def send_pull_request( fork_owner: The owner of the fork to push changes to (if different from the original repo owner) additional_message: The additional messages to post as a comment on the PR in json list format target_branch: The target branch to create the pull request against (defaults to repository default branch) + reviewer: The GitHub username of the reviewer to assign + pr_title: Custom title for the pull request (optional) """ if pr_type not in ['branch', 'draft', 'ready']: raise ValueError(f'Invalid pr_type: {pr_type}') @@ -321,7 +324,11 @@ def send_pull_request( raise RuntimeError('Failed to push changes to the remote repository') # Prepare the PR data: title and body - pr_title = f'Fix issue #{github_issue.number}: {github_issue.title}' + final_pr_title = ( + pr_title + if pr_title + else f'Fix issue #{github_issue.number}: {github_issue.title}' + ) pr_body = f'This pull request fixes #{github_issue.number}.' if additional_message: pr_body += f'\n\n{additional_message}' @@ -334,7 +341,7 @@ def send_pull_request( else: # Prepare the PR for the GitHub API data = { - 'title': pr_title, # No need to escape title for GitHub API + 'title': final_pr_title, # No need to escape title for GitHub API 'body': pr_body, 'head': branch_name, 'base': base_branch, @@ -366,7 +373,9 @@ def send_pull_request( url = pr_data['html_url'] - print(f'{pr_type} created: {url}\n\n--- Title: {pr_title}\n\n--- Body:\n{pr_body}') + print( + f'{pr_type} created: {url}\n\n--- Title: {final_pr_title}\n\n--- Body:\n{pr_body}' + ) return url @@ -535,6 +544,7 @@ def process_single_issue( send_on_failure: bool, target_branch: str | None = None, reviewer: str | None = None, + pr_title: str | None = None, ) -> None: if not resolver_output.success and not send_on_failure: print( @@ -585,6 +595,7 @@ def process_single_issue( additional_message=resolver_output.success_explanation, target_branch=target_branch, reviewer=reviewer, + pr_title=pr_title, ) @@ -687,6 +698,12 @@ def main(): help='GitHub username of the person to request review from', default=None, ) + parser.add_argument( + '--pr-title', + type=str, + help='Custom title for the pull request', + default=None, + ) my_args = parser.parse_args() github_token = ( @@ -741,6 +758,7 @@ def main(): my_args.send_on_failure, my_args.target_branch, my_args.reviewer, + my_args.pr_title, ) diff --git a/tests/unit/resolver/test_send_pull_request.py b/tests/unit/resolver/test_send_pull_request.py index c83b8a892c58..16c7d47cf796 100644 --- a/tests/unit/resolver/test_send_pull_request.py +++ b/tests/unit/resolver/test_send_pull_request.py @@ -332,14 +332,16 @@ def test_update_existing_pull_request( @pytest.mark.parametrize( - 'pr_type,target_branch', + 'pr_type,target_branch,pr_title', [ - ('branch', None), - ('draft', None), - ('ready', None), - ('branch', 'feature'), - ('draft', 'develop'), - ('ready', 'staging'), + ('branch', None, None), + ('draft', None, None), + ('ready', None, None), + ('branch', 'feature', None), + ('draft', 'develop', None), + ('ready', 'staging', None), + ('ready', None, 'Custom PR Title'), + ('draft', 'develop', 'Another Custom Title'), ], ) @patch('subprocess.run') @@ -353,6 +355,7 @@ def test_send_pull_request( mock_output_dir, pr_type, target_branch, + pr_title, ): repo_path = os.path.join(mock_output_dir, 'repo') @@ -386,6 +389,7 @@ def test_send_pull_request( patch_dir=repo_path, pr_type=pr_type, target_branch=target_branch, + pr_title=pr_title, ) # Assert API calls @@ -425,7 +429,8 @@ def test_send_pull_request( assert result == 'https://github.com/test-owner/test-repo/pull/1' mock_post.assert_called_once() post_data = mock_post.call_args[1]['json'] - assert post_data['title'] == 'Fix issue #42: Test Issue' + expected_title = pr_title if pr_title else 'Fix issue #42: Test Issue' + assert post_data['title'] == expected_title assert post_data['body'].startswith('This pull request fixes #42.') assert post_data['head'] == 'openhands-fix-issue-42' assert post_data['base'] == (target_branch if target_branch else 'main') @@ -828,6 +833,7 @@ def test_process_single_issue( additional_message=resolver_output.success_explanation, target_branch=None, reviewer=None, + pr_title=None, ) @@ -1096,6 +1102,7 @@ def test_main( mock_args.llm_api_key = 'mock_key' mock_args.target_branch = None mock_args.reviewer = None + mock_args.pr_title = None mock_parser.return_value.parse_args.return_value = mock_args # Setup environment variables @@ -1131,6 +1138,7 @@ def test_main( False, mock_args.target_branch, mock_args.reviewer, + mock_args.pr_title, ) # Other assertions From 0dd919bacf10677f8885e78aef43259251334202 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 20 Dec 2024 16:43:12 +0400 Subject: [PATCH 3/3] Bump prism-react-renderer from 2.4.0 to 2.4.1 in /docs in the version-all group (#5668) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- docs/package-lock.json | 8 ++++---- docs/package.json | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/package-lock.json b/docs/package-lock.json index d83acb021786..60d351b0ddb8 100644 --- a/docs/package-lock.json +++ b/docs/package-lock.json @@ -14,7 +14,7 @@ "@docusaurus/theme-mermaid": "^3.6.3", "@mdx-js/react": "^3.1.0", "clsx": "^2.0.0", - "prism-react-renderer": "^2.4.0", + "prism-react-renderer": "^2.4.1", "react": "^18.3.1", "react-dom": "^18.3.1", "react-icons": "^5.4.0", @@ -14781,9 +14781,9 @@ } }, "node_modules/prism-react-renderer": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/prism-react-renderer/-/prism-react-renderer-2.4.0.tgz", - "integrity": "sha512-327BsVCD/unU4CNLZTWVHyUHKnsqcvj2qbPlQ8MiBE2eq2rgctjigPA1Gp9HLF83kZ20zNN6jgizHJeEsyFYOw==", + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/prism-react-renderer/-/prism-react-renderer-2.4.1.tgz", + "integrity": "sha512-ey8Ls/+Di31eqzUxC46h8MksNuGx/n0AAC8uKpwFau4RPDYLuE3EXTp8N8G2vX2N7UC/+IXeNUnlWBGGcAG+Ig==", "dependencies": { "@types/prismjs": "^1.26.0", "clsx": "^2.0.0" diff --git a/docs/package.json b/docs/package.json index 232898bd88b8..aeed2f5674eb 100644 --- a/docs/package.json +++ b/docs/package.json @@ -21,7 +21,7 @@ "@docusaurus/theme-mermaid": "^3.6.3", "@mdx-js/react": "^3.1.0", "clsx": "^2.0.0", - "prism-react-renderer": "^2.4.0", + "prism-react-renderer": "^2.4.1", "react": "^18.3.1", "react-dom": "^18.3.1", "react-icons": "^5.4.0",