Skip to content

Commit

Permalink
linter: only lint on updated lines in the new file (#4409)
Browse files Browse the repository at this point in the history
  • Loading branch information
xingyaoww authored Oct 17, 2024
1 parent 642e01b commit ec3152b
Show file tree
Hide file tree
Showing 6 changed files with 571 additions and 4 deletions.
25 changes: 23 additions & 2 deletions openhands/linter/languages/python.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import List

from openhands.core.logger import openhands_logger as logger
from openhands.linter.base import BaseLinter, LintResult
from openhands.linter.utils import run_cmd

Expand Down Expand Up @@ -39,11 +40,31 @@ def flake_lint(filepath: str) -> list[LintResult]:
_msg = parts[3].strip()
if len(parts) > 4:
_msg += ': ' + parts[4].strip()

try:
line_num = int(parts[1])
except ValueError as e:
logger.warning(
f'Error parsing flake8 output for line: {e}. Parsed parts: {parts}. Skipping...'
)
continue

try:
column_num = int(parts[2])
except ValueError as e:
column_num = 1
_msg = (
parts[2].strip() + ' ' + _msg
) # add the unparsed message to the original message
logger.warning(
f'Error parsing flake8 output for column: {e}. Parsed parts: {parts}. Using default column 1.'
)

results.append(
LintResult(
file=filepath,
line=int(parts[1]),
column=int(parts[2]),
line=line_num,
column=column_num,
message=_msg,
)
)
Expand Down
87 changes: 87 additions & 0 deletions openhands/linter/linter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from collections import defaultdict
from difflib import SequenceMatcher

from openhands.linter.base import BaseLinter, LinterException, LintResult
from openhands.linter.languages.python import PythonLinter
Expand Down Expand Up @@ -33,3 +34,89 @@ def lint(self, file_path: str) -> list[LintResult]:
if res:
return res
return []

def lint_file_diff(
self, original_file_path: str, updated_file_path: str
) -> list[LintResult]:
"""Only return lint errors that are introduced by the diff.
Args:
original_file_path: The original file path.
updated_file_path: The updated file path.
Returns:
A list of lint errors that are introduced by the diff.
"""
# 1. Lint the original and updated file
original_lint_errors: list[LintResult] = self.lint(original_file_path)
updated_lint_errors: list[LintResult] = self.lint(updated_file_path)

# 2. Load the original and updated file content
with open(original_file_path, 'r') as f:
old_lines = f.readlines()
with open(updated_file_path, 'r') as f:
new_lines = f.readlines()

# 3. Get line numbers that are changed & unchanged
# Map the line number of the original file to the updated file
# NOTE: this only works for lines that are not changed (i.e., equal)
old_to_new_line_no_mapping: dict[int, int] = {}
replace_or_inserted_lines: list[int] = []
for (
tag,
old_idx_start,
old_idx_end,
new_idx_start,
new_idx_end,
) in SequenceMatcher(
isjunk=None,
a=old_lines,
b=new_lines,
).get_opcodes():
if tag == 'equal':
for idx, _ in enumerate(old_lines[old_idx_start:old_idx_end]):
old_to_new_line_no_mapping[old_idx_start + idx + 1] = (
new_idx_start + idx + 1
)
elif tag == 'replace' or tag == 'insert':
for idx, _ in enumerate(old_lines[old_idx_start:old_idx_end]):
replace_or_inserted_lines.append(new_idx_start + idx + 1)
else:
# omit the case of delete
pass

# 4. Get pre-existing errors in unchanged lines
# increased error elsewhere introduced by the newlines
# i.e., we omit errors that are already in original files and report new one
new_line_no_to_original_errors: dict[int, list[LintResult]] = defaultdict(list)
for error in original_lint_errors:
if error.line in old_to_new_line_no_mapping:
new_line_no_to_original_errors[
old_to_new_line_no_mapping[error.line]
].append(error)

# 5. Select errors from lint results in new file to report
selected_errors = []
for error in updated_lint_errors:
# 5.1. Error introduced by replace/insert
if error.line in replace_or_inserted_lines:
selected_errors.append(error)
# 5.2. Error introduced by modified lines that impacted
# the unchanged lines that HAVE pre-existing errors
elif error.line in new_line_no_to_original_errors:
# skip if the error is already reported
# or add if the error is new
if not any(
original_error.message == error.message
and original_error.column == error.column
for original_error in new_line_no_to_original_errors[error.line]
):
selected_errors.append(error)
# 5.3. Error introduced by modified lines that impacted
# the unchanged lines that have NO pre-existing errors
else:
selected_errors.append(error)

# 6. Sort errors by line and column
selected_errors.sort(key=lambda x: (x.line, x.column))
return selected_errors
41 changes: 41 additions & 0 deletions openhands/utils/diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import difflib

import whatthepatch


def get_diff(old_contents: str, new_contents: str, filepath: str = 'file') -> str:
diff = list(
difflib.unified_diff(
old_contents.split('\n'),
new_contents.split('\n'),
fromfile=filepath,
tofile=filepath,
# do not output unchange lines
# because they can cause `parse_diff` to fail
n=0,
)
)
return '\n'.join(map(lambda x: x.rstrip(), diff))


def parse_diff(diff_patch: str) -> list[whatthepatch.patch.Change]:
# handle empty patch
if diff_patch.strip() == '':
return []

patch = whatthepatch.parse_patch(diff_patch)
patch_list = list(patch)
assert len(patch_list) == 1, (
'parse_diff only supports single file diff. But got:\nPATCH:\n'
+ diff_patch
+ '\nPATCH LIST:\n'
+ str(patch_list)
)
changes = patch_list[0].changes

# ignore changes that are the same (i.e., old_lineno == new_lineno)
output_changes = []
for change in changes:
if change.old != change.new:
output_changes.append(change)
return output_changes
4 changes: 2 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ python-pptx = "*"
pylatexenc = "*"
tornado = "*"
python-dotenv = "*"
whatthepatch = "^1.0.6"
protobuf = "^4.21.6,<5.0.0" # chromadb currently fails on 5.0+
opentelemetry-api = "1.25.0"
opentelemetry-exporter-otlp-proto-grpc = "1.25.0"
Expand Down
Loading

0 comments on commit ec3152b

Please sign in to comment.