Skip to content

Commit

Permalink
feat: add diff object to FixIssue & ModifyCode steps (#1192)
Browse files Browse the repository at this point in the history
* feat: add diff object to FixIssue and ModifyCode step outputs

- Add diff field to ModifiedFile and ModifiedCodeFile types
- Include file content diffs in FixIssue output
- Add before/after diff information to ModifyCode output
- Keep changes minimal while maintaining existing functionality

Co-Authored-By: Patched <[email protected]>

* refactor: improve diff generation in FixIssue and ModifyCode steps

Co-Authored-By: Patched <[email protected]>

* fix: address diff generation, security, and file handling issues for PR #1192

Co-Authored-By: Patched <[email protected]>

* refactor: remove git dependency in FixIssue, switch to in-memory diff in FixIssue & ModifyCode

Co-Authored-By: Patched <[email protected]>

* feat: use git diff when in git repo, empty string otherwise

Co-Authored-By: Patched <[email protected]>

* refactor: replace print statements with logger.warning calls

Co-Authored-By: Patched <[email protected]>

* Some logic changes and more succint code

* bump patchwork version

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Patched <[email protected]>
Co-authored-by: TIANYOU CHEN <[email protected]>
  • Loading branch information
3 people authored Jan 15, 2025
1 parent 3e81f25 commit 497cbac
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 22 deletions.
72 changes: 62 additions & 10 deletions patchwork/steps/FixIssue/FixIssue.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import difflib
import re
from pathlib import Path
from typing import Any, Optional

from git import Repo
from git import Repo, InvalidGitRepositoryError
from patchwork.logger import logger
from openai.types.chat import ChatCompletionMessageParam

from patchwork.common.client.llm.aio import AioLlmClient
Expand Down Expand Up @@ -97,11 +99,31 @@ def is_stop(self, messages: list[ChatCompletionMessageParam]) -> bool:

class FixIssue(Step, input_class=FixIssueInputs, output_class=FixIssueOutputs):
def __init__(self, inputs):
"""Initialize the FixIssue step.
Args:
inputs: Dictionary containing input parameters including:
- base_path: Optional path to the repository root
- Other LLM-related parameters
"""
super().__init__(inputs)
self.base_path = inputs.get("base_path")
if self.base_path is None:
repo = Repo(Path.cwd(), search_parent_directories=True)
self.base_path = repo.working_tree_dir
cwd = str(Path.cwd())
original_base_path = inputs.get("base_path")

if original_base_path is not None:
original_base_path = str(Path(str(original_base_path)).resolve())

# Check if we're in a git repository
try:
self.repo = Repo(original_base_path or cwd, search_parent_directories=True)
except (InvalidGitRepositoryError, Exception):
self.repo = None

repo_working_dir = None
if self.repo is not None:
repo_working_dir = self.repo.working_dir

self.base_path = original_base_path or repo_working_dir or cwd

llm_client = AioLlmClient.create_aio_client(inputs)
if llm_client is None:
Expand All @@ -122,10 +144,40 @@ def __init__(self, inputs):
)

def run(self):
"""Execute the FixIssue step.
This method:
1. Executes the multi-turn LLM conversation to analyze and fix the issue
2. Tracks file modifications made by the CodeEditTool
3. Generates in-memory diffs for all modified files
Returns:
dict: Dictionary containing list of modified files with their diffs
"""
self.multiturn_llm_call.execute(limit=100)

modified_files = []
cwd = Path.cwd()
for tool in self.multiturn_llm_call.tool_set.values():
if isinstance(tool, CodeEditTool):
cwd = Path.cwd()
modified_files = [file_path.relative_to(cwd) for file_path in tool.tool_records["modified_files"]]
return dict(modified_files=[{"path": str(file)} for file in modified_files])
return dict()
if not isinstance(tool, CodeEditTool):
continue
tool_modified_files = [
dict(path=str(file_path.relative_to(cwd)), diff="")
for file_path in tool.tool_records["modified_files"]
]
modified_files.extend(tool_modified_files)

# Generate diffs for modified files
# Only try to generate git diff if we're in a git repository
if self.repo is not None:
for modified_file in modified_files:
file = modified_file["path"]
try:
# Try to get the diff using git
diff = self.repo.git.diff('HEAD', file)
modified_file["diff"] = diff or ""
except Exception as e:
# Git-specific errors (untracked files, etc) - keep empty diff
logger.warning(f"Could not get git diff for {file}: {str(e)}")

return dict(modified_files=modified_files)
18 changes: 17 additions & 1 deletion patchwork/steps/FixIssue/typed.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,21 @@ class FixIssueInputs(__FixIssueRequiredInputs, total=False):
]


class ModifiedFile(TypedDict):
"""Represents a file that has been modified by the FixIssue step.
Attributes:
path: The relative path to the modified file from the repository root
diff: A unified diff string showing the changes made to the file.
Generated using Python's difflib to compare the original and
modified file contents in memory.
Note:
The diff is generated by comparing file contents before and after
modifications, without relying on version control systems.
"""
path: str
diff: str

class FixIssueOutputs(TypedDict):
modified_files: List[Dict]
modified_files: List[ModifiedFile]
71 changes: 61 additions & 10 deletions patchwork/steps/ModifyCode/ModifyCode.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
from __future__ import annotations

import difflib
from pathlib import Path

from patchwork.logger import logger
from patchwork.step import Step, StepStatus


def save_file_contents(file_path, content):
"""Utility function to save content to a file."""
with open(file_path, "w") as file:
def save_file_contents(file_path: str | Path, content: str) -> None:
"""Utility function to save content to a file.
Args:
file_path: Path to the file to save content to (str or Path)
content: Content to write to the file
"""
path = Path(file_path)
with path.open("w") as file:
file.write(content)


Expand All @@ -33,20 +41,26 @@ def handle_indent(src: list[str], target: list[str], start: int, end: int) -> li


def replace_code_in_file(
file_path: str,
file_path: str | Path,
start_line: int | None,
end_line: int | None,
new_code: str,
) -> None:
"""Replace code in a file at the specified line range.
Args:
file_path: Path to the file to modify (str or Path)
start_line: Starting line number (1-based)
end_line: Ending line number (1-based)
new_code: New code to insert
"""
path = Path(file_path)
new_code_lines = new_code.splitlines(keepends=True)
if len(new_code_lines) > 0 and not new_code_lines[-1].endswith("\n"):
new_code_lines[-1] += "\n"

if path.exists() and start_line is not None and end_line is not None:
"""Replaces specified lines in a file with new code."""
text = path.read_text()

lines = text.splitlines(keepends=True)

# Insert the new code at the start line after converting it into a list of lines
Expand All @@ -55,7 +69,7 @@ def replace_code_in_file(
lines = new_code_lines

# Save the modified contents back to the file
save_file_contents(file_path, "".join(lines))
save_file_contents(path, "".join(lines))


class ModifyCode(Step):
Expand All @@ -81,16 +95,53 @@ def run(self) -> dict:
return dict(modified_code_files=[])

for code_snippet, extracted_response in sorted_list:
uri = code_snippet.get("uri")
# Use Path for consistent path handling
file_path = Path(code_snippet.get("uri", ""))
start_line = code_snippet.get("startLine")
end_line = code_snippet.get("endLine")
new_code = extracted_response.get("patch")

if new_code is None:
continue

replace_code_in_file(uri, start_line, end_line, new_code)
modified_code_file = dict(path=uri, start_line=start_line, end_line=end_line, **extracted_response)
# Get the original content for diffing
diff = ""
try:
# Store original content in memory
original_content = file_path.read_text() if file_path.exists() else ""

# Apply the changes
replace_code_in_file(file_path, start_line, end_line, new_code)

# Read modified content
current_content = file_path.read_text() if file_path.exists() else ""

# Generate unified diff
fromfile = f"a/{file_path}"
tofile = f"b/{file_path}"
diff = "".join(difflib.unified_diff(
original_content.splitlines(keepends=True),
current_content.splitlines(keepends=True),
fromfile=fromfile,
tofile=tofile
))

if not diff and new_code: # If no diff but we have new code (new file)
diff = f"+++ {file_path}\n{new_code}"
except (OSError, IOError) as e:
logger.warning(f"Failed to generate diff for {file_path}: {str(e)}")
# Still proceed with the modification even if diff generation fails
replace_code_in_file(file_path, start_line, end_line, new_code)
diff = f"+++ {file_path}\n{new_code}" # Use new code as diff on error

# Create the modified code file dictionary
modified_code_file = dict(
path=str(file_path),
start_line=start_line,
end_line=end_line,
diff=diff,
**extracted_response
)
modified_code_files.append(modified_code_file)

return dict(modified_code_files=modified_code_files)
16 changes: 16 additions & 0 deletions patchwork/steps/ModifyCode/typed.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,22 @@ class ModifyCodeOutputs(TypedDict):


class ModifiedCodeFile(TypedDict, total=False):
"""Represents a file that has been modified by the ModifyCode step.
Attributes:
path: The path to the modified file
start_line: The starting line number of the modification (1-based)
end_line: The ending line number of the modification (1-based)
diff: A unified diff string showing the changes made to the file.
Generated using Python's difflib for in-memory comparison
of original and modified file contents.
Note:
The diff field is generated using difflib.unified_diff() to compare
the original and modified file contents in memory, ensuring efficient
and secure diff generation.
"""
path: str
start_line: int
end_line: int
diff: str
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "patchwork-cli"
version = "0.0.90"
version = "0.0.91"
description = ""
authors = ["patched.codes"]
license = "AGPL"
Expand Down

0 comments on commit 497cbac

Please sign in to comment.