Skip to content

fix(apply-patch): skip initial new-file hunk header emitted by some AI models #695

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions codex-cli/src/utils/agent/apply-patch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,9 @@ class Parser {

private parse_add_file(): PatchAction {
const lines: Array<string> = [];
// Only skip the first new-file hunk header emitted by some AI models (e.g., Gemini 2.0 Flash)
const NEW_FILE_HUNK = /^@@\s*-0,0\s*\+1(?:,[1-9]\d*)?\s*@@(?: .*)?$/;
let isFirst = true;
while (
!this.is_done([
PATCH_SUFFIX,
Expand All @@ -316,6 +319,12 @@ class Parser {
])
) {
const s = this.read_str();
// Only skip the initial new-file hunk header once
if (isFirst && NEW_FILE_HUNK.test(s.trim())) {
isFirst = false;
continue;
}
isFirst = false;
if (!s.startsWith(HUNK_ADD_LINE_PREFIX)) {
throw new DiffError(`Invalid Add File Line: ${s}`);
}
Expand Down
36 changes: 36 additions & 0 deletions codex-cli/tests/apply-patch.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,42 @@ test("process_patch - add file", () => {
expect(fs.removals).toEqual([]);
});

test("process_patch - add file skips full hunk headers", () => {
const patch = `*** Begin Patch
*** Add File: z.txt
@@ -0,0 +1,2 @@ SectionName
+foo
+bar
*** End Patch`;
const fs = createInMemoryFS({});
process_patch(patch, fs.openFn, fs.writeFn, fs.removeFn);
expect(fs.writes).toEqual({ "z.txt": "foo\nbar" });
expect(fs.removals).toEqual([]);
});

// Regex self-test for new-file hunk headers
test("NEW_FILE_HUNK regex matches expected patterns and rejects others", () => {
const re = /^@@\s*-0,0\s*\+1(?:,[1-9]\d*)?\s*@@(?: .*)?$/;
const matching = [
"@@ -0,0 +1@@",
"@@ -0,0 +1 @@",
"@@ -0,0 +1,10 @@",
"@@ -0,0 +1,10@@",
"@@-0,0+1@@",
"@@-0,0 +1@@",
"@@ -0,0+1,2@@",
"@@ -0,0 +1,2@@ foo",
"@@ -0,0 +1,2 @@ foo",
];
const nonmatching = [
"@@ -1,1 +2,2 @@",
"@@ -0,0 +2,2 @@",
"@@ -0,0 +1,2 @@@@",
];
matching.forEach((s) => expect(re.test(s)).toBe(true));
nonmatching.forEach((s) => expect(re.test(s)).toBe(false));
});

test("process_patch - delete file", () => {
const patch = `*** Begin Patch
*** Delete File: c.txt
Expand Down
15 changes: 15 additions & 0 deletions codex-rs/apply-patch/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,21 @@ PATCH"#,
let contents = fs::read_to_string(path).unwrap();
assert_eq!(contents, "ab\ncd\n");
}

#[test]
fn test_add_file_skip_full_hunk_header_with_section() {
let dir = tempdir().unwrap();
let path = dir.path().join("z.txt");
let patch = wrap_patch(&format!(
"*** Add File: {}\n@@ -0,0 +1,2 @@ SectionName\n+foo\n+bar",
path.display()
));
let mut stdout = Vec::new();
let mut stderr = Vec::new();
apply_patch(&patch, &mut stdout, &mut stderr).unwrap();
let contents = fs::read_to_string(&path).unwrap();
assert_eq!(contents, "foo\nbar\n");
}

#[test]
fn test_delete_file_hunk_removes_file() {
Expand Down
41 changes: 40 additions & 1 deletion codex-rs/apply-patch/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
use std::path::PathBuf;

use thiserror::Error;
use regex::Regex;

const BEGIN_PATCH_MARKER: &str = "*** Begin Patch";
const END_PATCH_MARKER: &str = "*** End Patch";
Expand Down Expand Up @@ -113,10 +114,20 @@ fn parse_one_hunk(lines: &[&str], line_number: usize) -> Result<(Hunk, usize), P
// Be tolerant of case mismatches and extra padding around marker strings.
let first_line = lines[0].trim();
if let Some(path) = first_line.strip_prefix(ADD_FILE_MARKER) {
// Add File
// Add File: only skip the first new-file hunk header emitted by some AI models (e.g., Gemini 2.0 Flash)
let mut contents = String::new();
let mut parsed_lines = 1;
// Regex to match new-file hunk header with optional section name
let new_file_re = Regex::new(r"^@@\s*-0,0\s*\+1(?:,[1-9]\d*)?\s*@@(?: .*)?$").unwrap();
let mut first = true;
for add_line in &lines[1..] {
// Only skip the initial new-file hunk header once
if first && new_file_re.is_match(add_line.trim()) {
parsed_lines += 1;
first = false;
continue;
}
first = false;
if let Some(line_to_add) = add_line.strip_prefix('+') {
contents.push_str(line_to_add);
contents.push('\n');
Expand Down Expand Up @@ -497,3 +508,31 @@ fn test_update_file_chunk() {
))
);
}

// Regex self-test for new-file hunk headers
#[test]
fn test_new_file_hunk_regex() {
let re = Regex::new(r"^@@\s*-0,0\s*\+1(?:,[1-9]\d*)?\s*@@(?: .*)?$").unwrap();
let matching = vec![
"@@ -0,0 +1@@",
"@@ -0,0 +1 @@",
"@@ -0,0 +1,10 @@",
"@@ -0,0 +1,10@@",
"@@-0,0+1@@",
"@@-0,0 +1@@",
"@@ -0,0+1,2@@",
"@@ -0,0 +1,2@@ foo",
"@@ -0,0 +1,2 @@ foo",
];
let nonmatching = vec![
"@@ -1,1 +2,2 @@",
"@@ -0,0 +2,2 @@",
"@@ -0,0 +1,2 @@@@",
];
for s in matching {
assert!(re.is_match(s), "should match: {}", s);
}
for s in nonmatching {
assert!(!re.is_match(s), "should not match: {}", s);
}
}