Skip to content

Commit cba93ab

Browse files
committed
feat: Implement blame continuation from an existing checkpoint
In this commit, we introduce the ability to execute a blame operation from a previously generated checkpoint. This functionality makes the computation of incremental blame operations more efficient by reusing already computed information. The refactoring performed in commit id d22965f allowed effortless integration of this feature into the existing blame processing algorithm. The fundamental distinction between a regular blame and a blame operation from a checkpoint is that in the latter, we can pre-fill the `BlameState` with some `BlameEntry` instances, thereby reducing the number of `UnblamedHunk` instances we need to process. This update’s algorithm for incorporating the detected modifications since the checkpoint into the blame entries is encapsulated in the `update_checkpoint_blames_with_changes` method. The newly introduced `BlameCheckpoint` type is a public type that users can utilize to establish a checkpoint state for their blame operations.
1 parent d22965f commit cba93ab

File tree

8 files changed

+757
-24
lines changed

8 files changed

+757
-24
lines changed

gix-blame/src/file/checkpoint.rs

+228
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
use crate::types::{Change, UnblamedHunk};
2+
use crate::{BlameCheckpoint, BlameEntry};
3+
use gix_hash::ObjectId;
4+
use std::num::NonZeroU32;
5+
use std::ops::Range;
6+
7+
/// Updates blame entries from a checkpoint with newly detected changes.
8+
///
9+
/// # Arguments
10+
/// * `checkpoint` - Previous checkpoint with blame entries
11+
/// * `changes` - Changes since the checkpoint
12+
/// * `suspect` - Commit ID being investigated
13+
///
14+
/// # Returns
15+
/// Updated blame entries and new hunks to blame
16+
pub(crate) fn update_checkpoint_blames_with_changes(
17+
checkpoint: BlameCheckpoint,
18+
changes: Vec<Change>,
19+
suspect: ObjectId,
20+
) -> (Vec<BlameEntry>, Vec<UnblamedHunk>) {
21+
fn blame_fully_contained_by_change(
22+
blame_lines: &BlameLines,
23+
blame: &BlameEntry,
24+
change_lines: &ChangeLines,
25+
change: &Change,
26+
) -> bool {
27+
blame_lines.get_remaining(blame) < change_lines.get_remaining(change)
28+
}
29+
30+
let mut updated_blames = Vec::new();
31+
let mut new_hunks_to_blame = Vec::new();
32+
33+
let mut blame_iter = checkpoint.entries.into_iter().peekable();
34+
35+
// This nested loop iterates through changes and blame entries in parallel, tracking how many
36+
// lines have been processed in each. For each change type:
37+
// - Unchanged: Keep the original blame but adjust line numbers
38+
// - Deleted: Remove blame entries for deleted lines
39+
// - Added/Replaced: Create new hunks to be blamed later
40+
// The tracking ensures we correctly handle partial overlaps between changes and blame entries.
41+
'change: for change in changes {
42+
let mut change_assigned = ChangeLines::default();
43+
while let Some(blame) = blame_iter.peek_mut() {
44+
let mut blame_assigned = BlameLines::default();
45+
46+
// For each of the three cases we have to check if the blame is fully contained by the change.
47+
// If so we can update the blame with the remaining length of the blame.
48+
// If not we have to update the blame with the remaining length of the change.
49+
match change {
50+
Change::Unchanged(ref range) => {
51+
match blame_fully_contained_by_change(&blame_assigned, blame, &change_assigned, &change) {
52+
true => {
53+
updated_blames.push(BlameEntry {
54+
start_in_blamed_file: range.start + change_assigned.assigned.get_assigned(),
55+
start_in_source_file: blame.start_in_source_file,
56+
len: blame.len,
57+
commit_id: blame.commit_id,
58+
});
59+
60+
change_assigned.assigned.add_assigned(blame.len.get());
61+
blame_assigned.assigned.add_assigned(blame.len.get());
62+
}
63+
false => {
64+
updated_blames.push(BlameEntry {
65+
start_in_blamed_file: range.start + change_assigned.assigned.get_assigned(),
66+
start_in_source_file: blame.start_in_source_file,
67+
len: NonZeroU32::new(change_assigned.get_remaining(&change)).unwrap(),
68+
commit_id: blame.commit_id,
69+
});
70+
71+
blame_assigned
72+
.assigned
73+
.add_assigned(change_assigned.get_remaining(&change));
74+
change_assigned
75+
.assigned
76+
.add_assigned(change_assigned.get_remaining(&change));
77+
}
78+
}
79+
}
80+
Change::Deleted(_start_deletion, _lines_deleted) => {
81+
match blame_fully_contained_by_change(&blame_assigned, blame, &change_assigned, &change) {
82+
true => {
83+
blame_assigned.assigned.add_assigned(blame.len.get());
84+
change_assigned.assigned.add_assigned(blame.len.get());
85+
}
86+
false => {
87+
blame_assigned
88+
.assigned
89+
.add_assigned(change_assigned.get_remaining(&change));
90+
change_assigned
91+
.assigned
92+
.add_assigned(change_assigned.get_remaining(&change));
93+
}
94+
}
95+
}
96+
Change::AddedOrReplaced(ref range, lines_deleted) => {
97+
let new_unblamed_hunk = |range: &Range<u32>, suspect: ObjectId| UnblamedHunk {
98+
range_in_blamed_file: range.clone(),
99+
suspects: [(suspect, range.clone())].into(),
100+
};
101+
match blame_fully_contained_by_change(&blame_assigned, blame, &change_assigned, &change) {
102+
true => {
103+
if lines_deleted == 0 {
104+
new_hunks_to_blame.push(new_unblamed_hunk(range, suspect));
105+
}
106+
107+
change_assigned.assigned.add_assigned(blame.len.get());
108+
blame_assigned.assigned.add_assigned(blame.len.get());
109+
}
110+
false => {
111+
new_hunks_to_blame.push(new_unblamed_hunk(range, suspect));
112+
113+
blame_assigned
114+
.assigned
115+
.add_assigned(change_assigned.get_remaining(&change));
116+
change_assigned
117+
.assigned
118+
.add_assigned(change_assigned.get_remaining(&change));
119+
}
120+
}
121+
}
122+
}
123+
124+
// Check if the blame or the change is fully assigned.
125+
// If the blame is fully assigned we can continue with the next blame.
126+
// If the change is fully assigned we can continue with the next change.
127+
// Since we have a mutable reference to the blame we can update it and reset the assigned blame lines.
128+
// If both are fully assigned we can continue with the next blame and change.
129+
match (
130+
blame_assigned.has_remaining(blame),
131+
change_assigned.has_remaining(&change),
132+
) {
133+
(true, true) => {
134+
// Both have remaining
135+
blame.update_blame(&blame_assigned.assigned);
136+
}
137+
(true, false) => {
138+
// Change is fully assigned
139+
blame.update_blame(&blame_assigned.assigned);
140+
continue 'change;
141+
}
142+
(false, true) => {
143+
// Blame is fully assigned
144+
blame_iter.next();
145+
}
146+
(false, false) => {
147+
// Both are fully assigned
148+
blame_iter.next();
149+
continue 'change;
150+
}
151+
}
152+
}
153+
}
154+
(updated_blames, new_hunks_to_blame)
155+
}
156+
157+
impl BlameEntry {
158+
/// Updates the blame entry's line numbers by the given offset.
159+
///
160+
/// This is used when processing changes to adjust the line numbers in the blame entry
161+
/// to account for lines that have already been processed. It updates:
162+
/// * The starting line in the blamed file
163+
/// * The starting line in the source file
164+
/// * The length of the blame entry
165+
pub(crate) fn update_blame(&mut self, offset: &LinesAssigned) {
166+
self.start_in_blamed_file += offset.get_assigned();
167+
self.start_in_source_file += offset.get_assigned();
168+
self.len = NonZeroU32::new(u32::from(self.len) - offset.get_assigned()).unwrap();
169+
}
170+
}
171+
172+
/// Tracks the number of lines processed during blame updates
173+
#[derive(Debug, Default)]
174+
pub(crate) struct LinesAssigned {
175+
lines_assigned: u32,
176+
}
177+
178+
impl LinesAssigned {
179+
/// Add lines to the count
180+
fn add_assigned(&mut self, lines: u32) {
181+
self.lines_assigned += lines;
182+
}
183+
184+
/// Get current count
185+
fn get_assigned(&self) -> u32 {
186+
self.lines_assigned
187+
}
188+
}
189+
190+
/// Tracks line assignments for blame entries
191+
#[derive(Debug, Default)]
192+
struct BlameLines {
193+
assigned: LinesAssigned,
194+
}
195+
196+
impl BlameLines {
197+
/// Calculate remaining lines in a blame entry
198+
fn get_remaining(&self, blame: &BlameEntry) -> u32 {
199+
blame.len.get() - self.assigned.get_assigned()
200+
}
201+
202+
/// Check if any lines remain
203+
fn has_remaining(&self, blame: &BlameEntry) -> bool {
204+
self.get_remaining(blame) > 0
205+
}
206+
}
207+
208+
/// Tracks line assignments for changes
209+
#[derive(Debug, Default)]
210+
struct ChangeLines {
211+
assigned: LinesAssigned,
212+
}
213+
214+
impl ChangeLines {
215+
/// Calculate remaining lines in a change
216+
fn get_remaining(&self, change: &Change) -> u32 {
217+
match &change {
218+
Change::Unchanged(range) => range.len() as u32 - self.assigned.get_assigned(),
219+
Change::AddedOrReplaced(_, deleted_in_before) => *deleted_in_before - self.assigned.get_assigned(),
220+
Change::Deleted(_, deleted_in_before) => *deleted_in_before - self.assigned.get_assigned(),
221+
}
222+
}
223+
224+
/// Check if any lines remain
225+
fn has_remaining(&self, change: &Change) -> bool {
226+
self.get_remaining(change) > 0
227+
}
228+
}

0 commit comments

Comments
 (0)