Skip to content

Commit 35415c5

Browse files
committed
Merge branch 'rename-tracking'
2 parents dfe2402 + 2e7989f commit 35415c5

File tree

22 files changed

+1657
-289
lines changed

22 files changed

+1657
-289
lines changed

.github/workflows/ci.yml

+2-4
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@ on:
1111
- 'src/**'
1212
- 'tests/**'
1313
- 'cargo-*/**'
14-
- 'git*/**'
15-
- 'experiments/**'
14+
- 'gix*/**'
1615
- '*.toml'
1716
- Makefile
1817
pull_request:
@@ -24,8 +23,7 @@ on:
2423
- 'src/**'
2524
- 'tests/**'
2625
- 'cargo-*/**'
27-
- 'git*/**'
28-
- 'experiments/**'
26+
- 'gix*/**'
2927
- '*.toml'
3028
- Makefile
3129

crate-status.md

+13-1
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,19 @@ See its [README.md](https://github.com/Byron/gitoxide/blob/main/gix-lock/README.
604604
* **diffs/changes**
605605
* [x] tree with other tree
606606
* [ ] respect case-sensitivity of host filesystem.
607-
* [ ] a way to access various diff related settings or use them
607+
* [x] a way to access various diff related settings or use them
608+
* **rewrite tracking**
609+
* **deviation** - git keeps up to four candidates whereas we use the first-found candidate that matches the similarity percentage.
610+
This can lead to different sources being found. As such, we also don't consider the filename at all.
611+
* [ ] handle binary files correctly, and apply filters for that matter
612+
* [x] computation limit with observable reduction of precision when it is hit, for copies and renames separately
613+
* **by identity**
614+
* [x] renames (sym-links are only ever compared by identity)
615+
* [x] copies
616+
* **by similarity** - similarity factor controllable separately from renames
617+
* [x] renames
618+
* [x] copies
619+
* [x] 'find-copies-harder' - find copies with the source being the entire tree.
608620
* [ ] tree with working tree
609621
* [x] diffs between modified blobs with various algorithms
610622
* [ ] tree with index

gitoxide-core/src/hours/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -174,14 +174,14 @@ where
174174
};
175175
from.changes()?
176176
.track_filename()
177-
.track_renames(None)
177+
.track_rewrites(None)
178178
.for_each_to_obtain_tree(&to, |change| {
179179
use gix::object::tree::diff::change::Event::*;
180180
if let Some(c) = change_counter.as_ref() {
181181
c.fetch_add(1, Ordering::SeqCst);
182182
}
183183
match change.event {
184-
Copy { .. } | Rename { .. } => {
184+
Rewrite { .. } => {
185185
unreachable!("we turned that off")
186186
}
187187
Addition { entry_mode, id } => {

gix-diff/src/tree/mod.rs

+11-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::collections::VecDeque;
22

33
use gix_hash::ObjectId;
4+
use gix_object::bstr::BString;
45
use gix_object::TreeRefIter;
56

67
/// The state required to visit [Changes] to be instantiated with `State::default()`.
@@ -41,7 +42,15 @@ pub mod visit;
4142
#[doc(inline)]
4243
pub use visit::Visit;
4344

45+
/// A [Visit][visit::Visit] implementation to record every observed change and keep track of the changed paths.
46+
#[derive(Clone, Debug)]
47+
pub struct Recorder {
48+
path_deque: VecDeque<BString>,
49+
path: BString,
50+
location: Option<recorder::Location>,
51+
/// The observed changes.
52+
pub records: Vec<recorder::Change>,
53+
}
54+
4455
/// Useful for use as delegate implementing [`Visit`] to keep track of all seen changes. Useful for debugging or printing primarily.
4556
pub mod recorder;
46-
#[doc(inline)]
47-
pub use recorder::Recorder;

gix-diff/src/tree/recorder.rs

+63-19
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,21 @@
1-
use std::collections::VecDeque;
2-
31
use gix_hash::ObjectId;
42
use gix_object::{
53
bstr::{BStr, BString, ByteSlice, ByteVec},
64
tree,
75
};
86

9-
use crate::tree::visit;
7+
use crate::tree::{visit, Recorder};
8+
9+
/// Describe how to track the location of a change.
10+
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
11+
pub enum Location {
12+
/// Track the entire path, relative to the repository.
13+
Path,
14+
/// Keep only the file-name as location, which may be enough for some calculations.
15+
///
16+
/// This is less expensive than tracking the entire `Path`.
17+
FileName,
18+
}
1019

1120
/// A Change as observed by a call to [`visit(…)`][visit::Visit::visit()], enhanced with the path affected by the change.
1221
/// Its similar to [visit::Change] but includes the path that changed.
@@ -34,13 +43,37 @@ pub enum Change {
3443
},
3544
}
3645

37-
/// A [Visit][visit::Visit] implementation to record every observed change and keep track of the changed paths.
38-
#[derive(Clone, Debug, Default)]
39-
pub struct Recorder {
40-
path_deque: VecDeque<BString>,
41-
path: BString,
42-
/// The observed changes.
43-
pub records: Vec<Change>,
46+
impl Default for Recorder {
47+
fn default() -> Self {
48+
Recorder {
49+
path_deque: Default::default(),
50+
path: Default::default(),
51+
location: Some(Location::Path),
52+
records: vec![],
53+
}
54+
}
55+
}
56+
57+
/// Builder
58+
impl Recorder {
59+
/// Obtain a copy of the currently tracked, full path of the entry.
60+
pub fn track_location(mut self, location: Option<Location>) -> Self {
61+
self.location = location;
62+
self
63+
}
64+
}
65+
66+
/// Access
67+
impl Recorder {
68+
/// Obtain a copy of the currently tracked, full path of the entry.
69+
pub fn path_clone(&self) -> BString {
70+
self.path.clone()
71+
}
72+
73+
/// Return the currently set path.
74+
pub fn path(&self) -> &BStr {
75+
self.path.as_ref()
76+
}
4477
}
4578

4679
impl Recorder {
@@ -58,28 +91,39 @@ impl Recorder {
5891
}
5992
self.path.push_str(name);
6093
}
61-
62-
fn path_clone(&self) -> BString {
63-
self.path.clone()
64-
}
6594
}
6695

6796
impl visit::Visit for Recorder {
6897
fn pop_front_tracked_path_and_set_current(&mut self) {
69-
self.path = self.path_deque.pop_front().expect("every parent is set only once");
98+
if let Some(Location::Path) = self.location {
99+
self.path = self.path_deque.pop_front().expect("every parent is set only once");
100+
}
70101
}
71102

72103
fn push_back_tracked_path_component(&mut self, component: &BStr) {
73-
self.push_element(component);
74-
self.path_deque.push_back(self.path.clone());
104+
if let Some(Location::Path) = self.location {
105+
self.push_element(component);
106+
self.path_deque.push_back(self.path.clone());
107+
}
75108
}
76109

77110
fn push_path_component(&mut self, component: &BStr) {
78-
self.push_element(component);
111+
match self.location {
112+
None => {}
113+
Some(Location::Path) => {
114+
self.push_element(component);
115+
}
116+
Some(Location::FileName) => {
117+
self.path.clear();
118+
self.path.extend_from_slice(component);
119+
}
120+
}
79121
}
80122

81123
fn pop_path_component(&mut self) {
82-
self.pop_element();
124+
if let Some(Location::Path) = self.location {
125+
self.pop_element();
126+
}
83127
}
84128

85129
fn visit(&mut self, change: visit::Change) -> visit::Action {

gix-diff/src/tree/visit.rs

+26
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use gix_hash::ObjectId;
2+
use gix_object::tree::EntryMode;
23
use gix_object::{bstr::BStr, tree};
34

45
/// Represents any possible change in order to turn one tree into another.
@@ -33,6 +34,31 @@ pub enum Change {
3334
},
3435
}
3536

37+
impl Change {
38+
/// Return the current object id.
39+
pub fn oid(&self) -> &gix_hash::oid {
40+
match self {
41+
Change::Addition { oid, .. } | Change::Deletion { oid, .. } | Change::Modification { oid, .. } => oid,
42+
}
43+
}
44+
/// Return the current tree entry mode.
45+
pub fn entry_mode(&self) -> EntryMode {
46+
match self {
47+
Change::Addition { entry_mode, .. }
48+
| Change::Deletion { entry_mode, .. }
49+
| Change::Modification { entry_mode, .. } => *entry_mode,
50+
}
51+
}
52+
/// Return the current object id and tree entry mode of a change.
53+
pub fn oid_and_entry_mode(&self) -> (&gix_hash::oid, EntryMode) {
54+
match self {
55+
Change::Addition { oid, entry_mode }
56+
| Change::Deletion { oid, entry_mode }
57+
| Change::Modification { oid, entry_mode, .. } => (oid, *entry_mode),
58+
}
59+
}
60+
}
61+
3662
/// What to do after a [Change] was [recorded][Visit::visit()].
3763
#[derive(Clone, Copy, PartialOrd, PartialEq, Ord, Eq, Hash)]
3864
pub enum Action {

gix-diff/tests/tree/mod.rs

+22-9
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
mod changes {
22
mod to_obtain_tree {
3+
use gix_diff::tree::recorder::Location;
34
use gix_diff::tree::{recorder, recorder::Change::*};
45
use gix_hash::{oid, ObjectId};
56
use gix_object::{bstr::ByteSlice, tree::EntryMode, TreeRefIter};
@@ -40,14 +41,19 @@ mod changes {
4041
.expect("id to be a tree"))
4142
}
4243

43-
fn diff_commits(db: &gix_odb::Handle, lhs: impl Into<Option<ObjectId>>, rhs: &oid) -> crate::Result<Changes> {
44+
fn diff_commits(
45+
db: &gix_odb::Handle,
46+
lhs: impl Into<Option<ObjectId>>,
47+
rhs: &oid,
48+
location: Option<Location>,
49+
) -> crate::Result<Changes> {
4450
let mut buf = Vec::new();
4551
let lhs_tree = lhs
4652
.into()
4753
.and_then(|lhs| locate_tree_by_commit(db, &lhs, &mut buf).ok());
4854
let mut buf2 = Vec::new();
4955
let rhs_tree = locate_tree_by_commit(db, rhs, &mut buf2)?;
50-
let mut recorder = gix_diff::tree::Recorder::default();
56+
let mut recorder = gix_diff::tree::Recorder::default().track_location(location);
5157
gix_diff::tree::Changes::from(lhs_tree).needed_to_obtain(
5258
rhs_tree,
5359
gix_diff::tree::State::default(),
@@ -512,31 +518,33 @@ mod changes {
512518
diff_commits(
513519
&db,
514520
all_commits[0].to_owned(),
515-
all_commits.last().expect("we have many commits")
521+
all_commits.last().expect("we have many commits"),
522+
None
516523
)?,
517524
vec![
518525
Addition {
519526
entry_mode: EntryMode::Blob,
520527
oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"),
521-
path: "b".into()
528+
path: "".into()
522529
},
523530
Addition {
524531
entry_mode: EntryMode::Tree,
525532
oid: hex_to_id("496d6428b9cf92981dc9495211e6e1120fb6f2ba"),
526-
path: "g".into()
533+
path: "".into()
527534
},
528535
Addition {
529536
entry_mode: EntryMode::Blob,
530537
oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"),
531-
path: "g/a".into()
538+
path: "".into()
532539
}
533540
]
534541
);
535542
assert_eq!(
536543
diff_commits(
537544
&db,
538545
all_commits.last().expect("we have many commits").to_owned(),
539-
&all_commits[0]
546+
&all_commits[0],
547+
Location::FileName.into()
540548
)?,
541549
vec![
542550
Deletion {
@@ -552,7 +560,7 @@ mod changes {
552560
Deletion {
553561
entry_mode: EntryMode::Blob,
554562
oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"),
555-
path: "g/a".into()
563+
path: "a".into()
556564
}
557565
]
558566
);
@@ -565,7 +573,12 @@ mod changes {
565573
let all_commits = all_commits(&db);
566574

567575
assert_eq!(
568-
diff_commits(&db, None::<ObjectId>, &all_commits[all_commits.len() - 6])?,
576+
diff_commits(
577+
&db,
578+
None::<ObjectId>,
579+
&all_commits[all_commits.len() - 6],
580+
Some(Location::Path)
581+
)?,
569582
vec![
570583
Addition {
571584
entry_mode: EntryMode::Tree,

gix-object/src/tree/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ impl EntryMode {
4444
matches!(self, EntryMode::Blob | EntryMode::BlobExecutable)
4545
}
4646

47+
/// Return true if the entry is any kind of blob or symlink.
48+
pub fn is_blob_or_symlink(&self) -> bool {
49+
matches!(self, EntryMode::Blob | EntryMode::BlobExecutable | EntryMode::Link)
50+
}
51+
4752
/// Represent the mode as descriptive string.
4853
pub fn as_str(&self) -> &'static str {
4954
use EntryMode::*;

gix-traverse/src/tree/mod.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,14 @@ pub trait Visit {
2828
}
2929

3030
/// A [Visit][Visit] implementation to record every observed change and keep track of the changed paths.
31-
#[derive(Clone, Debug, Default)]
31+
///
32+
/// Recorders can also be instructed to track the filename only, or no location at all.
33+
#[derive(Clone, Debug)]
3234
pub struct Recorder {
3335
path_deque: VecDeque<BString>,
3436
path: BString,
37+
/// How to track the location.
38+
location: Option<recorder::Location>,
3539
/// The observed entries.
3640
pub records: Vec<recorder::Entry>,
3741
}

0 commit comments

Comments
 (0)