Skip to content

Commit ed87f4c

Browse files
Byronpascalkuthe
andcommitted
feat!: object::tree::diff::Platform::track_rewrites(...)
The invocation of `object::tree::diff::Platform::track_rewrites(Rewrites { percentage: None, ..Default::default() })` is now able to explicitly configure perfect rename tracking without percentage of equivalence. By setting `percentage = Some(<fraction>)` one can set how similar both files should be to be considered related. The same can be configured for copy-tracking, which also includes something like `--find-copies-harder`. Note that by default, renames are considered if a file looks 50% similar, and copies tracking is using the same convention. Co-authored-by: Pascal Kuthe <[email protected]>
1 parent 2ad0e8b commit ed87f4c

File tree

12 files changed

+1403
-242
lines changed

12 files changed

+1403
-242
lines changed

crate-status.md

+13-1
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,19 @@ See its [README.md](https://github.com/Byron/gitoxide/blob/main/gix-lock/README.
604604
* **diffs/changes**
605605
* [x] tree with other tree
606606
* [ ] respect case-sensitivity of host filesystem.
607-
* [ ] a way to access various diff related settings or use them
607+
* [x] a way to access various diff related settings or use them
608+
* **rewrite tracking**
609+
* **deviation** - git keeps up to four candidates whereas we use the first-found candidate that matches the similarity percentage.
610+
This can lead to different sources being found. As such, we also don't consider the filename at all.
611+
* [ ] handle binary files correctly, and apply filters for that matter
612+
* [x] computation limit with observable reduction of precision when it is hit, for copies and renames separately
613+
* **by identity**
614+
* [x] renames (sym-links are only ever compared by identity)
615+
* [x] copies
616+
* **by similarity** - similarity factor controllable separately from renames
617+
* [x] renames
618+
* [x] copies
619+
* [x] 'find-copies-harder' - find copies with the source being the entire tree.
608620
* [ ] tree with working tree
609621
* [x] diffs between modified blobs with various algorithms
610622
* [ ] tree with index

gix/src/config/cache/access.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,10 @@ impl Cache {
7171

7272
pub(crate) fn diff_renames(
7373
&self,
74-
) -> Result<Option<crate::object::tree::diff::Renames>, crate::object::tree::diff::renames::Error> {
74+
) -> Result<Option<crate::object::tree::diff::Rewrites>, crate::object::tree::diff::rewrites::Error> {
7575
self.diff_renames
7676
.get_or_try_init(|| {
77-
crate::object::tree::diff::Renames::try_from_config(&self.resolved, self.lenient_config)
77+
crate::object::tree::diff::Rewrites::try_from_config(&self.resolved, self.lenient_config)
7878
})
7979
.copied()
8080
}

gix/src/config/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ pub(crate) struct Cache {
426426
/// A lazily loaded rewrite list for remote urls
427427
pub(crate) url_rewrite: OnceCell<crate::remote::url::Rewrite>,
428428
/// The lazy-loaded rename information for diffs.
429-
pub(crate) diff_renames: OnceCell<Option<crate::object::tree::diff::Renames>>,
429+
pub(crate) diff_renames: OnceCell<Option<crate::object::tree::diff::Rewrites>>,
430430
/// A lazily loaded mapping to know which url schemes to allow
431431
#[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))]
432432
pub(crate) url_scheme: OnceCell<crate::remote::url::SchemePermission>,

gix/src/object/tree/diff/change.rs

+35-37
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,20 @@
11
use crate::bstr::BStr;
2-
use gix_object::tree::EntryMode;
32

43
use crate::Id;
54

5+
/// Information about the diff performed to detect similarity of a [Rewrite][Event::Rewrite].
6+
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
7+
pub struct DiffLineStats {
8+
/// The amount of lines to remove from the source to get to the destination.
9+
pub removals: u32,
10+
/// The amount of lines to add to the source to get to the destination.
11+
pub insertions: u32,
12+
/// The amount of lines of the previous state, in the source.
13+
pub before: u32,
14+
/// The amount of lines of the new state, in the destination.
15+
pub after: u32,
16+
}
17+
618
/// An event emitted when finding differences between two trees.
719
#[derive(Debug, Clone, Copy)]
820
pub enum Event<'a, 'old, 'new> {
@@ -33,12 +45,17 @@ pub enum Event<'a, 'old, 'new> {
3345
/// The object id after the modification.
3446
id: Id<'new>,
3547
},
36-
/// Entries are considered renamed if they are not trees and they, according to some understanding of identity, appeared
37-
/// as [`Deletion`][Event::Deletion] in case of the previous source of the rename as well as [`Addition`][Event::Addition]
38-
/// acting as destination all the while [rename tracking][super::Platform::track_renames()] is enabled.
48+
/// Entries are considered rewritten if they are not trees and they, according to some understanding of identity, were renamed
49+
/// or copied.
50+
/// In case of renames, this means they originally appeared as [`Deletion`][Event::Deletion] signalling their source as well as an
51+
/// [`Addition`][Event::Addition] acting as destination.
52+
///
53+
/// In case of copies, the `copy` flag is true and typically represents a perfect copy of a source was made.
54+
///
55+
/// This variant can only be encountered if [rewrite tracking][super::Platform::track_rewrites()] is enabled.
3956
///
4057
/// Note that mode changes may have occurred as well, i.e. changes from executable to non-executable or vice-versa.
41-
Rename {
58+
Rewrite {
4259
/// The location of the source of the rename operation.
4360
///
4461
/// It may be empty if neither [file names][super::Platform::track_filename()] nor [file paths][super::Platform::track_path()]
@@ -48,40 +65,20 @@ pub enum Event<'a, 'old, 'new> {
4865
source_entry_mode: gix_object::tree::EntryMode,
4966
/// The object id of the entry before the rename.
5067
///
51-
/// Note that this is the same as `id` if we require the [similarity to be 100%][super::Renames::percentage], but may
68+
/// Note that this is the same as `id` if we require the [similarity to be 100%][super::Rewrites::percentage], but may
5269
/// be different otherwise.
5370
source_id: Id<'old>,
54-
71+
/// Information about the diff we performed to detect similarity and match the `source_id` with the current state at `id`.
72+
/// It's `None` if `source_id` is equal to `id`, as identity made an actual diff computation unnecessary.
73+
diff: Option<DiffLineStats>,
5574
/// The mode of the entry after the rename.
5675
/// It could differ but still be considered a rename as we are concerned only about content.
5776
entry_mode: gix_object::tree::EntryMode,
5877
/// The object id after the rename.
5978
id: Id<'new>,
60-
},
61-
/// This entry is considered to be a copy of another, according to some understanding of identity, as its source still exists.
62-
/// If the source wouldn't exist, it would be considered a [rename][Event::Rename].
63-
///
64-
/// This variant may only occur if [rename tracking][super::Platform::track_renames()] is enabled, otherwise copies appear to be
65-
/// plain [additions][Event::Addition].
66-
Copy {
67-
/// The location of the source of the copy operation.
68-
///
69-
/// It may be empty if neither [file names][super::Platform::track_filename()] nor [file paths][super::Platform::track_path()]
70-
/// are tracked.
71-
source_location: &'a BStr,
72-
/// The mode of the entry that is considered the source.
73-
source_entry_mode: gix_object::tree::EntryMode,
74-
/// The object id of the source of the copy.
75-
///
76-
/// Note that this is the same as `id` if we require the [similarity to be 100%][super::Renames::percentage], but may
77-
/// be different otherwise.
78-
source_id: Id<'old>,
79-
80-
/// The mode of the entry after the copy, or the destination of it.
81-
/// It could differ but still be considered a copy as we are concerned only about content.
82-
entry_mode: gix_object::tree::EntryMode,
83-
/// The object id after the copy, or the destination of it.
84-
id: Id<'new>,
79+
/// If true, this rewrite is created by copy, and `source_id` is pointing to its source. Otherwise it's a rename, and `source_id`
80+
/// points to a deleted object, as renames are tracked as deletions and additions of the same or similar content.
81+
copy: bool,
8582
},
8683
}
8784

@@ -93,11 +90,13 @@ impl<'a, 'old, 'new> Event<'a, 'old, 'new> {
9390
) -> Option<Result<crate::object::blob::diff::Platform<'old, 'new>, crate::object::blob::diff::init::Error>> {
9491
match self {
9592
Event::Modification {
96-
previous_entry_mode: EntryMode::BlobExecutable | EntryMode::Blob,
93+
previous_entry_mode,
9794
previous_id,
98-
entry_mode: EntryMode::BlobExecutable | EntryMode::Blob,
95+
entry_mode,
9996
id,
100-
} => Some(crate::object::blob::diff::Platform::from_ids(previous_id, id)),
97+
} if entry_mode.is_blob() && previous_entry_mode.is_blob() => {
98+
Some(crate::object::blob::diff::Platform::from_ids(previous_id, id))
99+
}
101100
_ => None,
102101
}
103102
}
@@ -108,8 +107,7 @@ impl<'a, 'old, 'new> Event<'a, 'old, 'new> {
108107
Event::Addition { entry_mode, .. }
109108
| Event::Deletion { entry_mode, .. }
110109
| Event::Modification { entry_mode, .. }
111-
| Event::Rename { entry_mode, .. } => *entry_mode,
112-
Event::Copy { entry_mode, .. } => *entry_mode,
110+
| Event::Rewrite { entry_mode, .. } => *entry_mode,
113111
}
114112
}
115113
}

0 commit comments

Comments
 (0)