Skip to content

Commit 3ff54e5

Browse files
committed
feat: use stable hash from rustc-stable-hash
This helps `-Ztrim-paths` build a stable cross-platform path for the registry and git sources. Sources files then can be found from the same path when debugging. See #13171 (comment) A few caveats: * This will invalidate the current downloaded caches. Need to put this in the Cargo CHANGELOG. * As a consequence of changing how `SourceId` is hashed, the global cache tracker is also affected because Cargo writes source identifiers (e.g. `index.crates.io-6f17d22bba15001f`) to SQLite. * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/core/global_cache_tracker.rs#L388-L391 * The performance of rustc-stable-hash is slightly worse than the old SipHasher in std on short things like `SourceId`, but for long stuff like fingerprint. See appendix. StableHasher is used in several places (some might not be needed?): * Rebuild detection (fingerprints) * Rustc version, including all the CLI args running `rustc -vV`. * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/util/rustc.rs#L326 * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/util/rustc.rs#L381 * Build caches * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/core/compiler/fingerprint/mod.rs#L1456 * Compute rustc `-C metadata` * stable hash for SourceId * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/core/package_id.rs#L207 * Also read and hash contents from custom target JSON file. * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/core/compiler/compile_kind.rs#L81-L91 * `UnitInner::dep_hash` * This is to distinguish same units having different features set between normal and build dependencies. * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/ops/cargo_compile/mod.rs#L627 * Hash file contents for `cargo package` to verify if files were modified before and after the build. * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/ops/cargo_package.rs#L999 * Rusc diagnostics deduplication * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/core/compiler/job_queue/mod.rs#L311 * Places using `SourceId` identifier like `registry/src` path, and `-Zscript` target directories. Appendix -------- Benchmark on x86_64-unknown-linux-gnu ``` bench_hasher/RustcStableHasher/URL time: [33.843 ps 33.844 ps 33.845 ps] change: [-0.0167% -0.0049% +0.0072%] (p = 0.44 > 0.05) No change in performance detected. Found 10 outliers among 100 measurements (10.00%) 5 (5.00%) low severe 3 (3.00%) high mild 2 (2.00%) high severe bench_hasher/SipHasher/URL time: [18.954 ns 18.954 ns 18.955 ns] change: [-0.1281% -0.0951% -0.0644%] (p = 0.00 < 0.05) Change within noise threshold. Found 14 outliers among 100 measurements (14.00%) 3 (3.00%) low severe 4 (4.00%) low mild 3 (3.00%) high mild 4 (4.00%) high severe bench_hasher/RustcStableHasher/lorem ipsum time: [659.18 ns 659.20 ns 659.22 ns] change: [-0.0192% -0.0062% +0.0068%] (p = 0.34 > 0.05) No change in performance detected. Found 12 outliers among 100 measurements (12.00%) 4 (4.00%) low severe 3 (3.00%) low mild 3 (3.00%) high mild 2 (2.00%) high severe bench_hasher/SipHasher/lorem ipsum time: [1.2006 µs 1.2008 µs 1.2010 µs] change: [+0.0117% +0.0467% +0.0808%] (p = 0.01 < 0.05) Change within noise threshold. Found 1 outliers among 100 measurements (1.00%) 1 (1.00%) high mild ```
1 parent 6e23650 commit 3ff54e5

File tree

8 files changed

+73
-41
lines changed

8 files changed

+73
-41
lines changed

Cargo.lock

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ pulldown-cmark = { version = "0.11.0", default-features = false, features = ["ht
7878
rand = "0.8.5"
7979
regex = "1.10.4"
8080
rusqlite = { version = "0.31.0", features = ["bundled"] }
81+
rustc-stable-hash = { git = "https://github.com/rust-lang/rustc-stable-hash.git", rev = "cb8e141b08fb839606a5f79f9b56087cd54b764d" }
8182
rustfix = { version = "0.8.2", path = "crates/rustfix" }
8283
same-file = "1.0.6"
8384
security-framework = "2.10.0"
@@ -182,6 +183,7 @@ pathdiff.workspace = true
182183
rand.workspace = true
183184
regex.workspace = true
184185
rusqlite.workspace = true
186+
rustc-stable-hash.workspace = true
185187
rustfix.workspace = true
186188
same-file.workspace = true
187189
semver.workspace = true

src/cargo/core/compiler/build_runner/compilation_files.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
use std::collections::HashMap;
44
use std::fmt;
5-
use std::hash::{Hash, Hasher};
5+
use std::hash::Hash;
66
use std::path::{Path, PathBuf};
77
use std::sync::Arc;
88

src/cargo/core/compiler/compile_kind.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use anyhow::Context as _;
88
use serde::Serialize;
99
use std::collections::BTreeSet;
1010
use std::fs;
11-
use std::hash::{Hash, Hasher};
11+
use std::hash::Hash;
1212
use std::path::Path;
1313

1414
/// Indicator for how a unit is being compiled.

src/cargo/core/source_id.rs

Lines changed: 50 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -786,70 +786,93 @@ mod tests {
786786
// Otherwise please just leave a comment in your PR as to why the hash value is
787787
// changing and why the old value can't be easily preserved.
788788
//
789-
// The hash value depends on endianness and bit-width, so we only run this test on
790-
// little-endian 64-bit CPUs (such as x86-64 and ARM64) where it matches the
791-
// well-known value.
789+
// The hash value should be stable across platforms, and doesn't depend on
790+
// endianness and bit-width. One caveat is that absolute paths is inherently
791+
// different on Windows than on Unix-like platforms. Unless we omit or strip
792+
// the prefix components (e.g. `C:`), there is not way to have a
793+
// cross-platform stable hash for absolute paths.
792794
#[test]
793-
#[cfg(all(target_endian = "little", target_pointer_width = "64"))]
794795
fn test_cratesio_hash() {
795796
let gctx = GlobalContext::default().unwrap();
796797
let crates_io = SourceId::crates_io(&gctx).unwrap();
797-
assert_eq!(crate::util::hex::short_hash(&crates_io), "1ecc6299db9ec823");
798+
assert_eq!(crate::util::hex::short_hash(&crates_io), "83d63c3e13aca8cc");
798799
}
799800

800801
// See the comment in `test_cratesio_hash`.
801802
//
802803
// Only test on non-Windows as paths on Windows will get different hashes.
803804
#[test]
804-
#[cfg(all(target_endian = "little", target_pointer_width = "64", not(windows)))]
805805
fn test_stable_hash() {
806-
use std::hash::Hasher;
806+
use crate::util::StableHasher;
807807
use std::path::Path;
808808

809+
#[cfg(not(windows))]
810+
let ws_root = Path::new("/tmp/ws");
811+
#[cfg(windows)]
812+
let ws_root = Path::new(r"C:\\tmp\ws");
813+
809814
let gen_hash = |source_id: SourceId| {
810-
let mut hasher = std::collections::hash_map::DefaultHasher::new();
811-
source_id.stable_hash(Path::new("/tmp/ws"), &mut hasher);
815+
let mut hasher = StableHasher::new();
816+
source_id.stable_hash(ws_root, &mut hasher);
812817
hasher.finish()
813818
};
814819

815820
let url = "https://my-crates.io".into_url().unwrap();
816821
let source_id = SourceId::for_registry(&url).unwrap();
817-
assert_eq!(gen_hash(source_id), 18108075011063494626);
818-
assert_eq!(crate::util::hex::short_hash(&source_id), "fb60813d6cb8df79");
822+
assert_eq!(gen_hash(source_id), 2056262832525457700);
823+
assert_eq!(crate::util::hex::short_hash(&source_id), "24b984d12650891c");
819824

820825
let url = "https://your-crates.io".into_url().unwrap();
821826
let source_id = SourceId::for_alt_registry(&url, "alt").unwrap();
822-
assert_eq!(gen_hash(source_id), 12862859764592646184);
823-
assert_eq!(crate::util::hex::short_hash(&source_id), "09c10fd0cbd74bce");
827+
assert_eq!(gen_hash(source_id), 7851411715584162426);
828+
assert_eq!(crate::util::hex::short_hash(&source_id), "7afabb545bd1f56c");
824829

825830
let url = "sparse+https://my-crates.io".into_url().unwrap();
826831
let source_id = SourceId::for_registry(&url).unwrap();
827-
assert_eq!(gen_hash(source_id), 8763561830438022424);
828-
assert_eq!(crate::util::hex::short_hash(&source_id), "d1ea0d96f6f759b5");
832+
assert_eq!(gen_hash(source_id), 15233380663065439616);
833+
assert_eq!(crate::util::hex::short_hash(&source_id), "80ed51ce00d767d3");
829834

830835
let url = "sparse+https://your-crates.io".into_url().unwrap();
831836
let source_id = SourceId::for_alt_registry(&url, "alt").unwrap();
832-
assert_eq!(gen_hash(source_id), 5159702466575482972);
833-
assert_eq!(crate::util::hex::short_hash(&source_id), "135d23074253cb78");
837+
assert_eq!(gen_hash(source_id), 12749290624384351691);
838+
assert_eq!(crate::util::hex::short_hash(&source_id), "cbbda5344694eeb0");
834839

835840
let url = "file:///tmp/ws/crate".into_url().unwrap();
836841
let source_id = SourceId::for_git(&url, GitReference::DefaultBranch).unwrap();
837-
assert_eq!(gen_hash(source_id), 15332537265078583985);
838-
assert_eq!(crate::util::hex::short_hash(&source_id), "73a808694abda756");
839-
840-
let path = Path::new("/tmp/ws/crate");
842+
assert_eq!(gen_hash(source_id), 3109465066469481245);
843+
assert_eq!(crate::util::hex::short_hash(&source_id), "1d5b66d8000a272b");
841844

845+
let path = &ws_root.join("crate");
842846
let source_id = SourceId::for_local_registry(path).unwrap();
843-
assert_eq!(gen_hash(source_id), 18446533307730842837);
844-
assert_eq!(crate::util::hex::short_hash(&source_id), "52a84cc73f6fd48b");
847+
#[cfg(not(windows))]
848+
{
849+
assert_eq!(gen_hash(source_id), 17171351456028149232);
850+
assert_eq!(crate::util::hex::short_hash(&source_id), "f0c5f1e92be54cee");
851+
}
852+
#[cfg(windows)]
853+
{
854+
assert_eq!(gen_hash(source_id), 10712195329887934127);
855+
assert_eq!(crate::util::hex::short_hash(&source_id), "af96919ae55ca994");
856+
}
845857

846858
let source_id = SourceId::for_path(path).unwrap();
847-
assert_eq!(gen_hash(source_id), 8764714075439899829);
848-
assert_eq!(crate::util::hex::short_hash(&source_id), "e1ddd48578620fc1");
859+
assert_eq!(gen_hash(source_id), 13241112980875747369);
860+
#[cfg(not(windows))]
861+
assert_eq!(crate::util::hex::short_hash(&source_id), "e5ba2edec163e65a");
862+
#[cfg(windows)]
863+
assert_eq!(crate::util::hex::short_hash(&source_id), "429dd6f2283a9b5c");
849864

850865
let source_id = SourceId::for_directory(path).unwrap();
851-
assert_eq!(gen_hash(source_id), 17459999773908528552);
852-
assert_eq!(crate::util::hex::short_hash(&source_id), "6568fe2c2fab5bfe");
866+
#[cfg(not(windows))]
867+
{
868+
assert_eq!(gen_hash(source_id), 12461124588148212881);
869+
assert_eq!(crate::util::hex::short_hash(&source_id), "91c47582caceeeac");
870+
}
871+
#[cfg(windows)]
872+
{
873+
assert_eq!(gen_hash(source_id), 17000469607053345884);
874+
assert_eq!(crate::util::hex::short_hash(&source_id), "5c443d0709cdedeb");
875+
}
853876
}
854877

855878
#[test]

src/cargo/ops/cargo_compile/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
//! ["Cargo Target"]: https://doc.rust-lang.org/nightly/cargo/reference/cargo-targets.html
3737
3838
use std::collections::{HashMap, HashSet};
39-
use std::hash::{Hash, Hasher};
39+
use std::hash::Hash;
4040
use std::sync::Arc;
4141

4242
use crate::core::compiler::unit_dependencies::build_unit_dependencies;

src/cargo/util/hasher.rs

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,24 @@
1-
//! Implementation of a hasher that produces the same values across releases.
1+
//! A hasher that produces the same values across releases and platforms.
22
//!
3-
//! The hasher should be fast and have a low chance of collisions (but is not
4-
//! sufficient for cryptographic purposes).
5-
#![allow(deprecated)]
3+
//! This is a wrapper around [`rustc_stable_hash::StableHasher`].
64
7-
use std::hash::{Hasher, SipHasher};
8-
9-
pub struct StableHasher(SipHasher);
5+
pub struct StableHasher(rustc_stable_hash::StableHasher);
106

117
impl StableHasher {
128
pub fn new() -> StableHasher {
13-
StableHasher(SipHasher::new())
9+
StableHasher(rustc_stable_hash::StableHasher::new())
10+
}
11+
12+
pub fn finish(self) -> u64 {
13+
self.0.finalize().0
1414
}
1515
}
1616

17-
impl Hasher for StableHasher {
17+
impl std::hash::Hasher for StableHasher {
1818
fn finish(&self) -> u64 {
19-
self.0.finish()
19+
panic!("call StableHasher::finish instead");
2020
}
21+
2122
fn write(&mut self, bytes: &[u8]) {
2223
self.0.write(bytes)
2324
}

src/cargo/util/rustc.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::collections::hash_map::HashMap;
22
use std::env;
3-
use std::hash::{Hash, Hasher};
3+
use std::hash::Hash;
44
use std::path::{Path, PathBuf};
55
use std::sync::Mutex;
66

0 commit comments

Comments
 (0)