Skip to content

Commit fa94b61

Browse files
committed
Support multiple hasher
1 parent c2b3deb commit fa94b61

File tree

6 files changed

+166
-55
lines changed

6 files changed

+166
-55
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Unreleased
22

3+
- Introduce multi hasher support (#8)
34
- `StableHasher::finish` now returns a small hash instead of being fatal (#6)
45
- Remove `StableHasher::finalize` (#4)
56
- Import stable hasher implementation from rustc ([db8aca48129](https://github.com/rust-lang/rust/blob/db8aca48129d86b2623e3ac8cbcf2902d4d313ad/compiler/rustc_data_structures/src/))

src/lib.rs

+8
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,11 @@ pub use stable_hasher::StableHasher;
1111

1212
#[doc(inline)]
1313
pub use stable_hasher::StableHasherResult;
14+
15+
#[doc(inline)]
16+
pub use stable_hasher::ExtendedHasher;
17+
18+
pub use sip128::SipHasher128; // TODO: Should SipHasher128 be exposed?
19+
20+
/// Stable Sip Hasher 128
21+
pub type StableSipHasher128 = StableHasher<SipHasher128>;

src/sip128.rs

+44-29
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance.
44
// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727
55
use crate::int_overflow::{DebugStrictAdd, DebugStrictSub};
6+
use crate::ExtendedHasher;
67

78
use std::hash::Hasher;
89
use std::mem::{self, MaybeUninit};
@@ -214,28 +215,6 @@ impl SipHasher128 {
214215
hasher
215216
}
216217

217-
#[inline]
218-
pub fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
219-
let nbuf = self.nbuf;
220-
debug_assert!(LEN <= 8);
221-
debug_assert!(nbuf < BUFFER_SIZE);
222-
debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE);
223-
224-
if nbuf.debug_strict_add(LEN) < BUFFER_SIZE {
225-
unsafe {
226-
// The memcpy call is optimized away because the size is known.
227-
let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
228-
ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN);
229-
}
230-
231-
self.nbuf = nbuf.debug_strict_add(LEN);
232-
233-
return;
234-
}
235-
236-
unsafe { self.short_write_process_buffer(bytes) }
237-
}
238-
239218
// A specialized write function for values with size <= 8 that should only
240219
// be called when the write would cause the buffer to fill.
241220
//
@@ -378,13 +357,8 @@ impl SipHasher128 {
378357
}
379358
}
380359

381-
#[inline(always)]
382-
pub fn finish128(mut self) -> [u64; 2] {
383-
SipHasher128::finish128_inner(self.nbuf, &mut self.buf, self.state, self.processed)
384-
}
385-
386360
#[inline]
387-
fn finish128_inner(
361+
unsafe fn finish128_inner(
388362
nbuf: usize,
389363
buf: &mut [MaybeUninit<u64>; BUFFER_WITH_SPILL_CAPACITY],
390364
mut state: State,
@@ -437,6 +411,45 @@ impl SipHasher128 {
437411
}
438412
}
439413

414+
impl Default for SipHasher128 {
415+
fn default() -> SipHasher128 {
416+
SipHasher128::new_with_keys(0, 0)
417+
}
418+
}
419+
420+
impl ExtendedHasher for SipHasher128 {
421+
type Hash = [u64; 2];
422+
423+
#[inline]
424+
fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
425+
let nbuf = self.nbuf;
426+
debug_assert!(LEN <= 8);
427+
debug_assert!(nbuf < BUFFER_SIZE);
428+
debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE);
429+
430+
if nbuf.debug_strict_add(LEN) < BUFFER_SIZE {
431+
unsafe {
432+
// The memcpy call is optimized away because the size is known.
433+
let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
434+
ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN);
435+
}
436+
437+
self.nbuf = nbuf.debug_strict_add(LEN);
438+
439+
return;
440+
}
441+
442+
unsafe { self.short_write_process_buffer(bytes) }
443+
}
444+
445+
#[inline(always)]
446+
fn finish(mut self) -> [u64; 2] {
447+
unsafe {
448+
SipHasher128::finish128_inner(self.nbuf, &mut self.buf, self.state, self.processed)
449+
}
450+
}
451+
}
452+
440453
impl Hasher for SipHasher128 {
441454
#[inline]
442455
fn write_u8(&mut self, i: u8) {
@@ -504,7 +517,9 @@ impl Hasher for SipHasher128 {
504517

505518
fn finish(&self) -> u64 {
506519
let mut buf = self.buf.clone();
507-
let [a, b] = SipHasher128::finish128_inner(self.nbuf, &mut buf, self.state, self.processed);
520+
let [a, b] = unsafe {
521+
SipHasher128::finish128_inner(self.nbuf, &mut buf, self.state, self.processed)
522+
};
508523

509524
// Combining the two halves makes sure we get a good quality hash.
510525
a.wrapping_mul(3).wrapping_add(b).to_le()

src/sip128/tests.rs

+10-6
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ impl<'a> Hash for Bytes<'a> {
1616

1717
fn hash_with<T: Hash>(mut st: SipHasher128, x: &T) -> [u64; 2] {
1818
x.hash(&mut st);
19-
st.finish128()
19+
st.finish()
2020
}
2121

2222
fn hash<T: Hash>(x: &T) -> [u64; 2] {
@@ -253,8 +253,8 @@ fn test_short_write_works() {
253253
h2.write(&test_i128.to_ne_bytes());
254254
h2.write(&test_isize.to_ne_bytes());
255255

256-
let h1_hash = h1.finish128();
257-
let h2_hash = h2.finish128();
256+
let h1_hash = h1.finish();
257+
let h2_hash = h2.finish();
258258

259259
assert_eq!(h1_hash, h2_hash);
260260
}
@@ -279,8 +279,8 @@ macro_rules! test_fill_buffer {
279279
h2.write(s);
280280
h2.write(x_bytes);
281281

282-
let h1_hash = h1.finish128();
283-
let h2_hash = h2.finish128();
282+
let h1_hash = h1.finish();
283+
let h2_hash = h2.finish();
284284

285285
assert_eq!(h1_hash, h2_hash);
286286
}
@@ -306,10 +306,14 @@ fn test_fill_buffer() {
306306

307307
#[test]
308308
fn test_finish() {
309+
fn hash<H: Hasher>(h: &H) -> u64 {
310+
h.finish()
311+
}
312+
309313
let mut hasher = SipHasher128::new_with_keys(0, 0);
310314

311315
hasher.write_isize(0xF0);
312316
hasher.write_isize(0xF0010);
313317

314-
assert_eq!(hasher.finish(), hasher.finish());
318+
assert_eq!(hash(&hasher), hash(&hasher));
315319
}

src/stable_hasher.rs

+95-15
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,61 @@
11
//! Stable hasher adapted for cross-platform independent hash.
22
3-
use crate::sip128::SipHasher128;
4-
53
use std::fmt;
64
use std::hash::Hasher;
75

86
#[cfg(test)]
97
mod tests;
108

9+
/// Extended [`Hasher`] trait for use with [`StableHasher`].
10+
///
11+
/// It permits returning an arbitrary type as the [`Self::Hash`] type
12+
/// contrary to the [`Hasher`] trait which can only return `u64`. This
13+
/// is useful when the hasher uses a different representation.
14+
///
15+
/// # Example
16+
///
17+
/// ```
18+
/// use std::hash::Hasher;
19+
/// use rustc_stable_hash::ExtendedHasher;
20+
///
21+
/// struct DumbHasher(u128);
22+
///
23+
/// impl Hasher for DumbHasher {
24+
/// fn write(&mut self, a: &[u8]) {
25+
/// # self.0 = a.iter().fold(0u128, |acc, a| acc + (*a as u128)) + self.0;
26+
/// // ...
27+
/// }
28+
///
29+
/// fn finish(&self) -> u64 {
30+
/// self.0 as u64 // really dumb
31+
/// }
32+
/// }
33+
///
34+
/// impl ExtendedHasher for DumbHasher {
35+
/// type Hash = u128;
36+
///
37+
/// fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
38+
/// self.write(&bytes)
39+
/// }
40+
///
41+
/// fn finish(self) -> Self::Hash {
42+
/// self.0
43+
/// }
44+
/// }
45+
/// ```
46+
pub trait ExtendedHasher: Hasher {
47+
/// Type returned by the hasher.
48+
type Hash;
49+
50+
/// Optimized version of [`Hasher::write`] but for small write.
51+
fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
52+
self.write(&bytes);
53+
}
54+
55+
/// Finalization method of the hasher to return the [`Hash`].
56+
fn finish(self) -> Self::Hash;
57+
}
58+
1159
/// A Stable Hasher adapted for cross-platform independent hash.
1260
///
1361
/// When hashing something that ends up affecting properties like symbol names,
@@ -21,24 +69,26 @@ mod tests;
2169
/// # Example
2270
///
2371
/// ```
24-
/// use rustc_stable_hash::{StableHasher, StableHasherResult};
72+
/// use rustc_stable_hash::{StableHasher, StableHasherResult, StableSipHasher128};
2573
/// use std::hash::Hasher;
2674
///
2775
/// struct Hash128([u64; 2]);
2876
/// impl StableHasherResult for Hash128 {
77+
/// type Hash = [u64; 2];
78+
///
2979
/// fn finish(hash: [u64; 2]) -> Hash128 {
3080
/// Hash128(hash)
3181
/// }
3282
/// }
3383
///
34-
/// let mut hasher = StableHasher::new();
84+
/// let mut hasher = StableSipHasher128::new();
3585
/// hasher.write_usize(0xFA);
3686
///
3787
/// let hash: Hash128 = hasher.finish();
3888
/// ```
3989
#[must_use]
40-
pub struct StableHasher {
41-
state: SipHasher128,
90+
pub struct StableHasher<H: ExtendedHasher> {
91+
state: H,
4292
}
4393

4494
/// Trait for retrieving the result of the stable hashing operation.
@@ -51,6 +101,8 @@ pub struct StableHasher {
51101
/// struct Hash128(u128);
52102
///
53103
/// impl StableHasherResult for Hash128 {
104+
/// type Hash = [u64; 2];
105+
///
54106
/// fn finish(hash: [u64; 2]) -> Hash128 {
55107
/// let upper: u128 = hash[0] as u128;
56108
/// let lower: u128 = hash[1] as u128;
@@ -60,22 +112,50 @@ pub struct StableHasher {
60112
/// }
61113
/// ```
62114
pub trait StableHasherResult: Sized {
115+
type Hash;
116+
63117
/// Retrieving the finalized state of the [`StableHasher`] and construct
64118
/// an [`Self`] containing the hash.
65-
fn finish(hasher: [u64; 2]) -> Self;
119+
fn finish(hash: Self::Hash) -> Self;
66120
}
67121

68-
impl StableHasher {
122+
impl<H: ExtendedHasher + Default> StableHasher<H> {
69123
/// Creates a new [`StableHasher`].
70124
///
71125
/// To be used with the [`Hasher`] implementation and [`StableHasher::finish`].
72126
#[inline]
73127
#[must_use]
74128
pub fn new() -> Self {
129+
Default::default()
130+
}
131+
}
132+
133+
impl<H: ExtendedHasher + Default> Default for StableHasher<H> {
134+
/// Creates a new [`StableHasher`].
135+
///
136+
/// To be used with the [`Hasher`] implementation and [`StableHasher::finish`].
137+
#[inline]
138+
#[must_use]
139+
fn default() -> Self {
75140
StableHasher {
76-
state: SipHasher128::new_with_keys(0, 0),
141+
state: Default::default(),
77142
}
78143
}
144+
}
145+
146+
impl<H: ExtendedHasher> StableHasher<H> {
147+
/// Creates a new [`StableHasher`] from an already created [`ExtendedHasher`].
148+
///
149+
/// Useful when wanting to initialize a hasher with different parameters/keys.
150+
///
151+
/// **Important**: Any use of the hasher before being given to a [`StableHasher`]
152+
/// is not covered by this crate guarentes and will make the resulting hash
153+
/// NOT cross-platform independent.
154+
#[inline]
155+
#[must_use]
156+
pub fn with_hasher(state: H) -> Self {
157+
StableHasher { state }
158+
}
79159

80160
/// Returns the typed-hash value for the values written.
81161
///
@@ -85,23 +165,23 @@ impl StableHasher {
85165
/// To be used in-place of [`Hasher::finish`].
86166
#[inline]
87167
#[must_use]
88-
pub fn finish<W: StableHasherResult>(self) -> W {
89-
W::finish(self.state.finish128())
168+
pub fn finish<W: StableHasherResult<Hash = H::Hash>>(self) -> W {
169+
W::finish(self.state.finish())
90170
}
91171
}
92172

93-
impl fmt::Debug for StableHasher {
173+
impl<H: ExtendedHasher + fmt::Debug> fmt::Debug for StableHasher<H> {
94174
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
95175
write!(f, "{:?}", self.state)
96176
}
97177
}
98178

99-
impl Hasher for StableHasher {
179+
impl<H: ExtendedHasher> Hasher for StableHasher<H> {
100180
/// Returns a combined hash.
101181
///
102182
/// For greater precision use instead [`StableHasher::finish`].
103183
fn finish(&self) -> u64 {
104-
self.state.finish()
184+
Hasher::finish(&self.state)
105185
}
106186

107187
#[inline]
@@ -192,7 +272,7 @@ impl Hasher for StableHasher {
192272
// Cold path
193273
#[cold]
194274
#[inline(never)]
195-
fn hash_value(state: &mut SipHasher128, value: u64) {
275+
fn hash_value<H: ExtendedHasher>(state: &mut H, value: u64) {
196276
state.write_u8(0xFF);
197277
state.short_write(value.to_le_bytes());
198278
}

0 commit comments

Comments
 (0)