Skip to content

Commit 38bfa7a

Browse files
committed
Support multiple hasher
1 parent c2b3deb commit 38bfa7a

File tree

6 files changed

+149
-55
lines changed

6 files changed

+149
-55
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Unreleased
22

3+
- Introduce multi hasher support (#8)
34
- `StableHasher::finish` now returns a small hash instead of being fatal (#6)
45
- Remove `StableHasher::finalize` (#4)
56
- Import stable hasher implementation from rustc ([db8aca48129](https://github.com/rust-lang/rust/blob/db8aca48129d86b2623e3ac8cbcf2902d4d313ad/compiler/rustc_data_structures/src/))

src/lib.rs

+8
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,11 @@ pub use stable_hasher::StableHasher;
1111

1212
#[doc(inline)]
1313
pub use stable_hasher::StableHasherResult;
14+
15+
#[doc(inline)]
16+
pub use stable_hasher::ExtendedHasher;
17+
18+
pub use sip128::SipHasher128; // TODO: Should SipHasher128 be exposed?
19+
20+
/// Stable Sip Hasher 128
21+
pub type StableSipHasher128 = StableHasher<SipHasher128>;

src/sip128.rs

+44-29
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance.
44
// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727
55
use crate::int_overflow::{DebugStrictAdd, DebugStrictSub};
6+
use crate::ExtendedHasher;
67

78
use std::hash::Hasher;
89
use std::mem::{self, MaybeUninit};
@@ -214,28 +215,6 @@ impl SipHasher128 {
214215
hasher
215216
}
216217

217-
#[inline]
218-
pub fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
219-
let nbuf = self.nbuf;
220-
debug_assert!(LEN <= 8);
221-
debug_assert!(nbuf < BUFFER_SIZE);
222-
debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE);
223-
224-
if nbuf.debug_strict_add(LEN) < BUFFER_SIZE {
225-
unsafe {
226-
// The memcpy call is optimized away because the size is known.
227-
let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
228-
ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN);
229-
}
230-
231-
self.nbuf = nbuf.debug_strict_add(LEN);
232-
233-
return;
234-
}
235-
236-
unsafe { self.short_write_process_buffer(bytes) }
237-
}
238-
239218
// A specialized write function for values with size <= 8 that should only
240219
// be called when the write would cause the buffer to fill.
241220
//
@@ -378,13 +357,8 @@ impl SipHasher128 {
378357
}
379358
}
380359

381-
#[inline(always)]
382-
pub fn finish128(mut self) -> [u64; 2] {
383-
SipHasher128::finish128_inner(self.nbuf, &mut self.buf, self.state, self.processed)
384-
}
385-
386360
#[inline]
387-
fn finish128_inner(
361+
unsafe fn finish128_inner(
388362
nbuf: usize,
389363
buf: &mut [MaybeUninit<u64>; BUFFER_WITH_SPILL_CAPACITY],
390364
mut state: State,
@@ -437,6 +411,45 @@ impl SipHasher128 {
437411
}
438412
}
439413

414+
impl Default for SipHasher128 {
415+
fn default() -> SipHasher128 {
416+
SipHasher128::new_with_keys(0, 0)
417+
}
418+
}
419+
420+
impl ExtendedHasher for SipHasher128 {
421+
type Hash = [u64; 2];
422+
423+
#[inline]
424+
fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
425+
let nbuf = self.nbuf;
426+
debug_assert!(LEN <= 8);
427+
debug_assert!(nbuf < BUFFER_SIZE);
428+
debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE);
429+
430+
if nbuf.debug_strict_add(LEN) < BUFFER_SIZE {
431+
unsafe {
432+
// The memcpy call is optimized away because the size is known.
433+
let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
434+
ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN);
435+
}
436+
437+
self.nbuf = nbuf.debug_strict_add(LEN);
438+
439+
return;
440+
}
441+
442+
unsafe { self.short_write_process_buffer(bytes) }
443+
}
444+
445+
#[inline(always)]
446+
fn finish(mut self) -> [u64; 2] {
447+
unsafe {
448+
SipHasher128::finish128_inner(self.nbuf, &mut self.buf, self.state, self.processed)
449+
}
450+
}
451+
}
452+
440453
impl Hasher for SipHasher128 {
441454
#[inline]
442455
fn write_u8(&mut self, i: u8) {
@@ -504,7 +517,9 @@ impl Hasher for SipHasher128 {
504517

505518
fn finish(&self) -> u64 {
506519
let mut buf = self.buf.clone();
507-
let [a, b] = SipHasher128::finish128_inner(self.nbuf, &mut buf, self.state, self.processed);
520+
let [a, b] = unsafe {
521+
SipHasher128::finish128_inner(self.nbuf, &mut buf, self.state, self.processed)
522+
};
508523

509524
// Combining the two halves makes sure we get a good quality hash.
510525
a.wrapping_mul(3).wrapping_add(b).to_le()

src/sip128/tests.rs

+10-6
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ impl<'a> Hash for Bytes<'a> {
1616

1717
fn hash_with<T: Hash>(mut st: SipHasher128, x: &T) -> [u64; 2] {
1818
x.hash(&mut st);
19-
st.finish128()
19+
st.finish()
2020
}
2121

2222
fn hash<T: Hash>(x: &T) -> [u64; 2] {
@@ -253,8 +253,8 @@ fn test_short_write_works() {
253253
h2.write(&test_i128.to_ne_bytes());
254254
h2.write(&test_isize.to_ne_bytes());
255255

256-
let h1_hash = h1.finish128();
257-
let h2_hash = h2.finish128();
256+
let h1_hash = h1.finish();
257+
let h2_hash = h2.finish();
258258

259259
assert_eq!(h1_hash, h2_hash);
260260
}
@@ -279,8 +279,8 @@ macro_rules! test_fill_buffer {
279279
h2.write(s);
280280
h2.write(x_bytes);
281281

282-
let h1_hash = h1.finish128();
283-
let h2_hash = h2.finish128();
282+
let h1_hash = h1.finish();
283+
let h2_hash = h2.finish();
284284

285285
assert_eq!(h1_hash, h2_hash);
286286
}
@@ -306,10 +306,14 @@ fn test_fill_buffer() {
306306

307307
#[test]
308308
fn test_finish() {
309+
fn hash<H: Hasher>(h: &H) -> u64 {
310+
h.finish()
311+
}
312+
309313
let mut hasher = SipHasher128::new_with_keys(0, 0);
310314

311315
hasher.write_isize(0xF0);
312316
hasher.write_isize(0xF0010);
313317

314-
assert_eq!(hasher.finish(), hasher.finish());
318+
assert_eq!(hash(&hasher), hash(&hasher));
315319
}

src/stable_hasher.rs

+78-15
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,59 @@
11
//! Stable hasher adapted for cross-platform independent hash.
22
3-
use crate::sip128::SipHasher128;
4-
53
use std::fmt;
64
use std::hash::Hasher;
75

86
#[cfg(test)]
97
mod tests;
108

9+
/// Extended the [`Hasher`] trait for use with [`StableHasher`].
10+
///
11+
/// It permits returning an arbitrary type as the [`Self::Hash`] type
12+
/// contrary to the [`Hasher`] trait which can only return `u64`. This
13+
/// is useful when the hasher uses a different representation.
14+
///
15+
/// # Example
16+
///
17+
/// ```
18+
/// use std::hash::Hasher;
19+
/// use rustc_stable_hash::ExtendedHasher;
20+
///
21+
/// struct DumbHasher(u128);
22+
///
23+
/// impl Hasher for DumbHasher {
24+
/// fn write(&mut self, a: &[u8]) {
25+
/// # self.0 = a.iter().fold(0u128, |acc, a| acc + (*a as u128)) + self.0;
26+
/// // ...
27+
/// }
28+
///
29+
/// fn finish(&self) -> u64 {
30+
/// self.0 as u64 // really dumb
31+
/// }
32+
/// }
33+
///
34+
/// impl ExtendedHasher for DumbHasher {
35+
/// type Hash = u128;
36+
///
37+
/// fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
38+
/// self.write(&bytes)
39+
/// }
40+
///
41+
/// fn finish(self) -> Self::Hash {
42+
/// self.0
43+
/// }
44+
/// }
45+
/// ```
46+
pub trait ExtendedHasher: Hasher {
47+
/// Type returned by the hasher.
48+
type Hash;
49+
50+
/// Optimized version of [`Hasher::write`] but for small write.
51+
fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]);
52+
53+
/// Finalization method of the hasher to return the [`Hash`].
54+
fn finish(self) -> Self::Hash;
55+
}
56+
1157
/// A Stable Hasher adapted for cross-platform independent hash.
1258
///
1359
/// When hashing something that ends up affecting properties like symbol names,
@@ -21,24 +67,26 @@ mod tests;
2167
/// # Example
2268
///
2369
/// ```
24-
/// use rustc_stable_hash::{StableHasher, StableHasherResult};
70+
/// use rustc_stable_hash::{StableHasher, StableHasherResult, StableSipHasher128};
2571
/// use std::hash::Hasher;
2672
///
2773
/// struct Hash128([u64; 2]);
2874
/// impl StableHasherResult for Hash128 {
75+
/// type Hash = [u64; 2];
76+
///
2977
/// fn finish(hash: [u64; 2]) -> Hash128 {
3078
/// Hash128(hash)
3179
/// }
3280
/// }
3381
///
34-
/// let mut hasher = StableHasher::new();
82+
/// let mut hasher = StableSipHasher128::new();
3583
/// hasher.write_usize(0xFA);
3684
///
3785
/// let hash: Hash128 = hasher.finish();
3886
/// ```
3987
#[must_use]
40-
pub struct StableHasher {
41-
state: SipHasher128,
88+
pub struct StableHasher<H: ExtendedHasher> {
89+
state: H,
4290
}
4391

4492
/// Trait for retrieving the result of the stable hashing operation.
@@ -51,6 +99,8 @@ pub struct StableHasher {
5199
/// struct Hash128(u128);
52100
///
53101
/// impl StableHasherResult for Hash128 {
102+
/// type Hash = [u64; 2];
103+
///
54104
/// fn finish(hash: [u64; 2]) -> Hash128 {
55105
/// let upper: u128 = hash[0] as u128;
56106
/// let lower: u128 = hash[1] as u128;
@@ -60,23 +110,36 @@ pub struct StableHasher {
60110
/// }
61111
/// ```
62112
pub trait StableHasherResult: Sized {
113+
type Hash;
114+
63115
/// Retrieving the finalized state of the [`StableHasher`] and construct
64116
/// an [`Self`] containing the hash.
65-
fn finish(hasher: [u64; 2]) -> Self;
117+
fn finish(hash: Self::Hash) -> Self;
66118
}
67119

68-
impl StableHasher {
120+
impl<H: ExtendedHasher + Default> StableHasher<H> {
69121
/// Creates a new [`StableHasher`].
70122
///
71123
/// To be used with the [`Hasher`] implementation and [`StableHasher::finish`].
124+
// TODO: Should this exist as is? or maybe it should take an `ExtendedHasher` as input?
72125
#[inline]
73126
#[must_use]
74127
pub fn new() -> Self {
128+
Default::default()
129+
}
130+
}
131+
132+
impl<H: ExtendedHasher + Default> Default for StableHasher<H> {
133+
#[inline]
134+
#[must_use]
135+
fn default() -> Self {
75136
StableHasher {
76-
state: SipHasher128::new_with_keys(0, 0),
137+
state: Default::default(),
77138
}
78139
}
140+
}
79141

142+
impl<H: ExtendedHasher> StableHasher<H> {
80143
/// Returns the typed-hash value for the values written.
81144
///
82145
/// The resulting typed-hash value is constructed from an
@@ -85,23 +148,23 @@ impl StableHasher {
85148
/// To be used in-place of [`Hasher::finish`].
86149
#[inline]
87150
#[must_use]
88-
pub fn finish<W: StableHasherResult>(self) -> W {
89-
W::finish(self.state.finish128())
151+
pub fn finish<W: StableHasherResult<Hash = H::Hash>>(self) -> W {
152+
W::finish(self.state.finish())
90153
}
91154
}
92155

93-
impl fmt::Debug for StableHasher {
156+
impl<H: ExtendedHasher + fmt::Debug> fmt::Debug for StableHasher<H> {
94157
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
95158
write!(f, "{:?}", self.state)
96159
}
97160
}
98161

99-
impl Hasher for StableHasher {
162+
impl<H: ExtendedHasher> Hasher for StableHasher<H> {
100163
/// Returns a combined hash.
101164
///
102165
/// For greater precision use instead [`StableHasher::finish`].
103166
fn finish(&self) -> u64 {
104-
self.state.finish()
167+
Hasher::finish(&self.state)
105168
}
106169

107170
#[inline]
@@ -192,7 +255,7 @@ impl Hasher for StableHasher {
192255
// Cold path
193256
#[cold]
194257
#[inline(never)]
195-
fn hash_value(state: &mut SipHasher128, value: u64) {
258+
fn hash_value<H: ExtendedHasher>(state: &mut H, value: u64) {
196259
state.write_u8(0xFF);
197260
state.short_write(value.to_le_bytes());
198261
}

0 commit comments

Comments
 (0)