Skip to content

Commit

Permalink
Improve SmallRng initialization performance
Browse files Browse the repository at this point in the history
  • Loading branch information
arthurprs committed Sep 11, 2024
1 parent ef052ec commit 5410c5d
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 16 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ You may also find the [Upgrade Guide](https://rust-random.github.io/book/update.
- Add `UniformUsize` and use to make `Uniform` for `usize` portable (#1487)
- Remove support for generating `isize` and `usize` values with `Standard`, `Uniform` and `Fill` and usage as a `WeightedAliasIndex` weight (#1487)
- Require `Clone` and `AsRef` bound for `SeedableRng::Seed`. (#1491)
- Improve SmallRng initialization performance

## [0.9.0-alpha.1] - 2024-03-18
- Add the `Slice::num_choices` method to the Slice distribution (#1402)
Expand Down
58 changes: 57 additions & 1 deletion benches/benches/generators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use rand_pcg::{Pcg32, Pcg64, Pcg64Dxsm, Pcg64Mcg};
criterion_group!(
name = benches;
config = Criterion::default();
targets = gen_bytes, gen_u32, gen_u64, init_gen, reseeding_bytes
targets = gen_bytes, gen_u32, gen_u64, init_gen, init_from_u64, init_from_seed, reseeding_bytes
);
criterion_main!(benches);

Expand Down Expand Up @@ -133,6 +133,62 @@ pub fn init_gen(c: &mut Criterion) {
bench::<ChaCha12Rng>(&mut g, "chacha12");
bench::<ChaCha20Rng>(&mut g, "chacha20");
bench::<StdRng>(&mut g, "std");
bench::<SmallRng>(&mut g, "small");

g.finish()
}

pub fn init_from_u64(c: &mut Criterion) {
let mut g = c.benchmark_group("init_from_u64");
g.warm_up_time(Duration::from_millis(500));
g.measurement_time(Duration::from_millis(1000));

fn bench<R: SeedableRng>(g: &mut BenchmarkGroup<WallTime>, name: &str) {
g.bench_function(name, |b| {
let mut rng = Pcg32::from_os_rng();
let seed = rng.random();
b.iter(|| R::seed_from_u64(black_box(seed)));
});
}

bench::<Pcg32>(&mut g, "pcg32");
bench::<Pcg64>(&mut g, "pcg64");
bench::<Pcg64Mcg>(&mut g, "pcg64mcg");
bench::<Pcg64Dxsm>(&mut g, "pcg64dxsm");
bench::<ChaCha8Rng>(&mut g, "chacha8");
bench::<ChaCha12Rng>(&mut g, "chacha12");
bench::<ChaCha20Rng>(&mut g, "chacha20");
bench::<StdRng>(&mut g, "std");
bench::<SmallRng>(&mut g, "small");

g.finish()
}

pub fn init_from_seed(c: &mut Criterion) {
let mut g = c.benchmark_group("init_from_seed");
g.warm_up_time(Duration::from_millis(500));
g.measurement_time(Duration::from_millis(1000));

fn bench<R: SeedableRng>(g: &mut BenchmarkGroup<WallTime>, name: &str)
where
rand::distr::Standard: Distribution<<R as SeedableRng>::Seed>,
{
g.bench_function(name, |b| {
let mut rng = Pcg32::from_os_rng();
let seed = rng.random();
b.iter(|| R::from_seed(black_box(seed.clone())));
});
}

bench::<Pcg32>(&mut g, "pcg32");
bench::<Pcg64>(&mut g, "pcg64");
bench::<Pcg64Mcg>(&mut g, "pcg64mcg");
bench::<Pcg64Dxsm>(&mut g, "pcg64dxsm");
bench::<ChaCha8Rng>(&mut g, "chacha8");
bench::<ChaCha12Rng>(&mut g, "chacha12");
bench::<ChaCha20Rng>(&mut g, "chacha20");
bench::<StdRng>(&mut g, "std");
bench::<SmallRng>(&mut g, "small");

g.finish()
}
Expand Down
3 changes: 2 additions & 1 deletion src/rngs/small.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ impl SeedableRng for SmallRng {

#[inline(always)]
fn from_seed(seed: Self::Seed) -> Self {
// With MSRV >= 1.77: let seed = *seed.first_chunk().unwrap();
// This is for compatibility with 32-bit platforms where Rng::Seed has a different seed size
// With MSRV >= 1.77: let seed = *seed.first_chunk().unwrap()
const LEN: usize = core::mem::size_of::<<Rng as SeedableRng>::Seed>();
let seed = (&seed[..LEN]).try_into().unwrap();
SmallRng(Rng::from_seed(seed))
Expand Down
21 changes: 14 additions & 7 deletions src/rngs/xoshiro128plusplus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,29 +33,36 @@ impl SeedableRng for Xoshiro128PlusPlus {
/// mapped to a different seed.
#[inline]
fn from_seed(seed: [u8; 16]) -> Xoshiro128PlusPlus {
if seed.iter().all(|&x| x == 0) {
return Self::seed_from_u64(0);
}
let mut state = [0; 4];
read_u32_into(&seed, &mut state);
// Check for zero on aligned integers for better code generation.
// Furtermore, seed_from_u64(0) will expand to a constant when optimized.
if state.iter().all(|&x| x == 0) {
return Self::seed_from_u64(0);
}
Xoshiro128PlusPlus { s: state }
}

/// Create a new `Xoshiro128PlusPlus` from a `u64` seed.
///
/// This uses the SplitMix64 generator internally.
#[inline]
fn seed_from_u64(mut state: u64) -> Self {
const PHI: u64 = 0x9e3779b97f4a7c15;
let mut seed = Self::Seed::default();
for chunk in seed.as_mut().chunks_mut(8) {
let mut s = [0; 4];
for i in s.chunks_exact_mut(2) {
state = state.wrapping_add(PHI);
let mut z = state;
z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb);
z = z ^ (z >> 31);
chunk.copy_from_slice(&z.to_le_bytes());
i[0] = z.to_le() as u32;
i[1] = (z.to_le() >> 32) as u32;
}
Self::from_seed(seed)
// By using a non-zero PHI we are guaranteed to generate a non-zero state
// Thus preventing a recursion between from_seed and seed_from_u64.
debug_assert_ne!(s, [0; 4]);
Xoshiro128PlusPlus { s }
}
}

Expand Down
20 changes: 13 additions & 7 deletions src/rngs/xoshiro256plusplus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,29 +33,35 @@ impl SeedableRng for Xoshiro256PlusPlus {
/// mapped to a different seed.
#[inline]
fn from_seed(seed: [u8; 32]) -> Xoshiro256PlusPlus {
if seed.iter().all(|&x| x == 0) {
return Self::seed_from_u64(0);
}
let mut state = [0; 4];
read_u64_into(&seed, &mut state);
// Check for zero on aligned integers for better code generation.
// Furtermore, seed_from_u64(0) will expand to a constant when optimized.
if state.iter().all(|&x| x == 0) {
return Self::seed_from_u64(0);
}
Xoshiro256PlusPlus { s: state }
}

/// Create a new `Xoshiro256PlusPlus` from a `u64` seed.
///
/// This uses the SplitMix64 generator internally.
#[inline]
fn seed_from_u64(mut state: u64) -> Self {
const PHI: u64 = 0x9e3779b97f4a7c15;
let mut seed = Self::Seed::default();
for chunk in seed.as_mut().chunks_mut(8) {
let mut s = [0; 4];
for i in s.iter_mut() {
state = state.wrapping_add(PHI);
let mut z = state;
z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb);
z = z ^ (z >> 31);
chunk.copy_from_slice(&z.to_le_bytes());
*i = z.to_le();
}
Self::from_seed(seed)
// By using a non-zero PHI we are guaranteed to generate a non-zero state
// Thus preventing a recursion between from_seed and seed_from_u64.
debug_assert_ne!(s, [0; 4]);
Xoshiro256PlusPlus { s }
}
}

Expand Down

0 comments on commit 5410c5d

Please sign in to comment.