From e1ffc7dc100db578cee4c45c061a4fff9268328a Mon Sep 17 00:00:00 2001 From: jamaltas Date: Mon, 12 Aug 2024 02:26:04 -0400 Subject: [PATCH 1/5] Update binomial.rs Align BINV algorithm with NUMPY to relax i32::MAX condition to enter BINV algorithm. --- rand_distr/src/binomial.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/rand_distr/src/binomial.rs b/rand_distr/src/binomial.rs index fa061b0333..2ad8f45f1e 100644 --- a/rand_distr/src/binomial.rs +++ b/rand_distr/src/binomial.rs @@ -16,6 +16,8 @@ use core::fmt; use num_traits::Float; use rand::Rng; +use std::println; + /// The [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution) `Binomial(n, p)`. /// /// The binomial distribution is a discrete probability distribution @@ -90,7 +92,8 @@ impl Binomial { /// Convert a `f64` to an `i64`, panicking on overflow. fn f64_to_i64(x: f64) -> i64 { assert!(x < (i64::MAX as f64)); - x as i64 + x.floor() as i64 + //x as i64 } impl Distribution for Binomial { @@ -129,13 +132,11 @@ impl Distribution for Binomial { // When n*p < 10, so is n*p*q which is the variance, so a result > 110 would be 100 / sqrt(10) = 31 standard deviations away. const BINV_MAX_X: u64 = 110; - if (self.n as f64) * p < BINV_THRESHOLD && self.n <= (i32::MAX as u64) { + if (self.n as f64) * p < BINV_THRESHOLD { // Use the BINV algorithm. - let s = p / q; - let a = ((self.n + 1) as f64) * s; result = 'outer: loop { - let mut r = q.powi(self.n as i32); + let mut r = (q.ln() * (self.n as f64)).exp(); let mut u: f64 = rng.random(); let mut x = 0; @@ -145,7 +146,7 @@ impl Distribution for Binomial { if x > BINV_MAX_X { continue 'outer; } - r *= a / (x as f64) - s; + r = (((self.n - x + 1) as f64) * p * r) / (x as f64 * q); } break x; } From bbeffe9314c76b3ab44813e9080b7eddf5fa2896 Mon Sep 17 00:00:00 2001 From: jamaltas Date: Mon, 12 Aug 2024 02:28:03 -0400 Subject: [PATCH 2/5] Update binomial.rs --- rand_distr/src/binomial.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/rand_distr/src/binomial.rs b/rand_distr/src/binomial.rs index 2ad8f45f1e..88eb42c7bb 100644 --- a/rand_distr/src/binomial.rs +++ b/rand_distr/src/binomial.rs @@ -16,8 +16,6 @@ use core::fmt; use num_traits::Float; use rand::Rng; -use std::println; - /// The [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution) `Binomial(n, p)`. /// /// The binomial distribution is a discrete probability distribution @@ -92,8 +90,7 @@ impl Binomial { /// Convert a `f64` to an `i64`, panicking on overflow. fn f64_to_i64(x: f64) -> i64 { assert!(x < (i64::MAX as f64)); - x.floor() as i64 - //x as i64 + x as i64 } impl Distribution for Binomial { From 4b9c75dc9c7c09bfaa8d185cc5110423e95223ca Mon Sep 17 00:00:00 2001 From: jamaltas Date: Mon, 12 Aug 2024 02:42:40 -0400 Subject: [PATCH 3/5] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 205cf4c14b..783b00579a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ You may also find the [Upgrade Guide](https://rust-random.github.io/book/update. - Fix portability of `rand::distributions::Slice` (#1469) - Rename `rand::distributions` to `rand::distr` (#1470) - The `serde1` feature has been renamed `serde` (#1477) +- Refactor inverse `Binomial` algorithm to permit for n > i32::MAX values. ## [0.9.0-alpha.1] - 2024-03-18 - Add the `Slice::num_choices` method to the Slice distribution (#1402) From 79cc79d48baa6df743a6aec5ef4516bf66642033 Mon Sep 17 00:00:00 2001 From: jamaltas Date: Tue, 13 Aug 2024 02:42:12 -0400 Subject: [PATCH 4/5] Update binomial.rs Added n = 1 case. Added np < 10e-10 case. Optimized all BINV branches. Significant performance gains across most of the parameter space. No performance losses for any benchmarks. --- rand_distr/src/binomial.rs | 60 +++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/rand_distr/src/binomial.rs b/rand_distr/src/binomial.rs index 88eb42c7bb..dc3e1d68bc 100644 --- a/rand_distr/src/binomial.rs +++ b/rand_distr/src/binomial.rs @@ -110,6 +110,8 @@ impl Distribution for Binomial { let result; let q = 1. - p; + let np = (self.n as f64) * p; + // For small n * min(p, 1 - p), the BINV algorithm based on the inverse // transformation of the binomial distribution is efficient. Otherwise, @@ -123,17 +125,67 @@ impl Distribution for Binomial { // Ranlib uses 30, and GSL uses 14. const BINV_THRESHOLD: f64 = 10.; + // This threshold is when powi outperforms the .exp() .ln() method. + // However it's constrained by i32::MAX from powi and performs worse above this threshold. + // This value can likely be more finely optimized, but should be done across multiple hardware and in a more controlled setting. + // It's also such an edge case that very few people are likely to benefit from it. + const SMALL_NP_THRESHOLD: f64 = 1e-10; + // Same value as in GSL. // It is possible for BINV to get stuck, so we break if x > BINV_MAX_X and try again. // It would be safer to set BINV_MAX_X to self.n, but it is extremely unlikely to be relevant. // When n*p < 10, so is n*p*q which is the variance, so a result > 110 would be 100 / sqrt(10) = 31 standard deviations away. - const BINV_MAX_X: u64 = 110; + const BINV_MAX_X: u64 = 110; + + let mut r: f64; + if self.n == 1 { + // Use the BINV algorithm for special case n = 1 (simplify r calculations). + let s: f64 = p/q; + + result = 'outer: loop { + r = q; + let mut u: f64 = rng.random(); + let mut x = 0; + + while u > r { + u -= r; + x += 1; + if x > BINV_MAX_X { + continue 'outer; + } + r *= (((2 - x) as f64) * s) / (x as f64); + } + break x; + } + } + else if np < SMALL_NP_THRESHOLD && self.n <= (i32::MAX as u64) { + // For very small n*p the powi is superior. + // Use the BINV algorithm. + let s: f64 = p/q; + + result = 'outer: loop { + r = q.powi(self.n as i32); + let mut u: f64 = rng.random(); + let mut x = 0; - if (self.n as f64) * p < BINV_THRESHOLD { + while u > r { + u -= r; + x += 1; + if x > BINV_MAX_X { + continue 'outer; + } + r *= (((self.n - x + 1) as f64) * s) / (x as f64); + } + break x; + } + } + else if np < BINV_THRESHOLD { + // For everything else r = (q.ln() * (self.n as f64)).exp() is superior. // Use the BINV algorithm. + let s: f64 = p/q; result = 'outer: loop { - let mut r = (q.ln() * (self.n as f64)).exp(); + r = (q.ln() * (self.n as f64)).exp(); let mut u: f64 = rng.random(); let mut x = 0; @@ -143,7 +195,7 @@ impl Distribution for Binomial { if x > BINV_MAX_X { continue 'outer; } - r = (((self.n - x + 1) as f64) * p * r) / (x as f64 * q); + r *= (((self.n - x + 1) as f64) * s) / (x as f64); } break x; } From 683e3f038fe2b8f446509813af61663593bf5c64 Mon Sep 17 00:00:00 2001 From: jamaltas Date: Tue, 13 Aug 2024 14:19:43 -0400 Subject: [PATCH 5/5] Update binomial.rs rustfmt loose ends --- rand_distr/src/binomial.rs | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/rand_distr/src/binomial.rs b/rand_distr/src/binomial.rs index 438fb86287..58575398c4 100644 --- a/rand_distr/src/binomial.rs +++ b/rand_distr/src/binomial.rs @@ -125,7 +125,6 @@ impl Distribution for Binomial { let q = 1. - p; let np = (self.n as f64) * p; - // For small n * min(p, 1 - p), the BINV algorithm based on the inverse // transformation of the binomial distribution is efficient. Otherwise, // the BTPE algorithm is used. @@ -148,12 +147,12 @@ impl Distribution for Binomial { // It is possible for BINV to get stuck, so we break if x > BINV_MAX_X and try again. // It would be safer to set BINV_MAX_X to self.n, but it is extremely unlikely to be relevant. // When n*p < 10, so is n*p*q which is the variance, so a result > 110 would be 100 / sqrt(10) = 31 standard deviations away. - const BINV_MAX_X: u64 = 110; - + const BINV_MAX_X: u64 = 110; + let mut r: f64; if self.n == 1 { // Use the BINV algorithm for special case n = 1 (simplify r calculations). - let s: f64 = p/q; + let s: f64 = p / q; result = 'outer: loop { r = q; @@ -170,11 +169,10 @@ impl Distribution for Binomial { } break x; } - } - else if np < SMALL_NP_THRESHOLD && self.n <= (i32::MAX as u64) { + } else if np < SMALL_NP_THRESHOLD && self.n <= (i32::MAX as u64) { // For very small n*p the powi is superior. // Use the BINV algorithm. - let s: f64 = p/q; + let s: f64 = p / q; result = 'outer: loop { r = q.powi(self.n as i32); @@ -191,11 +189,10 @@ impl Distribution for Binomial { } break x; } - } - else if np < BINV_THRESHOLD { + } else if np < BINV_THRESHOLD { // For everything else r = (q.ln() * (self.n as f64)).exp() is superior. // Use the BINV algorithm. - let s: f64 = p/q; + let s: f64 = p / q; result = 'outer: loop { r = (q.ln() * (self.n as f64)).exp();