From 8c229a4e3efc1da0d2359f7a1d96e96104156b56 Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Mon, 5 Aug 2024 18:17:16 +0200
Subject: [PATCH 01/24] Make sure BTPE is not entered when np < 10

---
 rand_distr/src/binomial.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/rand_distr/src/binomial.rs b/rand_distr/src/binomial.rs
index fa061b0333..ad761f05ed 100644
--- a/rand_distr/src/binomial.rs
+++ b/rand_distr/src/binomial.rs
@@ -129,13 +129,13 @@ impl Distribution<u64> for Binomial {
         // When n*p < 10, so is n*p*q which is the variance, so a result > 110 would be 100 / sqrt(10) = 31 standard deviations away.
         const BINV_MAX_X: u64 = 110;
 
-        if (self.n as f64) * p < BINV_THRESHOLD && self.n <= (i32::MAX as u64) {
+        if (self.n as f64) * p < BINV_THRESHOLD {
             // Use the BINV algorithm.
             let s = p / q;
-            let a = ((self.n + 1) as f64) * s;
+            let a = (self.n as f64 + 1.0) * s;
 
             result = 'outer: loop {
-                let mut r = q.powi(self.n as i32);
+                let mut r = q.powf(self.n as f64);
                 let mut u: f64 = rng.random();
                 let mut x = 0;
 
@@ -358,6 +358,7 @@ mod test {
         test_binomial_mean_and_variance(40, 0.5, &mut rng);
         test_binomial_mean_and_variance(20, 0.7, &mut rng);
         test_binomial_mean_and_variance(20, 0.5, &mut rng);
+        test_binomial_mean_and_variance(1 << 61, 1e-17, &mut rng);
     }
 
     #[test]

From a1c52295c5611597b31758130f7e87b7a59bc9dd Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Mon, 5 Aug 2024 18:48:19 +0200
Subject: [PATCH 02/24] Use Poisson when BINV would fail because of q == 1.0

---
 rand_distr/src/binomial.rs | 39 ++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/rand_distr/src/binomial.rs b/rand_distr/src/binomial.rs
index ad761f05ed..2545b36360 100644
--- a/rand_distr/src/binomial.rs
+++ b/rand_distr/src/binomial.rs
@@ -9,7 +9,7 @@
 
 //! The binomial distribution `Binomial(n, p)`.
 
-use crate::{Distribution, Uniform};
+use crate::{Distribution, Poisson, Uniform};
 use core::cmp::Ordering;
 use core::fmt;
 #[allow(unused_imports)]
@@ -131,23 +131,29 @@ impl Distribution<u64> for Binomial {
 
         if (self.n as f64) * p < BINV_THRESHOLD {
             // Use the BINV algorithm.
-            let s = p / q;
-            let a = (self.n as f64 + 1.0) * s;
-
-            result = 'outer: loop {
-                let mut r = q.powf(self.n as f64);
-                let mut u: f64 = rng.random();
-                let mut x = 0;
-
-                while u > r {
-                    u -= r;
-                    x += 1;
-                    if x > BINV_MAX_X {
-                        continue 'outer;
+
+            if q == 1.0 {
+                // p was to small for BINV, we use the Poisson approximation which is very precise for this case
+                result = Poisson::new(self.n as f64 * p).unwrap().sample(rng) as u64;
+            } else {
+                let s = p / q;
+                let a = (self.n as f64 + 1.0) * s;
+
+                result = 'outer: loop {
+                    let mut r = q.powf(self.n as f64);
+                    let mut u: f64 = rng.random();
+                    let mut x = 0;
+
+                    while u > r {
+                        u -= r;
+                        x += 1;
+                        if x > BINV_MAX_X {
+                            continue 'outer;
+                        }
+                        r *= a / (x as f64) - s;
                     }
-                    r *= a / (x as f64) - s;
+                    break x;
                 }
-                break x;
             }
         } else {
             // Use the BTPE algorithm.
@@ -359,6 +365,7 @@ mod test {
         test_binomial_mean_and_variance(20, 0.7, &mut rng);
         test_binomial_mean_and_variance(20, 0.5, &mut rng);
         test_binomial_mean_and_variance(1 << 61, 1e-17, &mut rng);
+        test_binomial_mean_and_variance(u64::MAX, 1e-19, &mut rng);
     }
 
     #[test]

From 1928b8ca2f3e6cdeaf04ccde705e4ceade211e1a Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Thu, 15 Aug 2024 16:32:30 +0200
Subject: [PATCH 03/24] binomial enable precomputation of values

---
 rand_distr/src/binomial.rs | 455 ++++++++++++++++++++-----------------
 1 file changed, 248 insertions(+), 207 deletions(-)

diff --git a/rand_distr/src/binomial.rs b/rand_distr/src/binomial.rs
index 2545b36360..9132f1769e 100644
--- a/rand_distr/src/binomial.rs
+++ b/rand_distr/src/binomial.rs
@@ -46,10 +46,31 @@ use rand::Rng;
 #[derive(Clone, Copy, Debug, PartialEq)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct Binomial {
-    /// Number of trials.
+    inner: Inner,
+    flipped: bool,
+    n: u64,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+enum Inner {
+    Binv(Binv),
+    Btpe(Btpe),
+    Poisson(Poisson<f64>),
+    Constant(u64),
+}
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+struct Binv {
+    r: f64,
+    s: f64,
+    a: f64,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+struct Btpe {
     n: u64,
-    /// Probability of success.
     p: f64,
+    // TODO precompute the other constants
 }
 
 /// Error type returned from [`Binomial::new`].
@@ -83,33 +104,26 @@ impl Binomial {
         if !(p <= 1.0) {
             return Err(Error::ProbabilityTooLarge);
         }
-        Ok(Binomial { n, p })
-    }
-}
-
-/// Convert a `f64` to an `i64`, panicking on overflow.
-fn f64_to_i64(x: f64) -> i64 {
-    assert!(x < (i64::MAX as f64));
-    x as i64
-}
 
-impl Distribution<u64> for Binomial {
-    #[allow(clippy::many_single_char_names)] // Same names as in the reference.
-    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u64 {
-        // Handle these values directly.
-        if self.p == 0.0 {
-            return 0;
-        } else if self.p == 1.0 {
-            return self.n;
+        if p == 0.0 {
+            return Ok(Binomial {
+                inner: Inner::Constant(0),
+                flipped: false,
+                n,
+            });
         }
 
-        // The binomial distribution is symmetrical with respect to p -> 1-p,
-        // k -> n-k switch p so that it is less than 0.5 - this allows for lower
-        // expected values we will just invert the result at the end
-        let p = if self.p <= 0.5 { self.p } else { 1.0 - self.p };
+        if p == 1.0 {
+            return Ok(Binomial {
+                inner: Inner::Constant(n),
+                flipped: false,
+                n,
+            });
+        }
 
-        let result;
-        let q = 1. - p;
+        // The binomial distribution is symmetrical with respect to p -> 1-p
+        let flipped = p > 0.5;
+        let p = if flipped { 1.0 - p } else { p };
 
         // For small n * min(p, 1 - p), the BINV algorithm based on the inverse
         // transformation of the binomial distribution is efficient. Otherwise,
@@ -123,207 +137,234 @@ impl Distribution<u64> for Binomial {
         // Ranlib uses 30, and GSL uses 14.
         const BINV_THRESHOLD: f64 = 10.;
 
-        // Same value as in GSL.
-        // It is possible for BINV to get stuck, so we break if x > BINV_MAX_X and try again.
-        // It would be safer to set BINV_MAX_X to self.n, but it is extremely unlikely to be relevant.
-        // When n*p < 10, so is n*p*q which is the variance, so a result > 110 would be 100 / sqrt(10) = 31 standard deviations away.
-        const BINV_MAX_X: u64 = 110;
-
-        if (self.n as f64) * p < BINV_THRESHOLD {
-            // Use the BINV algorithm.
-
+        let np = n as f64 * p;
+        let inner = if np < BINV_THRESHOLD {
+            let q = 1.0 - p;
             if q == 1.0 {
-                // p was to small for BINV, we use the Poisson approximation which is very precise for this case
-                result = Poisson::new(self.n as f64 * p).unwrap().sample(rng) as u64;
+                Inner::Poisson(Poisson::new(np).unwrap())
             } else {
                 let s = p / q;
-                let a = (self.n as f64 + 1.0) * s;
-
-                result = 'outer: loop {
-                    let mut r = q.powf(self.n as f64);
-                    let mut u: f64 = rng.random();
-                    let mut x = 0;
-
-                    while u > r {
-                        u -= r;
-                        x += 1;
-                        if x > BINV_MAX_X {
-                            continue 'outer;
-                        }
-                        r *= a / (x as f64) - s;
-                    }
-                    break x;
-                }
+                Inner::Binv(Binv {
+                    r: q.powf(n as f64),
+                    s,
+                    a: (n as f64 + 1.0) * s,
+                })
             }
         } else {
-            // Use the BTPE algorithm.
-
-            // Threshold for using the squeeze algorithm. This can be freely
-            // chosen based on performance. Ranlib and GSL use 20.
-            const SQUEEZE_THRESHOLD: i64 = 20;
-
-            // Step 0: Calculate constants as functions of `n` and `p`.
-            let n = self.n as f64;
-            let np = n * p;
-            let npq = np * q;
-            let f_m = np + p;
-            let m = f64_to_i64(f_m);
-            // radius of triangle region, since height=1 also area of region
-            let p1 = (2.195 * npq.sqrt() - 4.6 * q).floor() + 0.5;
-            // tip of triangle
-            let x_m = (m as f64) + 0.5;
-            // left edge of triangle
-            let x_l = x_m - p1;
-            // right edge of triangle
-            let x_r = x_m + p1;
-            let c = 0.134 + 20.5 / (15.3 + (m as f64));
-            // p1 + area of parallelogram region
-            let p2 = p1 * (1. + 2. * c);
-
-            fn lambda(a: f64) -> f64 {
-                a * (1. + 0.5 * a)
+            Inner::Btpe(Btpe { n, p })
+        };
+        Ok(Binomial { flipped, inner, n })
+    }
+}
+
+/// Convert a `f64` to an `i64`, panicking on overflow.
+fn f64_to_i64(x: f64) -> i64 {
+    assert!(x < (i64::MAX as f64));
+    x as i64
+}
+
+fn binv<R: Rng + ?Sized>(binv: Binv, rng: &mut R) -> u64 {
+    // Same value as in GSL.
+    // It is possible for BINV to get stuck, so we break if x > BINV_MAX_X and try again.
+    // It would be safer to set BINV_MAX_X to self.n, but it is extremely unlikely to be relevant.
+    // When n*p < 10, so is n*p*q which is the variance, so a result > 110 would be 100 / sqrt(10) = 31 standard deviations away.
+    const BINV_MAX_X: u64 = 110;
+
+    'outer: loop {
+        let mut r = binv.r;
+        let mut u: f64 = rng.random();
+        let mut x = 0;
+
+        while u > r {
+            u -= r;
+            x += 1;
+            if x > BINV_MAX_X {
+                continue 'outer;
             }
+            r *= binv.a / (x as f64) - binv.s;
+        }
+        break x;
+    }
+}
 
-            let lambda_l = lambda((f_m - x_l) / (f_m - x_l * p));
-            let lambda_r = lambda((x_r - f_m) / (x_r * q));
-            // p1 + area of left tail
-            let p3 = p2 + c / lambda_l;
-            // p1 + area of right tail
-            let p4 = p3 + c / lambda_r;
-
-            // return value
-            let mut y: i64;
-
-            let gen_u = Uniform::new(0., p4).unwrap();
-            let gen_v = Uniform::new(0., 1.).unwrap();
-
-            loop {
-                // Step 1: Generate `u` for selecting the region. If region 1 is
-                // selected, generate a triangularly distributed variate.
-                let u = gen_u.sample(rng);
-                let mut v = gen_v.sample(rng);
-                if !(u > p1) {
-                    y = f64_to_i64(x_m - p1 * v + u);
-                    break;
-                }
+#[allow(clippy::many_single_char_names)] // Same names as in the reference.
+fn btpe<R: Rng + ?Sized>(btpe: Btpe, rng: &mut R) -> u64 {
+    // Threshold for using the squeeze algorithm. This can be freely
+    // chosen based on performance. Ranlib and GSL use 20.
+    const SQUEEZE_THRESHOLD: i64 = 20;
+
+    // Step 0: Calculate constants as functions of `n` and `p`.
+    let n = btpe.n as f64;
+    let np = n * btpe.p;
+    let q = 1. - btpe.p;
+    let npq = np * q;
+    let f_m = np + btpe.p;
+    let m = f64_to_i64(f_m);
+    // radius of triangle region, since height=1 also area of region
+    let p1 = (2.195 * npq.sqrt() - 4.6 * q).floor() + 0.5;
+    // tip of triangle
+    let x_m = (m as f64) + 0.5;
+    // left edge of triangle
+    let x_l = x_m - p1;
+    // right edge of triangle
+    let x_r = x_m + p1;
+    let c = 0.134 + 20.5 / (15.3 + (m as f64));
+    // p1 + area of parallelogram region
+    let p2 = p1 * (1. + 2. * c);
+
+    fn lambda(a: f64) -> f64 {
+        a * (1. + 0.5 * a)
+    }
 
-                if !(u > p2) {
-                    // Step 2: Region 2, parallelograms. Check if region 2 is
-                    // used. If so, generate `y`.
-                    let x = x_l + (u - p1) / c;
-                    v = v * c + 1.0 - (x - x_m).abs() / p1;
-                    if v > 1. {
-                        continue;
-                    } else {
-                        y = f64_to_i64(x);
-                    }
-                } else if !(u > p3) {
-                    // Step 3: Region 3, left exponential tail.
-                    y = f64_to_i64(x_l + v.ln() / lambda_l);
-                    if y < 0 {
-                        continue;
-                    } else {
-                        v *= (u - p2) * lambda_l;
-                    }
-                } else {
-                    // Step 4: Region 4, right exponential tail.
-                    y = f64_to_i64(x_r - v.ln() / lambda_r);
-                    if y > 0 && (y as u64) > self.n {
-                        continue;
-                    } else {
-                        v *= (u - p3) * lambda_r;
-                    }
-                }
+    let lambda_l = lambda((f_m - x_l) / (f_m - x_l * btpe.p));
+    let lambda_r = lambda((x_r - f_m) / (x_r * q));
+    // p1 + area of left tail
+    let p3 = p2 + c / lambda_l;
+    // p1 + area of right tail
+    let p4 = p3 + c / lambda_r;
+
+    // return value
+    let mut y: i64;
+
+    let gen_u = Uniform::new(0., p4).unwrap();
+    let gen_v = Uniform::new(0., 1.).unwrap();
+
+    loop {
+        // Step 1: Generate `u` for selecting the region. If region 1 is
+        // selected, generate a triangularly distributed variate.
+        let u = gen_u.sample(rng);
+        let mut v = gen_v.sample(rng);
+        if !(u > p1) {
+            y = f64_to_i64(x_m - p1 * v + u);
+            break;
+        }
 
-                // Step 5: Acceptance/rejection comparison.
-
-                // Step 5.0: Test for appropriate method of evaluating f(y).
-                let k = (y - m).abs();
-                if !(k > SQUEEZE_THRESHOLD && (k as f64) < 0.5 * npq - 1.) {
-                    // Step 5.1: Evaluate f(y) via the recursive relationship. Start the
-                    // search from the mode.
-                    let s = p / q;
-                    let a = s * (n + 1.);
-                    let mut f = 1.0;
-                    match m.cmp(&y) {
-                        Ordering::Less => {
-                            let mut i = m;
-                            loop {
-                                i += 1;
-                                f *= a / (i as f64) - s;
-                                if i == y {
-                                    break;
-                                }
-                            }
-                        }
-                        Ordering::Greater => {
-                            let mut i = y;
-                            loop {
-                                i += 1;
-                                f /= a / (i as f64) - s;
-                                if i == m {
-                                    break;
-                                }
-                            }
+        if !(u > p2) {
+            // Step 2: Region 2, parallelograms. Check if region 2 is
+            // used. If so, generate `y`.
+            let x = x_l + (u - p1) / c;
+            v = v * c + 1.0 - (x - x_m).abs() / p1;
+            if v > 1. {
+                continue;
+            } else {
+                y = f64_to_i64(x);
+            }
+        } else if !(u > p3) {
+            // Step 3: Region 3, left exponential tail.
+            y = f64_to_i64(x_l + v.ln() / lambda_l);
+            if y < 0 {
+                continue;
+            } else {
+                v *= (u - p2) * lambda_l;
+            }
+        } else {
+            // Step 4: Region 4, right exponential tail.
+            y = f64_to_i64(x_r - v.ln() / lambda_r);
+            if y > 0 && (y as u64) > btpe.n {
+                continue;
+            } else {
+                v *= (u - p3) * lambda_r;
+            }
+        }
+
+        // Step 5: Acceptance/rejection comparison.
+
+        // Step 5.0: Test for appropriate method of evaluating f(y).
+        let k = (y - m).abs();
+        if !(k > SQUEEZE_THRESHOLD && (k as f64) < 0.5 * npq - 1.) {
+            // Step 5.1: Evaluate f(y) via the recursive relationship. Start the
+            // search from the mode.
+            let s = btpe.p / q;
+            let a = s * (n + 1.);
+            let mut f = 1.0;
+            match m.cmp(&y) {
+                Ordering::Less => {
+                    let mut i = m;
+                    loop {
+                        i += 1;
+                        f *= a / (i as f64) - s;
+                        if i == y {
+                            break;
                         }
-                        Ordering::Equal => {}
-                    }
-                    if v > f {
-                        continue;
-                    } else {
-                        break;
                     }
                 }
-
-                // Step 5.2: Squeezing. Check the value of ln(v) against upper and
-                // lower bound of ln(f(y)).
-                let k = k as f64;
-                let rho = (k / npq) * ((k * (k / 3. + 0.625) + 1. / 6.) / npq + 0.5);
-                let t = -0.5 * k * k / npq;
-                let alpha = v.ln();
-                if alpha < t - rho {
-                    break;
-                }
-                if alpha > t + rho {
-                    continue;
+                Ordering::Greater => {
+                    let mut i = y;
+                    loop {
+                        i += 1;
+                        f /= a / (i as f64) - s;
+                        if i == m {
+                            break;
+                        }
+                    }
                 }
+                Ordering::Equal => {}
+            }
+            if v > f {
+                continue;
+            } else {
+                break;
+            }
+        }
 
-                // Step 5.3: Final acceptance/rejection test.
-                let x1 = (y + 1) as f64;
-                let f1 = (m + 1) as f64;
-                let z = (f64_to_i64(n) + 1 - m) as f64;
-                let w = (f64_to_i64(n) - y + 1) as f64;
+        // Step 5.2: Squeezing. Check the value of ln(v) against upper and
+        // lower bound of ln(f(y)).
+        let k = k as f64;
+        let rho = (k / npq) * ((k * (k / 3. + 0.625) + 1. / 6.) / npq + 0.5);
+        let t = -0.5 * k * k / npq;
+        let alpha = v.ln();
+        if alpha < t - rho {
+            break;
+        }
+        if alpha > t + rho {
+            continue;
+        }
 
-                fn stirling(a: f64) -> f64 {
-                    let a2 = a * a;
-                    (13860. - (462. - (132. - (99. - 140. / a2) / a2) / a2) / a2) / a / 166320.
-                }
+        // Step 5.3: Final acceptance/rejection test.
+        let x1 = (y + 1) as f64;
+        let f1 = (m + 1) as f64;
+        let z = (f64_to_i64(n) + 1 - m) as f64;
+        let w = (f64_to_i64(n) - y + 1) as f64;
 
-                if alpha
-                    > x_m * (f1 / x1).ln()
-                        + (n - (m as f64) + 0.5) * (z / w).ln()
-                        + ((y - m) as f64) * (w * p / (x1 * q)).ln()
-                        // We use the signs from the GSL implementation, which are
-                        // different than the ones in the reference. According to
-                        // the GSL authors, the new signs were verified to be
-                        // correct by one of the original designers of the
-                        // algorithm.
-                        + stirling(f1)
-                        + stirling(z)
-                        - stirling(x1)
-                        - stirling(w)
-                {
-                    continue;
-                }
+        fn stirling(a: f64) -> f64 {
+            let a2 = a * a;
+            (13860. - (462. - (132. - (99. - 140. / a2) / a2) / a2) / a2) / a / 166320.
+        }
 
-                break;
-            }
-            assert!(y >= 0);
-            result = y as u64;
+        if alpha
+            > x_m * (f1 / x1).ln()
+                + (n - (m as f64) + 0.5) * (z / w).ln()
+                + ((y - m) as f64) * (w * btpe.p / (x1 * q)).ln()
+                // We use the signs from the GSL implementation, which are
+                // different than the ones in the reference. According to
+                // the GSL authors, the new signs were verified to be
+                // correct by one of the original designers of the
+                // algorithm.
+                + stirling(f1)
+                + stirling(z)
+                - stirling(x1)
+                - stirling(w)
+        {
+            continue;
         }
 
-        // Invert the result for p < 0.5.
-        if p != self.p {
+        break;
+    }
+    assert!(y >= 0);
+    y as u64
+}
+
+impl Distribution<u64> for Binomial {
+    #[allow(clippy::many_single_char_names)] // Same names as in the reference.
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u64 {
+        let result = match self.inner {
+            Inner::Binv(binv_para) => binv(binv_para, rng),
+            Inner::Btpe(btpe_para) => btpe(btpe_para, rng),
+            Inner::Poisson(poisson) => poisson.sample(rng) as u64,
+            Inner::Constant(c) => c,
+        };
+
+        if self.flipped {
             self.n - result
         } else {
             result

From 9354f9ba601b3f5a15e9d6bbbf2a0e75d92faad4 Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Thu, 15 Aug 2024 16:34:11 +0200
Subject: [PATCH 04/24] serde impls

---
 rand_distr/src/binomial.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/rand_distr/src/binomial.rs b/rand_distr/src/binomial.rs
index 9132f1769e..e2fc2c1278 100644
--- a/rand_distr/src/binomial.rs
+++ b/rand_distr/src/binomial.rs
@@ -52,6 +52,7 @@ pub struct Binomial {
 }
 
 #[derive(Clone, Copy, Debug, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 enum Inner {
     Binv(Binv),
     Btpe(Btpe),
@@ -60,6 +61,7 @@ enum Inner {
 }
 
 #[derive(Clone, Copy, Debug, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 struct Binv {
     r: f64,
     s: f64,
@@ -67,6 +69,7 @@ struct Binv {
 }
 
 #[derive(Clone, Copy, Debug, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 struct Btpe {
     n: u64,
     p: f64,

From afb619d67ce111896fe93833be986cbfcee855c1 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Fri, 13 Sep 2024 08:53:41 +0100
Subject: [PATCH 05/24] benches/distr: use g.finish()

---
 benches/benches/distr.rs | 42 ++++++++++++++--------------------------
 1 file changed, 14 insertions(+), 28 deletions(-)

diff --git a/benches/benches/distr.rs b/benches/benches/distr.rs
index 3097f3a929..c5d63e0603 100644
--- a/benches/benches/distr.rs
+++ b/benches/benches/distr.rs
@@ -111,14 +111,12 @@ macro_rules! sample_binomial {
 }
 
 fn bench(c: &mut Criterion<CyclesPerByte>) {
-    {
     let mut g = c.benchmark_group("exp");
     distr_float!(g, "exp", f64, Exp::new(1.23 * 4.56).unwrap());
     distr_float!(g, "exp1_specialized", f64, Exp1);
     distr_float!(g, "exp1_general", f64, Exp::new(1.).unwrap());
-    }
+    g.finish();
 
-    {
     let mut g = c.benchmark_group("normal");
     distr_float!(g, "normal", f64, Normal::new(-1.23, 4.56).unwrap());
     distr_float!(g, "standardnormal_specialized", f64, StandardNormal);
@@ -139,16 +137,14 @@ fn bench(c: &mut Criterion<CyclesPerByte>) {
             accum
         });
     });
-    }
+    g.finish();
 
-    {
     let mut g = c.benchmark_group("skew_normal");
     distr_float!(g, "shape_zero", f64, SkewNormal::new(0.0, 1.0, 0.0).unwrap());
     distr_float!(g, "shape_positive", f64, SkewNormal::new(0.0, 1.0, 100.0).unwrap());
     distr_float!(g, "shape_negative", f64, SkewNormal::new(0.0, 1.0, -100.0).unwrap());
-    }
+    g.finish();
 
-    {
     let mut g = c.benchmark_group("gamma");
     distr_float!(g, "gamma_large_shape", f64, Gamma::new(10., 1.0).unwrap());
     distr_float!(g, "gamma_small_shape", f64, Gamma::new(0.1, 1.0).unwrap());
@@ -156,25 +152,21 @@ fn bench(c: &mut Criterion<CyclesPerByte>) {
     distr_float!(g, "beta_large_param_similar", f64, Beta::new(101., 95.).unwrap());
     distr_float!(g, "beta_large_param_different", f64, Beta::new(10., 1000.).unwrap());
     distr_float!(g, "beta_mixed_param", f64, Beta::new(0.5, 100.).unwrap());
-    }
+    g.finish();
 
-    {
     let mut g = c.benchmark_group("cauchy");
     distr_float!(g, "cauchy", f64, Cauchy::new(4.2, 6.9).unwrap());
-    }
+    g.finish();
 
-    {
     let mut g = c.benchmark_group("triangular");
     distr_float!(g, "triangular", f64, Triangular::new(0., 1., 0.9).unwrap());
-    }
+    g.finish();
 
-    {
     let mut g = c.benchmark_group("geometric");
     distr_int!(g, "geometric", u64, Geometric::new(0.5).unwrap());
     distr_int!(g, "standard_geometric", u64, StandardGeometric);
-    }
+    g.finish();
 
-    {
     let mut g = c.benchmark_group("weighted");
     distr_int!(g, "weighted_i8", usize, WeightedIndex::new([1i8, 2, 3, 4, 12, 0, 2, 1]).unwrap());
     distr_int!(g, "weighted_u32", usize, WeightedIndex::new([1u32, 2, 3, 4, 12, 0, 2, 1]).unwrap());
@@ -184,9 +176,8 @@ fn bench(c: &mut Criterion<CyclesPerByte>) {
     distr_int!(g, "weighted_alias_method_u32", usize, WeightedAliasIndex::new(vec![1u32, 2, 3, 4, 12, 0, 2, 1]).unwrap());
     distr_int!(g, "weighted_alias_method_f64", usize, WeightedAliasIndex::new(vec![1.0f64, 0.001, 1.0/3.0, 4.01, 0.0, 3.3, 22.0, 0.001]).unwrap());
     distr_int!(g, "weighted_alias_method_large_set", usize, WeightedAliasIndex::new((0..10000).rev().chain(1..10001).collect()).unwrap());
-    }
+    g.finish();
 
-    {
     let mut g = c.benchmark_group("binomial");
     sample_binomial!(g, "binomial", 20, 0.7);
     sample_binomial!(g, "binomial_small", 1_000_000, 1e-30);
@@ -195,33 +186,28 @@ fn bench(c: &mut Criterion<CyclesPerByte>) {
     sample_binomial!(g, "binomial_100", 100, 0.99);
     sample_binomial!(g, "binomial_1000", 1000, 0.01);
     sample_binomial!(g, "binomial_1e12", 1_000_000_000_000, 0.2);
-    }
+    g.finish();
 
-    {
     let mut g = c.benchmark_group("poisson");
     distr_float!(g, "poisson", f64, Poisson::new(4.0).unwrap());
-    }
+    g.finish();
 
-    {
     let mut g = c.benchmark_group("zipf");
     distr_float!(g, "zipf", f64, Zipf::new(10, 1.5).unwrap());
     distr_float!(g, "zeta", f64, Zeta::new(1.5).unwrap());
-    }
+    g.finish();
 
-    {
     let mut g = c.benchmark_group("bernoulli");
     distr!(g, "bernoulli", bool, Bernoulli::new(0.18).unwrap());
-    }
+    g.finish();
 
-    {
     let mut g = c.benchmark_group("circle");
     distr_arr!(g, "circle", [f64; 2], UnitCircle);
-    }
+    g.finish();
 
-    {
     let mut g = c.benchmark_group("sphere");
     distr_arr!(g, "sphere", [f64; 3], UnitSphere);
-    }
+    g.finish();
 }
 
 criterion_group!(

From f91933b2c2cc9addc838b4fe690ea88584507019 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Fri, 13 Sep 2024 09:12:21 +0100
Subject: [PATCH 06/24] benches/distr: revise groups/names

---
 benches/benches/distr.rs | 49 ++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/benches/benches/distr.rs b/benches/benches/distr.rs
index c5d63e0603..272899b7ab 100644
--- a/benches/benches/distr.rs
+++ b/benches/benches/distr.rs
@@ -146,12 +146,15 @@ fn bench(c: &mut Criterion<CyclesPerByte>) {
     g.finish();
 
     let mut g = c.benchmark_group("gamma");
-    distr_float!(g, "gamma_large_shape", f64, Gamma::new(10., 1.0).unwrap());
-    distr_float!(g, "gamma_small_shape", f64, Gamma::new(0.1, 1.0).unwrap());
-    distr_float!(g, "beta_small_param", f64, Beta::new(0.1, 0.1).unwrap());
-    distr_float!(g, "beta_large_param_similar", f64, Beta::new(101., 95.).unwrap());
-    distr_float!(g, "beta_large_param_different", f64, Beta::new(10., 1000.).unwrap());
-    distr_float!(g, "beta_mixed_param", f64, Beta::new(0.5, 100.).unwrap());
+    distr_float!(g, "large_shape", f64, Gamma::new(10., 1.0).unwrap());
+    distr_float!(g, "small_shape", f64, Gamma::new(0.1, 1.0).unwrap());
+    g.finish();
+
+    let mut g = c.benchmark_group("beta");
+    distr_float!(g, "small_param", f64, Beta::new(0.1, 0.1).unwrap());
+    distr_float!(g, "large_param_similar", f64, Beta::new(101., 95.).unwrap());
+    distr_float!(g, "large_param_different", f64, Beta::new(10., 1000.).unwrap());
+    distr_float!(g, "mixed_param", f64, Beta::new(0.5, 100.).unwrap());
     g.finish();
 
     let mut g = c.benchmark_group("cauchy");
@@ -168,24 +171,23 @@ fn bench(c: &mut Criterion<CyclesPerByte>) {
     g.finish();
 
     let mut g = c.benchmark_group("weighted");
-    distr_int!(g, "weighted_i8", usize, WeightedIndex::new([1i8, 2, 3, 4, 12, 0, 2, 1]).unwrap());
-    distr_int!(g, "weighted_u32", usize, WeightedIndex::new([1u32, 2, 3, 4, 12, 0, 2, 1]).unwrap());
-    distr_int!(g, "weighted_f64", usize, WeightedIndex::new([1.0f64, 0.001, 1.0/3.0, 4.01, 0.0, 3.3, 22.0, 0.001]).unwrap());
-    distr_int!(g, "weighted_large_set", usize, WeightedIndex::new((0..10000).rev().chain(1..10001)).unwrap());
-    distr_int!(g, "weighted_alias_method_i8", usize, WeightedAliasIndex::new(vec![1i8, 2, 3, 4, 12, 0, 2, 1]).unwrap());
-    distr_int!(g, "weighted_alias_method_u32", usize, WeightedAliasIndex::new(vec![1u32, 2, 3, 4, 12, 0, 2, 1]).unwrap());
-    distr_int!(g, "weighted_alias_method_f64", usize, WeightedAliasIndex::new(vec![1.0f64, 0.001, 1.0/3.0, 4.01, 0.0, 3.3, 22.0, 0.001]).unwrap());
-    distr_int!(g, "weighted_alias_method_large_set", usize, WeightedAliasIndex::new((0..10000).rev().chain(1..10001).collect()).unwrap());
+    distr_int!(g, "i8", usize, WeightedIndex::new([1i8, 2, 3, 4, 12, 0, 2, 1]).unwrap());
+    distr_int!(g, "u32", usize, WeightedIndex::new([1u32, 2, 3, 4, 12, 0, 2, 1]).unwrap());
+    distr_int!(g, "f64", usize, WeightedIndex::new([1.0f64, 0.001, 1.0/3.0, 4.01, 0.0, 3.3, 22.0, 0.001]).unwrap());
+    distr_int!(g, "large_set", usize, WeightedIndex::new((0..10000).rev().chain(1..10001)).unwrap());
+    distr_int!(g, "alias_method_i8", usize, WeightedAliasIndex::new(vec![1i8, 2, 3, 4, 12, 0, 2, 1]).unwrap());
+    distr_int!(g, "alias_method_u32", usize, WeightedAliasIndex::new(vec![1u32, 2, 3, 4, 12, 0, 2, 1]).unwrap());
+    distr_int!(g, "alias_method_f64", usize, WeightedAliasIndex::new(vec![1.0f64, 0.001, 1.0/3.0, 4.01, 0.0, 3.3, 22.0, 0.001]).unwrap());
+    distr_int!(g, "alias_method_large_set", usize, WeightedAliasIndex::new((0..10000).rev().chain(1..10001).collect()).unwrap());
     g.finish();
 
     let mut g = c.benchmark_group("binomial");
-    sample_binomial!(g, "binomial", 20, 0.7);
-    sample_binomial!(g, "binomial_small", 1_000_000, 1e-30);
-    sample_binomial!(g, "binomial_1", 1, 0.9);
-    sample_binomial!(g, "binomial_10", 10, 0.9);
-    sample_binomial!(g, "binomial_100", 100, 0.99);
-    sample_binomial!(g, "binomial_1000", 1000, 0.01);
-    sample_binomial!(g, "binomial_1e12", 1_000_000_000_000, 0.2);
+    sample_binomial!(g, "small", 1_000_000, 1e-30);
+    sample_binomial!(g, "1", 1, 0.9);
+    sample_binomial!(g, "10", 10, 0.9);
+    sample_binomial!(g, "100", 100, 0.99);
+    sample_binomial!(g, "1000", 1000, 0.01);
+    sample_binomial!(g, "1e12", 1_000_000_000_000, 0.2);
     g.finish();
 
     let mut g = c.benchmark_group("poisson");
@@ -201,11 +203,8 @@ fn bench(c: &mut Criterion<CyclesPerByte>) {
     distr!(g, "bernoulli", bool, Bernoulli::new(0.18).unwrap());
     g.finish();
 
-    let mut g = c.benchmark_group("circle");
+    let mut g = c.benchmark_group("unit");
     distr_arr!(g, "circle", [f64; 2], UnitCircle);
-    g.finish();
-
-    let mut g = c.benchmark_group("sphere");
     distr_arr!(g, "sphere", [f64; 3], UnitSphere);
     g.finish();
 }

From 0105cbe4ee96c8ae3b47dad4c9938d7e30590128 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Fri, 13 Sep 2024 09:43:22 +0100
Subject: [PATCH 07/24] benches/distr: reduce warm-up and measurement time

---
 benches/benches/distr.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/benches/benches/distr.rs b/benches/benches/distr.rs
index 272899b7ab..cfbd54084a 100644
--- a/benches/benches/distr.rs
+++ b/benches/benches/distr.rs
@@ -211,7 +211,9 @@ fn bench(c: &mut Criterion<CyclesPerByte>) {
 
 criterion_group!(
     name = benches;
-    config = Criterion::default().with_measurement(CyclesPerByte);
+    config = Criterion::default().with_measurement(CyclesPerByte)
+        .warm_up_time(core::time::Duration::from_secs(1))
+        .measurement_time(core::time::Duration::from_secs(2));
     targets = bench
 );
 criterion_main!(benches);

From 307c4a96b404f5d45970a692193ffd4ad01cb2b6 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Fri, 13 Sep 2024 09:27:30 +0100
Subject: [PATCH 08/24] benches/distr: use elements, not bytes; remove most
 sample loops

---
 benches/benches/distr.rs | 61 +++++++---------------------------------
 1 file changed, 10 insertions(+), 51 deletions(-)

diff --git a/benches/benches/distr.rs b/benches/benches/distr.rs
index cfbd54084a..b82589ee36 100644
--- a/benches/benches/distr.rs
+++ b/benches/benches/distr.rs
@@ -11,95 +11,57 @@
 // Rustfmt splits macro invocations to shorten lines; in this case longer-lines are more readable
 #![rustfmt::skip]
 
-const RAND_BENCH_N: u64 = 1000;
-
 use criterion::{criterion_group, criterion_main, Criterion, Throughput};
 use criterion_cycles_per_byte::CyclesPerByte;
 
-use core::mem::size_of;
-
 use rand::prelude::*;
 use rand_distr::*;
 
 // At this time, distributions are optimised for 64-bit platforms.
 use rand_pcg::Pcg64Mcg;
 
+const ITER_ELTS: u64 = 100;
+
 macro_rules! distr_int {
     ($group:ident, $fnn:expr, $ty:ty, $distr:expr) => {
-        $group.throughput(Throughput::Bytes(
-            size_of::<$ty>() as u64 * RAND_BENCH_N));
         $group.bench_function($fnn, |c| {
             let mut rng = Pcg64Mcg::from_os_rng();
             let distr = $distr;
 
-            c.iter(|| {
-                let mut accum: $ty = 0;
-                for _ in 0..RAND_BENCH_N {
-                    let x: $ty = distr.sample(&mut rng);
-                    accum = accum.wrapping_add(x);
-                }
-                accum
-            });
+            c.iter(|| distr.sample(&mut rng));
         });
     };
 }
 
 macro_rules! distr_float {
     ($group:ident, $fnn:expr, $ty:ty, $distr:expr) => {
-        $group.throughput(Throughput::Bytes(
-            size_of::<$ty>() as u64 * RAND_BENCH_N));
         $group.bench_function($fnn, |c| {
             let mut rng = Pcg64Mcg::from_os_rng();
             let distr = $distr;
 
-            c.iter(|| {
-                let mut accum = 0.;
-                for _ in 0..RAND_BENCH_N {
-                    let x: $ty = distr.sample(&mut rng);
-                    accum += x;
-                }
-                accum
-            });
+            c.iter(|| Distribution::<$ty>::sample(&distr, &mut rng));
         });
     };
 }
 
 macro_rules! distr {
     ($group:ident, $fnn:expr, $ty:ty, $distr:expr) => {
-        $group.throughput(Throughput::Bytes(
-            size_of::<$ty>() as u64 * RAND_BENCH_N));
         $group.bench_function($fnn, |c| {
             let mut rng = Pcg64Mcg::from_os_rng();
             let distr = $distr;
 
-            c.iter(|| {
-                let mut accum: u32 = 0;
-                for _ in 0..RAND_BENCH_N {
-                    let x: $ty = distr.sample(&mut rng);
-                    accum = accum.wrapping_add(x as u32);
-                }
-                accum
-            });
+            c.iter(|| distr.sample(&mut rng));
         });
     };
 }
 
 macro_rules! distr_arr {
     ($group:ident, $fnn:expr, $ty:ty, $distr:expr) => {
-        $group.throughput(Throughput::Bytes(
-            size_of::<$ty>() as u64 * RAND_BENCH_N));
         $group.bench_function($fnn, |c| {
             let mut rng = Pcg64Mcg::from_os_rng();
             let distr = $distr;
 
-            c.iter(|| {
-                let mut accum: u32 = 0;
-                for _ in 0..RAND_BENCH_N {
-                    let x: $ty = distr.sample(&mut rng);
-                    accum = accum.wrapping_add(x[0] as u32);
-                }
-                accum
-            });
+            c.iter(|| Distribution::<$ty>::sample(&distr, &mut rng));
         });
     };
 }
@@ -122,19 +84,16 @@ fn bench(c: &mut Criterion<CyclesPerByte>) {
     distr_float!(g, "standardnormal_specialized", f64, StandardNormal);
     distr_float!(g, "standardnormal_general", f64, Normal::new(0., 1.).unwrap());
     distr_float!(g, "log_normal", f64, LogNormal::new(-1.23, 4.56).unwrap());
-    g.throughput(Throughput::Bytes(size_of::<f64>() as u64 * RAND_BENCH_N));
+    g.throughput(Throughput::Elements(ITER_ELTS));
     g.bench_function("iter", |c| {
         use core::f64::consts::{E, PI};
         let mut rng = Pcg64Mcg::from_os_rng();
         let distr = Normal::new(-E, PI).unwrap();
-        let mut iter = distr.sample_iter(&mut rng);
 
         c.iter(|| {
-            let mut accum = 0.0;
-            for _ in 0..RAND_BENCH_N {
-                accum += iter.next().unwrap();
-            }
-            accum
+            distr.sample_iter(&mut rng)
+                .take(ITER_ELTS as usize)
+                .fold(0.0, |a, r| a + r)
         });
     });
     g.finish();

From 5ca3b18d5b789a3999d98421fa2035e293ffb4e9 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Fri, 13 Sep 2024 10:03:11 +0100
Subject: [PATCH 09/24] benches/distr: inline distr!

---
 benches/benches/distr.rs | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/benches/benches/distr.rs b/benches/benches/distr.rs
index b82589ee36..a5a9634049 100644
--- a/benches/benches/distr.rs
+++ b/benches/benches/distr.rs
@@ -44,17 +44,6 @@ macro_rules! distr_float {
     };
 }
 
-macro_rules! distr {
-    ($group:ident, $fnn:expr, $ty:ty, $distr:expr) => {
-        $group.bench_function($fnn, |c| {
-            let mut rng = Pcg64Mcg::from_os_rng();
-            let distr = $distr;
-
-            c.iter(|| distr.sample(&mut rng));
-        });
-    };
-}
-
 macro_rules! distr_arr {
     ($group:ident, $fnn:expr, $ty:ty, $distr:expr) => {
         $group.bench_function($fnn, |c| {
@@ -159,7 +148,11 @@ fn bench(c: &mut Criterion<CyclesPerByte>) {
     g.finish();
 
     let mut g = c.benchmark_group("bernoulli");
-    distr!(g, "bernoulli", bool, Bernoulli::new(0.18).unwrap());
+    g.bench_function("bernoulli", |c| {
+        let mut rng = Pcg64Mcg::from_os_rng();
+        let distr = Bernoulli::new(0.18).unwrap();
+        c.iter(|| distr.sample(&mut rng))
+    });
     g.finish();
 
     let mut g = c.benchmark_group("unit");

From 3bb85ca999b489855e95da6f6d711e0649c0f7ae Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Fri, 13 Sep 2024 10:06:16 +0100
Subject: [PATCH 10/24] benches/distr: expand Poisson benchmarks

---
 benches/benches/distr.rs | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/benches/benches/distr.rs b/benches/benches/distr.rs
index a5a9634049..63d6a1034e 100644
--- a/benches/benches/distr.rs
+++ b/benches/benches/distr.rs
@@ -139,7 +139,23 @@ fn bench(c: &mut Criterion<CyclesPerByte>) {
     g.finish();
 
     let mut g = c.benchmark_group("poisson");
-    distr_float!(g, "poisson", f64, Poisson::new(4.0).unwrap());
+    for lambda in [1f64, 4.0, 10.0, 100.0].into_iter() {
+        let name = format!("{lambda}");
+        distr_float!(g, name, f64, Poisson::new(lambda).unwrap());
+    }
+    g.throughput(Throughput::Elements(ITER_ELTS));
+    g.bench_function("variable", |c| {
+        let mut rng = Pcg64Mcg::from_os_rng();
+        let ldistr = Uniform::new(0.1, 10.0).unwrap();
+
+        c.iter(|| {
+            let l = rng.sample(ldistr);
+            let distr = Poisson::new(l * l).unwrap();
+            Distribution::<f64>::sample_iter(&distr, &mut rng)
+                .take(ITER_ELTS as usize)
+                .fold(0.0, |a, r| a + r)
+        })
+    });
     g.finish();
 
     let mut g = c.benchmark_group("zipf");

From 8c4b34ef4662189eb4313b257c8f794e45fca467 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Fri, 13 Sep 2024 10:58:13 +0100
Subject: [PATCH 11/24] Poisson: split repr into two enum variants

With #[inline] on all sample methods there is no effect on benchmarks;
as presented there is <2% on most, +8% time on variable benchmark.
---
 rand_distr/src/poisson.rs | 160 +++++++++++++++++++++++---------------
 1 file changed, 98 insertions(+), 62 deletions(-)

diff --git a/rand_distr/src/poisson.rs b/rand_distr/src/poisson.rs
index 09eae374db..54ee0df5ed 100644
--- a/rand_distr/src/poisson.rs
+++ b/rand_distr/src/poisson.rs
@@ -51,11 +51,7 @@ where
     Standard: Distribution<F>,
 {
     lambda: F,
-    // precalculated values
-    exp_lambda: F,
-    log_lambda: F,
-    sqrt_2lambda: F,
-    magic_val: F,
+    method: Method<F>,
 }
 
 /// Error type returned from [`Poisson::new`].
@@ -81,6 +77,22 @@ impl fmt::Display for Error {
 #[cfg(feature = "std")]
 impl std::error::Error for Error {}
 
+#[derive(Clone, Copy, Debug, PartialEq)]
+struct KnuthMethod<F> {
+    exp_lambda: F,
+}
+#[derive(Clone, Copy, Debug, PartialEq)]
+struct RejectionMethod<F> {
+    log_lambda: F,
+    sqrt_2lambda: F,
+    magic_val: F,
+}
+#[derive(Clone, Copy, Debug, PartialEq)]
+enum Method<F> {
+    Knuth(KnuthMethod<F>),
+    Rejection(RejectionMethod<F>),
+}
+
 impl<F> Poisson<F>
 where
     F: Float + FloatConst,
@@ -104,77 +116,101 @@ where
         if !(lambda > F::zero()) {
             return Err(Error::ShapeTooSmall);
         }
-        let log_lambda = lambda.ln();
-        Ok(Poisson {
-            lambda,
-            exp_lambda: (-lambda).exp(),
-            log_lambda,
-            sqrt_2lambda: (F::from(2.0).unwrap() * lambda).sqrt(),
-            magic_val: lambda * log_lambda - crate::utils::log_gamma(F::one() + lambda),
-        })
+
+        // Use the Knuth method only for low expected values
+        let method = if lambda < F::from(12.0).unwrap() {
+            Method::Knuth(KnuthMethod {
+                exp_lambda: (-lambda).exp(),
+            })
+        } else {
+            let log_lambda = lambda.ln();
+            let sqrt_2lambda = (F::from(2.0).unwrap() * lambda).sqrt();
+            let magic_val = lambda * log_lambda - crate::utils::log_gamma(F::one() + lambda);
+            Method::Rejection(RejectionMethod {
+                log_lambda,
+                sqrt_2lambda,
+                magic_val,
+            })
+        };
+
+        Ok(Poisson { lambda, method })
     }
 }
 
-impl<F> Distribution<F> for Poisson<F>
+impl<F> Distribution<F> for KnuthMethod<F>
 where
     F: Float + FloatConst,
     Standard: Distribution<F>,
 {
-    #[inline]
     fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> F {
-        // using the algorithm from Numerical Recipes in C
-
-        // for low expected values use the Knuth method
-        if self.lambda < F::from(12.0).unwrap() {
-            let mut result = F::one();
-            let mut p = rng.random::<F>();
-            while p > self.exp_lambda {
-                p = p * rng.random::<F>();
-                result = result + F::one();
-            }
-            result - F::one()
+        let mut result = F::one();
+        let mut p = rng.random::<F>();
+        while p > self.exp_lambda {
+            p = p * rng.random::<F>();
+            result = result + F::one();
         }
-        // high expected values - rejection method
-        else {
-            // we use the Cauchy distribution as the comparison distribution
-            // f(x) ~ 1/(1+x^2)
-            let cauchy = Cauchy::new(F::zero(), F::one()).unwrap();
-            let mut result;
+        result - F::one()
+    }
+}
+impl<F> RejectionMethod<F>
+where
+    F: Float + FloatConst,
+    Standard: Distribution<F>,
+{
+    fn sample<R: Rng + ?Sized>(&self, lambda: F, rng: &mut R) -> F {
+        // The algorithm from Numerical Recipes in C
+
+        // we use the Cauchy distribution as the comparison distribution
+        // f(x) ~ 1/(1+x^2)
+        let cauchy = Cauchy::new(F::zero(), F::one()).unwrap();
+        let mut result;
+
+        loop {
+            let mut comp_dev;
 
             loop {
-                let mut comp_dev;
-
-                loop {
-                    // draw from the Cauchy distribution
-                    comp_dev = rng.sample(cauchy);
-                    // shift the peak of the comparison distribution
-                    result = self.sqrt_2lambda * comp_dev + self.lambda;
-                    // repeat the drawing until we are in the range of possible values
-                    if result >= F::zero() {
-                        break;
-                    }
-                }
-                // now the result is a random variable greater than 0 with Cauchy distribution
-                // the result should be an integer value
-                result = result.floor();
-
-                // this is the ratio of the Poisson distribution to the comparison distribution
-                // the magic value scales the distribution function to a range of approximately 0-1
-                // since it is not exact, we multiply the ratio by 0.9 to avoid ratios greater than 1
-                // this doesn't change the resulting distribution, only increases the rate of failed drawings
-                let check = F::from(0.9).unwrap()
-                    * (F::one() + comp_dev * comp_dev)
-                    * (result * self.log_lambda
-                        - crate::utils::log_gamma(F::one() + result)
-                        - self.magic_val)
-                        .exp();
-
-                // check with uniform random value - if below the threshold, we are within the target distribution
-                if rng.random::<F>() <= check {
+                // draw from the Cauchy distribution
+                comp_dev = rng.sample(cauchy);
+                // shift the peak of the comparison distribution
+                result = self.sqrt_2lambda * comp_dev + lambda;
+                // repeat the drawing until we are in the range of possible values
+                if result >= F::zero() {
                     break;
                 }
             }
-            result
+            // now the result is a random variable greater than 0 with Cauchy distribution
+            // the result should be an integer value
+            result = result.floor();
+
+            // this is the ratio of the Poisson distribution to the comparison distribution
+            // the magic value scales the distribution function to a range of approximately 0-1
+            // since it is not exact, we multiply the ratio by 0.9 to avoid ratios greater than 1
+            // this doesn't change the resulting distribution, only increases the rate of failed drawings
+            let check = F::from(0.9).unwrap()
+                * (F::one() + comp_dev * comp_dev)
+                * (result * self.log_lambda
+                    - crate::utils::log_gamma(F::one() + result)
+                    - self.magic_val)
+                    .exp();
+
+            // check with uniform random value - if below the threshold, we are within the target distribution
+            if rng.random::<F>() <= check {
+                break;
+            }
+        }
+        result
+    }
+}
+impl<F> Distribution<F> for Poisson<F>
+where
+    F: Float + FloatConst,
+    Standard: Distribution<F>,
+{
+    #[inline]
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> F {
+        match &self.method {
+            Method::Knuth(method) => method.sample(rng),
+            Method::Rejection(method) => method.sample(self.lambda, rng),
         }
     }
 }

From 2dee225d0947148a1c11c85ac102478bfed2d050 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Fri, 13 Sep 2024 11:26:03 +0100
Subject: [PATCH 12/24] Make poisson::KnuthMethod pub(crate)

---
 rand_distr/src/lib.rs     |  2 +-
 rand_distr/src/poisson.rs | 14 ++++++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/rand_distr/src/lib.rs b/rand_distr/src/lib.rs
index 90a534ff8c..03fad85c91 100644
--- a/rand_distr/src/lib.rs
+++ b/rand_distr/src/lib.rs
@@ -211,7 +211,7 @@ mod normal;
 mod normal_inverse_gaussian;
 mod pareto;
 mod pert;
-mod poisson;
+pub(crate) mod poisson;
 mod skew_normal;
 mod student_t;
 mod triangular;
diff --git a/rand_distr/src/poisson.rs b/rand_distr/src/poisson.rs
index 54ee0df5ed..5f07259761 100644
--- a/rand_distr/src/poisson.rs
+++ b/rand_distr/src/poisson.rs
@@ -78,9 +78,17 @@ impl fmt::Display for Error {
 impl std::error::Error for Error {}
 
 #[derive(Clone, Copy, Debug, PartialEq)]
-struct KnuthMethod<F> {
+pub(crate) struct KnuthMethod<F> {
     exp_lambda: F,
 }
+impl<F: Float> KnuthMethod<F> {
+    pub(crate) fn new(lambda: F) -> Self {
+        KnuthMethod {
+            exp_lambda: (-lambda).exp(),
+        }
+    }
+}
+
 #[derive(Clone, Copy, Debug, PartialEq)]
 struct RejectionMethod<F> {
     log_lambda: F,
@@ -119,9 +127,7 @@ where
 
         // Use the Knuth method only for low expected values
         let method = if lambda < F::from(12.0).unwrap() {
-            Method::Knuth(KnuthMethod {
-                exp_lambda: (-lambda).exp(),
-            })
+            Method::Knuth(KnuthMethod::new(lambda))
         } else {
             let log_lambda = lambda.ln();
             let sqrt_2lambda = (F::from(2.0).unwrap() * lambda).sqrt();

From aaa08df11bb52d834463268d06ec13eb83a964bf Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Fri, 13 Sep 2024 12:39:40 +0200
Subject: [PATCH 13/24] Use Knuth Method in Binomial to save space in the
 Binomial struct

---
 rand_distr/src/binomial.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/rand_distr/src/binomial.rs b/rand_distr/src/binomial.rs
index b7cd840800..2552729d08 100644
--- a/rand_distr/src/binomial.rs
+++ b/rand_distr/src/binomial.rs
@@ -9,7 +9,7 @@
 
 //! The binomial distribution `Binomial(n, p)`.
 
-use crate::{Distribution, Poisson, Uniform};
+use crate::{Distribution, Uniform};
 use core::cmp::Ordering;
 use core::fmt;
 #[allow(unused_imports)]
@@ -60,7 +60,7 @@ pub struct Binomial {
 enum Inner {
     Binv(Binv),
     Btpe(Btpe),
-    Poisson(Poisson<f64>),
+    Poisson(crate::poisson::KnuthMethod<f64>),
     Constant(u64),
 }
 
@@ -157,7 +157,8 @@ impl Binomial {
         let inner = if np < BINV_THRESHOLD {
             let q = 1.0 - p;
             if q == 1.0 {
-                Inner::Poisson(Poisson::new(np).unwrap())
+                assert!(np <= 12.0, "This is required for Knuth method");
+                Inner::Poisson(crate::poisson::KnuthMethod::new(np))
             } else {
                 let s = p / q;
                 Inner::Binv(Binv {

From a8bfec1558ad084545ecc151e995eacd5be5f02b Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Fri, 13 Sep 2024 12:42:43 +0200
Subject: [PATCH 14/24] serde impls

---
 rand_distr/src/poisson.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/rand_distr/src/poisson.rs b/rand_distr/src/poisson.rs
index 5f07259761..332d1fc06c 100644
--- a/rand_distr/src/poisson.rs
+++ b/rand_distr/src/poisson.rs
@@ -78,6 +78,7 @@ impl fmt::Display for Error {
 impl std::error::Error for Error {}
 
 #[derive(Clone, Copy, Debug, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub(crate) struct KnuthMethod<F> {
     exp_lambda: F,
 }
@@ -90,12 +91,14 @@ impl<F: Float> KnuthMethod<F> {
 }
 
 #[derive(Clone, Copy, Debug, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 struct RejectionMethod<F> {
     log_lambda: F,
     sqrt_2lambda: F,
     magic_val: F,
 }
 #[derive(Clone, Copy, Debug, PartialEq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 enum Method<F> {
     Knuth(KnuthMethod<F>),
     Rejection(RejectionMethod<F>),

From a007613ae90c0a2636bb3421c83302ca70fd2f2d Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Fri, 13 Sep 2024 13:01:18 +0200
Subject: [PATCH 15/24] rename inner to method

---
 rand_distr/src/binomial.rs | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/rand_distr/src/binomial.rs b/rand_distr/src/binomial.rs
index 2552729d08..d7d5d8cb76 100644
--- a/rand_distr/src/binomial.rs
+++ b/rand_distr/src/binomial.rs
@@ -50,14 +50,14 @@ use rand::Rng;
 #[derive(Clone, Copy, Debug, PartialEq)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct Binomial {
-    inner: Inner,
+    method: Method,
     flipped: bool,
     n: u64,
 }
 
 #[derive(Clone, Copy, Debug, PartialEq)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-enum Inner {
+enum Method {
     Binv(Binv),
     Btpe(Btpe),
     Poisson(crate::poisson::KnuthMethod<f64>),
@@ -123,7 +123,7 @@ impl Binomial {
 
         if p == 0.0 {
             return Ok(Binomial {
-                inner: Inner::Constant(0),
+                method: Method::Constant(0),
                 flipped: false,
                 n,
             });
@@ -131,7 +131,7 @@ impl Binomial {
 
         if p == 1.0 {
             return Ok(Binomial {
-                inner: Inner::Constant(n),
+                method: Method::Constant(n),
                 flipped: false,
                 n,
             });
@@ -158,19 +158,19 @@ impl Binomial {
             let q = 1.0 - p;
             if q == 1.0 {
                 assert!(np <= 12.0, "This is required for Knuth method");
-                Inner::Poisson(crate::poisson::KnuthMethod::new(np))
+                Method::Poisson(crate::poisson::KnuthMethod::new(np))
             } else {
                 let s = p / q;
-                Inner::Binv(Binv {
+                Method::Binv(Binv {
                     r: q.powf(n as f64),
                     s,
                     a: (n as f64 + 1.0) * s,
                 })
             }
         } else {
-            Inner::Btpe(Btpe { n, p })
+            Method::Btpe(Btpe { n, p })
         };
-        Ok(Binomial { flipped, inner, n })
+        Ok(Binomial { flipped, method: inner, n })
     }
 }
 
@@ -374,11 +374,11 @@ fn btpe<R: Rng + ?Sized>(btpe: Btpe, rng: &mut R) -> u64 {
 impl Distribution<u64> for Binomial {
     #[allow(clippy::many_single_char_names)] // Same names as in the reference.
     fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u64 {
-        let result = match self.inner {
-            Inner::Binv(binv_para) => binv(binv_para, rng),
-            Inner::Btpe(btpe_para) => btpe(btpe_para, rng),
-            Inner::Poisson(poisson) => poisson.sample(rng) as u64,
-            Inner::Constant(c) => c,
+        let result = match self.method {
+            Method::Binv(binv_para) => binv(binv_para, rng),
+            Method::Btpe(btpe_para) => btpe(btpe_para, rng),
+            Method::Poisson(poisson) => poisson.sample(rng) as u64,
+            Method::Constant(c) => c,
         };
 
         if self.flipped {

From b0021aa98a7b12ab467a3641710041b376a0a20c Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Fri, 13 Sep 2024 13:01:56 +0200
Subject: [PATCH 16/24] remove the known issue from doc

---
 rand_distr/src/binomial.rs | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/rand_distr/src/binomial.rs b/rand_distr/src/binomial.rs
index d7d5d8cb76..33f3fa0469 100644
--- a/rand_distr/src/binomial.rs
+++ b/rand_distr/src/binomial.rs
@@ -26,10 +26,6 @@ use rand::Rng;
 ///
 /// `f(k) = n!/(k! (n-k)!) p^k (1-p)^(n-k)` for `k >= 0`.
 ///
-/// # Known issues
-///
-/// See documentation of [`Binomial::new`].
-///
 /// # Plot
 ///
 /// The following plot of the binomial distribution illustrates the
@@ -106,13 +102,6 @@ impl std::error::Error for Error {}
 impl Binomial {
     /// Construct a new `Binomial` with the given shape parameters `n` (number
     /// of trials) and `p` (probability of success).
-    ///
-    /// # Known issues
-    ///
-    /// Although this method should return an [`Error`] on invalid parameters,
-    /// some (extreme) parameter combinations are known to return a [`Binomial`]
-    /// object which panics when [sampled](Distribution::sample).
-    /// See [#1378](https://github.com/rust-random/rand/issues/1378).
     pub fn new(n: u64, p: f64) -> Result<Binomial, Error> {
         if !(p >= 0.0) {
             return Err(Error::ProbabilityTooSmall);

From 2f6cef551752adadfc0bcec513de40582bbe6663 Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Fri, 13 Sep 2024 13:09:48 +0200
Subject: [PATCH 17/24] rustfmt and changelog

---
 rand_core/CHANGELOG.md     | 2 ++
 rand_distr/src/binomial.rs | 8 ++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/rand_core/CHANGELOG.md b/rand_core/CHANGELOG.md
index d422fe30bd..cd6b75fc87 100644
--- a/rand_core/CHANGELOG.md
+++ b/rand_core/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 - Bump the MSRV to 1.61.0
 - The `serde1` feature has been renamed `serde` (#1477)
+- Fix panic in Binomial (#1378)
+- Move some of the computations in Binomail from `sample` to `new`
 
 ## [0.9.0-alpha.1] - 2024-03-18
 
diff --git a/rand_distr/src/binomial.rs b/rand_distr/src/binomial.rs
index 33f3fa0469..97e655744c 100644
--- a/rand_distr/src/binomial.rs
+++ b/rand_distr/src/binomial.rs
@@ -73,7 +73,7 @@ struct Binv {
 struct Btpe {
     n: u64,
     p: f64,
-    // TODO precompute the other constants
+    // TODO consider precomputing more constants, there is a size/speed tradeoff to make here.
 }
 
 /// Error type returned from [`Binomial::new`].
@@ -159,7 +159,11 @@ impl Binomial {
         } else {
             Method::Btpe(Btpe { n, p })
         };
-        Ok(Binomial { flipped, method: inner, n })
+        Ok(Binomial {
+            flipped,
+            method: inner,
+            n,
+        })
     }
 }
 

From 333f302367c04dfc3a1f99374cb523cc6be3a9d0 Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Fri, 13 Sep 2024 13:24:13 +0200
Subject: [PATCH 18/24] code comments and inline attr

---
 rand_distr/src/binomial.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/rand_distr/src/binomial.rs b/rand_distr/src/binomial.rs
index 97e655744c..9f6cfd455d 100644
--- a/rand_distr/src/binomial.rs
+++ b/rand_distr/src/binomial.rs
@@ -146,6 +146,7 @@ impl Binomial {
         let inner = if np < BINV_THRESHOLD {
             let q = 1.0 - p;
             if q == 1.0 {
+                // p is so small that this is extremly close to a Poisson distribution.
                 assert!(np <= 12.0, "This is required for Knuth method");
                 Method::Poisson(crate::poisson::KnuthMethod::new(np))
             } else {
@@ -228,9 +229,9 @@ fn btpe<R: Rng + ?Sized>(btpe: Btpe, rng: &mut R) -> u64 {
 
     let lambda_l = lambda((f_m - x_l) / (f_m - x_l * btpe.p));
     let lambda_r = lambda((x_r - f_m) / (x_r * q));
-    // p1 + area of left tail
+    
     let p3 = p2 + c / lambda_l;
-    // p1 + area of right tail
+    
     let p4 = p3 + c / lambda_r;
 
     // return value
@@ -365,7 +366,7 @@ fn btpe<R: Rng + ?Sized>(btpe: Btpe, rng: &mut R) -> u64 {
 }
 
 impl Distribution<u64> for Binomial {
-    #[allow(clippy::many_single_char_names)] // Same names as in the reference.
+    #[inline]
     fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u64 {
         let result = match self.method {
             Method::Binv(binv_para) => binv(binv_para, rng),

From 487177cb4a51be5ed7b38dceefd0b05a42379a6f Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Fri, 13 Sep 2024 13:26:08 +0200
Subject: [PATCH 19/24] rustfmt

---
 rand_distr/src/binomial.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/rand_distr/src/binomial.rs b/rand_distr/src/binomial.rs
index 9f6cfd455d..d63967b13a 100644
--- a/rand_distr/src/binomial.rs
+++ b/rand_distr/src/binomial.rs
@@ -229,9 +229,9 @@ fn btpe<R: Rng + ?Sized>(btpe: Btpe, rng: &mut R) -> u64 {
 
     let lambda_l = lambda((f_m - x_l) / (f_m - x_l * btpe.p));
     let lambda_r = lambda((x_r - f_m) / (x_r * q));
-    
+
     let p3 = p2 + c / lambda_l;
-    
+
     let p4 = p3 + c / lambda_r;
 
     // return value

From 32a66556ad280c3df0b1c831acd11e85a856d5ff Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Tue, 24 Sep 2024 21:21:04 +0200
Subject: [PATCH 20/24] remove inline

---
 rand_distr/src/binomial.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/rand_distr/src/binomial.rs b/rand_distr/src/binomial.rs
index d63967b13a..0a213cba4d 100644
--- a/rand_distr/src/binomial.rs
+++ b/rand_distr/src/binomial.rs
@@ -366,7 +366,6 @@ fn btpe<R: Rng + ?Sized>(btpe: Btpe, rng: &mut R) -> u64 {
 }
 
 impl Distribution<u64> for Binomial {
-    #[inline]
     fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u64 {
         let result = match self.method {
             Method::Binv(binv_para) => binv(binv_para, rng),

From 7a66486a28c05f611cc4362b8273647ba8d74b59 Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Mon, 30 Sep 2024 23:07:21 +0200
Subject: [PATCH 21/24] move flipped and n into variants

---
 rand_distr/src/binomial.rs | 65 ++++++++++++++++++++++----------------
 1 file changed, 38 insertions(+), 27 deletions(-)

diff --git a/rand_distr/src/binomial.rs b/rand_distr/src/binomial.rs
index 0a213cba4d..1c7d020aca 100644
--- a/rand_distr/src/binomial.rs
+++ b/rand_distr/src/binomial.rs
@@ -47,8 +47,6 @@ use rand::Rng;
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct Binomial {
     method: Method,
-    flipped: bool,
-    n: u64,
 }
 
 #[derive(Clone, Copy, Debug, PartialEq)]
@@ -66,6 +64,8 @@ struct Binv {
     r: f64,
     s: f64,
     a: f64,
+    n: u64,
+    flipped: bool,
 }
 
 #[derive(Clone, Copy, Debug, PartialEq)]
@@ -73,7 +73,10 @@ struct Binv {
 struct Btpe {
     n: u64,
     p: f64,
-    // TODO consider precomputing more constants, there is a size/speed tradeoff to make here.
+    npq: f64,
+    p1: f64,
+    flipped: bool,
+    // This has the same size as `Binv` one could consider precomputing more.
 }
 
 /// Error type returned from [`Binomial::new`].
@@ -113,16 +116,12 @@ impl Binomial {
         if p == 0.0 {
             return Ok(Binomial {
                 method: Method::Constant(0),
-                flipped: false,
-                n,
             });
         }
 
         if p == 1.0 {
             return Ok(Binomial {
                 method: Method::Constant(n),
-                flipped: false,
-                n,
             });
         }
 
@@ -143,11 +142,10 @@ impl Binomial {
         const BINV_THRESHOLD: f64 = 10.;
 
         let np = n as f64 * p;
-        let inner = if np < BINV_THRESHOLD {
+        let method = if np < BINV_THRESHOLD {
             let q = 1.0 - p;
             if q == 1.0 {
-                // p is so small that this is extremly close to a Poisson distribution.
-                assert!(np <= 12.0, "This is required for Knuth method");
+                assert!(!flipped);
                 Method::Poisson(crate::poisson::KnuthMethod::new(np))
             } else {
                 let s = p / q;
@@ -155,16 +153,23 @@ impl Binomial {
                     r: q.powf(n as f64),
                     s,
                     a: (n as f64 + 1.0) * s,
+                    n,
+                    flipped,
                 })
             }
         } else {
-            Method::Btpe(Btpe { n, p })
+            let q = 1.0 - p;
+            let npq = np * q;
+            let p1 = (2.195 * npq.sqrt() - 4.6 * q).floor() + 0.5;
+            Method::Btpe(Btpe {
+                n,
+                p,
+                npq,
+                p1,
+                flipped,
+            })
         };
-        Ok(Binomial {
-            flipped,
-            method: inner,
-            n,
-        })
+        Ok(Binomial { method })
     }
 }
 
@@ -181,7 +186,7 @@ fn binv<R: Rng + ?Sized>(binv: Binv, rng: &mut R) -> u64 {
     // When n*p < 10, so is n*p*q which is the variance, so a result > 110 would be 100 / sqrt(10) = 31 standard deviations away.
     const BINV_MAX_X: u64 = 110;
 
-    'outer: loop {
+    let sample = 'outer: loop {
         let mut r = binv.r;
         let mut u: f64 = rng.random();
         let mut x = 0;
@@ -195,6 +200,12 @@ fn binv<R: Rng + ?Sized>(binv: Binv, rng: &mut R) -> u64 {
             r *= binv.a / (x as f64) - binv.s;
         }
         break x;
+    };
+
+    if binv.flipped {
+        binv.n - sample
+    } else {
+        sample
     }
 }
 
@@ -208,11 +219,11 @@ fn btpe<R: Rng + ?Sized>(btpe: Btpe, rng: &mut R) -> u64 {
     let n = btpe.n as f64;
     let np = n * btpe.p;
     let q = 1. - btpe.p;
-    let npq = np * q;
+    let npq = btpe.npq;
     let f_m = np + btpe.p;
     let m = f64_to_i64(f_m);
     // radius of triangle region, since height=1 also area of region
-    let p1 = (2.195 * npq.sqrt() - 4.6 * q).floor() + 0.5;
+    let p1 = btpe.p1;
     // tip of triangle
     let x_m = (m as f64) + 0.5;
     // left edge of triangle
@@ -362,22 +373,22 @@ fn btpe<R: Rng + ?Sized>(btpe: Btpe, rng: &mut R) -> u64 {
         break;
     }
     assert!(y >= 0);
-    y as u64
+    let y = y as u64;
+
+    if btpe.flipped {
+        btpe.n - y
+    } else {
+        y
+    }
 }
 
 impl Distribution<u64> for Binomial {
     fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u64 {
-        let result = match self.method {
+        match self.method {
             Method::Binv(binv_para) => binv(binv_para, rng),
             Method::Btpe(btpe_para) => btpe(btpe_para, rng),
             Method::Poisson(poisson) => poisson.sample(rng) as u64,
             Method::Constant(c) => c,
-        };
-
-        if self.flipped {
-            self.n - result
-        } else {
-            result
         }
     }
 }

From 0318978dda449772cd8f8c99887a6b997954f54e Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Tue, 1 Oct 2024 14:09:38 +0200
Subject: [PATCH 22/24] correct CHANGELOG

---
 rand_core/CHANGELOG.md  | 2 --
 rand_distr/CHANGELOG.md | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/rand_core/CHANGELOG.md b/rand_core/CHANGELOG.md
index cd6b75fc87..d422fe30bd 100644
--- a/rand_core/CHANGELOG.md
+++ b/rand_core/CHANGELOG.md
@@ -7,8 +7,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 - Bump the MSRV to 1.61.0
 - The `serde1` feature has been renamed `serde` (#1477)
-- Fix panic in Binomial (#1378)
-- Move some of the computations in Binomail from `sample` to `new`
 
 ## [0.9.0-alpha.1] - 2024-03-18
 
diff --git a/rand_distr/CHANGELOG.md b/rand_distr/CHANGELOG.md
index 51bde39e86..93756eb705 100644
--- a/rand_distr/CHANGELOG.md
+++ b/rand_distr/CHANGELOG.md
@@ -6,6 +6,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 - The `serde1` feature has been renamed `serde` (#1477)
+- Fix panic in Binomial (#1484)
+- Move some of the computations in Binomial from `sample` to `new` (#1484)
 
 ### Added
 - Add plots for `rand_distr` distributions to documentation (#1434)

From fd9d62035363c3f8d307cc5f853f8a88f6a458c9 Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Tue, 1 Oct 2024 14:27:44 +0200
Subject: [PATCH 23/24] put flipped directly into the enum variants

---
 rand_distr/src/binomial.rs | 44 ++++++++++++++++----------------------
 1 file changed, 19 insertions(+), 25 deletions(-)

diff --git a/rand_distr/src/binomial.rs b/rand_distr/src/binomial.rs
index 1c7d020aca..0f29fe97e7 100644
--- a/rand_distr/src/binomial.rs
+++ b/rand_distr/src/binomial.rs
@@ -52,8 +52,8 @@ pub struct Binomial {
 #[derive(Clone, Copy, Debug, PartialEq)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 enum Method {
-    Binv(Binv),
-    Btpe(Btpe),
+    Binv(Binv, bool),
+    Btpe(Btpe, bool),
     Poisson(crate::poisson::KnuthMethod<f64>),
     Constant(u64),
 }
@@ -65,7 +65,6 @@ struct Binv {
     s: f64,
     a: f64,
     n: u64,
-    flipped: bool,
 }
 
 #[derive(Clone, Copy, Debug, PartialEq)]
@@ -75,8 +74,6 @@ struct Btpe {
     p: f64,
     npq: f64,
     p1: f64,
-    flipped: bool,
-    // This has the same size as `Binv` one could consider precomputing more.
 }
 
 /// Error type returned from [`Binomial::new`].
@@ -145,29 +142,26 @@ impl Binomial {
         let method = if np < BINV_THRESHOLD {
             let q = 1.0 - p;
             if q == 1.0 {
-                assert!(!flipped);
+                // p is so small that this is extremely close to a Poisson distribution.
+                // The flipped case cannot occur here.
                 Method::Poisson(crate::poisson::KnuthMethod::new(np))
             } else {
                 let s = p / q;
-                Method::Binv(Binv {
-                    r: q.powf(n as f64),
-                    s,
-                    a: (n as f64 + 1.0) * s,
-                    n,
+                Method::Binv(
+                    Binv {
+                        r: q.powf(n as f64),
+                        s,
+                        a: (n as f64 + 1.0) * s,
+                        n,
+                    },
                     flipped,
-                })
+                )
             }
         } else {
             let q = 1.0 - p;
             let npq = np * q;
             let p1 = (2.195 * npq.sqrt() - 4.6 * q).floor() + 0.5;
-            Method::Btpe(Btpe {
-                n,
-                p,
-                npq,
-                p1,
-                flipped,
-            })
+            Method::Btpe(Btpe { n, p, npq, p1 }, flipped)
         };
         Ok(Binomial { method })
     }
@@ -179,7 +173,7 @@ fn f64_to_i64(x: f64) -> i64 {
     x as i64
 }
 
-fn binv<R: Rng + ?Sized>(binv: Binv, rng: &mut R) -> u64 {
+fn binv<R: Rng + ?Sized>(binv: Binv, flipped: bool, rng: &mut R) -> u64 {
     // Same value as in GSL.
     // It is possible for BINV to get stuck, so we break if x > BINV_MAX_X and try again.
     // It would be safer to set BINV_MAX_X to self.n, but it is extremely unlikely to be relevant.
@@ -202,7 +196,7 @@ fn binv<R: Rng + ?Sized>(binv: Binv, rng: &mut R) -> u64 {
         break x;
     };
 
-    if binv.flipped {
+    if flipped {
         binv.n - sample
     } else {
         sample
@@ -210,7 +204,7 @@ fn binv<R: Rng + ?Sized>(binv: Binv, rng: &mut R) -> u64 {
 }
 
 #[allow(clippy::many_single_char_names)] // Same names as in the reference.
-fn btpe<R: Rng + ?Sized>(btpe: Btpe, rng: &mut R) -> u64 {
+fn btpe<R: Rng + ?Sized>(btpe: Btpe, flipped: bool, rng: &mut R) -> u64 {
     // Threshold for using the squeeze algorithm. This can be freely
     // chosen based on performance. Ranlib and GSL use 20.
     const SQUEEZE_THRESHOLD: i64 = 20;
@@ -375,7 +369,7 @@ fn btpe<R: Rng + ?Sized>(btpe: Btpe, rng: &mut R) -> u64 {
     assert!(y >= 0);
     let y = y as u64;
 
-    if btpe.flipped {
+    if flipped {
         btpe.n - y
     } else {
         y
@@ -385,8 +379,8 @@ fn btpe<R: Rng + ?Sized>(btpe: Btpe, rng: &mut R) -> u64 {
 impl Distribution<u64> for Binomial {
     fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u64 {
         match self.method {
-            Method::Binv(binv_para) => binv(binv_para, rng),
-            Method::Btpe(btpe_para) => btpe(btpe_para, rng),
+            Method::Binv(binv_para, flipped) => binv(binv_para, flipped, rng),
+            Method::Btpe(btpe_para, flipped) => btpe(btpe_para, flipped, rng),
             Method::Poisson(poisson) => poisson.sample(rng) as u64,
             Method::Constant(c) => c,
         }

From 9fb68a0b91f6a0e21cf37523c4f83e576967fe60 Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Thu, 3 Oct 2024 15:39:38 +0200
Subject: [PATCH 24/24] replace npq with m

---
 rand_distr/src/binomial.rs | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/rand_distr/src/binomial.rs b/rand_distr/src/binomial.rs
index 0f29fe97e7..3ee0f447b4 100644
--- a/rand_distr/src/binomial.rs
+++ b/rand_distr/src/binomial.rs
@@ -72,7 +72,7 @@ struct Binv {
 struct Btpe {
     n: u64,
     p: f64,
-    npq: f64,
+    m: i64,
     p1: f64,
 }
 
@@ -161,7 +161,9 @@ impl Binomial {
             let q = 1.0 - p;
             let npq = np * q;
             let p1 = (2.195 * npq.sqrt() - 4.6 * q).floor() + 0.5;
-            Method::Btpe(Btpe { n, p, npq, p1 }, flipped)
+            let f_m = np + p;
+            let m = f64_to_i64(f_m);
+            Method::Btpe(Btpe { n, p, m, p1 }, flipped)
         };
         Ok(Binomial { method })
     }
@@ -213,9 +215,9 @@ fn btpe<R: Rng + ?Sized>(btpe: Btpe, flipped: bool, rng: &mut R) -> u64 {
     let n = btpe.n as f64;
     let np = n * btpe.p;
     let q = 1. - btpe.p;
-    let npq = btpe.npq;
+    let npq = np * q;
     let f_m = np + btpe.p;
-    let m = f64_to_i64(f_m);
+    let m = btpe.m;
     // radius of triangle region, since height=1 also area of region
     let p1 = btpe.p1;
     // tip of triangle