Implement Cheng algorithm for sampling Beta

vks · vks · commit 49cad1fca09f · 2020-08-04T19:51:37.000+02:00
This should be faster than the gamma variate transformation we are currently using, and it seems to work better for parameters smaller than one. The algorithm is also used by the R language, however I did not consult their implementation in order to avoid licensing problems. Reference: R. C. H. Cheng (1978). Generating beta variates with nonintegral shape parameters. Communications of the ACM 21, 317-322. https://doi.org/10.1145/359460.359482
diff --git a/rand_distr/CHANGELOG.md b/rand_distr/CHANGELOG.md
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - All error types now implement `std::error::Error` (#919)
 - Re-exported `rand::distributions::BernoulliError` (#919)
 - Add case `lambda = 0` in the parametrixation of `Exp` (#972)
+- Improve algorithm for sampling `Beta` (#1000)
 
 ## [0.2.2] - 2019-09-10
 - Fix version requirement on rand lib (#847)
diff --git a/rand_distr/benches/distributions.rs b/rand_distr/benches/distributions.rs
@@ -112,6 +112,10 @@ distr_float!(distr_normal, f64, Normal::new(-1.23, 4.56).unwrap());
 distr_float!(distr_log_normal, f64, LogNormal::new(-1.23, 4.56).unwrap());
 distr_float!(distr_gamma_large_shape, f64, Gamma::new(10., 1.0).unwrap());
 distr_float!(distr_gamma_small_shape, f64, Gamma::new(0.1, 1.0).unwrap());
+distr_float!(distr_beta_small_param, f64, Beta::new(0.1, 0.1).unwrap());
+distr_float!(distr_beta_large_param_similar, f64, Beta::new(101., 95.).unwrap());
+distr_float!(distr_beta_large_param_different, f64, Beta::new(10., 1000.).unwrap());
+distr_float!(distr_beta_mixed_param, f64, Beta::new(0.5, 100.).unwrap());
 distr_float!(distr_cauchy, f64, Cauchy::new(4.2, 6.9).unwrap());
 distr_float!(distr_triangular, f64, Triangular::new(0., 1., 0.9).unwrap());
 distr_int!(distr_binomial, u64, Binomial::new(20, 0.7).unwrap());
diff --git a/rand_distr/src/gamma.rs b/rand_distr/src/gamma.rs
@@ -495,6 +495,38 @@ where
     }
 }
 
+/// The algorithm used for sampling the Beta distribution.
+///
+/// Reference:
+///
+/// R. C. H. Cheng (1978).
+/// Generating beta variates with nonintegral shape parameters.
+/// Communications of the ACM 21, 317-322.
+/// https://doi.org/10.1145/359460.359482
+#[derive(Clone, Copy, Debug)]
+enum BetaAlgorithm<N> {
+    BB(BB<N>),
+    BC(BC<N>),
+}
+
+/// Algorithm BB for `min(alpha, beta) > 1`.
+#[derive(Clone, Copy, Debug)]
+struct BB<N> {
+    alpha: N,
+    beta: N,
+    gamma: N,
+}
+
+/// Algorithm BC for `min(alpha, beta) <= 1`.
+#[derive(Clone, Copy, Debug)]
+struct BC<N> {
+    alpha: N,
+    beta: N,
+    delta: N,
+    kappa1: N,
+    kappa2: N,
+}
+
 /// The Beta distribution with shape parameters `alpha` and `beta`.
 ///
 /// # Example
@@ -510,12 +542,11 @@ where
 pub struct Beta<F>
 where
     F: Float,
-    StandardNormal: Distribution<F>,
-    Exp1: Distribution<F>,
     Open01: Distribution<F>,
 {
-    gamma_a: Gamma<F>,
-    gamma_b: Gamma<F>,
+    a: F, a0: F,
+    b: F, b0: F,
+    algorithm: BetaAlgorithm<F>,
 }
 
 /// Error type returned from `Beta::new`.
@@ -542,31 +573,140 @@ impl std::error::Error for BetaError {}
 impl<F> Beta<F>
 where
     F: Float,
-    StandardNormal: Distribution<F>,
-    Exp1: Distribution<F>,
     Open01: Distribution<F>,
 {
     /// Construct an object representing the `Beta(alpha, beta)`
     /// distribution.
     pub fn new(alpha: F, beta: F) -> Result<Beta<F>, BetaError> {
-        Ok(Beta {
-            gamma_a: Gamma::new(alpha, F::one()).map_err(|_| BetaError::AlphaTooSmall)?,
-            gamma_b: Gamma::new(beta, F::one()).map_err(|_| BetaError::BetaTooSmall)?,
-        })
+        if !(alpha > F::zero()) {
+            return Err(BetaError::AlphaTooSmall);
+        }
+        if !(beta > F::zero()) {
+            return Err(BetaError::BetaTooSmall);
+        }
+        // From now on, we use the notation from the reference,
+        // i.e. `alpha` and `beta` are renamed to `a0` and `b0`.
+        let (a0, b0) = (alpha, beta);
+        let (a, b) = if a0 < b0 { (a0, b0) } else { (b0, a0) };
+        if alpha > F::one() {
+            let alpha = a + b;
+            let beta = ((alpha - F::from(2.).unwrap())
+                        / (F::from(2.).unwrap()*a*b - alpha)).sqrt();
+            let gamma = a + F::one() / beta;
+
+            Ok(Beta {
+                a, a0, b, b0,
+                algorithm: BetaAlgorithm::BB(BB {
+                    alpha, beta, gamma,
+                })
+            })
+        } else {
+            let alpha = a + b;
+            let beta = F::one() / b;
+            let delta = F::one() + a - b;
+            let kappa1 = delta
+                * (F::from(0.0138889).unwrap() + F::from(0.0416667).unwrap()*b)
+                / (a*beta - F::from(0.777778).unwrap());
+            let kappa2 = F::from(0.25).unwrap()
+                + (F::from(0.5).unwrap() + F::from(0.25).unwrap()/delta)*b;
+
+            Ok(Beta {
+                a, a0, b, b0,
+                algorithm: BetaAlgorithm::BC(BC {
+                    alpha, beta, delta, kappa1, kappa2,
+                })
+            })
+        }
     }
 }
 
 impl<F> Distribution<F> for Beta<F>
 where
     F: Float,
-    StandardNormal: Distribution<F>,
-    Exp1: Distribution<F>,
     Open01: Distribution<F>,
 {
     fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> F {
-        let x = self.gamma_a.sample(rng);
-        let y = self.gamma_b.sample(rng);
-        x / (x + y)
+        match self.algorithm {
+            BetaAlgorithm::BB(algo) => {
+                let mut w;
+                loop {
+                    // 1.
+                    let u1 = rng.sample(Open01);
+                    let u2 = rng.sample(Open01);
+                    let v = algo.beta * (u1 / (F::one() - u1)).ln();
+                    w = self.a * v.exp();
+                    let z = u1*u1 * u2;
+                    let r = algo.gamma * v - F::from(4.).unwrap().ln();
+                    let s = self.a + r - w;
+                    // 2.
+                    if s + F::one() + F::from(5.).unwrap().ln()
+                        >= F::from(5.).unwrap() * z {
+                        break;
+                    }
+                    // 3.
+                    let t = z.ln();
+                    if s >= t {
+                        break;
+                    }
+                    // 4.
+                    if !(r + algo.alpha * (algo.alpha / (self.b + w)).ln() < t) {
+                        break;
+                    }
+                }
+                // 5.
+                if self.a == self.a0 {
+                    w / (self.b + w)
+                } else {
+                    self.b / (self.b + w)
+                }
+            },
+            BetaAlgorithm::BC(algo) => {
+                let mut w;
+                loop {
+                    let z;
+                    // 1.
+                    let u1 = rng.sample(Open01);
+                    let u2 = rng.sample(Open01);
+                    if u1 < F::from(0.5).unwrap() {
+                        // 2.
+                        let y = u1 * u2;
+                        z = u1 * y;
+                        if F::from(0.25).unwrap() * u2 + z - y >= algo.kappa1 {
+                            continue;
+                        }
+                    } else {
+                        // 3.
+                        z = u1 * u1 * u2;
+                        if z <= F::from(0.25).unwrap() {
+                            let v = algo.beta * (u1 / (F::one() - u1)).ln();
+                            w = self.a * v.exp();
+                            break;
+                        }
+                        // 4.
+                        if z >= algo.kappa2 {
+                            continue;
+                        }
+                    }
+                    // 5.
+                    let v = algo.beta * (u1 / (F::one() - u1)).ln();
+                    w = self.a * v.exp();
+                    if !(algo.alpha * ((algo.alpha / (self.b + w)).ln() + v)
+                         - F::from(1.3862944).unwrap() < z.ln()) {
+                        break;
+                    };
+                }
+                // 6.
+                if self.a == self.a0 {
+                    if w == F::infinity() {
+                        // Assuming `b` is finite, for large `w`:
+                        return F::one();
+                    }
+                    w / (self.b + w)
+                } else {
+                    self.b / (self.b + w)
+                }
+            },
+        }
     }
 }
 
@@ -636,4 +776,13 @@ mod test {
     fn test_beta_invalid_dof() {
         Beta::new(0., 0.).unwrap();
     }
+
+    #[test]
+    fn test_beta_small_param() {
+        let beta = Beta::<f64>::new(1e-3, 1e-3).unwrap();
+        let mut rng = crate::test::rng(206);
+        for i in 0..1000 {
+            assert!(!beta.sample(&mut rng).is_nan(), "failed at i={}", i);
+        }
+    }
 }
diff --git a/rand_distr/tests/value_stability.rs b/rand_distr/tests/value_stability.rs
@@ -121,11 +121,11 @@ fn normal_inverse_gaussian_stability() {
 fn pert_stability() {
     // mean = 4, var = 12/7
     test_samples(860, Pert::new(2., 10., 3.).unwrap(), &[
-        4.631484136029422f64,
-        3.307201472321789f64,
-        3.29995019556348f64,
-        3.66835483991721f64,
-        3.514246139933899f64,
+        4.908681667460367,
+        4.014196196158352,
+        2.6489397149197234,
+        3.4569780580044727,
+        4.242864311947118,
     ]);
 }
 
@@ -200,15 +200,21 @@ fn gamma_stability() {
         -2.377641221169782,
     ]);
 
-    // Beta has same special cases as Gamma on each param
+    // Beta has two special cases:
+    //
+    // 1. min(alpha, beta) <= 1
+    // 2. min(alpha, beta > 1
     test_samples(223, Beta::new(1.0, 0.8).unwrap(), &[
-        0.6444564f32, 0.357635, 0.4110078, 0.7347192,
-    ]);
-    test_samples(223, Beta::new(0.7, 1.2).unwrap(), &[
-        0.6433129944095513f64,
-        0.5373371199711573,
-        0.10313293199269491,
-        0.002472280249144378,
+        0.2958284085602274,
+        0.9384411906056516,
+        0.3151361582723264,
+        0.6150273348630618,
+    ]);
+    test_samples(223, Beta::new(3.0, 1.2).unwrap(), &[
+        0.49563509121756827,
+        0.9551305482256759,
+        0.5151181353461637,
+        0.7551732971235077,
     ]);
 }