ChiSym · sritchie · Mar 12, 2024 · Feb 25, 2024 · Feb 29, 2024 · Mar 8, 2024
diff --git a/.gitignore b/.gitignore
@@ -19,4 +19,5 @@ pom.xml
 !template/pom.xml
 pom.xml.asc
 node_modules
-**.shadow-cljs
+**.shadow-cljs
+.#*
diff --git a/package-lock.json b/package-lock.json
diff --git a/src/gen/distribution/commons_math.clj b/src/gen/distribution/commons_math.clj
@@ -68,6 +68,10 @@
                      0 false
                      1 true)))))
 
+(defn binomial-distribution
+  ([n ^double p]
+   (BinomialDistribution. (rng) n p)))
+
 (defn beta-distribution
   ([] (beta-distribution 1.0 1.0))
   ([^double alpha ^double beta]
@@ -129,6 +133,9 @@
 (def bernoulli
   (d/->GenerativeFn bernoulli-distribution 1))
 
+(def binomial
+  (d/->GenerativeFn binomial-distribution 2))
+
 (def beta
   (d/->GenerativeFn beta-distribution 2))
 

diff --git a/src/gen/distribution/java_util.clj b/src/gen/distribution/java_util.clj
@@ -25,6 +25,15 @@
   (d/logpdf [_ v]
     (ll/bernoulli p v)))
 
+(defrecord Binomial [^SplittableRandom rnd n p]
+  d/Sample
+  (sample [_]
+    (.nextBinomial rnd n p))
+
+  d/LogPDF
+  (d/logpdf [_ v]
+    (ll/binomial n p v)))
+
 (defrecord Gaussian [^SplittableRandom rnd mu sigma]
   d/Sample
   (sample [_]
@@ -43,6 +52,10 @@
   ([] (bernoulli-distribution 0.5))
   ([p] (->Bernoulli (rng) p)))
 
+(defn binomial-distribution
+  [n p]
+  (->Binomial (rng) n p))
+
 (defn uniform-distribution
   ([] (uniform-distribution 0.0 1.0))
   ([lo hi] (->Uniform (rng) lo hi)))
@@ -57,6 +70,9 @@
 (def bernoulli
   (d/->GenerativeFn bernoulli-distribution 1))
 
+(def binomial
+  (d/->GenerativeFn binomial-distribution 2))
+
 (def uniform
   (d/->GenerativeFn uniform-distribution 2))
 

diff --git a/src/gen/distribution/kixi.cljc b/src/gen/distribution/kixi.cljc
@@ -4,6 +4,7 @@
             [kixi.stats.distribution :as k])
   #?(:clj
      (:import (kixi.stats.distribution Bernoulli Cauchy
+                                       Binomial
                                        Exponential Beta
                                        Gamma Normal Uniform T))))
 
@@ -22,6 +23,14 @@
   (logpdf [this v]
     (ll/bernoulli (.-p this) v)))
 
+(extend-type #?(:clj Binomial :cljs k/Binomial)
+  d/Sample
+  (sample [this] (k/draw this))
+
+  d/LogPDF
+  (logpdf [this v]
+    (ll/binomial (.-n this) (.-p this) v)))
+
 (extend-type #?(:clj Beta :cljs k/Beta)
   d/Sample
   (sample [this] (k/draw this))
@@ -106,6 +115,10 @@
   ([] (bernoulli-distribution 0.5))
   ([p] (k/bernoulli {:p p})))
 
+(defn binomial-distribution
+  ([n p]
+   (k/binomial {:n n :p p})))
+
 (defn beta-distribution
   ([] (beta-distribution 1.0 1.0))
   ([alpha beta]
@@ -143,6 +156,9 @@
 (def bernoulli
   (d/->GenerativeFn bernoulli-distribution 1))
 
+(def binomial
+  (d/->GenerativeFn binomial-distribution 2))
+
 (def beta
   (d/->GenerativeFn beta-distribution 2))
 

diff --git a/src/gen/distribution/math/log_likelihood.cljc b/src/gen/distribution/math/log_likelihood.cljc
@@ -97,6 +97,46 @@
   {:pre [(<= 0 p 1)]}
   (Math/log (if v p (- 1.0 p))))
 
+(defn log-fact
+  "Returns the natural logarithm of `x` factorial."
+  [x]
+  {:pre [(>= x 0)]}
+  (log-gamma-fn (inc x)))
+
+(defn log-bico
+  "Returns the natural logorithm of the binomial coefficient, `n` choose `k`."
+  [n k]
+  {:pre [(integer? n)
+         (integer? k)
+         (>= k 0)
+         (>= n k)]}
+  (if (or (zero? k) (= k n))
+    0  ;; log 1
+    (- (log-fact n) (log-fact k) (log-fact (- n k)))))
+
+(defn binomial
+  "Returns the log-likelihood of a [Binomial
+  distribution](https://en.wikipedia.org/wiki/Binomial_distribution)
+  parameterized by `n` (number of trials) and `p` (probability of success in
+  each trial) at the value `v` (number of successes)."
+  [n p v]
+  {:pre [(integer? n)
+         (integer? v)
+         (>= v 0)
+         (>= n v)
+         (<= 0 p 1)]}
+  (cond
+    (= p 0) (if (= v 0)
+              0.0     ;; log(1)
+              ##-Inf) ;; log(0)
+    (= p 1) (if (= v n)
+              0.0     ;; log(1)
+              ##-Inf) ;; log(0)
+    :else
+    (+ (log-bico n v)
+       (* v (Math/log p))
+       (* (- n v) (Math/log (- 1 p))))))
+
 (defn cauchy
   "Returns the log-likelihood of a [Cauchy
   distribution](https://en.wikipedia.org/wiki/Cauchy_distribution) parameterized

diff --git a/test/gen/distribution/commons_math_test.clj b/test/gen/distribution/commons_math_test.clj
@@ -7,6 +7,9 @@
   (dt/bernoulli-tests commons/bernoulli-distribution)
   (dt/bernoulli-gfi-tests commons/bernoulli))
 
+(deftest binomial-tests
+  (dt/binomial-tests commons/binomial-distribution))
+
 (deftest beta-tests
   (dt/beta-tests commons/beta-distribution))
 

diff --git a/test/gen/distribution/java_util_test.clj b/test/gen/distribution/java_util_test.clj
@@ -7,6 +7,9 @@
   (dt/bernoulli-tests java-util/bernoulli-distribution)
   (dt/bernoulli-gfi-tests java-util/bernoulli))
 
+(deftest binomial-tests
+  (dt/binomial-tests java-util/binomial-distribution))
+
 (deftest uniform-tests
   (dt/uniform-tests java-util/uniform-distribution))
 

diff --git a/test/gen/distribution/kixi_test.cljc b/test/gen/distribution/kixi_test.cljc
@@ -7,6 +7,9 @@
   (dt/bernoulli-tests kixi/bernoulli-distribution)
   (dt/bernoulli-gfi-tests kixi/bernoulli))
 
+(deftest binomial-tests
+  (dt/binomial-tests kixi/binomial-distribution))
+
 (deftest beta-tests
   (dt/beta-tests kixi/beta-distribution))
 

diff --git a/test/gen/distribution/math/log_likelihood_test.cljc b/test/gen/distribution/math/log_likelihood_test.cljc
@@ -46,6 +46,9 @@
 (deftest bernoulli-tests
   (dt/bernoulli-tests (->logpdf ll/bernoulli)))
 
+(deftest binomial-tests
+  (dt/binomial-tests (->logpdf ll/binomial)))
+
 (deftest cauchy-tests
   (dt/cauchy-tests (->logpdf ll/cauchy)))
 

diff --git a/test/gen/distribution_test.cljc b/test/gen/distribution_test.cljc
@@ -59,6 +59,87 @@
                          (Math/exp (dist/logpdf (->bernoulli p) (not v)))))
                 "All options sum to 1")))
 
+(defn binomial-tests [->binomial]
+  ;; boundaries...
+  (testing "when p = 0 and v = 0, probability is 1, log(1) = 0"
+    (is 0 (dist/logpdf (->binomial 10 0) 0)))
+
+  (testing "when p = 0 and v > 0, probability is 0, log(0) = -Inf"
+    (is ##-Inf (dist/logpdf (->binomial 10 0) 1)))
+
+  (testing "when p = 1 and v = n, probability is 1, log(1) = 0"
+    (is 0 (dist/logpdf (->binomial 10 1) 10)))
+
+  (testing "when p = 1 and v < n, probability is 0, log(0) = -Inf"
+    (is ##-Inf(dist/logpdf (->binomial 10 0) 1)))
+
+  ;; properties...
+  (testing "sum of probabilities equals 1"
+    (with-comparator (within 1e-9)
+      (let [n 100
+            p 0.5
+            log-probs (map (fn [k] (dist/logpdf (->binomial n p) k)) (range 0 (inc n)))
+            probs (map (fn [x] (Math/exp x)) log-probs)
+            sum-probs (reduce + probs)]
+        (is (ish? 1.0 sum-probs)))))
+
+  (testing "symmetric when p = 0.5 such that binomial(k) = binomial(n -k)"
+    (let [n 100
+          p 0.5
+          v 10]
+      (is (dist/logpdf (->binomial n p) v)
+          (dist/logpdf (->binomial n p) (- n v)))))
+
+  (testing "mean and variance consistency where mu = n * p and variance = mu(1 - p)"
+    (with-comparator (within 1e-9)
+      (let [n 100
+            p 0.3
+            ks (range 0 (inc n))
+            log-probs (map (fn [k] (dist/logpdf (->binomial n p) k)) ks)
+            probs (map (fn [x] (Math/exp x)) log-probs)
+            mu (reduce + (map * probs ks))
+            variance (reduce + (map (fn [k p] (* p (Math/pow (- k mu) 2))) ks probs))
+            theoretical-mu (* n p)
+            theoretical-variance (* n p (- 1 p))]
+        (is (ish? theoretical-mu mu))
+        (is (ish? theoretical-variance variance)))))
+
+  (testing "spot check against scipy.stats.binom.logpmf (v1.12.0)"
+    (with-comparator (within 1e-9)
+      (is (ish? -7.13354688230902 (dist/logpdf (->binomial 1000000 0.5) 500000)))
+
+      ;; TODO: failing test (off by 1.9e-9)
+      ;; expected: (ish? -3.222306954272568 (dist/logpdf (->binomial 1000000 0.0001) 100))
+      ;; actual: (not (ish? -3.222306954272568 -3.2223069561241857))
+      (is (ish? -3.222306954272568 (dist/logpdf (->binomial 1000000 0.0001) 100)))
+
+      (is (ish? -8.047189562170502 (dist/logpdf (->binomial 5 0.2) 5)))
+      (is (ish? -1.1856136373815076 (dist/logpdf (->binomial 50 0.99) 49)))
+      (is (ish? -1.185613637381508 (dist/logpdf (->binomial 50 0.01) 1)))
+      (is (ish? -693133.3650493873 (dist/logpdf (->binomial 1000000 0.5) 999999)))
+      (is (ish? 0 (dist/logpdf (->binomial 10 0) 0)))
+      (is (ish? 0 (dist/logpdf (->binomial 10 1) 10)))
+      (is (ish? -2.02597397686619 (dist/logpdf (->binomial 100 0.9) 90)))
+      (is (ish? -52.680257828913156 (dist/logpdf (->binomial 500 0.1) 0)))))
+
+  (testing "spot check against gen logpdf (v0.4.6)"
+    (with-comparator (within 1e-9)
+      (is (ish? -7.133546882067904 (dist/logpdf (->binomial 1000000 0.5) 500000)))
+
+      ;; TODO: failing test (off by 1.9e-9)
+      ;; expected: (ish? -3.222306954262436 (dist/logpdf (->binomial 1000000 0.0001) 100))
+      ;; actual: (not (ish? -3.222306954262436 -3.2223069561241857))
+      (is (ish? -3.222306954262436 (dist/logpdf (->binomial 1000000 0.0001) 100)))
+
+      (is (ish? -8.047189562170502 (dist/logpdf (->binomial 5 0.2) 5)))
+      (is (ish? -1.185613637381516 (dist/logpdf (->binomial 50 0.99) 49)))
+      (is (ish? -1.1856136373815152 (dist/logpdf (->binomial 50 0.01) 1)))
+      (is (ish? -693133.3650493873 (dist/logpdf (->binomial 1000000 0.5) 999999)))
+      (is (ish? 0 (dist/logpdf (->binomial 10 0) 0)))
+      (is (ish? 0 (dist/logpdf (->binomial 10 1) 10)))
+      (is (ish? -2.025973976866184 (dist/logpdf (->binomial 100 0.9) 90)))
+      (is (ish? -52.680257828913156 (dist/logpdf (->binomial 500 0.1) 0))))))
+
 (defn categorical-tests [->cat]
   (checking "map => categorical properties"
             [p (gen-double 0 1)]