diff --git a/.gitignore b/.gitignore index 96248de..3af6366 100644 --- a/.gitignore +++ b/.gitignore @@ -19,4 +19,5 @@ pom.xml !template/pom.xml pom.xml.asc node_modules -**.shadow-cljs \ No newline at end of file +**.shadow-cljs +.#* \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index ec4e8e5..a6b8cfd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,5 +1,5 @@ { - "name": "gen.clj", + "name": "Gen.clj", "lockfileVersion": 2, "requires": true, "packages": { diff --git a/src/gen/distribution/commons_math.clj b/src/gen/distribution/commons_math.clj index 4fc3b5b..5aca4ac 100644 --- a/src/gen/distribution/commons_math.clj +++ b/src/gen/distribution/commons_math.clj @@ -68,6 +68,10 @@ 0 false 1 true))))) +(defn binomial-distribution + ([n ^double p] + (BinomialDistribution. (rng) n p))) + (defn beta-distribution ([] (beta-distribution 1.0 1.0)) ([^double alpha ^double beta] @@ -129,6 +133,9 @@ (def bernoulli (d/->GenerativeFn bernoulli-distribution 1)) +(def binomial + (d/->GenerativeFn binomial-distribution 2)) + (def beta (d/->GenerativeFn beta-distribution 2)) diff --git a/src/gen/distribution/kixi.cljc b/src/gen/distribution/kixi.cljc index d31a79f..1815956 100644 --- a/src/gen/distribution/kixi.cljc +++ b/src/gen/distribution/kixi.cljc @@ -4,6 +4,7 @@ [kixi.stats.distribution :as k]) #?(:clj (:import (kixi.stats.distribution Bernoulli Cauchy + Binomial Exponential Beta Gamma Normal Uniform T)))) @@ -22,6 +23,14 @@ (logpdf [this v] (ll/bernoulli (.-p this) v))) +(extend-type #?(:clj Binomial :cljs k/Binomial) + d/Sample + (sample [this] (k/draw this)) + + d/LogPDF + (logpdf [this v] + (ll/binomial (.-n this) (.-p this) v))) + (extend-type #?(:clj Beta :cljs k/Beta) d/Sample (sample [this] (k/draw this)) @@ -106,6 +115,10 @@ ([] (bernoulli-distribution 0.5)) ([p] (k/bernoulli {:p p}))) +(defn binomial-distribution + ([n p] + (k/binomial {:n n :p p}))) + (defn beta-distribution ([] (beta-distribution 1.0 1.0)) ([alpha beta] @@ -143,6 +156,9 @@ (def bernoulli (d/->GenerativeFn bernoulli-distribution 1)) +(def binomial + (d/->GenerativeFn binomial-distribution 2)) + (def beta (d/->GenerativeFn beta-distribution 2)) diff --git a/src/gen/distribution/math/log_likelihood.cljc b/src/gen/distribution/math/log_likelihood.cljc index a98142a..8d77e5c 100644 --- a/src/gen/distribution/math/log_likelihood.cljc +++ b/src/gen/distribution/math/log_likelihood.cljc @@ -1,9 +1,6 @@ (ns gen.distribution.math.log-likelihood - "Log-likelihood implementations for various primitive distributions.") - -;; ## Helpful constants -;; -;; These come in handy in the implementations below and are worth caching. + "Log-likelihood implementations for various primitive distributions." + (:require [kixi.stats.math :as k])) (def ^:no-doc log-pi (Math/log Math/PI)) @@ -14,39 +11,11 @@ (def ^:no-doc sqrt-2pi (Math/sqrt (* 2 Math/PI))) -;; ## Log-likelihood implementations - -(def ^:no-doc gamma-coefficients - "Coefficients for the Lanczos approximation to the natural log of the Gamma - function described in [section 6.1 of Numerical - Recipes](http://phys.uri.edu/nigh/NumRec/bookfpdf/f6-1.pdf)." - [76.18009172947146 - -86.50532032941677 - 24.01409824083091 - -1.231739572450155 - 0.1208650973866179e-2 - -0.5395239384953e-5]) - (defn ^:no-doc log-gamma-fn "Returns the natural log of the value of the [Gamma - function](https://en.wikipedia.org/wiki/Gamma_function) evaluated at `x` - - This function implements the Lanczos approximation described in [section 6.1 - of Numerical Recipes](http://phys.uri.edu/nigh/NumRec/bookfpdf/f6-1.pdf)." + function](https://en.wikipedia.org/wiki/Gamma_function) evaluated at `x`" [x] - (let [tmp (+ x 5.5) - tmp (- (* (+ x 0.5) (Math/log tmp)) tmp) - n (dec (count gamma-coefficients)) - ser (loop [i 0 - x+1 (inc x) - acc 1.000000000190015] - (if (> i n) - acc - (let [coef (nth gamma-coefficients i nil)] - (recur (inc i) - (inc x+1) - (+ acc (/ coef x+1))))))] - (+ tmp (Math/log (* sqrt-2pi (/ ser x)))))) + (k/log-gamma x)) (defn gamma "Returns the log-likelihood of the [Gamma @@ -97,6 +66,36 @@ {:pre [(<= 0 p 1)]} (Math/log (if v p (- 1.0 p)))) +(defn binomial + "Returns the log-likelihood of a [Binomial + distribution](https://en.wikipedia.org/wiki/Binomial_distribution) + parameterized by `n` (number of trials) and `p` (probability of success in + each trial) at the value `v` (number of successes)." + [n p v] + {:pre [(integer? n) + (integer? v) + (>= v 0) + (>= n v) + (<= 0 p 1)]} + (letfn [(log-fact + [x] + (log-gamma-fn (inc x))) + (log-bico + [n k] + (if (or (zero? k) (= k n)) + 0 ;; log(1) + (- (log-fact n) (log-fact k) (log-fact (- n k)))))] + (case p + 0 (if (= v 0) + 0.0 ;; log(1) + ##-Inf) ;; log(0)) + 1 (if (= v n) + 0.0 ;; log(1) + ##-Inf) ;; log(0) + (+ (log-bico n v) + (* v (Math/log p)) + (* (- n v) (Math/log (- 1 p))))))) + (defn cauchy "Returns the log-likelihood of a [Cauchy distribution](https://en.wikipedia.org/wiki/Cauchy_distribution) parameterized diff --git a/test/gen/distribution/commons_math_test.clj b/test/gen/distribution/commons_math_test.clj index 8068278..f39759f 100644 --- a/test/gen/distribution/commons_math_test.clj +++ b/test/gen/distribution/commons_math_test.clj @@ -7,6 +7,10 @@ (dt/bernoulli-tests commons/bernoulli-distribution) (dt/bernoulli-gfi-tests commons/bernoulli)) +(deftest binomial-tests + (dt/binomial-tests commons/binomial-distribution) + (dt/binomial-gf-tests commons/binomial)) + (deftest beta-tests (dt/beta-tests commons/beta-distribution)) diff --git a/test/gen/distribution/kixi_test.cljc b/test/gen/distribution/kixi_test.cljc index 534b164..109b0c1 100644 --- a/test/gen/distribution/kixi_test.cljc +++ b/test/gen/distribution/kixi_test.cljc @@ -7,6 +7,10 @@ (dt/bernoulli-tests kixi/bernoulli-distribution) (dt/bernoulli-gfi-tests kixi/bernoulli)) +(deftest binomial-tests + (dt/binomial-tests kixi/binomial-distribution) + (dt/binomial-gf-tests kixi/binomial)) + (deftest beta-tests (dt/beta-tests kixi/beta-distribution)) diff --git a/test/gen/distribution/math/log_likelihood_test.cljc b/test/gen/distribution/math/log_likelihood_test.cljc index 1b4b72b..fd57407 100644 --- a/test/gen/distribution/math/log_likelihood_test.cljc +++ b/test/gen/distribution/math/log_likelihood_test.cljc @@ -46,6 +46,9 @@ (deftest bernoulli-tests (dt/bernoulli-tests (->logpdf ll/bernoulli))) +(deftest binomial-tests + (dt/binomial-tests (->logpdf ll/binomial))) + (deftest cauchy-tests (dt/cauchy-tests (->logpdf ll/cauchy))) diff --git a/test/gen/distribution_test.cljc b/test/gen/distribution_test.cljc index 280ffd6..6dbccc1 100644 --- a/test/gen/distribution_test.cljc +++ b/test/gen/distribution_test.cljc @@ -59,6 +59,78 @@ (Math/exp (dist/logpdf (->bernoulli p) (not v))))) "All options sum to 1"))) +(defn binomial-gf-tests [->binomial-gf] + (checking "spot check gf score implementations" + [n (gen/choose 0 10000) + p (gen-double 0.11111 0.99999)] + (let [trace (gf/simulate ->binomial-gf [n p]) + sample (trace/get-retval trace)] + (is (<= sample n))))) + +(defn binomial-tests [->binomial] + ;; boundaries... + (testing "when p = 0 and v = 0, probability is 1, log(1) = 0" + (is 0 (dist/logpdf (->binomial 10 0) 0))) + + (testing "when p = 0 and v > 0, probability is 0, log(0) = -Inf" + (is ##-Inf (dist/logpdf (->binomial 10 0) 1))) + + (testing "when p = 1 and v = n, probability is 1, log(1) = 0" + (is 0 (dist/logpdf (->binomial 10 1) 10))) + + (testing "when p = 1 and v < n, probability is 0, log(0) = -Inf" + (is ##-Inf(dist/logpdf (->binomial 10 0) 1))) + + ;; properties... + (checking "sum of probabilities equals 1" + [n (gen/choose 0 10000) + p (gen-double 0.11111 0.99999)] + (let [log-probs (map (fn [k] (dist/logpdf (->binomial n p) k)) (range 0 (inc n))) + probs (map (fn [x] (Math/exp x)) log-probs) + sum-probs (reduce + probs)] + (with-comparator (within 1e-9) + (is (ish? 1.0 sum-probs))))) + + ;; A binomial distribution is symmetrical if the probability of observing $k$ + ;; successes in $n$ trials is the same as observing $n - k$ successes, which + ;; should be true when $p = 0.5$. + (checking "symmetrical shape when $p = 0.5$" + [n (gen/choose 0 10000)] + (with-comparator (within 1e-9) + (let [p 0.5 + ks (range 0 (inc n)) + k (map (fn [k] (dist/logpdf (->binomial n p) k)) ks) + n-k (map (fn [k] (dist/logpdf (->binomial n p) (- n k))) ks)] + (is (ish? k n-k))))) + + (testing "spot check against scipy.stats.binom.logpmf (v1.12.0)" + (with-comparator (within 1e-12) + (let [scipy-data [[5 0.2 5 -8.047189562170502] + [50 0.99 49 -1.1856136373815076] + [50 0.01 1 -1.185613637381508] + [10 0 0 0] + [10 1 10 0] + [100 0.9 90 -2.02597397686619] + [500 0.1 0 -52.680257828913156]]] + (doseq [[n p v expected] scipy-data] + (let [actual (dist/logpdf (->binomial n p) v)] + (is (ish? expected actual) + (str "n=" n ", p=" p ", v=" v))))))) + + (testing "spot check against gen.jl logpdf (v0.4.6)" + (with-comparator (within 1e-12) + (let [gen-data [[5 0.2 5 -8.047189562170502] + [50 0.99 49 -1.185613637381516] + [50 0.01 1 -1.1856136373815152] + [10 0 0 0] + [10 1 10 0] + [100 0.9 90 -2.025973976866184] + [500 0.1 0 -52.680257828913156]]] + (doseq [[n p v expected] gen-data] + (let [actual (dist/logpdf (->binomial n p) v)] + (is (ish? expected actual) + (str "n=" n ", p=" p ", v=" v)))))))) + (defn categorical-tests [->cat] (checking "map => categorical properties" [p (gen-double 0 1)]