From 346b5d096067b84f1a3aac51d4a73fdd136d935f Mon Sep 17 00:00:00 2001 From: Aaron Steele Date: Sat, 24 Feb 2024 17:26:41 -0800 Subject: [PATCH 1/3] initial commit --- .gitignore | 3 +- package-lock.json | 2 +- src/gen/distribution/commons_math.clj | 7 ++ src/gen/distribution/java_util.clj | 16 ++++ src/gen/distribution/kixi.cljc | 16 ++++ src/gen/distribution/math/log_likelihood.cljc | 40 +++++++++ test/gen/distribution/commons_math_test.clj | 3 + test/gen/distribution/java_util_test.clj | 3 + test/gen/distribution/kixi_test.cljc | 3 + .../math/log_likelihood_test.cljc | 3 + test/gen/distribution_test.cljc | 81 +++++++++++++++++++ 11 files changed, 175 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 96248de..3af6366 100644 --- a/.gitignore +++ b/.gitignore @@ -19,4 +19,5 @@ pom.xml !template/pom.xml pom.xml.asc node_modules -**.shadow-cljs \ No newline at end of file +**.shadow-cljs +.#* \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index ec4e8e5..a6b8cfd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,5 +1,5 @@ { - "name": "gen.clj", + "name": "Gen.clj", "lockfileVersion": 2, "requires": true, "packages": { diff --git a/src/gen/distribution/commons_math.clj b/src/gen/distribution/commons_math.clj index 4fc3b5b..5aca4ac 100644 --- a/src/gen/distribution/commons_math.clj +++ b/src/gen/distribution/commons_math.clj @@ -68,6 +68,10 @@ 0 false 1 true))))) +(defn binomial-distribution + ([n ^double p] + (BinomialDistribution. (rng) n p))) + (defn beta-distribution ([] (beta-distribution 1.0 1.0)) ([^double alpha ^double beta] @@ -129,6 +133,9 @@ (def bernoulli (d/->GenerativeFn bernoulli-distribution 1)) +(def binomial + (d/->GenerativeFn binomial-distribution 2)) + (def beta (d/->GenerativeFn beta-distribution 2)) diff --git a/src/gen/distribution/java_util.clj b/src/gen/distribution/java_util.clj index ed7e9cd..46e816c 100644 --- a/src/gen/distribution/java_util.clj +++ b/src/gen/distribution/java_util.clj @@ -25,6 +25,15 @@ (d/logpdf [_ v] (ll/bernoulli p v))) +(defrecord Binomial [^SplittableRandom rnd n p] + d/Sample + (sample [_] + (.nextBinomial rnd n p)) + + d/LogPDF + (d/logpdf [_ v] + (ll/binomial n p v))) + (defrecord Gaussian [^SplittableRandom rnd mu sigma] d/Sample (sample [_] @@ -43,6 +52,10 @@ ([] (bernoulli-distribution 0.5)) ([p] (->Bernoulli (rng) p))) +(defn binomial-distribution + [n p] + (->Binomial (rng) n p)) + (defn uniform-distribution ([] (uniform-distribution 0.0 1.0)) ([lo hi] (->Uniform (rng) lo hi))) @@ -57,6 +70,9 @@ (def bernoulli (d/->GenerativeFn bernoulli-distribution 1)) +(def binomial + (d/->GenerativeFn binomial-distribution 2)) + (def uniform (d/->GenerativeFn uniform-distribution 2)) diff --git a/src/gen/distribution/kixi.cljc b/src/gen/distribution/kixi.cljc index d31a79f..1815956 100644 --- a/src/gen/distribution/kixi.cljc +++ b/src/gen/distribution/kixi.cljc @@ -4,6 +4,7 @@ [kixi.stats.distribution :as k]) #?(:clj (:import (kixi.stats.distribution Bernoulli Cauchy + Binomial Exponential Beta Gamma Normal Uniform T)))) @@ -22,6 +23,14 @@ (logpdf [this v] (ll/bernoulli (.-p this) v))) +(extend-type #?(:clj Binomial :cljs k/Binomial) + d/Sample + (sample [this] (k/draw this)) + + d/LogPDF + (logpdf [this v] + (ll/binomial (.-n this) (.-p this) v))) + (extend-type #?(:clj Beta :cljs k/Beta) d/Sample (sample [this] (k/draw this)) @@ -106,6 +115,10 @@ ([] (bernoulli-distribution 0.5)) ([p] (k/bernoulli {:p p}))) +(defn binomial-distribution + ([n p] + (k/binomial {:n n :p p}))) + (defn beta-distribution ([] (beta-distribution 1.0 1.0)) ([alpha beta] @@ -143,6 +156,9 @@ (def bernoulli (d/->GenerativeFn bernoulli-distribution 1)) +(def binomial + (d/->GenerativeFn binomial-distribution 2)) + (def beta (d/->GenerativeFn beta-distribution 2)) diff --git a/src/gen/distribution/math/log_likelihood.cljc b/src/gen/distribution/math/log_likelihood.cljc index a98142a..4f4c0ec 100644 --- a/src/gen/distribution/math/log_likelihood.cljc +++ b/src/gen/distribution/math/log_likelihood.cljc @@ -97,6 +97,46 @@ {:pre [(<= 0 p 1)]} (Math/log (if v p (- 1.0 p)))) +(defn log-fact + "Returns the natural logarithm of `x` factorial." + [x] + {:pre [(>= x 0)]} + (log-gamma-fn (inc x))) + +(defn log-bico + "Returns the natural logorithm of the binomial coefficient, `n` choose `k`." + [n k] + {:pre [(integer? n) + (integer? k) + (>= k 0) + (>= n k)]} + (if (or (zero? k) (= k n)) + 0 ;; log 1 + (- (log-fact n) (log-fact k) (log-fact (- n k))))) + +(defn binomial + "Returns the log-likelihood of a [Binomial + distribution](https://en.wikipedia.org/wiki/Binomial_distribution) + parameterized by `n` (number of trials) and `p` (probability of success in + each trial) at the value `v` (number of successes)." + [n p v] + {:pre [(integer? n) + (integer? v) + (>= v 0) + (>= n v) + (<= 0 p 1)]} + (cond + (= p 0) (if (= v 0) + 0.0 ;; log(1) + ##-Inf) ;; log(0) + (= p 1) (if (= v n) + 0.0 ;; log(1) + ##-Inf) ;; log(0) + :else + (+ (log-bico n v) + (* v (Math/log p)) + (* (- n v) (Math/log (- 1 p)))))) + (defn cauchy "Returns the log-likelihood of a [Cauchy distribution](https://en.wikipedia.org/wiki/Cauchy_distribution) parameterized diff --git a/test/gen/distribution/commons_math_test.clj b/test/gen/distribution/commons_math_test.clj index 8068278..e6a0939 100644 --- a/test/gen/distribution/commons_math_test.clj +++ b/test/gen/distribution/commons_math_test.clj @@ -7,6 +7,9 @@ (dt/bernoulli-tests commons/bernoulli-distribution) (dt/bernoulli-gfi-tests commons/bernoulli)) +(deftest binomial-tests + (dt/binomial-tests commons/binomial-distribution)) + (deftest beta-tests (dt/beta-tests commons/beta-distribution)) diff --git a/test/gen/distribution/java_util_test.clj b/test/gen/distribution/java_util_test.clj index fdae373..8e32b40 100644 --- a/test/gen/distribution/java_util_test.clj +++ b/test/gen/distribution/java_util_test.clj @@ -7,6 +7,9 @@ (dt/bernoulli-tests java-util/bernoulli-distribution) (dt/bernoulli-gfi-tests java-util/bernoulli)) +(deftest binomial-tests + (dt/binomial-tests java-util/binomial-distribution)) + (deftest uniform-tests (dt/uniform-tests java-util/uniform-distribution)) diff --git a/test/gen/distribution/kixi_test.cljc b/test/gen/distribution/kixi_test.cljc index 534b164..7a38c44 100644 --- a/test/gen/distribution/kixi_test.cljc +++ b/test/gen/distribution/kixi_test.cljc @@ -7,6 +7,9 @@ (dt/bernoulli-tests kixi/bernoulli-distribution) (dt/bernoulli-gfi-tests kixi/bernoulli)) +(deftest binomial-tests + (dt/binomial-tests kixi/binomial-distribution)) + (deftest beta-tests (dt/beta-tests kixi/beta-distribution)) diff --git a/test/gen/distribution/math/log_likelihood_test.cljc b/test/gen/distribution/math/log_likelihood_test.cljc index 1b4b72b..fd57407 100644 --- a/test/gen/distribution/math/log_likelihood_test.cljc +++ b/test/gen/distribution/math/log_likelihood_test.cljc @@ -46,6 +46,9 @@ (deftest bernoulli-tests (dt/bernoulli-tests (->logpdf ll/bernoulli))) +(deftest binomial-tests + (dt/binomial-tests (->logpdf ll/binomial))) + (deftest cauchy-tests (dt/cauchy-tests (->logpdf ll/cauchy))) diff --git a/test/gen/distribution_test.cljc b/test/gen/distribution_test.cljc index 280ffd6..1da671f 100644 --- a/test/gen/distribution_test.cljc +++ b/test/gen/distribution_test.cljc @@ -59,6 +59,87 @@ (Math/exp (dist/logpdf (->bernoulli p) (not v))))) "All options sum to 1"))) +(defn binomial-tests [->binomial] + ;; boundaries... + (testing "when p = 0 and v = 0, probability is 1, log(1) = 0" + (is 0 (dist/logpdf (->binomial 10 0) 0))) + + (testing "when p = 0 and v > 0, probability is 0, log(0) = -Inf" + (is ##-Inf (dist/logpdf (->binomial 10 0) 1))) + + (testing "when p = 1 and v = n, probability is 1, log(1) = 0" + (is 0 (dist/logpdf (->binomial 10 1) 10))) + + (testing "when p = 1 and v < n, probability is 0, log(0) = -Inf" + (is ##-Inf(dist/logpdf (->binomial 10 0) 1))) + + ;; properties... + (testing "sum of probabilities equals 1" + (with-comparator (within 1e-9) + (let [n 100 + p 0.5 + log-probs (map (fn [k] (dist/logpdf (->binomial n p) k)) (range 0 (inc n))) + probs (map (fn [x] (Math/exp x)) log-probs) + sum-probs (reduce + probs)] + (is (ish? 1.0 sum-probs))))) + + (testing "symmetric when p = 0.5 such that binomial(k) = binomial(n -k)" + (let [n 100 + p 0.5 + v 10] + (is (dist/logpdf (->binomial n p) v) + (dist/logpdf (->binomial n p) (- n v))))) + + (testing "mean and variance consistency where mu = n * p and variance = mu(1 - p)" + (with-comparator (within 1e-9) + (let [n 100 + p 0.3 + ks (range 0 (inc n)) + log-probs (map (fn [k] (dist/logpdf (->binomial n p) k)) ks) + probs (map (fn [x] (Math/exp x)) log-probs) + mu (reduce + (map * probs ks)) + variance (reduce + (map (fn [k p] (* p (Math/pow (- k mu) 2))) ks probs)) + theoretical-mu (* n p) + theoretical-variance (* n p (- 1 p))] + (is (ish? theoretical-mu mu)) + (is (ish? theoretical-variance variance))))) + + (testing "spot check against scipy.stats.binom.logpmf (v1.12.0)" + (with-comparator (within 1e-9) + (is (ish? -7.13354688230902 (dist/logpdf (->binomial 1000000 0.5) 500000))) + + ;; TODO: failing test (off by 1.9e-9) + ;; expected: (ish? -3.222306954272568 (dist/logpdf (->binomial 1000000 0.0001) 100)) + ;; actual: (not (ish? -3.222306954272568 -3.2223069561241857)) + (is (ish? -3.222306954272568 (dist/logpdf (->binomial 1000000 0.0001) 100))) + + (is (ish? -8.047189562170502 (dist/logpdf (->binomial 5 0.2) 5))) + (is (ish? -1.1856136373815076 (dist/logpdf (->binomial 50 0.99) 49))) + (is (ish? -1.185613637381508 (dist/logpdf (->binomial 50 0.01) 1))) + (is (ish? -693133.3650493873 (dist/logpdf (->binomial 1000000 0.5) 999999))) + (is (ish? 0 (dist/logpdf (->binomial 10 0) 0))) + (is (ish? 0 (dist/logpdf (->binomial 10 1) 10))) + (is (ish? -2.02597397686619 (dist/logpdf (->binomial 100 0.9) 90))) + (is (ish? -52.680257828913156 (dist/logpdf (->binomial 500 0.1) 0))))) + + (testing "spot check against gen logpdf (v0.4.6)" + (with-comparator (within 1e-9) + (is (ish? -7.133546882067904 (dist/logpdf (->binomial 1000000 0.5) 500000))) + + ;; TODO: failing test (off by 1.9e-9) + ;; expected: (ish? -3.222306954262436 (dist/logpdf (->binomial 1000000 0.0001) 100)) + ;; actual: (not (ish? -3.222306954262436 -3.2223069561241857)) + (is (ish? -3.222306954262436 (dist/logpdf (->binomial 1000000 0.0001) 100))) + + (is (ish? -8.047189562170502 (dist/logpdf (->binomial 5 0.2) 5))) + (is (ish? -1.185613637381516 (dist/logpdf (->binomial 50 0.99) 49))) + (is (ish? -1.1856136373815152 (dist/logpdf (->binomial 50 0.01) 1))) + (is (ish? -693133.3650493873 (dist/logpdf (->binomial 1000000 0.5) 999999))) + (is (ish? 0 (dist/logpdf (->binomial 10 0) 0))) + (is (ish? 0 (dist/logpdf (->binomial 10 1) 10))) + (is (ish? -2.025973976866184 (dist/logpdf (->binomial 100 0.9) 90))) + (is (ish? -52.680257828913156 (dist/logpdf (->binomial 500 0.1) 0)))))) + (defn categorical-tests [->cat] (checking "map => categorical properties" [p (gen-double 0 1)] From 0484bfb95a208cfd025aa4191eb27a3e9e62a268 Mon Sep 17 00:00:00 2001 From: Aaron Steele Date: Wed, 28 Feb 2024 17:37:34 -0800 Subject: [PATCH 2/3] updates with requested changes --- src/gen/distribution/java_util.clj | 16 ----- src/gen/distribution/math/log_likelihood.cljc | 46 +++++------- test/gen/distribution/commons_math_test.clj | 3 +- test/gen/distribution/java_util_test.clj | 3 - test/gen/distribution/kixi_test.cljc | 3 +- test/gen/distribution_test.cljc | 71 ++++++++++--------- 6 files changed, 61 insertions(+), 81 deletions(-) diff --git a/src/gen/distribution/java_util.clj b/src/gen/distribution/java_util.clj index 46e816c..ed7e9cd 100644 --- a/src/gen/distribution/java_util.clj +++ b/src/gen/distribution/java_util.clj @@ -25,15 +25,6 @@ (d/logpdf [_ v] (ll/bernoulli p v))) -(defrecord Binomial [^SplittableRandom rnd n p] - d/Sample - (sample [_] - (.nextBinomial rnd n p)) - - d/LogPDF - (d/logpdf [_ v] - (ll/binomial n p v))) - (defrecord Gaussian [^SplittableRandom rnd mu sigma] d/Sample (sample [_] @@ -52,10 +43,6 @@ ([] (bernoulli-distribution 0.5)) ([p] (->Bernoulli (rng) p))) -(defn binomial-distribution - [n p] - (->Binomial (rng) n p)) - (defn uniform-distribution ([] (uniform-distribution 0.0 1.0)) ([lo hi] (->Uniform (rng) lo hi))) @@ -70,9 +57,6 @@ (def bernoulli (d/->GenerativeFn bernoulli-distribution 1)) -(def binomial - (d/->GenerativeFn binomial-distribution 2)) - (def uniform (d/->GenerativeFn uniform-distribution 2)) diff --git a/src/gen/distribution/math/log_likelihood.cljc b/src/gen/distribution/math/log_likelihood.cljc index 4f4c0ec..b064a1f 100644 --- a/src/gen/distribution/math/log_likelihood.cljc +++ b/src/gen/distribution/math/log_likelihood.cljc @@ -97,23 +97,6 @@ {:pre [(<= 0 p 1)]} (Math/log (if v p (- 1.0 p)))) -(defn log-fact - "Returns the natural logarithm of `x` factorial." - [x] - {:pre [(>= x 0)]} - (log-gamma-fn (inc x))) - -(defn log-bico - "Returns the natural logorithm of the binomial coefficient, `n` choose `k`." - [n k] - {:pre [(integer? n) - (integer? k) - (>= k 0) - (>= n k)]} - (if (or (zero? k) (= k n)) - 0 ;; log 1 - (- (log-fact n) (log-fact k) (log-fact (- n k))))) - (defn binomial "Returns the log-likelihood of a [Binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution) @@ -125,17 +108,24 @@ (>= v 0) (>= n v) (<= 0 p 1)]} - (cond - (= p 0) (if (= v 0) - 0.0 ;; log(1) - ##-Inf) ;; log(0) - (= p 1) (if (= v n) - 0.0 ;; log(1) - ##-Inf) ;; log(0) - :else - (+ (log-bico n v) - (* v (Math/log p)) - (* (- n v) (Math/log (- 1 p)))))) + (letfn [(log-fact + [x] + (log-gamma-fn (inc x))) + (log-bico + [n k] + (if (or (zero? k) (= k n)) + 0 ;; log(1) + (- (log-fact n) (log-fact k) (log-fact (- n k)))))] + (case p + 0 (if (= v 0) + 0.0 ;; log(1) + ##-Inf) ;; log(0)) + 1 (if (= v n) + 0.0 ;; log(1) + ##-Inf) ;; log(0) + (+ (log-bico n v) + (* v (Math/log p)) + (* (- n v) (Math/log (- 1 p))))))) (defn cauchy "Returns the log-likelihood of a [Cauchy diff --git a/test/gen/distribution/commons_math_test.clj b/test/gen/distribution/commons_math_test.clj index e6a0939..f39759f 100644 --- a/test/gen/distribution/commons_math_test.clj +++ b/test/gen/distribution/commons_math_test.clj @@ -8,7 +8,8 @@ (dt/bernoulli-gfi-tests commons/bernoulli)) (deftest binomial-tests - (dt/binomial-tests commons/binomial-distribution)) + (dt/binomial-tests commons/binomial-distribution) + (dt/binomial-gf-tests commons/binomial)) (deftest beta-tests (dt/beta-tests commons/beta-distribution)) diff --git a/test/gen/distribution/java_util_test.clj b/test/gen/distribution/java_util_test.clj index 8e32b40..fdae373 100644 --- a/test/gen/distribution/java_util_test.clj +++ b/test/gen/distribution/java_util_test.clj @@ -7,9 +7,6 @@ (dt/bernoulli-tests java-util/bernoulli-distribution) (dt/bernoulli-gfi-tests java-util/bernoulli)) -(deftest binomial-tests - (dt/binomial-tests java-util/binomial-distribution)) - (deftest uniform-tests (dt/uniform-tests java-util/uniform-distribution)) diff --git a/test/gen/distribution/kixi_test.cljc b/test/gen/distribution/kixi_test.cljc index 7a38c44..109b0c1 100644 --- a/test/gen/distribution/kixi_test.cljc +++ b/test/gen/distribution/kixi_test.cljc @@ -8,7 +8,8 @@ (dt/bernoulli-gfi-tests kixi/bernoulli)) (deftest binomial-tests - (dt/binomial-tests kixi/binomial-distribution)) + (dt/binomial-tests kixi/binomial-distribution) + (dt/binomial-gf-tests kixi/binomial)) (deftest beta-tests (dt/beta-tests kixi/beta-distribution)) diff --git a/test/gen/distribution_test.cljc b/test/gen/distribution_test.cljc index 1da671f..8f6d477 100644 --- a/test/gen/distribution_test.cljc +++ b/test/gen/distribution_test.cljc @@ -59,11 +59,19 @@ (Math/exp (dist/logpdf (->bernoulli p) (not v))))) "All options sum to 1"))) +(defn binomial-gf-tests [->binomial-gf] + (checking "spot check gf score implementations" + [n (gen/choose 0 10000) + p (gen-double 0.11111 0.99999)] + (let [trace (gf/simulate ->binomial-gf [n p]) + sample (trace/get-retval trace)] + (is (<= sample n))))) + (defn binomial-tests [->binomial] ;; boundaries... (testing "when p = 0 and v = 0, probability is 1, log(1) = 0" (is 0 (dist/logpdf (->binomial 10 0) 0))) - + (testing "when p = 0 and v > 0, probability is 0, log(0) = -Inf" (is ##-Inf (dist/logpdf (->binomial 10 0) 1))) @@ -74,45 +82,49 @@ (is ##-Inf(dist/logpdf (->binomial 10 0) 1))) ;; properties... - (testing "sum of probabilities equals 1" - (with-comparator (within 1e-9) - (let [n 100 - p 0.5 - log-probs (map (fn [k] (dist/logpdf (->binomial n p) k)) (range 0 (inc n))) + (checking "sum of probabilities equals 1" + [n (gen/choose 0 10000) + p (gen-double 0.11111 0.99999)] + (let [log-probs (map (fn [k] (dist/logpdf (->binomial n p) k)) (range 0 (inc n))) probs (map (fn [x] (Math/exp x)) log-probs) sum-probs (reduce + probs)] - (is (ish? 1.0 sum-probs))))) - - (testing "symmetric when p = 0.5 such that binomial(k) = binomial(n -k)" - (let [n 100 - p 0.5 - v 10] - (is (dist/logpdf (->binomial n p) v) - (dist/logpdf (->binomial n p) (- n v))))) - - (testing "mean and variance consistency where mu = n * p and variance = mu(1 - p)" + (with-comparator (within 1e-9) + (is (ish? 1.0 sum-probs))))) + + ;; A binomial distribution is symmetrical if the probability of observing $k$ + ;; successes in $n$ trials is the same as observing $n - k$ successes, which + ;; should be true when $p = 0.5$. + (checking "symmetrical shape when $p = 0.5$" + [n (gen/choose 0 10000)] (with-comparator (within 1e-9) - (let [n 100 - p 0.3 + (let [p 0.5 ks (range 0 (inc n)) + k (map (fn [k] (dist/logpdf (->binomial n p) k)) ks) + n-k (map (fn [k] (dist/logpdf (->binomial n p) (- n k))) ks)] + (is (ish? k n-k))))) + + ;; The expected value (mean) of the binomial distribution is $mu = n * p$ and + ;; variance is $mu * (1 - p)$ for all $n$ and $p$. This computes the mean and + ;; variance from the log-likelihood function and compares them to the + ;; theoretical values. + (checking "expected value and variance" + [n (gen/choose 1 10000) + p (gen-double 0.11111 0.99999)] + (let [ks (range 0 (inc n)) log-probs (map (fn [k] (dist/logpdf (->binomial n p) k)) ks) probs (map (fn [x] (Math/exp x)) log-probs) mu (reduce + (map * probs ks)) variance (reduce + (map (fn [k p] (* p (Math/pow (- k mu) 2))) ks probs)) theoretical-mu (* n p) theoretical-variance (* n p (- 1 p))] - (is (ish? theoretical-mu mu)) - (is (ish? theoretical-variance variance))))) + (with-comparator (within 1e-3) + (is (ish? theoretical-mu mu)) + (is (ish? theoretical-variance variance))))) (testing "spot check against scipy.stats.binom.logpmf (v1.12.0)" - (with-comparator (within 1e-9) + (with-comparator (within 1e-6) (is (ish? -7.13354688230902 (dist/logpdf (->binomial 1000000 0.5) 500000))) - - ;; TODO: failing test (off by 1.9e-9) - ;; expected: (ish? -3.222306954272568 (dist/logpdf (->binomial 1000000 0.0001) 100)) - ;; actual: (not (ish? -3.222306954272568 -3.2223069561241857)) (is (ish? -3.222306954272568 (dist/logpdf (->binomial 1000000 0.0001) 100))) - (is (ish? -8.047189562170502 (dist/logpdf (->binomial 5 0.2) 5))) (is (ish? -1.1856136373815076 (dist/logpdf (->binomial 50 0.99) 49))) (is (ish? -1.185613637381508 (dist/logpdf (->binomial 50 0.01) 1))) @@ -123,14 +135,9 @@ (is (ish? -52.680257828913156 (dist/logpdf (->binomial 500 0.1) 0))))) (testing "spot check against gen logpdf (v0.4.6)" - (with-comparator (within 1e-9) + (with-comparator (within 1e-6) (is (ish? -7.133546882067904 (dist/logpdf (->binomial 1000000 0.5) 500000))) - - ;; TODO: failing test (off by 1.9e-9) - ;; expected: (ish? -3.222306954262436 (dist/logpdf (->binomial 1000000 0.0001) 100)) - ;; actual: (not (ish? -3.222306954262436 -3.2223069561241857)) (is (ish? -3.222306954262436 (dist/logpdf (->binomial 1000000 0.0001) 100))) - (is (ish? -8.047189562170502 (dist/logpdf (->binomial 5 0.2) 5))) (is (ish? -1.185613637381516 (dist/logpdf (->binomial 50 0.99) 49))) (is (ish? -1.1856136373815152 (dist/logpdf (->binomial 50 0.01) 1))) From fe62fbc3d03275cea3807a2804b143ce8462d10e Mon Sep 17 00:00:00 2001 From: Aaron Steele Date: Fri, 8 Mar 2024 10:03:50 -0800 Subject: [PATCH 3/3] log-gamma-fn dispatch to kixi --- src/gen/distribution/math/log_likelihood.cljc | 39 ++--------- test/gen/distribution_test.cljc | 68 +++++++------------ 2 files changed, 30 insertions(+), 77 deletions(-) diff --git a/src/gen/distribution/math/log_likelihood.cljc b/src/gen/distribution/math/log_likelihood.cljc index b064a1f..8d77e5c 100644 --- a/src/gen/distribution/math/log_likelihood.cljc +++ b/src/gen/distribution/math/log_likelihood.cljc @@ -1,9 +1,6 @@ (ns gen.distribution.math.log-likelihood - "Log-likelihood implementations for various primitive distributions.") - -;; ## Helpful constants -;; -;; These come in handy in the implementations below and are worth caching. + "Log-likelihood implementations for various primitive distributions." + (:require [kixi.stats.math :as k])) (def ^:no-doc log-pi (Math/log Math/PI)) @@ -14,39 +11,11 @@ (def ^:no-doc sqrt-2pi (Math/sqrt (* 2 Math/PI))) -;; ## Log-likelihood implementations - -(def ^:no-doc gamma-coefficients - "Coefficients for the Lanczos approximation to the natural log of the Gamma - function described in [section 6.1 of Numerical - Recipes](http://phys.uri.edu/nigh/NumRec/bookfpdf/f6-1.pdf)." - [76.18009172947146 - -86.50532032941677 - 24.01409824083091 - -1.231739572450155 - 0.1208650973866179e-2 - -0.5395239384953e-5]) - (defn ^:no-doc log-gamma-fn "Returns the natural log of the value of the [Gamma - function](https://en.wikipedia.org/wiki/Gamma_function) evaluated at `x` - - This function implements the Lanczos approximation described in [section 6.1 - of Numerical Recipes](http://phys.uri.edu/nigh/NumRec/bookfpdf/f6-1.pdf)." + function](https://en.wikipedia.org/wiki/Gamma_function) evaluated at `x`" [x] - (let [tmp (+ x 5.5) - tmp (- (* (+ x 0.5) (Math/log tmp)) tmp) - n (dec (count gamma-coefficients)) - ser (loop [i 0 - x+1 (inc x) - acc 1.000000000190015] - (if (> i n) - acc - (let [coef (nth gamma-coefficients i nil)] - (recur (inc i) - (inc x+1) - (+ acc (/ coef x+1))))))] - (+ tmp (Math/log (* sqrt-2pi (/ ser x)))))) + (k/log-gamma x)) (defn gamma "Returns the log-likelihood of the [Gamma diff --git a/test/gen/distribution_test.cljc b/test/gen/distribution_test.cljc index 8f6d477..6dbccc1 100644 --- a/test/gen/distribution_test.cljc +++ b/test/gen/distribution_test.cljc @@ -103,49 +103,33 @@ n-k (map (fn [k] (dist/logpdf (->binomial n p) (- n k))) ks)] (is (ish? k n-k))))) - ;; The expected value (mean) of the binomial distribution is $mu = n * p$ and - ;; variance is $mu * (1 - p)$ for all $n$ and $p$. This computes the mean and - ;; variance from the log-likelihood function and compares them to the - ;; theoretical values. - (checking "expected value and variance" - [n (gen/choose 1 10000) - p (gen-double 0.11111 0.99999)] - (let [ks (range 0 (inc n)) - log-probs (map (fn [k] (dist/logpdf (->binomial n p) k)) ks) - probs (map (fn [x] (Math/exp x)) log-probs) - mu (reduce + (map * probs ks)) - variance (reduce + (map (fn [k p] (* p (Math/pow (- k mu) 2))) ks probs)) - theoretical-mu (* n p) - theoretical-variance (* n p (- 1 p))] - (with-comparator (within 1e-3) - (is (ish? theoretical-mu mu)) - (is (ish? theoretical-variance variance))))) - (testing "spot check against scipy.stats.binom.logpmf (v1.12.0)" - (with-comparator (within 1e-6) - (is (ish? -7.13354688230902 (dist/logpdf (->binomial 1000000 0.5) 500000))) - (is (ish? -3.222306954272568 (dist/logpdf (->binomial 1000000 0.0001) 100))) - (is (ish? -8.047189562170502 (dist/logpdf (->binomial 5 0.2) 5))) - (is (ish? -1.1856136373815076 (dist/logpdf (->binomial 50 0.99) 49))) - (is (ish? -1.185613637381508 (dist/logpdf (->binomial 50 0.01) 1))) - (is (ish? -693133.3650493873 (dist/logpdf (->binomial 1000000 0.5) 999999))) - (is (ish? 0 (dist/logpdf (->binomial 10 0) 0))) - (is (ish? 0 (dist/logpdf (->binomial 10 1) 10))) - (is (ish? -2.02597397686619 (dist/logpdf (->binomial 100 0.9) 90))) - (is (ish? -52.680257828913156 (dist/logpdf (->binomial 500 0.1) 0))))) - - (testing "spot check against gen logpdf (v0.4.6)" - (with-comparator (within 1e-6) - (is (ish? -7.133546882067904 (dist/logpdf (->binomial 1000000 0.5) 500000))) - (is (ish? -3.222306954262436 (dist/logpdf (->binomial 1000000 0.0001) 100))) - (is (ish? -8.047189562170502 (dist/logpdf (->binomial 5 0.2) 5))) - (is (ish? -1.185613637381516 (dist/logpdf (->binomial 50 0.99) 49))) - (is (ish? -1.1856136373815152 (dist/logpdf (->binomial 50 0.01) 1))) - (is (ish? -693133.3650493873 (dist/logpdf (->binomial 1000000 0.5) 999999))) - (is (ish? 0 (dist/logpdf (->binomial 10 0) 0))) - (is (ish? 0 (dist/logpdf (->binomial 10 1) 10))) - (is (ish? -2.025973976866184 (dist/logpdf (->binomial 100 0.9) 90))) - (is (ish? -52.680257828913156 (dist/logpdf (->binomial 500 0.1) 0)))))) + (with-comparator (within 1e-12) + (let [scipy-data [[5 0.2 5 -8.047189562170502] + [50 0.99 49 -1.1856136373815076] + [50 0.01 1 -1.185613637381508] + [10 0 0 0] + [10 1 10 0] + [100 0.9 90 -2.02597397686619] + [500 0.1 0 -52.680257828913156]]] + (doseq [[n p v expected] scipy-data] + (let [actual (dist/logpdf (->binomial n p) v)] + (is (ish? expected actual) + (str "n=" n ", p=" p ", v=" v))))))) + + (testing "spot check against gen.jl logpdf (v0.4.6)" + (with-comparator (within 1e-12) + (let [gen-data [[5 0.2 5 -8.047189562170502] + [50 0.99 49 -1.185613637381516] + [50 0.01 1 -1.1856136373815152] + [10 0 0 0] + [10 1 10 0] + [100 0.9 90 -2.025973976866184] + [500 0.1 0 -52.680257828913156]]] + (doseq [[n p v expected] gen-data] + (let [actual (dist/logpdf (->binomial n p) v)] + (is (ish? expected actual) + (str "n=" n ", p=" p ", v=" v)))))))) (defn categorical-tests [->cat] (checking "map => categorical properties"