Separated out numerical mean and variance from analytic

Added tests rewrote equations in vignette
bertcarnell · Feb 4, 2024 · 78ab7cd · 78ab7cd
1 parent 52cf2d5
commit 78ab7cd
Show file tree

Hide file tree

Showing 5 changed files with 290 additions and 102 deletions.
diff --git a/R/mle.R b/R/mle.R
@@ -365,16 +365,34 @@ f_rth_order_stat <- function(x, n, r, a, b, c)
 #'
 #' @return the expected value
 #'
-#' @importFrom stats integrate
 #' @examples
 #' mean_rth_order_stat(10, 5, 0, 1, 0.5)
 mean_rth_order_stat <- function(n, r, a, b, c)
 {
-  integrand <- function(x, n, r, a, b, c) {x * f_rth_order_stat(x, n, r, a, b, c)}
-  stats::integrate(integrand, lower = a, upper = b, n = n, r = r, a = a, b = b, c = c)
+  # n <- 200
+  # r <- 100
+  # a <- 0
+  # b <- 1
+  # c <- 0.5
+  if (n > 10) {
+    return(mean_rth_order_stat_numeric(n, r, a, b, c))
+  } else {
+    coefs1 <- sapply(0:(n-r), function(k) {
+      choose(n-r, k) * (b-a)^(k-n) * (c-a)^{n-k} * (-1)^(n-r-k) *
+        (c/(n-k) - (c-a)/(n-k)/(2*n-2*k+1))
+    })
+
+    coefs2 <- sapply(0:(r-1), function(k) {
+      choose(r-1, k) * (b-a)^(k-n) * (c-b)^{n-k} *
+        (-c/(n-k) + (c-b)/(n-k)/(2*n-2*k+1))
+    })
+
+    return(ifelse(c > a, r*choose(n, r)*sum(coefs1), 0) +
+             ifelse(c < b, r*choose(n, r)*(-1)^(n-r)*sum(coefs2), 0))
+  }
 }
 
-#' Variance of the rth order statistic
+#' Expected value of the rth order statistic using numeric integration
 #'
 #' @noRd
 #'
@@ -388,13 +406,67 @@ mean_rth_order_stat <- function(n, r, a, b, c)
 #'
 #' @importFrom stats integrate
 #' @examples
+#' mean_rth_order_stat_numeric(10, 5, 0, 1, 0.5)
+mean_rth_order_stat_numeric <- function(n, r, a, b, c)
+{
+  integrand <- function(x, n, r, a, b, c) {x * f_rth_order_stat(x, n, r, a, b, c)}
+  stats::integrate(integrand, lower = a, upper = b, n = n, r = r, a = a, b = b, c = c)$value
+}
+
+#' Variance of the rth order statistic
+#'
+#' @noRd
+#'
+#' @param n number of order statistics
+#' @param r the order statistic number
+#' @param a the minimum support of the triangle distribution \code{a < b}, \code{a <= min(z)}
+#' @param b the maximum support of the triangle distribution \code{b >= max(z)}
+#' @param c the mode of the triangle distribution
+#'
+#' @return the variance
+#'
+#' @examples
 #' variance_rth_order_stat(10, 5, 0, 1, 0.5)
 variance_rth_order_stat <- function(n, r, a, b, c)
+{
+  if (n > 10) {
+    variance_rth_order_stat_numeric(n, r, a, b, c)
+  } else {
+    coefs1 <- sapply(0:(n-r), function(k) {
+      choose(n-r, k) * (b-a)^(k-n) * (c-a)^(n-k) * (-1)^(n-r-k) / (n-k) *
+        (c^2 - 2*c*(c-a)/(2*n-2*k+1) + 2*(c-a)^2/(2*n-2*k+1)/(2*n-2*k+2))
+    })
+    coefs2 <- sapply(0:(r-1), function(k) {
+      choose(r-1, k) * (b-a)^(k-n) * (c-b)^(n-k)/(n-k) *
+        (-c^2 + 2*c*(c-b)/(2*n-2*k+1) - 2*(c-b)^2/(2*n-2*k+1)/(2*n-2*k+2))
+    })
+    m <- mean_rth_order_stat(n, r, a, b, c)
+    return(ifelse(c > a, r*choose(n, r)*sum(coefs1), 0) +
+      ifelse(c < b, r*choose(n, r)*(-1)^(n-r)*sum(coefs2), 0) - m^2)
+  }
+}
+
+#' Variance of the rth order statistic using numeric integration
+#'
+#' @noRd
+#'
+#' @param n number of order statistics
+#' @param r the order statistic number
+#' @param a the minimum support of the triangle distribution \code{a < b}, \code{a <= min(z)}
+#' @param b the maximum support of the triangle distribution \code{b >= max(z)}
+#' @param c the mode of the triangle distribution
+#'
+#' @return the variance
+#'
+#' @importFrom stats integrate
+#' @examples
+#' variance_rth_order_stat_numeric(10, 5, 0, 1, 0.5)
+variance_rth_order_stat_numeric <- function(n, r, a, b, c)
 {
   integrand <- function(x, n, r, a, b, c) {x * x * f_rth_order_stat(x, n, r, a, b, c)}
   E_x2 <- stats::integrate(integrand, lower = a, upper = b, n = n, r = r, a = a, b = b, c = c)
-  E_x <- mean_rth_order_stat(n, r, a, b, c)
-  E_x2$value - E_x$value * E_x$value
+  E_x <- mean_rth_order_stat_numeric(n, r, a, b, c)
+  E_x2$value - E_x * E_x
 }
 
 #' Maximum likelihood estimate of the triangle distribution parameters
@@ -450,6 +522,14 @@ triangle_mle <- function(x, debug = FALSE, maxiter = 100)
   }
 
   var_chat <- variance_rth_order_stat(length(x), mle_c2$r_hat, mle_ab$a, mle_ab$b, mle_c2$c_hat)
+  if (any(var_chat < 0) | debug) {
+    cat("\nNegative Varince in c hat\n")
+    cat("n=", length(x), "\n")
+    cat("r=", mle_c2$r_hat, "\n")
+    cat("a=", mle_ab$a, "\n")
+    cat("b=", mle_ab$b, "\n")
+    cat("c=", mle_c2$c_hat, "\n")
+  }
   vcov <- rbind(cbind(solve(mle_ab$hessian_ab), c(0, 0)), c(0, 0, var_chat))
   dimnames(vcov) <- list(c("a", "b", "c"), c("a", "b", "c"))
 

diff --git a/tests/testthat/helper-mle-utils.R b/tests/testthat/helper-mle-utils.R
@@ -0,0 +1,4 @@
+set.seed(39854)
+xtest <- rtriangle(200, 0, 1, 0.5)
+
+mle1 <- triangle_mle(xtest)
diff --git a/tests/testthat/test-mle-utils.R b/tests/testthat/test-mle-utils.R
@@ -1,8 +1,4 @@
 test_that("summary works", {
-  set.seed(39854)
-  xtest <- rtriangle(200, 0, 1, 0.5)
-
-  mle1 <- triangle_mle(xtest)
   summ <- summary(mle1)
 
   expect_equal(summ@coef[,1], coef(mle1))
@@ -13,28 +9,16 @@ test_that("summary works", {
 })
 
 test_that("print works", {
-  set.seed(39854)
-  xtest <- rtriangle(200, 0, 1, 0.5)
-
-  mle1 <- triangle_mle(xtest)
   expect_output(print(mle1))
 })
 
 test_that("coef works", {
-  set.seed(39854)
-  xtest <- rtriangle(200, 0, 1, 0.5)
-
-  mle1 <- triangle_mle(xtest)
   cf <- coef(mle1)
 
   expect_equal(mle1$coef, cf)
 })
 
 test_that("logLik works", {
-  set.seed(39854)
-  xtest <- rtriangle(200, 0, 1, 0.5)
-
-  mle1 <- triangle_mle(xtest)
   ll <- logLik(mle1)
 
   expect_equivalent(-1*nLL_triangle(xtest, mle1$coef[1], mle1$coef[2], mle1$coef[3]),
@@ -44,61 +28,41 @@ test_that("logLik works", {
 })
 
 test_that("AIC works", {
-  set.seed(39854)
-  xtest <- rtriangle(200, 0, 1, 0.5)
-
-  mle1 <- triangle_mle(xtest)
   aic <- AIC(mle1)
 
   expect_equivalent(2 * nLL_triangle(xtest, mle1$coef[1], mle1$coef[2], mle1$coef[3]) + 2 * 3,
                     aic)
 })
 
 test_that("BIC works", {
-  set.seed(39854)
-  xtest <- rtriangle(200, 0, 1, 0.5)
-
-  mle1 <- triangle_mle(xtest)
   bic <- BIC(mle1)
 
   expect_equivalent(2 * nLL_triangle(xtest, mle1$coef[1], mle1$coef[2], mle1$coef[3]) + log(length(xtest)) * 3,
                     bic)
 })
 
 test_that("vcov works", {
-  set.seed(39854)
-  xtest <- rtriangle(200, 0, 1, 0.5)
-
-  mle1 <- triangle_mle(xtest)
   v <- vcov(mle1)
 
   expect_equivalent(mle1$vcov, v)
 })
 
 test_that("profile works", {
-  set.seed(39854)
-  xtest <- rtriangle(200, 0, 1, 0.5)
-
-  mle1 <- triangle_mle(xtest)
   prof <- profile(mle1)
 
   expect_equivalent("profile.mle", class(prof))
   expect_equal(3, length(prof@profile))
   expect_equal(3, ncol(prof@profile$a$par.vals))
 
-  mle1 <- standard_triangle_mle(xtest)
-  prof <- profile(mle1)
+  mle2 <- standard_triangle_mle(xtest)
+  prof <- profile(mle2)
 
   expect_equivalent("profile.mle", class(prof))
   expect_equal(3, length(prof@profile))
   expect_equal(3, ncol(prof@profile$a$par.vals))
 })
 
 test_that("confint works", {
-  set.seed(39854)
-  xtest <- rtriangle(200, 0, 1, 0.5)
-
-  mle1 <- triangle_mle(xtest)
   cfi <- confint(mle1, level = 0.95)
 
   expect_equal(c(3,2), dim(cfi))

diff --git a/tests/testthat/test-mle.R b/tests/testthat/test-mle.R
@@ -95,15 +95,31 @@ test_that("f_rth_order_stat works", {
 
 test_that("mean_rth_order_stat works", {
   temp <- mean_rth_order_stat(10, 5, 0, 1, 0.5)
-  expect_true(temp$value >= 0 & temp$value <= 1)
-  expect_true(temp$value < 0.5)
+  expect_true(temp >= 0 & temp <= 1)
+  expect_true(temp < 0.5)
+
   temp <- mean_rth_order_stat(10, 6, 0, 1, 0.5)
-  expect_true(temp$value > 0.5)
+  expect_true(temp > 0.5)
 
   temp <- mean_rth_order_stat(10, 1, 0, 1, 0.5)
-  expect_true(temp$value >= 0 & temp$value <= 1)
+  expect_true(temp >= 0 & temp <= 1)
+
   temp <- mean_rth_order_stat(10, 10, 0, 1, 0.5)
-  expect_true(temp$value >= 0 & temp$value <= 1)
+  expect_true(temp >= 0 & temp <= 1)
+
+  expect_equal(mean_rth_order_stat_numeric(10, 5, 1, 3, 2),
+               mean_rth_order_stat(10, 5, 1, 3, 2), tolerance = 1E-6)
+  expect_equal(mean_rth_order_stat_numeric(10, 5, 1, 3, 1),
+               mean_rth_order_stat(10, 5, 1, 3, 1), tolerance = 1E-6)
+  expect_equal(mean_rth_order_stat_numeric(10, 5, 1, 3, 3),
+               mean_rth_order_stat(10, 5, 1, 3, 3), tolerance = 1E-6)
+  expect_equal(mean_rth_order_stat_numeric(10, 1, 1, 3, 2),
+               mean_rth_order_stat(10, 1, 1, 3, 2), tolerance = 1E-6)
+  expect_equal(mean_rth_order_stat_numeric(10, 10, 1, 3, 2),
+               mean_rth_order_stat(10, 10, 1, 3, 2), tolerance = 1E-4) ######
+
+  expect_true(mean_rth_order_stat(200, 100, 0, 1, 0.5) < 1)
+  expect_true(mean_rth_order_stat(200, 100, 0, 1, 0.5) > 0)
 })
 
 test_that("variance_rth_order_stat works", {
@@ -113,6 +129,17 @@ test_that("variance_rth_order_stat works", {
   expect_true(variance_rth_order_stat(10, 8, 0, 1, 0.5) >= 0)
   expect_true(variance_rth_order_stat(10, 9, 0, 1, 0.5) >= 0)
   expect_true(variance_rth_order_stat(10, 10, 0, 1, 0.5) >= 0)
+
+  expect_equal(variance_rth_order_stat_numeric(10, 5, 1, 3, 2),
+               variance_rth_order_stat(10, 5, 1, 3, 2), tolerance = 1E-6)
+  expect_equal(variance_rth_order_stat_numeric(10, 5, 1, 3, 1),
+               variance_rth_order_stat(10, 5, 1, 3, 1), tolerance = 1E-6)
+  expect_equal(variance_rth_order_stat_numeric(10, 5, 1, 3, 3),
+               variance_rth_order_stat(10, 5, 1, 3, 3), tolerance = 1E-6)
+  expect_equal(variance_rth_order_stat_numeric(10, 1, 1, 3, 2),
+               variance_rth_order_stat(10, 1, 1, 3, 2), tolerance = 1E-6)
+  expect_equal(variance_rth_order_stat_numeric(10, 10, 1, 3, 2),
+               variance_rth_order_stat(10, 10, 1, 3, 2), tolerance = 1E-3) ####
 })
 
 test_that("triangle_mle works", {